From 4355fd0c27f17e08d28b8be9b58c651ab7ed9864 Mon Sep 17 00:00:00 2001 From: "[1000i100] Millicent Billette" <git@1000i100.fr> Date: Wed, 31 Mar 2021 04:16:34 +0200 Subject: [PATCH] WiP: dictionary stuff (dictionary-tree) --- src/dictionary-parser.mjs | 58 ++++++++++++++++----- src/dictionary-parser.test.mjs | 2 +- src/dictionary-tree.mjs | 94 +++++++++++++++++++++------------- src/dictionary-tree.test.mjs | 32 +++++------- 4 files changed, 116 insertions(+), 70 deletions(-) diff --git a/src/dictionary-parser.mjs b/src/dictionary-parser.mjs index b263a66..71f208b 100644 --- a/src/dictionary-parser.mjs +++ b/src/dictionary-parser.mjs @@ -1,25 +1,59 @@ +import * as tree from './dictionary-tree.mjs'; + export function parse(dictionaryString) { const allLines = dictionaryString.split('\n'); const parsedLines = parseAllLines(allLines); - return combineUnspecifiedLines2IdSecPwdLines(parsedLines); + return combineUnspecified2IdSecPwd(parsedLines); } -function parseAllLines(lines){ + +function parseAllLines(lines) { return lines; } -function parseLine(theLine,allLines){ + +function parseLine(theLine, allLines) { return theLine; } -function combineUnspecifiedLines2IdSecPwdLines(lines){ - const unspecifiedLines = []; + +function combineUnspecified2IdSecPwd(lines) { const idSecPwdLines = []; - lines.forEach(line=>line.includes('@@@@') ? idSecPwdLines.push(line) : unspecifiedLines.push(line)); - if(unspecifiedLines.length) idSecPwdLines.push(`${mergeLines(unspecifiedLines)}@@@@${mergeLines(unspecifiedLines)}`) + const unspecifiedLines = []; + lines.forEach(line => line.includes('@@@@') ? deepUnspecifiedExtractor(line, unspecifiedLines, idSecPwdLines) : unspecifiedLines.push(line)); + if (unspecifiedLines.length) idSecPwdLines.push(`${mergeLines(unspecifiedLines)}@@@@${mergeLines(unspecifiedLines)}`) return mergeLines(idSecPwdLines); } -function mergeLines(lines){ - console.log('lines:',lines); - if(!lines.length) return ''; - lines = lines.filter(l=>l.length); - if(lines.length===1) return lines[0]; + +function deepUnspecifiedExtractor(line, unspecifiedLines, idSecPwdLines) { + const hybridTree = tree.buildTreeStruct(line); + + /* + buildTree (à créer dans un module à part) + parcours en profondeur jusqu'à trouver des branches unspecified. + extraction de celles-ci pour les ajouter (avec leur chaine de parents) aux unspecifiedlines + ajout de l'arbre restant aux idSecPwdLines + */ + idSecPwdLines.push(line); +} + +function mergeLines(lines) { + //console.log('lines:',lines); + if (!lines.length) return ''; + lines = lines.filter(l => l.length); + if (lines.length === 1) return lines[0]; return `(${lines.join('|')})`; } + +/* +const json1 = '(a(b@@@@c|d)|(e|f)|g|h@@@@i)' +const json2 = { + 'a':'a'{ + 'b@@@@c':'b@@@@c', + 'd':'d' + }, + { + 'e':'e', + 'f':'f' + }, + 'g':'g', + 'h@@@@i':'h@@@@i' +} +*/ diff --git a/src/dictionary-parser.test.mjs b/src/dictionary-parser.test.mjs index 1bf239e..fdd9aae 100644 --- a/src/dictionary-parser.test.mjs +++ b/src/dictionary-parser.test.mjs @@ -16,5 +16,5 @@ h@@@@(i|j) `), '(a@@@@(b|c)|h@@@@(i|j)|((d|e)|(f|g))@@@@((d|e)|(f|g)))'); }); test.skip('no @@@@ cases combined with @@@@ case on same line recombine to allways finish with @@@@ case', t => { - t.is(app.parse('(a|b|c@@@@d)'), '(c@@@@d|(a|b)@@@@(a|b))'); + t.is(app.parse('(a(b@@@@c|d)|(e|f)|g|h@@@@i)'), '(g@@@@h|(a|b|c)@@@@(a|b|c))'); }); diff --git a/src/dictionary-tree.mjs b/src/dictionary-tree.mjs index 9e1667e..c07d603 100644 --- a/src/dictionary-tree.mjs +++ b/src/dictionary-tree.mjs @@ -2,10 +2,10 @@ export function build(dictionaryString) { } export function buildTreeStruct(monoLineString) { - console.log('srcTree', monoLineString); + //console.log('srcTree', monoLineString); const stringAsArray = monoLineString.split(''); const rawTree = leftParser(stringAsArray); - console.log('rawTree', JSON.stringify(rawTree)); + //console.log('rawTree', JSON.stringify(rawTree)); const outOfScope = stringAsArray.length; if (outOfScope) throw `fail to build tree from : "${monoLineString}" parsed: ${JSON.stringify(rawTree)} unparsed/failed: ${stringAsArray.join('')}`; //return rawTree;//trivialDedup(flattenTree(rawTree)) @@ -15,70 +15,82 @@ export function buildTreeStruct(monoLineString) { /** * leftParser stuff */ -function flushNoEmptyString(data){ - if (data.str.length) data.tree[data.tree.length-1].step.push(data.str); +function flushNoEmptyString(data) { + if (data.str.length) data.tree[data.tree.length - 1].step.push(data.str); data.str = ''; } -function appendToString(chr,data){ + +function appendToString(chr, data) { data.str += chr; } -function nextAlt(data){ + +function nextAlt(data) { data.tree.push({step: []}); } -function goIntoNewAlt(data){ - data.tree[data.tree.length-1].step.push(leftParser(data.input)); + +function goIntoNewAlt(data) { + data.tree[data.tree.length - 1].step.push(leftParser(data.input)); +} + +function returnTrue() { + return true; +} + +function doTheses() { } -function returnTrue(){return true;} -function doTheses(){} + const leftParserFunc = { - ')':(chr,data)=>returnTrue(flushNoEmptyString(data)), - '|':(chr,data)=>doTheses(flushNoEmptyString(data), nextAlt(data)), - '(':(chr,data)=>doTheses(flushNoEmptyString(data), goIntoNewAlt(data)), - 'default':(chr,data)=>appendToString(chr,data) + ')': (chr, data) => returnTrue(flushNoEmptyString(data)), + '|': (chr, data) => doTheses(flushNoEmptyString(data), nextAlt(data)), + '(': (chr, data) => doTheses(flushNoEmptyString(data), goIntoNewAlt(data)), + 'default': (chr, data) => appendToString(chr, data) } + function leftParser(inputChrArray) { const data = { - input:inputChrArray, - str:'', - tree:[{step: []}], + input: inputChrArray, + str: '', + tree: [{step: []}], }; while (data.input.length) { const chr = data.input.shift(); - if(typeof leftParserFunc[chr]!=='undefined') { - const out = leftParserFunc[chr](chr,data); - if(out) return data.tree; - } else leftParserFunc['default'](chr,data); + if (typeof leftParserFunc[chr] !== 'undefined') { + const out = leftParserFunc[chr](chr, data); + if (out) return data.tree; + } else leftParserFunc['default'](chr, data); } flushNoEmptyString(data); return data.tree; } + // end leftParser -function concatStrings(array){ +function concatStrings(array) { const data = { - input:array, - str:'', - tree:[{step: []}], + input: array, + str: '', + tree: [{step: []}], }; - array.forEach(e=>{ - if(typeof e === 'string') appendToString(e,data); - else{ + array.forEach(e => { + if (typeof e === 'string') appendToString(e, data); + else { flushNoEmptyString(data) - data.tree[data.tree.length-1].step.push(e); + data.tree[data.tree.length - 1].step.push(e); } }); flushNoEmptyString(data) - return data.tree[data.tree.length-1].step; + return data.tree[data.tree.length - 1].step; } + function flattenTree(tree) { if (typeof tree === 'string') return tree; - if (Array.isArray(tree) && tree.length===0) return ''; - if (Array.isArray(tree) && tree.length===1) return flattenTree(tree[0]); + if (Array.isArray(tree) && tree.length === 0) return ''; + if (Array.isArray(tree) && tree.length === 1) return flattenTree(tree[0]); if (isStep(tree)) { tree.step = tree.step.map(flattenTree); if (!Array.isArray(tree.step)) return flattenTree(tree.step); if (tree.step.length === 1) return flattenTree(tree.step); if (tree.step.length === 0) return ''; - if(!tree.step.reduce((acc,cur)=>Array.isArray(cur)||acc,false)) return concatStrings(tree.step); + if (!tree.step.reduce((acc, cur) => Array.isArray(cur) || acc, false)) return concatStrings(tree.step); return tree; } @@ -93,7 +105,7 @@ function flattenTree(tree) { //console.log('tree:', JSON.stringify(tree)); ///if(isStep(tree[0])) tree[0].step = flattenTree(tree[0].step); tree = trivialDedup(tree.map(flattenTree)); - tree = [].concat(...tree.map(e=>Array.isArray(e) ? e : [e])); + tree = [].concat(...tree.map(e => Array.isArray(e) ? e : [e])); return tree; } @@ -101,13 +113,21 @@ function flattenTree(tree) { function isStep(element) { return typeof element === 'object' && typeof element.step !== 'undefined'; } + function trivialDedup(tree) { if (typeof tree === 'string') return tree; - if (isStep(tree)) return {step:tree.step.map(subTree=>trivialDedup(subTree))}; - if (Array.isArray(tree)){ + if (isStep(tree)) return {step: tree.step.map(subTree => trivialDedup(subTree))}; + if (Array.isArray(tree)) { const dedupKeys = {}; tree.forEach(v => dedupKeys[JSON.stringify(v)] = v); // eslint-disable-line no-return-assign - return Object.keys(dedupKeys).map(json=>trivialDedup(JSON.parse(json))); + return Object.keys(dedupKeys).map(json => trivialDedup(JSON.parse(json))); } throw `unknown case : ${tree}`; } + +export function serialize(treeStruct) { + if (typeof treeStruct === 'string') return treeStruct; + if (Array.isArray(treeStruct)) return `(${treeStruct.map(serialize).join('|')})`; + if (isStep(treeStruct)) return treeStruct.step.map(serialize).join(''); + throw new Error(`Error: how to serialize ${JSON.stringify(treeStruct)} RAW: ${treeStruct}`); +} diff --git a/src/dictionary-tree.test.mjs b/src/dictionary-tree.test.mjs index 73cf5c5..620877b 100644 --- a/src/dictionary-tree.test.mjs +++ b/src/dictionary-tree.test.mjs @@ -2,27 +2,19 @@ import test from 'ava'; import * as app from './dictionary-tree.mjs'; const buildTreeStruct = str => JSON.stringify(app.buildTreeStruct(str)); -const expected= str => JSON.stringify(str); +const expected = str => JSON.stringify(str); -test('simple string still simple string',t=>t.is(buildTreeStruct('abc'),expected('abc'))); -test('(a|b) alt as array',t=>t.is(buildTreeStruct('(a|b)'),expected(['a','b']))); +test('simple string still simple string', t => t.is(buildTreeStruct('abc'), expected('abc'))); +test('(a|b) alt as array', t => t.is(buildTreeStruct('(a|b)'), expected(['a', 'b']))); //test('a) throw',t=>t.throws(()=>buildTreeStruct('a)'))); //test('(a throw',t=>t.throws(()=>buildTreeStruct('(a'))); //test('a|b throw',t=>t.throws(()=>buildTreeStruct('a|b'))); -test('(|b) empty choice is choice', t=>t.is(buildTreeStruct('(|b)'),expected(['','b']))); -test('(b|b) trivial dedup', t=>t.is(buildTreeStruct('(|b||b|)'),expected(['','b']))); -test('a(b|c) ordered part in step obj',t=>t.is(buildTreeStruct('a(b|c)'),expected({step:['a',['b','c']]}))); -test('a(b) flat merge when no alt',t=>t.is(buildTreeStruct('a(b)'),expected('ab'))); - -test('build tree with (|) pattern', t => t.is(buildTreeStruct('(a(|b@@@@c|d|)|(e|f)|g|h@@@@i)'), - expected([ - {step:['a',[ - '', - 'b@@@@c', - 'd' - ]]}, - 'e', - 'f', - 'g', - 'h@@@@i' - ]))); +test('(|b) empty choice is choice', t => t.is(buildTreeStruct('(|b)'), expected(['', 'b']))); +test('(b|b) trivial dedup', t => t.is(buildTreeStruct('(|b||b|)'), expected(['', 'b']))); +test('a(b|c) ordered part in step obj', t => t.is(buildTreeStruct('a(b|c)'), expected({step: ['a', ['b', 'c']]}))); +test('a(b) flat merge when no alt', t => t.is(buildTreeStruct('a(b)'), expected('ab'))); +test('build complexe tree with (|) pattern', t => t.is(buildTreeStruct('(a(|b@@@@c|d|)|(e|f)|g|h@@@@i)'), + expected([{step: ['a', ['', 'b@@@@c', 'd']]}, 'e', 'f', 'g', 'h@@@@i']) +)); +test('serialize tree to a(b|c)', t => t.is(app.serialize({step: ['a', ['b', 'c']]}), 'a(b|c)')); +test('serialize incorrect tree throw', t => t.throws(() => app.serialize({plop: ['a']}))); -- GitLab