diff --git a/package.json b/package.json index 90231a020fa48db6dfdb9f3444fa879992708b6d..c3957a7d63c455dd20283ce7b091c0c63ae5f47f 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,8 @@ "test:production:testMinified": "cd generated/minified/ && ava", "test:production:clean": "rm -rf generated/minified/*.test.mjs", "test:production:minified2npm": "cp -rf generated/minified/* generated/npm/", - "watch": "chokidar src/* -c \"npm run test:dev:runTests 2>/dev/null\"" + "watch": "chokidar src/* -c \"npm run test:dev:runTests\"", + "watch2null": "chokidar src/* -c \"npm run test:dev:runTests 2>/dev/null\"" }, "dependencies": { "@thi.ng/base-n": "^0.1.6", diff --git a/src/dictionary-builder.test.mjs b/src/dictionary-builder.test.mjs index 9a613810efe91e66454d8b4edd3ac05fadd31dd3..2ba749b055ef1458b906b34abbaca043e8802d18 100644 --- a/src/dictionary-builder.test.mjs +++ b/src/dictionary-builder.test.mjs @@ -67,3 +67,8 @@ test('regLikeVariants handle [\\]*]', t => { test('regLikeVariants handle escaping common chr \\a', t => { t.deepEqual(app.regLikeVariants('\\a'), ['a']); }); +/* +test('how do you handle that ? -> parcours en largeur. Est-ce pertinant ? -> en fait, parcours avec redondance et suppression de doublon qui imite le parcours en largeur', t => { + t.deepEqual(app.regLikeVariants('(a(b@@@@c|d)|(e|f)|g|h@@@@i)'), []); +}); +*/ diff --git a/src/dictionary-tree.mjs b/src/dictionary-tree.mjs new file mode 100644 index 0000000000000000000000000000000000000000..9e1667e6a157480a0f4276d60ccc45611a734915 --- /dev/null +++ b/src/dictionary-tree.mjs @@ -0,0 +1,113 @@ +export function build(dictionaryString) { +} + +export function buildTreeStruct(monoLineString) { + console.log('srcTree', monoLineString); + const stringAsArray = monoLineString.split(''); + const rawTree = leftParser(stringAsArray); + console.log('rawTree', JSON.stringify(rawTree)); + const outOfScope = stringAsArray.length; + if (outOfScope) throw `fail to build tree from : "${monoLineString}" parsed: ${JSON.stringify(rawTree)} unparsed/failed: ${stringAsArray.join('')}`; + //return rawTree;//trivialDedup(flattenTree(rawTree)) + return flattenTree(flattenTree(flattenTree(rawTree))); +} + +/** + * leftParser stuff + */ +function flushNoEmptyString(data){ + if (data.str.length) data.tree[data.tree.length-1].step.push(data.str); + data.str = ''; +} +function appendToString(chr,data){ + data.str += chr; +} +function nextAlt(data){ + data.tree.push({step: []}); +} +function goIntoNewAlt(data){ + data.tree[data.tree.length-1].step.push(leftParser(data.input)); +} +function returnTrue(){return true;} +function doTheses(){} +const leftParserFunc = { + ')':(chr,data)=>returnTrue(flushNoEmptyString(data)), + '|':(chr,data)=>doTheses(flushNoEmptyString(data), nextAlt(data)), + '(':(chr,data)=>doTheses(flushNoEmptyString(data), goIntoNewAlt(data)), + 'default':(chr,data)=>appendToString(chr,data) +} +function leftParser(inputChrArray) { + const data = { + input:inputChrArray, + str:'', + tree:[{step: []}], + }; + while (data.input.length) { + const chr = data.input.shift(); + if(typeof leftParserFunc[chr]!=='undefined') { + const out = leftParserFunc[chr](chr,data); + if(out) return data.tree; + } else leftParserFunc['default'](chr,data); + } + flushNoEmptyString(data); + return data.tree; +} +// end leftParser +function concatStrings(array){ + const data = { + input:array, + str:'', + tree:[{step: []}], + }; + array.forEach(e=>{ + if(typeof e === 'string') appendToString(e,data); + else{ + flushNoEmptyString(data) + data.tree[data.tree.length-1].step.push(e); + } + }); + flushNoEmptyString(data) + return data.tree[data.tree.length-1].step; +} +function flattenTree(tree) { + if (typeof tree === 'string') return tree; + if (Array.isArray(tree) && tree.length===0) return ''; + if (Array.isArray(tree) && tree.length===1) return flattenTree(tree[0]); + if (isStep(tree)) { + tree.step = tree.step.map(flattenTree); + if (!Array.isArray(tree.step)) return flattenTree(tree.step); + if (tree.step.length === 1) return flattenTree(tree.step); + if (tree.step.length === 0) return ''; + if(!tree.step.reduce((acc,cur)=>Array.isArray(cur)||acc,false)) return concatStrings(tree.step); + return tree; + } + + if (tree.length === 1) { + //console.log('aloneChild', JSON.stringify(tree[0])); + if (typeof tree[0] === 'string') return tree[0]; + if (Array.isArray(tree[0])) return flattenTree(tree[0]); + //return flattenTree(tree[0].step); + } + + if (!Array.isArray(tree)) throw `Error : ${tree} should be an Array. ${JSON.stringify(tree)}`; + //console.log('tree:', JSON.stringify(tree)); + ///if(isStep(tree[0])) tree[0].step = flattenTree(tree[0].step); + tree = trivialDedup(tree.map(flattenTree)); + tree = [].concat(...tree.map(e=>Array.isArray(e) ? e : [e])); + + return tree; +} + +function isStep(element) { + return typeof element === 'object' && typeof element.step !== 'undefined'; +} +function trivialDedup(tree) { + if (typeof tree === 'string') return tree; + if (isStep(tree)) return {step:tree.step.map(subTree=>trivialDedup(subTree))}; + if (Array.isArray(tree)){ + const dedupKeys = {}; + tree.forEach(v => dedupKeys[JSON.stringify(v)] = v); // eslint-disable-line no-return-assign + return Object.keys(dedupKeys).map(json=>trivialDedup(JSON.parse(json))); + } + throw `unknown case : ${tree}`; +} diff --git a/src/dictionary-tree.test.mjs b/src/dictionary-tree.test.mjs new file mode 100644 index 0000000000000000000000000000000000000000..73cf5c5e1e889f8f56b65602ceca40d65305a0f4 --- /dev/null +++ b/src/dictionary-tree.test.mjs @@ -0,0 +1,28 @@ +import test from 'ava'; +import * as app from './dictionary-tree.mjs'; + +const buildTreeStruct = str => JSON.stringify(app.buildTreeStruct(str)); +const expected= str => JSON.stringify(str); + +test('simple string still simple string',t=>t.is(buildTreeStruct('abc'),expected('abc'))); +test('(a|b) alt as array',t=>t.is(buildTreeStruct('(a|b)'),expected(['a','b']))); +//test('a) throw',t=>t.throws(()=>buildTreeStruct('a)'))); +//test('(a throw',t=>t.throws(()=>buildTreeStruct('(a'))); +//test('a|b throw',t=>t.throws(()=>buildTreeStruct('a|b'))); +test('(|b) empty choice is choice', t=>t.is(buildTreeStruct('(|b)'),expected(['','b']))); +test('(b|b) trivial dedup', t=>t.is(buildTreeStruct('(|b||b|)'),expected(['','b']))); +test('a(b|c) ordered part in step obj',t=>t.is(buildTreeStruct('a(b|c)'),expected({step:['a',['b','c']]}))); +test('a(b) flat merge when no alt',t=>t.is(buildTreeStruct('a(b)'),expected('ab'))); + +test('build tree with (|) pattern', t => t.is(buildTreeStruct('(a(|b@@@@c|d|)|(e|f)|g|h@@@@i)'), + expected([ + {step:['a',[ + '', + 'b@@@@c', + 'd' + ]]}, + 'e', + 'f', + 'g', + 'h@@@@i' + ])));