Skip to content
Snippets Groups Projects
Commit e62e0e5d authored by Millicent Billette's avatar Millicent Billette
Browse files

REFACTO: cleaning dictionary for demo

parent 3dace612
No related branches found
No related tags found
No related merge requests found
......@@ -17,43 +17,14 @@ function parseLine(theLine, allLines) {
function combineUnspecified2IdSecPwd(lines) {
const idSecPwdLines = [];
const unspecifiedLines = [];
lines.forEach(line => line.includes('@@@@') ? deepUnspecifiedExtractor(line, unspecifiedLines, idSecPwdLines) : unspecifiedLines.push(line));
lines.forEach(line => line.includes('@@@@') ? idSecPwdLines.push(line) : unspecifiedLines.push(line));
if (unspecifiedLines.length) idSecPwdLines.push(`${mergeLines(unspecifiedLines)}@@@@${mergeLines(unspecifiedLines)}`)
return mergeLines(idSecPwdLines);
}
function deepUnspecifiedExtractor(line, unspecifiedLines, idSecPwdLines) {
const hybridTree = tree.buildTreeStruct(line);
/*
buildTree (à créer dans un module à part)
parcours en profondeur jusqu'à trouver des branches unspecified.
extraction de celles-ci pour les ajouter (avec leur chaine de parents) aux unspecifiedlines
ajout de l'arbre restant aux idSecPwdLines
*/
idSecPwdLines.push(line);
}
function mergeLines(lines) {
//console.log('lines:',lines);
if (!lines.length) return '';
lines = lines.filter(l => l.length);
if (lines.length === 1) return lines[0];
return `(${lines.join('|')})`;
}
/*
const json1 = '(a(b@@@@c|d)|(e|f)|g|h@@@@i)'
const json2 = {
'a':'a'{
'b@@@@c':'b@@@@c',
'd':'d'
},
{
'e':'e',
'f':'f'
},
'g':'g',
'h@@@@i':'h@@@@i'
}
*/
......@@ -15,6 +15,3 @@ a@@@@(b|c)
h@@@@(i|j)
`), '(a@@@@(b|c)|h@@@@(i|j)|((d|e)|(f|g))@@@@((d|e)|(f|g)))');
});
test.skip('no @@@@ cases combined with @@@@ case on same line recombine to allways finish with @@@@ case', t => {
t.is(app.parse('(a(b@@@@c|d)|(e|f)|g|h@@@@i)'), '(g@@@@h|(a|b|c)@@@@(a|b|c))');
});
......@@ -2,21 +2,23 @@ export function build(dictionaryString) {
}
export function buildTreeStruct(monoLineString) {
//console.log('srcTree', monoLineString);
const stringAsArray = monoLineString.split('');
const rawTree = leftParser(stringAsArray);
//console.log('rawTree', JSON.stringify(rawTree));
const outOfScope = stringAsArray.length;
if (outOfScope) throw `fail to build tree from : "${monoLineString}" parsed: ${JSON.stringify(rawTree)} unparsed/failed: ${stringAsArray.join('')}`;
//return rawTree;//trivialDedup(flattenTree(rawTree))
return flattenTree(flattenTree(flattenTree(rawTree)));
let lastTree, tree = rawTree;
do {
lastTree = JSON.stringify(tree);
tree = flattenTree(tree);
} while (JSON.stringify(tree) !== lastTree)
return tree;
}
/**
* leftParser stuff
*/
function flushNoEmptyString(data) {
if (data.str.length) data.tree[data.tree.length - 1].step.push(data.str);
if (data.str.length) data.tree.alt[data.tree.alt.length - 1].step.push(data.str);
data.str = '';
}
......@@ -25,11 +27,11 @@ function appendToString(chr, data) {
}
function nextAlt(data) {
data.tree.push({step: []});
data.tree.alt.push({step: []});
}
function goIntoNewAlt(data) {
data.tree[data.tree.length - 1].step.push(leftParser(data.input));
data.tree.alt[data.tree.alt.length - 1].step.push(leftParser(data.input));
}
function returnTrue() {
......@@ -50,7 +52,7 @@ function leftParser(inputChrArray) {
const data = {
input: inputChrArray,
str: '',
tree: [{step: []}],
tree: {alt: [{step: []}]},
};
while (data.input.length) {
const chr = data.input.shift();
......@@ -68,90 +70,69 @@ function concatStrings(array) {
const data = {
input: array,
str: '',
tree: [{step: []}],
tree: {alt: [{step: []}]}, // Output array
};
array.forEach(e => {
if (typeof e === 'string') appendToString(e, data);
if (isString(e)) appendToString(e, data);
else {
flushNoEmptyString(data)
data.tree[data.tree.length - 1].step.push(e);
data.tree.alt[data.tree.alt.length - 1].step.push(e);
}
});
flushNoEmptyString(data)
return data.tree[data.tree.length - 1].step;
return data.tree.alt[data.tree.alt.length - 1].step;
}
function flattenTree(tree) {
if (typeof tree === 'string') return tree;
if (Array.isArray(tree) && tree.length === 0) return '';
if (Array.isArray(tree) && tree.length === 1) return flattenTree(tree[0]);
if (isStep(tree)) {
tree.step = tree.step.map(flattenTree);
if (!Array.isArray(tree.step)) return flattenTree(tree.step);
if (tree.step.length === 1) return flattenTree(tree.step);
if (tree.step.length === 0) return '';
if (!tree.step.reduce((acc, cur) => Array.isArray(cur) || acc, false)) return concatStrings(tree.step);
if (isString(tree)) return tree;
const objType = isAlt(tree)?'alt':'step';
tree[objType] = tree[objType].map(flattenTree);
if (tree[objType].length === 0) return '';
if (tree[objType].length === 1) return tree[objType][0];
if (isAlt(tree)) {
tree.alt = trivialDedup(tree.alt);
tree.alt = [].concat(...tree.alt.map(e => isAlt(e) ? e.alt : [e]));
return tree;
}
if (tree.length === 1) {
//console.log('aloneChild', JSON.stringify(tree[0]));
if (typeof tree[0] === 'string') return tree[0];
if (Array.isArray(tree[0])) return flattenTree(tree[0]);
//return flattenTree(tree[0].step);
if (isStep(tree)) {
tree.step = concatStrings(tree.step);
return tree;
}
}
if (!Array.isArray(tree)) throw `Error : ${tree} should be an Array. ${JSON.stringify(tree)}`;
//console.log('tree:', JSON.stringify(tree));
///if(isStep(tree[0])) tree[0].step = flattenTree(tree[0].step);
tree = trivialDedup(tree.map(flattenTree));
tree = [].concat(...tree.map(e => Array.isArray(e) ? e : [e]));
return tree;
function isString(element) {
return typeof element === 'string';
}
function isStep(element) {
return typeof element === 'object' && typeof element.step !== 'undefined';
}
function isAlt(element) {
return typeof element === 'object' && typeof element.alt !== 'undefined';
}
function trivialDedup(tree) {
if (typeof tree === 'string') return tree;
if (isStep(tree)) return {step: tree.step.map(subTree => trivialDedup(subTree))};
if (Array.isArray(tree)) {
const dedupKeys = {};
tree.forEach(v => dedupKeys[JSON.stringify(v)] = v); // eslint-disable-line no-return-assign
return Object.keys(dedupKeys).map(json => trivialDedup(JSON.parse(json)));
}
throw `unknown case : ${tree}`;
const dedupKeys = {};
tree.sort();
tree.forEach(v => dedupKeys[JSON.stringify(v)] = v); // eslint-disable-line no-return-assign
return Object.keys(dedupKeys).map(JSON.parse);
}
export function serialize(treeStruct) {
if (typeof treeStruct === 'string') return treeStruct;
if (Array.isArray(treeStruct)) return `(${treeStruct.map(serialize).join('|')})`;
if (isString(treeStruct)) return treeStruct;
if (isStep(treeStruct)) return treeStruct.step.map(serialize).join('');
if (isAlt(treeStruct)) return `(${treeStruct.alt.map(serialize).join('|')})`;
throw new Error(`Error: how to serialize ${JSON.stringify(treeStruct)} RAW: ${treeStruct}`);
}
/*
export function extract(treeNavArray,fromTreeWrapped){
if(!Array.isArray(treeNavArray)) throw new Error(`treeNav: ${JSON.stringify(treeNavArray)} should be an integer array.`);
let extractedTree = '';
if(treeNavArray.length === 0) return fromTreeWrapped.pop();
if (typeof fromTreeWrapped[0] === 'string') throw new Error(`invalid treeNav pointer ${JSON.stringify(treeNavArray)} in ${serialize(fromTreeWrapped[0])} no branch ${treeNavArray[0]}.`);
if (Array.isArray(fromTreeWrapped[0])) {
const branch = treeNavArray.shift();
const extractedTree = fromTreeWrapped[0][branch]; // recursive here ?
//fromTreeWrapped[0] = [].concat(branch>0?fromTreeWrapped[0].slice(0,branch-1):[],fromTreeWrapped[0].slice(branch+1,fromTreeWrapped[0].length));
return extractedTree;
}
if (isStep(fromTreeWrapped[0])) {
const extractedTree = {step:[]};
fromTreeWrapped[0].step.forEach()
f
}
export function altCount(treeStruct) {
if (isString(treeStruct)) return 1;
if (isStep(treeStruct)) return treeStruct.step.reduce((acc, cur) => acc * altCount(cur), 1);
if (isAlt(treeStruct)) return treeStruct.reduce((acc, cur) => acc + altCount(cur), 0);
}
return extractedTree;
export function getAlternative(altNumber, tree) {
return 'abc';
}
*/
import test from 'ava';
import * as app from './dictionary-tree.mjs';
const buildTreeStruct = str => JSON.stringify(app.buildTreeStruct(str));
const expected = str => JSON.stringify(str);
const buildTreeThenSerialize = str => app.serialize(app.buildTreeStruct(str));
test('simple string still simple string', t => t.is(buildTreeStruct('abc'), expected('abc')));
test('(a|b) alt as array', t => t.is(buildTreeStruct('(a|b)'), expected(['a', 'b'])));
// Ok to be permissive test('a) throw',t=>t.throws(()=>buildTreeStruct('a)')));
test('simple string still simple string', t => t.is(buildTreeThenSerialize('abc'), 'abc'));
test('(a|b) alt still (a|b)', t => t.is(buildTreeThenSerialize('(a|b)'), '(a|b)'));
test('a)b throw', t => t.throws(() => buildTreeStruct('a)b')));
// Ok to be permissive test('(a throw',t=>t.throws(()=>buildTreeStruct('(a')));
// Ok to be permissive test('a|b throw',t=>t.throws(()=>buildTreeStruct('a|b')));
test('(|b) empty choice is choice', t => t.is(buildTreeStruct('(|b)'), expected(['', 'b'])));
test('(b|b) trivial dedup', t => t.is(buildTreeStruct('(|b||b|)'), expected(['', 'b'])));
test('a(b|c) ordered part in step obj', t => t.is(buildTreeStruct('a(b|c)'), expected({step: ['a', ['b', 'c']]})));
test('a(b) flat merge when no alt', t => t.is(buildTreeStruct('a(b)'), expected('ab')));
test('build complexe tree with (|) pattern', t => t.is(buildTreeStruct('(a(|b@@@@c|d|)|(e|f)|g|h@@@@i)'),
expected([{step: ['a', ['', 'b@@@@c', 'd']]}, 'e', 'f', 'g', 'h@@@@i'])
));
test('serialize tree to a(b|c)', t => t.is(app.serialize({step: ['a', ['b', 'c']]}), 'a(b|c)'));
test('serialize incorrect tree throw', t => t.throws(() => app.serialize({plop: ['a']})));
test('(|b) keep empty choice', t => t.is(buildTreeThenSerialize('(|b)'), '(|b)'));
test('(b|b) trivial dedup', t => t.is(buildTreeThenSerialize('(|b||b|)'), '(|b)'));
test('a(b|c) mix fix and alt', t => t.is(buildTreeThenSerialize('a(b|c)'), 'a(b|c)'));
test('a(b) flat merge when no alt', t => t.is(buildTreeThenSerialize('a(b)'), 'ab'));
test('build complexe tree with (|) pattern', t => t.is(buildTreeThenSerialize('(a(|b@@@@c|d|)|(e|f)|g|h@@@@i)'), '(a(|b@@@@c|d)|e|f|g|h@@@@i)'));
const exempleTree = ()=>app.buildTreeStruct('a(b(c|d)|e(f|g|h)ij(k|l)|@@@@m)')
test.skip('extract subtree ae(f|g|h)ij(k|l) from a(b(c|d)|e(f|g|h)ij(k|l)|@@@@m)',t =>{
const srcTree = exempleTree();
const treePointer = [1];
const extractedTree = app.extract(treePointer,[srcTree]);
test('serialize incorrect tree throw', t => t.throws(() => app.serialize({plop: ['a']})));
t.is(JSON.stringify(extractedTree), expected({step: ['ae', ['f', 'g', 'h'], 'ij']}))
t.is(app.serialize(srcTree), 'a(b(c|d)|@@@@k)')
});
test.skip('extract subtree aehij from a(b(c|d)|e(f|g|h)ij(k|l)|@@@@m)',t =>{
const srcTree = exempleTree();
const treePointer = [1,2];
const extractedTree = app.extract(treePointer,[srcTree]);
t.is(extractedTree, 'aehij');
t.is(app.serialize(srcTree), 'a(b(c|d)|e(f|g)ij|@@@@k)')
});
const exampleTree = () => app.buildTreeStruct('a(b(c|d)|e(f|g|h)ij(k|l)|@@@@m)')
test.skip('mono altCount', t => t.is(app.altCount(app.buildTreeStruct('ipsum')), 1));
test.skip('simple altCount', t => t.is(app.altCount(app.buildTreeStruct('(lore|ipsu)m')), 2));
test.skip('multi altCount', t => t.is(app.altCount(app.buildTreeStruct('(a|b|c)(d|e|f)g(h|i|j|k)')), 36));
test.skip('multi level tree altCount', t => t.is(app.altCount(app.buildTreeStruct('a(b(c|d)|e(f|g|h)ij(k|l)|@@@@m)')), 9));
test.skip('getAlternative 0', t => t.is(app.getAlternative(0, exampleTree()), 'abc'));
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment