diff --git a/CHANGELOG.fr.md b/CHANGELOG.fr.md index cec15433ff8846a1e7d46891366227fcc88b844b..adc74a3625d02cc4cf3cb147838aaa1406088919 100644 --- a/CHANGELOG.fr.md +++ b/CHANGELOG.fr.md @@ -14,6 +14,15 @@ et ce projet adhère au [versionnage sémantique](https://semver.org/spec/v2.0.0 ## [Non-publié/Non-Stabilisé] (par [1000i100]) +## [Version 3.5.4] - 2022-12-17 (par [1000i100]) +### Corrections +- Dictionary bascule en mode sans échec (non regex) si l'interprétation du texte fourni échoue (ou si options.escapeAll=1) +- trivialDedup tri désormais les alternatives pour mieux les dédoublonner. +- dictionary-parse prend en paramètres un tableau associatif 'options' au lieu d'une liste de paramètres + +- '\' est désormais considéré comme un caractère spécial +- Les sérialisations internes (rawSerialize) de Dictionary ne reconvertissent plus les caractères spéciaux + ## [Version 3.5.3] - 2022-12-16 (par [1000i100]) ### Corrections - '\' est désormais considéré comme un caractère spécial @@ -135,8 +144,9 @@ et ce projet adhère au [versionnage sémantique](https://semver.org/spec/v2.0.0 - intégration des librairies de crypto nécessaires - calcul de la clef publique correspondant à chaque combinaison de secrets saisie, et comparaison à la clef publique de référence. -[Non-publié/Non-Stabilisé]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.3...main +[Non-publié/Non-Stabilisé]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.4...main +[Version 3.5.4]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.3...v3.5.4 [Version 3.5.3]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.2...v3.5.3 [Version 3.5.2]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.1...v3.5.2 [Version 3.5.1]: https://git.duniter.org/libs/g1lib.js/-/compare/v3.5.0...v3.5.1 diff --git a/npm/package.json b/npm/package.json index 29e580696b1315e776e699c6e0761d8de380bf47..9691042abd7e3184cddc2a10a7801cc07a9108d1 100644 --- a/npm/package.json +++ b/npm/package.json @@ -1,6 +1,6 @@ { "name": "g1lib", - "version": "3.5.3", + "version": "3.5.4", "description": "An ubiquitous static javascript toolbox lib for Ǧ1 / Duniter ecosystem with reliability in mind.", "main": "nodejs/all.mjs", "browser": "browser/all.mjs", diff --git a/src/dictionary-escaper.mjs b/src/dictionary-escaper.mjs index 2925dff93b467b3936fbe8b72950e788c9b2095e..11155e58229b9db7b5024e2b7d8a513d96e6796b 100644 --- a/src/dictionary-escaper.mjs +++ b/src/dictionary-escaper.mjs @@ -1,19 +1,19 @@ const specialMap = { - '(': String.fromCharCode(0x01), - ')': String.fromCharCode(0x02), - '|': String.fromCharCode(0x03), - '{': String.fromCharCode(0x04), - '}': String.fromCharCode(0x05), - ',': String.fromCharCode(0x06), - '[': String.fromCharCode(0x07), - ']': String.fromCharCode(0x08), - '-': String.fromCharCode(0x09), - '<': String.fromCharCode(0x0a), - '>': String.fromCharCode(0x0b), - ':': String.fromCharCode(0x0c), - '=': String.fromCharCode(0x0d), - '@': String.fromCharCode(0x0e), - '\\': String.fromCharCode(0x0f), + '(': String.fromCharCode(0x2001), + ')': String.fromCharCode(0x2002), + '|': String.fromCharCode(0x2003), + '{': String.fromCharCode(0x2004), + '}': String.fromCharCode(0x2005), + ',': String.fromCharCode(0x2006), + '[': String.fromCharCode(0x2007), + ']': String.fromCharCode(0x2008), + '-': String.fromCharCode(0x2009), + '<': String.fromCharCode(0x200a), + '>': String.fromCharCode(0x200b), + ':': String.fromCharCode(0x200c), + '=': String.fromCharCode(0x200d), + '@': String.fromCharCode(0x200e), + '\\': String.fromCharCode(0x200f), }; const revertSpecial = swapKeyValue(specialMap); function swapKeyValue(object) { @@ -33,3 +33,6 @@ export function utfSpecial2unEscaped(str) { export function utfSpecial2escaped(str) { return str.split('').map(chr => revertSpecial[chr] ? `\\${revertSpecial[chr]}` : chr).join(''); } +export function escapeAll(str){ + return str.replace(/./g, chr => specialMap[chr] ? specialMap[chr] : chr); +} diff --git a/src/dictionary-escaper.test.mjs b/src/dictionary-escaper.test.mjs index ce00c23cb1d258369f535ca0abfe381af5f2b908..6583e87581dba2f3e6c3f3793a6a12f4d817af4d 100644 --- a/src/dictionary-escaper.test.mjs +++ b/src/dictionary-escaper.test.mjs @@ -8,6 +8,7 @@ test('unescape special characters & re-escape them generate identical string', t '[\\]*]', '\\{\\}', '\\\\', + '8\\<9<', ] cases.forEach(c=>t.is(app.utfSpecial2escaped(app.escape2utfSpecial(c)), c)); }); @@ -17,3 +18,6 @@ test('unescape special characters & reconvert without escaping remove escaping', test('unescape usual characters & re-escape them generate usual string', t => { t.is(app.utfSpecial2escaped(app.escape2utfSpecial('\\a')), 'a'); }); +test('escapeAll convert any special character of a string', t => { + t.is(app.utfSpecial2escaped(app.escapeAll('a:[]{}()<>-=\\b')), 'a\\:\\[\\]\\{\\}\\(\\)\\<\\>\\-\\=\\\\b'); +}); diff --git a/src/dictionary-parser.mjs b/src/dictionary-parser.mjs index cb400616d4d1183fb11940ee88ade6bf0f1cb0f8..c160e0e46300722e5ee71342af60f1a61ae03ffa 100644 --- a/src/dictionary-parser.mjs +++ b/src/dictionary-parser.mjs @@ -1,10 +1,16 @@ import latinize from '../node_modules/latinize-to-ascii/latinize.mjs'; import * as tree from './dictionary-tree.mjs'; -import {escape2utfSpecial, utfSpecial2escaped} from "./dictionary-escaper.mjs"; +import {escape2utfSpecial, escapeAll, utfSpecial2escaped} from "./dictionary-escaper.mjs"; -export function parse(dictionaryString,idSecPwd=true,accent=0,lowerCase=0,leet=0) { +export function parse(dictionaryString,options={}) { resetCache(); - const escapedString = escape2utfSpecial(dictionaryString); + if(typeof options.idSecPwd === 'undefined') options.idSecPwd = true; + if(typeof options.escapeAll === 'undefined') options.escapeAll = 0; + if(typeof options.accent === 'undefined') options.accent = 0; + if(typeof options.lowerCase === 'undefined') options.lowerCase = 0; + if(typeof options.leetSpeak === 'undefined') options.leetSpeak = 0; + const escapeAllString = options.escapeAll===1?escapeAll(dictionaryString):options.escapeAll===2?`${dictionaryString}\n${escapeAll(dictionaryString)}`:dictionaryString; + const escapedString = escape2utfSpecial(escapeAllString); const unbracketed = bracketsHandler(escapedString); const allLines = unbracketed.split('\n'); const parsedLines = parseAllLines(allLines); @@ -13,27 +19,27 @@ export function parse(dictionaryString,idSecPwd=true,accent=0,lowerCase=0,leet=0 .replace(/\|§void§/g,'') .replace(/§void§/g,''); if(monoLined.includes('§infiniteRecursion§')) throw new Error(`Unable to parse : ${utfSpecial2escaped(flattenIt(monoLined))}`); - if(!idSecPwd) return parseEnd(monoLined,allLines,accent,lowerCase,leet); + if(!options.idSecPwd) return parseEnd(monoLined,allLines,options); const parts = tree.splitAround('@@',tree.buildTreeStruct(monoLined)); const flatParts = []; if(parts.matching) flatParts.push(parts.matching); if(parts.notMatching) flatParts.push(parts.notMatching); const combined = combineUnspecified2IdSecPwd(flatParts); - return parseEnd(combined,allLines,accent,lowerCase,leet); + return parseEnd(combined,allLines,options); } -function parseEnd(str,allLines,accent,lowerCase,leet) { +function parseEnd(str,allLines,options) { let theString = syncRefHandler(str,allLines) if(theString.match(/=[^>]+>/)) throw new Error(`Unable to parse : ${utfSpecial2escaped(flattenIt(theString))}`); theString = qtyHandler(theString); - if(accent===1) theString = zeroAccent(theString); - if(accent===2) theString = optionalAccent(theString); - if(lowerCase===1) theString = zeroUpperCase(theString); - if(lowerCase===2) theString = optionalUpperCase(theString); - if(lowerCase===3) theString = allUpperCase(allCapitalized(theString)); - if(lowerCase===4) theString = optionalCapitalized(theString); - if(leet===1) theString = optionalLeetSpeak(theString); - if(leet===2) theString = allLeetSpeak(theString); - if(leet===3) theString = everyLeetSpeak(theString); + if(options.accent===1) theString = zeroAccent(theString); + if(options.accent===2) theString = optionalAccent(theString); + if(options.lowerCase===1) theString = zeroUpperCase(theString); + if(options.lowerCase===2) theString = optionalUpperCase(theString); + if(options.lowerCase===3) theString = allUpperCase(allCapitalized(theString)); + if(options.lowerCase===4) theString = optionalCapitalized(theString); + if(options.leetSpeak===1) theString = optionalLeetSpeak(theString); + if(options.leetSpeak===2) theString = allLeetSpeak(theString); + if(options.leetSpeak===3) theString = everyLeetSpeak(theString); //TODO: bigLeet Variants return utfSpecial2escaped(flattenIt(theString)); } diff --git a/src/dictionary-parser.test.mjs b/src/dictionary-parser.test.mjs index 6e8fffe0e7558bdff2f98caa96e922c9ad8d27b0..1b572c6d7a2dde1cfaf8fcfdf8fe46a39beb1fb1 100644 --- a/src/dictionary-parser.test.mjs +++ b/src/dictionary-parser.test.mjs @@ -2,73 +2,73 @@ import test from 'ava'; import * as app from './dictionary-parser.mjs'; test('parse remove ref:: lines', t => { - t.is(app.parse('ref::truc',false), ''); + t.is(app.parse('ref::truc',{idSecPwd:false}), ''); }); test('parse handle <ref>', t => { - t.is(app.parse(`ref::truc\nref::bidule\n<ref> <ref>`,false), '(truc|bidule) (truc|bidule)'); - t.is(app.parse(`ref::(truc|bidule)\n<ref> <ref>`,false), '(truc|bidule) (truc|bidule)'); + t.is(app.parse(`ref::truc\nref::bidule\n<ref> <ref>`,{idSecPwd:false}), '(bidule|truc) (bidule|truc)'); + t.is(app.parse(`ref::(truc|bidule)\n<ref> <ref>`,{idSecPwd:false}), '(bidule|truc) (bidule|truc)'); }); test('parse handle <ref> complex inclusion', t => { - t.is(app.parse(`ref::a<ref2>\n<ref>\nref2::(b|c)`,false), 'a(b|c)'); + t.is(app.parse(`ref::a<ref2>\n<ref>\nref2::(b|c)`,{idSecPwd:false}), 'a(b|c)'); }); test('parse detect infinite recursion with <ref> and throw', t => { - t.throws(()=>app.parse(`ref::a<ref>\n<ref>`,false)); + t.throws(()=>app.parse(`ref::a<ref>\n<ref>`,{idSecPwd:false})); }); test('parse detect infinite recursion with nested <ref> and throw', t => { - t.throws(()=>app.parse(`ref::a<ref2>\n<ref>\nref2::b<ref>`,false)); + t.throws(()=>app.parse(`ref::a<ref2>\n<ref>\nref2::b<ref>`,{idSecPwd:false})); }); test('parse handle [ -_]', t => { - t.is(app.parse('[ -_]',false), '( |-|_)'); + t.is(app.parse('[ -_]',{idSecPwd:false}), '( |-|_)'); }); test('parse handle [a-f]', t => { - t.is(app.parse('[a-f]',false), '(a|b|c|d|e|f)'); - t.is(app.parse('[7-9]',false), '(7|8|9)'); - t.is(app.parse('[C-F]',false), '(C|D|E|F)'); + t.is(app.parse('[a-f]',{idSecPwd:false}), '(a|b|c|d|e|f)'); + t.is(app.parse('[7-9]',{idSecPwd:false}), '(7|8|9)'); + t.is(app.parse('[C-F]',{idSecPwd:false}), '(C|D|E|F)'); }); test('parse handle [a-c-]', t => { - t.is(app.parse('[a-c-]',false), '(a|b|c|-)'); + t.is(app.parse('[a-c-]',{idSecPwd:false}), '(-|a|b|c)'); }); test('parse handle {qty}', t => { - t.is(app.parse('a{5}',false), 'aaaaa'); + t.is(app.parse('a{5}',{idSecPwd:false}), 'aaaaa'); }); test('parse handle {min,max}', t => { - t.is(app.parse('b{3,5}',false), 'bbb(|b)(|b)'); + t.is(app.parse('b{3,5}',{idSecPwd:false}), 'bbb(|b)(|b)'); }); test('parse handle (string){qty}', t => { - t.is(app.parse(`c'est (toto|tata){0,2}`,false), `c'est (|toto|tata)(|toto|tata)`); + t.is(app.parse(`c'est (toto|tata){0,2}`,{idSecPwd:false}), `c'est (|tata|toto)(|tata|toto)`); }); test('parse handle nested (s|t(ri|ng)){qty}', t => { - t.is(app.parse(`(s|t(ri|ng)){1,2}`,false), `(s|t(ri|ng))(|s|t(ri|ng))`); + t.is(app.parse(`(s|t(ri|ng)){1,2}`,{idSecPwd:false}), `(t(ng|ri)|s)(t(ng|ri)||s)`); }); test('parse handle plop:\\:', t => { - t.is(app.parse('plop:\\:ici',false), 'plop:\\:ici'); - t.is(app.parse('plop\\::ici',false), 'plop\\::ici'); - t.is(app.parse('plop::ici',false), ''); + t.is(app.parse('plop:\\:ici',{idSecPwd:false}), 'plop:\\:ici'); + t.is(app.parse('plop\\::ici',{idSecPwd:false}), 'plop\\::ici'); + t.is(app.parse('plop::ici',{idSecPwd:false}), ''); }); test('parse handle [\\]*]', t => { - t.is(app.parse('[\\]*]',false), '(\\]|*)'); + t.is(app.parse('[\\]*]',{idSecPwd:false}), '(*|\\])'); }); test('parse handle =ref>', t => { - t.is(app.parse(`ref::truc\nref::bidule\n=ref> =ref>`,false), '(truc truc|bidule bidule)'); - t.is(app.parse(`ref::(truc|bidule)\n=ref> =ref>`,false), '(truc truc|bidule bidule)'); + t.is(app.parse(`ref::truc\nref::bidule\n=ref> =ref>`,{idSecPwd:false}), '(bidule bidule|truc truc)'); + t.is(app.parse(`ref::(truc|bidule)\n=ref> =ref>`,{idSecPwd:false}), '(bidule bidule|truc truc)'); }); test('parse handle =ref> without generating duplication', t => { - t.is(app.parse(`ref::(truc|bidule)\n(=ref> =ref>|machin)`,false), '(truc truc|machin|bidule bidule)'); + t.is(app.parse(`ref::(truc|bidule)\n(=ref> =ref>|machin)`,{idSecPwd:false}), '(bidule bidule|machin|truc truc)'); }); test('parse handle multiple =ref>', t => { t.is( - app.parse(`=ref> =ref2> =ref> =ref2>\nref::(truc|bidule)\nref2::(machin|chose)`,false), - '(truc machin truc machin|bidule machin bidule machin|truc chose truc chose|bidule chose bidule chose)'); + app.parse(`=ref> =ref2> =ref> =ref2>\nref::(truc|bidule)\nref2::(machin|chose)`,{idSecPwd:false}), + '(bidule chose bidule chose|bidule machin bidule machin|truc chose truc chose|truc machin truc machin)'); }); test('parse handle multi-level =ref>', t => { - t.is(app.parse(`=ref> =ref> =ref2>\nref::(truc|=ref2>)\nref2::(machin|chose)`,false), - '(truc truc machin|machin machin machin|truc truc chose|chose chose chose)'); + t.is(app.parse(`=ref> =ref> =ref2>\nref::(truc|=ref2>)\nref2::(machin|chose)`,{idSecPwd:false}), + '(chose chose chose|machin machin machin|truc truc chose|truc truc machin)'); }); -test('parse throw if unconverted =ref>', t => t.throws(()=>app.parse(`=ref>\nref::=ref>`,false))); +test('parse throw if unconverted =ref>', t => t.throws(()=>app.parse(`=ref>\nref::=ref>`,{idSecPwd:false}))); /*TODO: test('parse handle multi-level =ref> in all case', t => { t.is(app.parse(`=ref2> =ref> =ref>\nref::(truc|=ref2>)\nref2::(machin|chose)`,false), '(machin truc truc|chose truc truc|machin machin machin|chose chose chose)'); @@ -82,11 +82,11 @@ test('@@ present, do not add it', t => { }); test('no @@ on each line ? combine no @@ lines', t => { t.is(app.parse(` -a@@(b|c) -(d|e) -(f|g) +e@@(f|g) +(a|b) +(c|d) h@@(i|j) -`), '(a@@(b|c)|h@@(i|j)|(d|e|f|g)@@(d|e|f|g))'); +`), '((a|b|c|d)@@(a|b|c|d)|e@@(f|g)|h@@(i|j))'); }); test('add @@ on part without it', t => { t.is(app.parse(`(a@@b|c)`), '(a@@b|c@@c)'); @@ -97,13 +97,13 @@ test('add @@ on part without it complex case', t => { }); test('throw if multiple @@ in the same sequence', t => t.throws(() => app.parse(`a@@(b|c)@@d`))); -test('add no accents variant', t => t.is(app.parse('Ǧ1ǦdiT ici',false,1), '(Ǧ1ǦdiT ici|G1GdiT ici)')); -test('add optional accents variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,2), '(Ǧ|G)1(Ǧ|G)diT ici')); -test('add lowercase variant', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,1), '(Ǧ1ǦdiT ici|ǧ1ǧdit ici)')); -test('add optional lowercase variant', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,2), '(Ǧ|ǧ)1(Ǧ|ǧ)di(T|t) ici')); -test('add uppercase variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,3), '(Ǧ1ǦdiT ici|Ǧ1ǦdiT Ici|Ǧ1ǧdit Ici|Ǧ1ǦDIT ICI)')); -//test.skip('add optional capitalized variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,4), '(Ǧ|ǧ)1(Ǧ|ǧ)di(T|t) (I|i)ci')); -test('add no leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,0,1), 'Ǧ(1|i|l|I|L|t|T)ǦdiT ici')); -test('add leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,0,2), '(Ǧ1ǦdiT ici|Ǧ1Ǧd1(7|1) 1c1)')); -test('add every leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',false,0,0,3), 'Ǧ(1|i|l|I|L|t|T)Ǧd(i|1|l|I|L|t|T)(T|7|t|y|Y|1|i|l|I|L) (i|1|l|I|L|t|T)c(i|1|l|I|L|t|T)')); -//test('add all variants', t => t.true(app.parse('Ǧ1ǦdiT ici',true,2,4,3).length>500)); +test('add no accents variant', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, accent:1}), '(G1GdiT ici|Ǧ1ǦdiT ici)')); +test('add optional accents variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, accent:2}), '(G|Ǧ)1(G|Ǧ)diT ici')); +test('add lowercase variant', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, lowerCase:1}), '(Ǧ1ǦdiT ici|ǧ1ǧdit ici)')); +test('add optional lowercase variant', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, lowerCase:2}), '(Ǧ|ǧ)1(Ǧ|ǧ)di(T|t) ici')); +test('add uppercase variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, lowerCase:3}), '(Ǧ1ǦDIT ICI|Ǧ1ǦdiT Ici|Ǧ1ǦdiT ici|Ǧ1ǧdit Ici)')); +//test('add optional capitalized variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, lowerCase:4}), '(Ǧ|ǧ)1(Ǧ|ǧ)di(T|t) (I|i)ci')); +test('add no leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, leetSpeak:1}), 'Ǧ(1|I|L|T|i|l|t)ǦdiT ici')); +test('add leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, leetSpeak:2}), '(Ǧ1Ǧd1(1|7) 1c1|Ǧ1ǦdiT ici)')); +test('add every leetSpeak variants', t => t.is(app.parse('Ǧ1ǦdiT ici',{idSecPwd:false, leetSpeak:3}), 'Ǧ(1|I|L|T|i|l|t)Ǧd(1|I|L|T|i|l|t)(1|7|I|L|T|Y|i|l|t|y) (1|I|L|T|i|l|t)c(1|I|L|T|i|l|t)')); +//test('add all variants', t => t.true(app.parse('Ǧ1ǦdiT ici',{idSecPwd:true, accent:2, lowerCase:4, leetSpeak:3}).length>500)); diff --git a/src/dictionary-tree.mjs b/src/dictionary-tree.mjs index 4e0eee0980579ca7d6cc3a92aaf835bd340573a6..b820759242236593ac81b1f045294050ae7372ac 100644 --- a/src/dictionary-tree.mjs +++ b/src/dictionary-tree.mjs @@ -115,9 +115,8 @@ function isAlt(element) { function trivialDedup(tree) { const dedupKeys = {}; - tree.sort(); tree.forEach(v => dedupKeys[JSON.stringify(v)] = v); // eslint-disable-line no-return-assign - return Object.keys(dedupKeys).map(JSON.parse); + return Object.keys(dedupKeys).sort().map(JSON.parse); } export function splitAround(pattern,treeStruct){ function recSplitter(treeStruct){ @@ -160,7 +159,7 @@ export function splitAround(pattern,treeStruct){ if(matching.length) return {matching: `(${matching.join('|')})`} if(notMatching.length) return {notMatching: `(${notMatching.join('|')})`} } - throw new Error(`Error: how to splitAround ${pattern} with ${JSON.stringify(treeStruct)} RAW: ${treeStruct}`); + throw new Error(`Error: how to splitAround ${pattern} with ${JSON.stringify(treeStruct)}`); } const res = recSplitter(treeStruct); if(res.matching) res.matching = rawSerialize(buildTreeStruct(res.matching)); diff --git a/src/dictionary-tree.test.mjs b/src/dictionary-tree.test.mjs index 48e0cdcf731379f8440ccf1783716ef6e107faed..b8bdfecf7a4051b333652da2b1efa7657e3d3d15 100644 --- a/src/dictionary-tree.test.mjs +++ b/src/dictionary-tree.test.mjs @@ -19,6 +19,7 @@ test('serialize incorrect tree throw', t => t.throws(() => app.serialize({plop: test('splitAround throw when to many split', t => t.throws(() => app.splitAround('@@',app.buildTreeStruct('z@@a(b|c@@d)')))); test('splitAround throw with @@@@', t => t.throws(() => app.splitAround('@@',app.buildTreeStruct('@@@@')))); +test('splitAround throw with invalid tree', t => t.throws(() => app.splitAround('@@',[]))); test('splitAround return notMatching case', t => t.deepEqual(app.splitAround('@',app.buildTreeStruct('a(b|c)')),{notMatching:'a(b|c)'})); test('splitAround return matching case', t => t.deepEqual(app.splitAround('@',app.buildTreeStruct('a@b')),{matching:'a@b'})); test('splitAround return both matching case and not matching one', t => t.deepEqual(app.splitAround('@',app.buildTreeStruct('a@b|c')),{matching:'a@b',notMatching:'c'})); @@ -28,46 +29,48 @@ test('simple altCount', t => t.is(app.altCount(app.buildTreeStruct('(lore|ipsu)m test('multi altCount', t => t.is(app.altCount(app.buildTreeStruct('(a|b|c)(d|e|f)g(h|i|j|k)')), 36)); test('multi level tree altCount', t => t.is(app.altCount(app.buildTreeStruct('a(b(c|d)|e(f|g|h)ij(k|l)|@@m)')), 9)); -const exampleTree = () => app.buildTreeStruct('a(b(c|d)|e(f|g(h|i)|j)kl(m|n(o|p)|q(r|s)|t)|(u|v)w)'); +const exampleTree = () => app.buildTreeStruct('a((b|c)d|e(f|g)|h(i(j|k)|l|m)no(p(q|r)|s(t|u)|v|w))'); // console.log(JSON.stringify(exampleTree())); // console.log(app.serialize(exampleTree())); /* exampleTree detailed + a( 0-27 - b(c|d) 0 & 1 - |e( 2-25 - f 2-7 - |g(h|i) 8-13 &14-19 - |j 20-25 - )kl( 2-25 - m 2 & 8 & 14 & 20 - |n(o|p) 3-4 & 9-10 & 15-16 & 21-22 - |q(r|s) 5-6 & 11-12 & 17-18 & 23-24 - |t 7 & 13 & 19 & 25 - ) - |(u|v)w 26-27 + (b|c)d 0-1 + |e(f|g) 2-3 + |h( 4-27 + i(j|k) 4-9 & 10-15 + |l 16-21 + |m 22-27 + )no( 4-27 + p(q|r) 4-5 & 10-11 & 16-17 & 22-23 + |s(t|u) 6-7 & 12-13 & 18-19 & 24-25 + |v 8 & 14 & 20 & 26 + |w 9 & 15 & 21 & 27 + ) ) */ -test('getAlternative 0', t => t.is(app.getAlternative(0, exampleTree()), 'abc')); -test('getAlternative 1', t => t.is(app.getAlternative(1, exampleTree()), 'abd')); -test('getAlternative 2', t => t.is(app.getAlternative(2, exampleTree()), 'aefklm')); -test('getAlternative 3', t => t.is(app.getAlternative(3, exampleTree()), 'aefklno')); -test('getAlternative 4', t => t.is(app.getAlternative(4, exampleTree()), 'aefklnp')); -test('getAlternative 5', t => t.is(app.getAlternative(5, exampleTree()), 'aefklqr')); -test('getAlternative 6', t => t.is(app.getAlternative(6, exampleTree()), 'aefklqs')); -test('getAlternative 7', t => t.is(app.getAlternative(7, exampleTree()), 'aefklt')); -test('getAlternative 8', t => t.is(app.getAlternative(8, exampleTree()), 'aeghklm')); -test('getAlternative 9', t => t.is(app.getAlternative(9, exampleTree()), 'aeghklno')); -test('getAlternative 14', t => t.is(app.getAlternative(14, exampleTree()), 'aegiklm')); -test('getAlternative 20', t => t.is(app.getAlternative(20, exampleTree()), 'aejklm')); -test('getAlternative 26', t => t.is(app.getAlternative(26, exampleTree()), 'auw')); -test('getAlternative 27', t => t.is(app.getAlternative(27, exampleTree()), 'avw')); +test('getAlternative 0', t => t.is(app.getAlternative(0, exampleTree()), 'abd')); +test('getAlternative 1', t => t.is(app.getAlternative(1, exampleTree()), 'acd')); +test('getAlternative 2', t => t.is(app.getAlternative(2, exampleTree()), 'aef')); +test('getAlternative 3', t => t.is(app.getAlternative(3, exampleTree()), 'aeg')); +test('getAlternative 4', t => t.is(app.getAlternative(4, exampleTree()), 'ahijnopq')); +test('getAlternative 5', t => t.is(app.getAlternative(5, exampleTree()), 'ahijnopr')); +test('getAlternative 6', t => t.is(app.getAlternative(6, exampleTree()), 'ahijnost')); +test('getAlternative 7', t => t.is(app.getAlternative(7, exampleTree()), 'ahijnosu')); +test('getAlternative 8', t => t.is(app.getAlternative(8, exampleTree()), 'ahijnov')); +test('getAlternative 9', t => t.is(app.getAlternative(9, exampleTree()), 'ahijnow')); +test('getAlternative 10', t => t.is(app.getAlternative(10, exampleTree()), 'ahiknopq')); +test('getAlternative 15', t => t.is(app.getAlternative(15, exampleTree()), 'ahiknow')); +test('getAlternative 16', t => t.is(app.getAlternative(16, exampleTree()), 'ahlnopq')); +test('getAlternative 27', t => t.is(app.getAlternative(27, exampleTree()), 'ahmnow')); test('getAlternative 28 or more throw', t => t.throws(() => app.getAlternative(28, exampleTree()))); test('escaped special characters are reconverted with getAlternative but not with getRawAlternative', t => { const tree = app.buildTreeStruct('a\\(b(c|d)@@e'); t.is(app.getAlternative(0,tree), 'a(bc@@e'); - t.is(app.getRawAlternative(0,tree), `a${String.fromCharCode(0x01)}bc@@e`); + t.is(app.getRawAlternative(0,tree), `a${String.fromCharCode(0x2001)}bc@@e`); const treeWhereRawIsUseful = app.buildTreeStruct('a\\@\\@b(c|d)@@e'); t.is(app.getAlternative(0,treeWhereRawIsUseful), 'a@@bc@@e'); - t.is(app.getRawAlternative(0,treeWhereRawIsUseful), `a${String.fromCharCode(0x0e)+String.fromCharCode(0x0e)}bc@@e`); + const escAro = String.fromCharCode(0x200e); + t.is(app.getRawAlternative(0,treeWhereRawIsUseful), `a${escAro+escAro}bc@@e`); }); diff --git a/src/dictionary.mjs b/src/dictionary.mjs index 1aa138be80eb92840d2dbe88140c7b1cc489162e..9006de7148bd99b95785e3fb6b702c7a5079a13c 100644 --- a/src/dictionary.mjs +++ b/src/dictionary.mjs @@ -10,7 +10,14 @@ export class Dictionary { if(typeof this.config.cache === 'undefined') this.config.cache = true; if(typeof this.config.cacheMax === 'undefined') this.config.cacheMax = 1_000_000; if(typeof this.config.idSecPwd === 'undefined') this.config.idSecPwd = true; - this.tree = buildTreeStruct(parse(dictionaryString,this.config.idSecPwd,this.config.accent,this.config.lowerCase,this.config.leetSpeak)); + if(this.originalConfig.escapeAll !== 1) try{ + this.config.escapeAll = 0; + this.tree = buildTreeStruct(parse(dictionaryString,this.config)); + this.config.escapeAll = this.originalConfig.escapeAll; + } catch (e){ + this.config.escapeAll = 1; + } + this.tree = buildTreeStruct(parse(dictionaryString,this.config)); this.length = this.tree.altCount; this.estimateDuration = ()=>this.length/(this.config.speed || 1) this.estimateRemaining = ()=>(this.length-this.tried)/(this.config.speed || 1) @@ -60,12 +67,17 @@ function adjustVariant(self){ if (self.estimateDuration() >= durationGoal) return; if (self.length >= self.config.cacheMax) return; function rebuildTree(self){ - self.tree = buildTreeStruct(parse(self.originalConfig.dictionaryString,self.config.idSecPwd,self.config.accent,self.config.lowerCase,self.config.leetSpeak)); + self.tree = buildTreeStruct(parse(self.originalConfig.dictionaryString,self.config)); self.length = self.tree.altCount; } let lastLess1hConfig; while (self.estimateDuration() < durationGoal && self.length < self.config.cacheMax){ lastLess1hConfig = JSON.parse(JSON.stringify(self.config)); + if(self.config.escapeAll !== 2 && (typeof self.originalConfig.escapeAll === 'undefined' || self.originalConfig.escapeAll === "auto")) { + self.config.escapeAll = 2; + rebuildTree(self); + continue; + } if(self.config.accent !== 2 && (typeof self.originalConfig.accent === 'undefined' || self.originalConfig.accent === "auto")) { if(!self.config.accent) self.config.accent = 1; else self.config.accent++; @@ -84,7 +96,6 @@ function adjustVariant(self){ rebuildTree(self); continue; } - console.log('strange, not an hour yet with all variants ?'); break; } self.config = lastLess1hConfig || self.config; diff --git a/src/dictionary.test-e2e.mjs b/src/dictionary.test-e2e.mjs index db6491d87ca83eecfbeb2c181018ff18366d09ab..6fe0cbd786c771f9f960e7c80e5ff22209f96d9b 100644 --- a/src/dictionary.test-e2e.mjs +++ b/src/dictionary.test-e2e.mjs @@ -3,6 +3,6 @@ import * as app from './dictionary.mjs'; test("alt < 1_000_000 & estimateDuration < 1h search for variant and use cache to fine duplicate", t => { const dico = new app.Dictionary(`[0-9]{4}@@[a-z0-8][01]`, {speed:200}); - t.is(dico.length,700_000); + t.is(dico.length,700_001); t.is(dico.duplicateTotal,0); }); diff --git a/src/dictionary.test.mjs b/src/dictionary.test.mjs index 924c5e9839ad22ee8c1ccd1bec8574069db92c0d..352212e11cf24fd2d0fb61d74d85bf3b94893584 100644 --- a/src/dictionary.test.mjs +++ b/src/dictionary.test.mjs @@ -34,8 +34,8 @@ test('_\\@\\@_@@_@\\@_ can be ambiguous with get, dryGet or not with rawGet, raw t.is(dico.dryGet(0), '_@@)@@'); t.is(dico.dryGet(1), '@@@@@@'); t.is(dico.get(1), '@@@@@@'); - const escAro = String.fromCharCode(0x0e); - t.is(dico.rawGet(0), `_@@${String.fromCharCode(0x02)}@${escAro}`); + const escAro = String.fromCharCode(0x200e); + t.is(dico.rawGet(0), `_@@${String.fromCharCode(0x2002)}@${escAro}`); t.is(dico.rawDryGet(1), `${escAro+escAro}@@${escAro+escAro}`); t.is(dico.splitGet(0)[0], '_'); t.is(dico.splitGet(0)[1], ')@@'); @@ -51,6 +51,12 @@ test('\\{\\} work fine', t => { t.is(dico.serialize(), '\\{\\}@@\\{\\}'); t.is(dico.dryGet(0), '{}@@{}'); }); +test('8\\<Z work fine', t => { + const dico = new app.Dictionary('8\\<Z'); + //t.is(dico.length, 1); + t.is(dico.serialize(), '8\\<Z@@8\\<Z'); + t.is(dico.dryGet(0), '8<Z@@8<Z'); +}); test('get is time tracked', async t => { const dico = new app.Dictionary('(a|b|c)d(e|f|g)'); dico.get(1); @@ -76,13 +82,13 @@ test('get duplicated found in past iteration (from dictionary)', t => { const dictionaryString = '((a|b)cd|(a|b)c(d|e)|bc(d|e))'; const dico = new app.Dictionary(dictionaryString, {idSecPwd:false}); for(let i = 0; i < dico.length;i++) dico.get(i); - t.deepEqual(dico.duplicatedFound(), [ {alt:"bcd",index: [ 1, 4, 6 ]}, {alt:"acd",index: [ 0, 2 ]}, {alt:"bce",index: [ 5, 7 ]} ]); + t.deepEqual(dico.duplicatedFound(), [ {alt:"bcd",index: [ 2, 5, 6 ]}, {alt:"acd",index: [ 0, 4 ]}, {alt:"bce",index: [ 3, 7 ]} ]); }); test('dictionary can dryRun to find all duplicate', t => { const dictionaryString = '((a|b)cd|(a|b)c(d|e)|bc(d|e))'; const dico = new app.Dictionary(dictionaryString, {idSecPwd:false}); const duplicate = dico.dryRunDedup(); - t.deepEqual(duplicate, [ {alt:"bcd",index: [ 1, 4, 6 ]}, {alt:"acd",index: [ 0, 2 ]}, {alt:"bce",index: [ 5, 7 ]} ]); + t.deepEqual(duplicate, [ {alt:"bcd",index: [ 2, 5, 6 ]}, {alt:"acd",index: [ 0, 4 ]}, {alt:"bce",index: [ 3, 7 ]} ]); t.is(dico.length,8); t.is(dico.duplicateTotal,4); }); @@ -116,14 +122,20 @@ test('dictionary called with speed option try to activate variante accent, caps t.is(dico.duplicateRatio.toPrecision(3),'0.0588'); t.is(dico.uniqueRatio.toPrecision(3),'0.941'); }); +test('auto activate leetSpeak for tiny case', t => { + const dictionaryString = 'a'; + const dico = new app.Dictionary(dictionaryString, {speed:30}); + t.is(dico.serialize(),'(4|A|a)@@(4|A|a)'); + t.is(dico.length,9); +}); test('dico should not crash', t=>{ const v8CrashStringButFirefoxWork = '(((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)@@((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)|(((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)@@((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)|(ǧ|g)(1|i|l|I)(ǧ|g)(1|i|l|I)@@(ǧ|g)(1|i|l|I)(ǧ|g)(1|i|l|I)))'; const dico = new app.Dictionary(v8CrashStringButFirefoxWork,{speed:30}) - t.is(dico.length,70928); - t.is(dico.duplicateTotal,5392); - t.is(dico.duplicateRatio.toFixed(3),'0.076'); - t.is(dico.uniqueRatio.toFixed(3),'0.924'); - t.is(dico.estimateDuration().toFixed(0),'2364'); + t.is(dico.length,73986); + t.is(dico.duplicateTotal,8448); + t.is(dico.duplicateRatio.toFixed(3),'0.114'); + t.is(dico.uniqueRatio.toFixed(3),'0.886'); + t.is(dico.estimateDuration().toFixed(0),'2466'); }); test('just under 1h : try to find variant', t => { const dictionaryString = 'onlyOne@@[a-z0-8]'; @@ -160,3 +172,27 @@ test('escaped special characters still escaped when re-serialized', t => { t.is(serialized, dictionaryString); t.is(dico.get(6), 'c)cde'); }); +test('escapeAll:0 -> regex parse strings', t => { + const dico = new app.Dictionary('[0-1]{3}@@=autre>\nautre::ça',{escapeAll:0}); + t.is(dico.length, 8); +}); +test('escapeAll:1 -> everything is escaped', t => { + const dico = new app.Dictionary('[0-1]{3}@@=autre>\nautre::ça',{escapeAll:1}); + t.is(dico.length, 4); +}); +test('escapeAll:2 -> everything is escaped', t => { + const dico = new app.Dictionary('[0-1]{3}@@=autre>\nautre::ça',{escapeAll:2}); + t.is(dico.length, 12); +}); +test('escapeAll:auto -> try 2 if too slow 0 if previous fail 1', t => { + const dico = new app.Dictionary('[0-1]{3}@@=autre>\nautre::ça',{escapeAll:'auto',speed:30}); + t.is(dico.length, 77); +}); +test('escapeAll:undefined -> like auto', t => { + const dico = new app.Dictionary('[0-1]{3}@@=autre>\nautre::ça',{speed:30}); + t.is(dico.length, 77); +}); +test('invalid string -> autofallback to escapeAll:1', t => { + const dico = new app.Dictionary('[0-1]{z}@@=autre>\nautre::ça'); + t.is(dico.length, 4); +});