diff --git a/CHANGELOG.fr.md b/CHANGELOG.fr.md index 6336d72f8af334b76cc6d890548280650270a901..2038eecbf7d5b4aca627ff349a5ff5f2d476e196 100644 --- a/CHANGELOG.fr.md +++ b/CHANGELOG.fr.md @@ -16,7 +16,7 @@ et ce projet adhère au [versionnage sémantique](https://semver.org/spec/v2.0.0 ## [Version 3.5.0] - 2022-11-27 (par [1000i100]) ### Ajouté -- dictionary intègre un mécanisme de cache pour détecter les doublons. Il est désactivable pour éviter les crashs par saturation de la mémoire. +- dictionary intègre un mécanisme de cache pour détecter les doublons. Il est désactivable au-delà d'1_000_000 pour éviter les crashs et saturation de mémoire. - dictionary-parser gère toutes les syntaxes d'expression régulières qu'utilisaient gsper v2 + des situations plus complexes - dictionary-parser permet de distinguer identifiant secret et mot de passe via le séparateur `@@` - dictionary-parser permet plusieurs types de déclinaisons : sans accents, accents optionnels, sans majuscule, majuscule optionnelle, tout en majuscule, et des déclinaison type leetSpeak. diff --git a/package.json b/package.json index 86c92fc032954722197c2c821a47fbfeee22842f..323ff43e6883012bcfba083741ba0fa277c1f7c6 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "build:npm:min": "node CI/minify.mjs", "build:npm:zeroDep": "node CI/zeroDependency.mjs", "test": "run-s test:dev", - "test:dev": "run-s test:dev:**", + "test:dev": "run-p test:dev:**", "xtest:dev:qualityCheck": "xo", "test:dev:duplication": "jscpd ./ -s", "test:dev:runTests": "ava src/**.test.mjs", diff --git a/src/dictionary.mjs b/src/dictionary.mjs index d3aa4f207d8aa27ba57d32330535877587a6e2c6..c8d82846415fbfa0e1a3b1f13d760f625fb3ff94 100644 --- a/src/dictionary.mjs +++ b/src/dictionary.mjs @@ -7,21 +7,22 @@ export class Dictionary { this.originalConfig.dictionaryString = dictionaryString; this.config = JSON.parse(JSON.stringify(this.originalConfig)); if(typeof this.config.cache === 'undefined') this.config.cache = true; + if(typeof this.config.cacheMax === 'undefined') this.config.cacheMax = 1_000_000; if(typeof this.config.idSecPwd === 'undefined') this.config.idSecPwd = true; this.tree = buildTreeStruct(parse(dictionaryString,this.config.idSecPwd)); - this.estimateDuration = ()=>this.tree.altCount/(this.config.speed || 1) - this.estimateRemaining = ()=>(this.tree.altCount-this.tried)/(this.config.speed || 1) - if(this.config.speed) adjustVariant(this); this.length = this.tree.altCount; + this.estimateDuration = ()=>this.length/(this.config.speed || 1) + this.estimateRemaining = ()=>(this.length-this.tried)/(this.config.speed || 1) + if(this.config.speed) adjustVariant(this); this.tried = 0; this.cache = []; - this.duplicatedCount = ()=> this.tried - Object.keys(this.cache).length; + this.duplicatedCount = ()=> Object.keys(this.cache).length? this.tried - Object.keys(this.cache).length : 0; this.dryGet = index => getAlternative(index,this.tree); this.get = index => { if(typeof this.startTime === "undefined") this.startTime = Date.now(); this.tried++; const alt = getAlternative(index,this.tree); - if(this.config.cache) { + if(this.config.cache && this.length < this.config.cacheMax) { if(typeof this.cache[alt] === "undefined") this.cache[alt] = []; this.cache[alt].push(index); if(this.cache[alt].length>1) return `§duplicate§${alt}`; @@ -41,11 +42,13 @@ export class Dictionary { function adjustVariant(self){ const durationGoal = 3_600; if (self.estimateDuration() >= durationGoal) return; + if (self.length >= self.config.cacheMax) return; function rebuildTree(self){ self.tree = buildTreeStruct(parse(self.originalConfig.dictionaryString,self.config.idSecPwd,self.config.accent,self.config.lowerCase,self.config.leetSpeak)); + self.length = self.tree.altCount; } let lastLess1hConfig; - while (self.estimateDuration() < durationGoal){ + while (self.estimateDuration() < durationGoal && self.length < self.config.cacheMax){ lastLess1hConfig = JSON.parse(JSON.stringify(self.config)); if(self.config.accent !== 2 && (typeof self.originalConfig.accent === 'undefined' || self.originalConfig.accent === "auto")) { if(!self.config.accent) self.config.accent = 1; @@ -68,7 +71,7 @@ function adjustVariant(self){ console.log('strange, not an hour yet with all variants ?'); break; } - self.config = lastLess1hConfig; + self.config = lastLess1hConfig || self.config; rebuildTree(self); dryRunDedup(self); } @@ -76,11 +79,8 @@ function dryRunDedup(self){ const dry = new Dictionary(serialize(self.tree),{cache:true,idSecPwd:false}); for(let i = 0; i < dry.length;i++) dry.get(i); const duplicate = dry.duplicatedFound(); - self.lengthBeforeDedup = dry.length; - //TODO: Factorize dry.cache keys - const dedupAsString = `(${Object.keys(dry.cache).join('|')})`; - self.tree = buildTreeStruct(dedupAsString,false); - self.config.cache = false; - self.length = self.tree.altCount; + self.duplicateTotal = dry.duplicatedCount(); + self.duplicateRatio = self.duplicateTotal/self.length; + self.uniqueRatio = 1 - self.duplicateRatio; return duplicate; } diff --git a/src/dictionary.test-e2e.mjs b/src/dictionary.test-e2e.mjs new file mode 100644 index 0000000000000000000000000000000000000000..db6491d87ca83eecfbeb2c181018ff18366d09ab --- /dev/null +++ b/src/dictionary.test-e2e.mjs @@ -0,0 +1,8 @@ +import test from 'ava'; +import * as app from './dictionary.mjs'; + +test("alt < 1_000_000 & estimateDuration < 1h search for variant and use cache to fine duplicate", t => { + const dico = new app.Dictionary(`[0-9]{4}@@[a-z0-8][01]`, {speed:200}); + t.is(dico.length,700_000); + t.is(dico.duplicateTotal,0); +}); diff --git a/src/dictionary.test.mjs b/src/dictionary.test.mjs index 8e09e3ba2d6686d1fcfd2da193755d6bed34afa3..2d846b358b6cc707e5051fa04be93df0fcce3f70 100644 --- a/src/dictionary.test.mjs +++ b/src/dictionary.test.mjs @@ -37,15 +37,15 @@ test('dictionary can dryRun to find all duplicate', t => { const dico = new app.Dictionary(dictionaryString, {idSecPwd:false}); const duplicate = dico.dryRunDedup(); t.deepEqual(duplicate, [ {alt:"bcd",index: [ 1, 4, 6 ]}, {alt:"acd",index: [ 0, 2 ]}, {alt:"bce",index: [ 5, 7 ]} ]); - t.is(dico.lengthBeforeDedup,8); - t.is(dico.length,4); + t.is(dico.length,8); + t.is(dico.duplicateTotal,4); }); test('dictionary can run with cache disabled', t => { const dictionaryString = '(a|b|c)d(e|f|g)'; const dico = new app.Dictionary(dictionaryString,{cache:false}); dico.get(0); - t.is(dico.duplicatedCount(), dico.tried); + t.is(Object.keys(dico.cache).length, 0); }); test('dictionary can run with cache on', t => { const dictionaryString = 'a'; @@ -62,15 +62,43 @@ test('duplicate match §duplicate§ pattern', t => { t.is(first, 'a@@a'); t.is(second, '§duplicate§a@@a'); }); -test.skip('dictionary called with speed option try to activate variante accent, caps and leetSpeak option to reach 1h of compute estimated time', t => { +test('dictionary called with speed option try to activate variante accent, caps and leetSpeak option to reach 1h of compute estimated time', t => { const dictionaryString = 'Ǧ1Ǧ1'; - const dico = new app.Dictionary(dictionaryString, {speed:30, leetSpeak:1}); - t.is(dico.lengthBeforeDedup,2); - t.is(dico.length,2); + const dico = new app.Dictionary(dictionaryString, {speed:30}); + t.is(dico.length,272); + t.is(dico.duplicateTotal,16); + t.is(dico.duplicateRatio.toPrecision(3),'0.0588'); + t.is(dico.uniqueRatio.toPrecision(3),'0.941'); }); -test.skip('dico should not crash', t=>{ +test('dico should not crash', t=>{ const v8CrashStringButFirefoxWork = '(((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)@@((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)|(((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)@@((Ǧ|ǧ)|(G|g))(1|i|l|I)((Ǧ|ǧ)|(G|g))(1|i|l|I)|(ǧ|g)(1|i|l|I)(ǧ|g)(1|i|l|I)@@(ǧ|g)(1|i|l|I)(ǧ|g)(1|i|l|I)))'; const dico = new app.Dictionary(v8CrashStringButFirefoxWork,{speed:30}) - //t.is(dico.lengthBeforeDedup,2); - t.is(dico.length,2); + t.is(dico.length,70928); + t.is(dico.duplicateTotal,5392); + t.is(dico.duplicateRatio.toFixed(3),'0.076'); + t.is(dico.uniqueRatio.toFixed(3),'0.924'); + t.is(dico.estimateDuration().toFixed(0),'2364'); +}); +test('just under 1h : try to find variant', t => { + const dictionaryString = 'onlyOne@@[a-z0-8]'; + const dico = new app.Dictionary(dictionaryString, {speed:0.01}); + t.is(dico.length,35); + t.is(dico.duplicateTotal,0); +}); +test("just over 1h : don't search for variants", t => { + const dictionaryString = 'onlyOne@@[a-z0-9 ]'; + const dico = new app.Dictionary(dictionaryString, {speed:0.01}); + t.is(dico.length,37); + t.is(typeof dico.duplicateTotal, 'undefined'); +}); +test("alt number >= 1_000_000 disable cache and variants attempt", t => { + const dico = new app.Dictionary(`[0-9]{6}@@onlyOne`, {speed:1_000_000}); + t.is(dico.length,1_000_000); + t.is(typeof dico.duplicateTotal, 'undefined'); +}); +test("huge alt number work fine", t => { + const x = 12; + const dico = new app.Dictionary(`[0-9]{${2+x}}@@[a-z0-9 ]`, {speed:Math.pow(10,x)}); + t.is(dico.length,3_700 * Math.pow(10,x)); + t.is(typeof dico.duplicateTotal, 'undefined'); }); diff --git a/src/multi-node-layer.test.mjs b/src/multi-node-layer.test.mjs index 511de0d39f536f8610dc2f6a18fdae79009d92ce..a6703b257e0e6522c207de07018981d2a9ae70c3 100644 --- a/src/multi-node-layer.test.mjs +++ b/src/multi-node-layer.test.mjs @@ -5,7 +5,7 @@ function sleep(ms){ return new Promise((resolve)=>setTimeout(resolve,ms)); } -test('pingTracker give time elapsed', async t => { +test.serial('pingTracker give time elapsed', async t => { await sleep(1); // in some test environnement this fix latency bug. const mockedFunc = async (first,second)=>{await sleep(second);return first}; const firstParameter = 'finalResult';