digitalmars.D - Suffix tree benchmark
- Basile B. (14/14) May 18 2016 Hey, los douchbagos...
- Andrei Alexandrescu (2/16) May 18 2016 Could you explain the numbers a bit? -- Andrei
- Basile B. (111/130) May 18 2016 the benchmark is:
Hey, los douchbagos... I've recently worked on a suffix array (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555) Test for presence are excelent (O1) canFind % of canFind : 100 SuffixArr1 % of canFind : 2.78019 SuffixArr2 % of canFind : 3.15439 runtime AA % of runtime AA: 100 EMSI hashs % of runtime AA: 167.434 SuffixArr1 % of runtime AA: 97.8582 SuffixArr2 % of runtime AA: 111.029 EMSI hashs % of EMSI hashs: 100 SuffixArr1 % of EMSI hashs: 58.4458 SuffixArr2 % of EMSI hashs: 66.3123
May 18 2016
On 5/18/16 3:14 PM, Basile B. wrote:Hey, los douchbagos... I've recently worked on a suffix array (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555) Test for presence are excelent (O1) canFind % of canFind : 100 SuffixArr1 % of canFind : 2.78019 SuffixArr2 % of canFind : 3.15439 runtime AA % of runtime AA: 100 EMSI hashs % of runtime AA: 167.434 SuffixArr1 % of runtime AA: 97.8582 SuffixArr2 % of runtime AA: 111.029 EMSI hashs % of EMSI hashs: 100 SuffixArr1 % of EMSI hashs: 58.4458 SuffixArr2 % of EMSI hashs: 66.3123Could you explain the numbers a bit? -- Andrei
May 18 2016
On Wednesday, 18 May 2016 at 19:55:40 UTC, Andrei Alexandrescu wrote:On 5/18/16 3:14 PM, Basile B. wrote:the benchmark is: ---- module runnable; import std.stdio; import containers.hashset, containers.internal.hash; import std.experimental.allocator.mallocator; void main(string[] args) { import std.datetime, iz.strings, std.random, std.range; import core.memory: GC; GC.disable; string[] wordsA, wordsB; ubyte[] source = cast(ubyte[])"ABCDEFGHIJKL".dup; foreach(i; 0..64) { wordsA ~= cast(string) randomCover(source, rndGen).array; rndGen.popFront; if ((i & 1) == 0) wordsB ~= wordsA[i]; else { wordsB ~= cast(string) randomCover(source, rndGen).array; rndGen.popFront; } } auto arrOld = SuffixArray!string(wordsA); auto arrNew = SuffixArrayTemp!string(wordsA); enum repeat = 2^^10; size_t test(T)(ref T t) { StopWatch sw; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in t; } return sw.peek.hnsecs; } size_t refTest() { import std.algorithm.searching: canFind; import std.algorithm.sorting; StopWatch sw; auto sorted = sort(wordsA).array; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = sorted.canFind(wordsB[i]); } return sw.peek.hnsecs; } size_t aaTest() { StopWatch sw; bool[string] aa; foreach(w; wordsA) aa[w] = true; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in aa; } return sw.peek.hnsecs; } size_t aa2Test() { static size_t hasher(string value) pure { size_t result; foreach(i; 1 .. value.length) result += value[i-1] ^ value[i] << (i-1); return result; } StopWatch sw; alias HS = HashSet!(string, Mallocator, generateHash!string, false); HS aa; foreach(w; wordsA) aa.insert(w); sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in aa; } return sw.peek.hnsecs; } auto tA = refTest(); auto tB = aaTest(); auto tC = aa2Test(); auto t1 = test(arrOld); auto t2 = test(arrNew); writeln("canFind % of canFind : ", tA / (tA * 0.01)); writeln("SuffixArr1 % of canFind : ", t1 / (tA * 0.01)); writeln("SuffixArr2 % of canFind : ", t2 / (tA * 0.01)); writeln("runtime AA % of runtime AA: ", tB / (tB * 0.01)); writeln("EMSI hashs % of runtime AA: ", tC / (tB * 0.01)); writeln("SuffixArr1 % of runtime AA: ", t1 / (tB * 0.01)); writeln("SuffixArr2 % of runtime AA: ", t2 / (tB * 0.01)); writeln("EMSI hashs % of EMSI hashs: ", tC / (tC * 0.01)); writeln("SuffixArr1 % of EMSI hashs: ", t1 / (tC * 0.01)); writeln("SuffixArr2 % of EMSI hashs: ", t2 / (tC * 0.01)); } ----Hey, los douchbagos... I've recently worked on a suffix array (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555) Test for presence are excelent (O1) canFind % of canFind : 100 SuffixArr1 % of canFind : 2.78019 SuffixArr2 % of canFind : 3.15439 runtime AA % of runtime AA: 100 EMSI hashs % of runtime AA: 167.434 SuffixArr1 % of runtime AA: 97.8582 SuffixArr2 % of runtime AA: 111.029 EMSI hashs % of EMSI hashs: 100 SuffixArr1 % of EMSI hashs: 58.4458 SuffixArr2 % of EMSI hashs: 66.3123Could you explain the numbers a bit? -- Andrei
May 18 2016