www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - Suffix tree benchmark

reply Basile B. <b2.temp gmx.com> writes:
Hey, los douchbagos...

I've recently worked on a suffix array 
(https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)

Test for presence are excelent (O1)

canFind    % of canFind   : 100
SuffixArr1 % of canFind   : 2.78019
SuffixArr2 % of canFind   : 3.15439
runtime AA % of runtime AA: 100
EMSI hashs % of runtime AA: 167.434
SuffixArr1 % of runtime AA: 97.8582
SuffixArr2 % of runtime AA: 111.029
EMSI hashs % of EMSI hashs: 100
SuffixArr1 % of EMSI hashs: 58.4458
SuffixArr2 % of EMSI hashs: 66.3123
May 18 2016
parent reply Andrei Alexandrescu <SeeWebsiteForEmail erdani.org> writes:
On 5/18/16 3:14 PM, Basile B. wrote:
 Hey, los douchbagos...

 I've recently worked on a suffix array
 (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)

 Test for presence are excelent (O1)

 canFind    % of canFind   : 100
 SuffixArr1 % of canFind   : 2.78019
 SuffixArr2 % of canFind   : 3.15439
 runtime AA % of runtime AA: 100
 EMSI hashs % of runtime AA: 167.434
 SuffixArr1 % of runtime AA: 97.8582
 SuffixArr2 % of runtime AA: 111.029
 EMSI hashs % of EMSI hashs: 100
 SuffixArr1 % of EMSI hashs: 58.4458
 SuffixArr2 % of EMSI hashs: 66.3123
Could you explain the numbers a bit? -- Andrei
May 18 2016
parent Basile B. <b2.temp gmx.com> writes:
On Wednesday, 18 May 2016 at 19:55:40 UTC, Andrei Alexandrescu 
wrote:
 On 5/18/16 3:14 PM, Basile B. wrote:
 Hey, los douchbagos...

 I've recently worked on a suffix array
 (https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)

 Test for presence are excelent (O1)

 canFind    % of canFind   : 100
 SuffixArr1 % of canFind   : 2.78019
 SuffixArr2 % of canFind   : 3.15439
 runtime AA % of runtime AA: 100
 EMSI hashs % of runtime AA: 167.434
 SuffixArr1 % of runtime AA: 97.8582
 SuffixArr2 % of runtime AA: 111.029
 EMSI hashs % of EMSI hashs: 100
 SuffixArr1 % of EMSI hashs: 58.4458
 SuffixArr2 % of EMSI hashs: 66.3123
Could you explain the numbers a bit? -- Andrei
the benchmark is: ---- module runnable; import std.stdio; import containers.hashset, containers.internal.hash; import std.experimental.allocator.mallocator; void main(string[] args) { import std.datetime, iz.strings, std.random, std.range; import core.memory: GC; GC.disable; string[] wordsA, wordsB; ubyte[] source = cast(ubyte[])"ABCDEFGHIJKL".dup; foreach(i; 0..64) { wordsA ~= cast(string) randomCover(source, rndGen).array; rndGen.popFront; if ((i & 1) == 0) wordsB ~= wordsA[i]; else { wordsB ~= cast(string) randomCover(source, rndGen).array; rndGen.popFront; } } auto arrOld = SuffixArray!string(wordsA); auto arrNew = SuffixArrayTemp!string(wordsA); enum repeat = 2^^10; size_t test(T)(ref T t) { StopWatch sw; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in t; } return sw.peek.hnsecs; } size_t refTest() { import std.algorithm.searching: canFind; import std.algorithm.sorting; StopWatch sw; auto sorted = sort(wordsA).array; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = sorted.canFind(wordsB[i]); } return sw.peek.hnsecs; } size_t aaTest() { StopWatch sw; bool[string] aa; foreach(w; wordsA) aa[w] = true; sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in aa; } return sw.peek.hnsecs; } size_t aa2Test() { static size_t hasher(string value) pure { size_t result; foreach(i; 1 .. value.length) result += value[i-1] ^ value[i] << (i-1); return result; } StopWatch sw; alias HS = HashSet!(string, Mallocator, generateHash!string, false); HS aa; foreach(w; wordsA) aa.insert(w); sw.start; foreach(i; 0..64) foreach(j; 0..repeat) { auto n = wordsB[i] in aa; } return sw.peek.hnsecs; } auto tA = refTest(); auto tB = aaTest(); auto tC = aa2Test(); auto t1 = test(arrOld); auto t2 = test(arrNew); writeln("canFind % of canFind : ", tA / (tA * 0.01)); writeln("SuffixArr1 % of canFind : ", t1 / (tA * 0.01)); writeln("SuffixArr2 % of canFind : ", t2 / (tA * 0.01)); writeln("runtime AA % of runtime AA: ", tB / (tB * 0.01)); writeln("EMSI hashs % of runtime AA: ", tC / (tB * 0.01)); writeln("SuffixArr1 % of runtime AA: ", t1 / (tB * 0.01)); writeln("SuffixArr2 % of runtime AA: ", t2 / (tB * 0.01)); writeln("EMSI hashs % of EMSI hashs: ", tC / (tC * 0.01)); writeln("SuffixArr1 % of EMSI hashs: ", t1 / (tC * 0.01)); writeln("SuffixArr2 % of EMSI hashs: ", t2 / (tC * 0.01)); } ----
May 18 2016