www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - D hash table comparison benchmark

reply Nathan S. <no.public.email example.com> writes:
The below benchmarks come from writing 100 int-to-int mappings to 
a new hashtable then reading them back, repeated 10_000 times. 
The built-in AA doesn't deallocate memory when it falls out of 
scope but the other maps do. Benchmark code in next post.

== Speed Ranking using LDC2 (optimized) ==
   21 msecs vibe.utils.hashmap
   37 msecs memutils.hashmap
   57 msecs built-in AA
  102 msecs jive.map
  185 msecs containers.hashmap w/GCAllocator
  240 msecs containers.hashmap w/Mallocator

== Speed Ranking using DMD (optimized) ==
  55 msecs memutils.hashmap
  64 msecs vibe.utils.hashmap
  80 msecs built-in AA
131 msecs jive.map
315 msecs containers.hashmap w/GCAllocator
361 msecs containers.hashmap w/Mallocator

** What if the array size is smaller or larger? The ordering 
didn't change so I won't post the results. **

** What if we reuse the hashtable? **

== Speed Ranking using LDC2 (optimized) ==
10.45 msecs vibe.utils.hashmap
11.85 msecs memutils.hashmap
12.61 msecs containers.hashmap w/GCAllocator
12.91 msecs containers.hashmap w/Mallocator
14.30 msecs built-in AA
19.21 msecs jive.map

== Speed Ranking using DMD (optimized) ==
18.05 msecs memutils.hashmap
21.03 msecs jive.map
24.99 msecs built-in AA
25.22 msecs containers.hashmap w/Mallocator
25.75 msecs containers.hashmap w/GCAllocator
29.93 msecs vibe.utils.hashmap

== Not benchmarked ==
stdx.collections.hashtable (dlang-stdx/collections): compilation 
error
kontainer.orderedAssocArray (alphaKAI/kontainer): doesn't accept 
int keys
tanya.container.hashtable (caraus-ecms/tanya): either has a bug 
or is very slow
Jun 25 2018
parent reply Nathan S. <no.public.email example.com> writes:
Benchmark code:

dub.sdl
```
name "hashbench"
description "D hashtable comparison."
dependency "emsi_containers" version="~>0.7.0"
dependency "memutils" version="~>0.4.11"
dependency "vibe-d:utils" version="~>0.8.4"
dependency "jive" version="~>0.2.0"
//dependency "collections" version="~>0.1.0"
//dependency "tanya" version="~>0.10.0"
//dependency "kontainer" version="~>0.0.2"
```

app.d
```d
int nthKey(in uint n)  nogc nothrow pure  safe
{
     // Can be any invertible function.
     // The goal is to map [0 .. N] to a sequence not in ascending 
order.
     int h = cast(int) (n + 1);
     h = (h ^ (h >>> 16)) * 0x85ebca6b;
     h = (h ^ (n >>> 13)) * 0xc2b2ae35;
     return h ^ (h >>> 16);
}

pragma(inline, false)
uint hashBench(HashTable, Args...)(in uint N, in uint seed, Args 
initArgs)
{
     static if (initArgs.length)
         HashTable hashtable = HashTable(initArgs);
     else // Separate branch needed for builtin AA.
         HashTable hashtable;

     foreach (uint n; 0 .. N)
         hashtable[nthKey(n)] = n + seed;

     uint sum;
     foreach_reverse (uint n; 0 .. N/2)
         sum += hashtable[nthKey(n)];
     foreach_reverse(uint n; N/2 .. N)
         sum += hashtable[nthKey(n)];
     return sum;
}

pragma(inline, false)
uint hashBenchReuse(HashTable)(in uint N, in uint seed, ref 
HashTable hashtable)
{
     foreach (uint n; 0 .. N)
         hashtable[nthKey(n)] = n + seed;

     uint sum;
     foreach_reverse (uint n; 0 .. N/2)
         sum += hashtable[nthKey(n)];
     foreach_reverse(uint n; N/2 .. N)
         sum += hashtable[nthKey(n)];
     return sum;
}

enum benchmarkCode(string name, string signature = name) =
     `
         {
             sw.reset();
             result = 0;
             sw.start();
             foreach (_; 0 .. M)
             {
                 result += hashBench!(`~signature~`)(N, result);
             }
             sw.stop();
             string s = "`~name~`";
             printf("[checksum %d] %3d msecs %s\n",
                 result, sw.peek.total!"msecs", &s[0]);
         }
     `;

enum benchmarkCodeReuse(string name, string signature = name) =
     `
         {
             sw.reset();
             result = 0;
             sw.start();
             `~signature~` hashtable;
             foreach (_; 0 .. M)
             {
                 result += hashBenchReuse!(`~signature~`)(N, 
result, hashtable);
             }
             sw.stop();
             string s = "`~name~`";
             printf("(checksum %d) %3.2f msecs %s\n",
                 result, sw.peek.total!"usecs" / 1000.0, &s[0]);
         }
     `;

void main(string[] args)
{
     import std.datetime.stopwatch : AutoStart, StopWatch;
     import core.stdc.stdio : printf, puts;
     import std.experimental.allocator.gc_allocator : GCAllocator;
     import std.experimental.allocator.mallocator : Mallocator;

     alias BuiltinAA(K,V) = V[K];
     import containers.hashmap : EMSI_HashMap = HashMap;
     import memutils.hashmap : Memutils_HashMap = HashMap;
     import vibe.utils.hashmap : Vibe_HashMap = HashMap;
     import jive.map : Jive_Map = Map;
     //import stdx.collections.hashtable : Stdx_Hashtable = 
Hashtable;
     //import tanya.container.hashtable : Tanya_HashTable = 
HashTable;
     //import kontainer.orderedAssocArray.orderedAssocArray : 
Kontainer_OrderedAssocArray = OrderedAssocArray;

     immutable uint N = args.length < 2 ? 100 :
         () {
             import std.conv : to;
             auto result = to!uint(args[1]);
             return (result == 0 ? 100 : result);
         }();
     immutable M = N <= 500_000 ? (1000_000 / N) : 2;
     enum topLevelRepetitions = 3;

     printf("Hashtable benchmark N (size) = %d (repetitions) = 
%d\n", N, M);

     StopWatch sw = StopWatch(AutoStart.no);
     uint result;

     version(all)
     {
         puts("\n=Results (new hashtables)=");
         foreach (_repetition; 0 .. topLevelRepetitions)
         {


             mixin(benchmarkCode!("built-in AA", "BuiltinAA!(int, 
int)"));
             mixin(benchmarkCode!("containers.hashmap 
w/Mallocator", "EMSI_HashMap!(int, int, Mallocator)"));
             mixin(benchmarkCode!("containers.hashmap 
w/GCAllocator", "EMSI_HashMap!(int, int, GCAllocator)"));
             mixin(benchmarkCode!("memutils.hashmap", 
"Memutils_HashMap!(int,int)"));
             mixin(benchmarkCode!("vibe.utils.hashmap", 
"Vibe_HashMap!(int,int)"));
             mixin(benchmarkCode!("jive.map", 
"Jive_Map!(int,int)"));
             //mixin(benchmarkCode!("stdx.collections.hashtable", 
"Stdx_Hashtable!(int,int)"));
             //mixin(benchmarkCode!("tanya.container.hashtable", 
"Tanya_HashTable!(int,int)"));
             
//mixin(benchmarkCode!("kontainer.orderedAssocArray.orderedAssocArray",
"Kontainer_OrderedAssocArray!(int,int)"));
         }
     }

     version(all)
     {
         puts("\n=Results (reusing hashtables)=\n");
         foreach (_repetition; 0 .. topLevelRepetitions)
         {


             mixin(benchmarkCodeReuse!("built-in AA", 
"BuiltinAA!(int, int)"));
             mixin(benchmarkCodeReuse!("containers.hashmap 
w/Mallocator", "EMSI_HashMap!(int, int, Mallocator)"));
             mixin(benchmarkCodeReuse!("containers.hashmap 
w/GCAllocator", "EMSI_HashMap!(int, int, GCAllocator)"));
             mixin(benchmarkCodeReuse!("memutils.hashmap", 
"Memutils_HashMap!(int,int)"));
             mixin(benchmarkCodeReuse!("vibe.utils.hashmap", 
"Vibe_HashMap!(int,int)"));
             mixin(benchmarkCodeReuse!("jive.map", 
"Jive_Map!(int,int)"));
             
//mixin(benchmarkCodeReuse!("stdx.collections.hashtable", 
"Stdx_Hashtable!(int,int)"));
             
//mixin(benchmarkCodeReuse!("tanya.container.hashtable", 
"Tanya_HashTable!(int,int)"));
             
//mixin(benchmarkCodeReuse!("kontainer.orderedAssocArray.orderedAssocArray",
"Kontainer_OrderedAssocArray!(int,int)"));
         }
     }
}
```
Jun 25 2018
parent reply Nathan S. <no.public.email example.com> writes:
With LDC2 the times for vibe.utils.hashmap and memutils.hashmap 
are suspiciously low, leading me to suspect that the optimizer 
might be omitting most of the work. Here are the figures without 
optimizations enabled.

== Speed Ranking using DMD (no optimizations) ==
  95 msecs built-in AA
168 msecs vibe.utils.hashmap
182 msecs jive.map
224 msecs memutils.hashmap
663 msecs containers.hashmap w/GCAllocator
686 msecs containers.hashmap w/Mallocator

== Speed Ranking using LDC2 (no optimizations) ==
  68 msecs built-in AA
143 msecs vibe.utils.hashmap
155 msecs jive.map
164 msecs memutils.hashmap
515 msecs containers.hashmap w/GCAllocator
537 msecs containers.hashmap w/Mallocator
Jun 25 2018
parent reply Seb <seb wilzba.ch> writes:
On Tuesday, 26 June 2018 at 02:53:22 UTC, Nathan S. wrote:
 With LDC2 the times for vibe.utils.hashmap and memutils.hashmap 
 are suspiciously low, leading me to suspect that the optimizer 
 might be omitting most of the work. Here are the figures 
 without optimizations enabled.

 == Speed Ranking using DMD (no optimizations) ==
  95 msecs built-in AA
 168 msecs vibe.utils.hashmap
 182 msecs jive.map
 224 msecs memutils.hashmap
 663 msecs containers.hashmap w/GCAllocator
 686 msecs containers.hashmap w/Mallocator

 == Speed Ranking using LDC2 (no optimizations) ==
  68 msecs built-in AA
 143 msecs vibe.utils.hashmap
 155 msecs jive.map
 164 msecs memutils.hashmap
 515 msecs containers.hashmap w/GCAllocator
 537 msecs containers.hashmap w/Mallocator
Did you by chance also benchmark it with other languages like C++, Go or Rust? BTW I'm not sure what your plans are, but are you aware of this recent article? https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-table There were also plans to lower the AA implementation entirely into D runtime/user space, s.t. specialization can be done easier, but sadly these plans stagnated so far: https://github.com/dlang/druntime/pull/1282 https://github.com/dlang/druntime/pull/1985
Jun 25 2018
next sibling parent reply Nathan S. <no.public.email example.com> writes:
On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:
 Did you by chance also benchmark it with other languages like 
 C++, Go or Rust?
I didn't since I was evaluating hashtable implementations for use in a D application.
 BTW I'm not sure what your plans are, but are you aware of this 
 recent article?

 https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-table
I wasn't, thanks.
Jun 25 2018
parent reply Eugene Wissner <belka caraus.de> writes:
On Tuesday, 26 June 2018 at 04:17:44 UTC, Nathan S. wrote:
 On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:
 Did you by chance also benchmark it with other languages like 
 C++, Go or Rust?
I didn't since I was evaluating hashtable implementations for use in a D application.
 BTW I'm not sure what your plans are, but are you aware of 
 this recent article?

 https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-table
I wasn't, thanks.
It isn't fair to post the benchmarks without giving some time to fix the bug :). Just kidding. Thanks a lot for taking time to report the bug. tanya's HashTable implementation is very young. I fixed the bug in the "bugfix/hashtable-endless" branch (still need to fix another rehash() before merging into master). Just put in dub.sdl: dependency "tanya" version="~bugfix/hashtable-endless" Here are the results from my machine with tanya included. I'm posting only optimized version for dmd and ldc. I just say that tanya's HashTable sucks in debug mode, I'm using an underlying structure to combine common functionality for the hash table and hash set. With optimizations a lot of function calls can be inlined. So dmd: Hashtable benchmark N (size) = 100 (repetitions) = 10000 =Results (new hashtables)= [checksum 1526449824] 139 msecs built-in AA [checksum 1526449824] 368 msecs containers.hashmap w/Mallocator [checksum 1526449824] 422 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 97 msecs memutils.hashmap [checksum 1526449824] 101 msecs vibe.utils.hashmap [checksum 1526449824] 181 msecs jive.map [checksum 1526449824] 242 msecs tanya.container.hashtable [checksum 1526449824] 128 msecs built-in AA [checksum 1526449824] 361 msecs containers.hashmap w/Mallocator [checksum 1526449824] 416 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 95 msecs memutils.hashmap [checksum 1526449824] 109 msecs vibe.utils.hashmap [checksum 1526449824] 179 msecs jive.map [checksum 1526449824] 239 msecs tanya.container.hashtable [checksum 1526449824] 131 msecs built-in AA [checksum 1526449824] 360 msecs containers.hashmap w/Mallocator [checksum 1526449824] 421 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 89 msecs memutils.hashmap [checksum 1526449824] 105 msecs vibe.utils.hashmap [checksum 1526449824] 180 msecs jive.map [checksum 1526449824] 237 msecs tanya.container.hashtable =Results (reusing hashtables)= (checksum 1526449824) 57.66 msecs built-in AA (checksum 1526449824) 52.76 msecs containers.hashmap w/Mallocator (checksum 1526449824) 48.49 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 31.16 msecs memutils.hashmap (checksum 1526449824) 45.19 msecs vibe.utils.hashmap (checksum 1526449824) 47.52 msecs jive.map (checksum 1526449824) 114.41 msecs tanya.container.hashtable (checksum 1526449824) 54.42 msecs built-in AA (checksum 1526449824) 52.37 msecs containers.hashmap w/Mallocator (checksum 1526449824) 53.10 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 32.39 msecs memutils.hashmap (checksum 1526449824) 46.94 msecs vibe.utils.hashmap (checksum 1526449824) 48.90 msecs jive.map (checksum 1526449824) 113.73 msecs tanya.container.hashtable (checksum 1526449824) 58.06 msecs built-in AA (checksum 1526449824) 53.29 msecs containers.hashmap w/Mallocator (checksum 1526449824) 55.08 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 30.94 msecs memutils.hashmap (checksum 1526449824) 44.89 msecs vibe.utils.hashmap (checksum 1526449824) 47.69 msecs jive.map (checksum 1526449824) 112.62 msecs tanya.container.hashtable LDC: Hashtable benchmark N (size) = 100 (repetitions) = 10000 =Results (new hashtables)= [checksum 1526449824] 103 msecs built-in AA [checksum 1526449824] 261 msecs containers.hashmap w/Mallocator [checksum 1526449824] 274 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 64 msecs memutils.hashmap [checksum 1526449824] 52 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable [checksum 1526449824] 97 msecs built-in AA [checksum 1526449824] 257 msecs containers.hashmap w/Mallocator [checksum 1526449824] 274 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 59 msecs memutils.hashmap [checksum 1526449824] 50 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable [checksum 1526449824] 96 msecs built-in AA [checksum 1526449824] 258 msecs containers.hashmap w/Mallocator [checksum 1526449824] 271 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 60 msecs memutils.hashmap [checksum 1526449824] 50 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable =Results (reusing hashtables)= (checksum 1526449824) 31.89 msecs built-in AA (checksum 1526449824) 26.17 msecs containers.hashmap w/Mallocator (checksum 1526449824) 26.55 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 16.08 msecs memutils.hashmap (checksum 1526449824) 20.43 msecs vibe.utils.hashmap (checksum 1526449824) 30.75 msecs jive.map (checksum 1526449824) 54.34 msecs tanya.container.hashtable (checksum 1526449824) 31.96 msecs built-in AA (checksum 1526449824) 25.99 msecs containers.hashmap w/Mallocator (checksum 1526449824) 26.02 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 16.05 msecs memutils.hashmap (checksum 1526449824) 20.23 msecs vibe.utils.hashmap (checksum 1526449824) 30.76 msecs jive.map (checksum 1526449824) 52.78 msecs tanya.container.hashtable (checksum 1526449824) 36.33 msecs built-in AA (checksum 1526449824) 29.36 msecs containers.hashmap w/Mallocator (checksum 1526449824) 27.83 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 15.97 msecs memutils.hashmap (checksum 1526449824) 20.28 msecs vibe.utils.hashmap (checksum 1526449824) 30.19 msecs jive.map (checksum 1526449824) 53.06 msecs tanya.container.hashtable
Jun 26 2018
parent reply Eugene Wissner <belka caraus.de> writes:
It seems it doesn't work with a branch in dub.sdl. I just 
replaced the files in ~/.dub/packages.
Jun 26 2018
next sibling parent Nathan S. <no.public.email example.com> writes:
BTW the output is formatted so you can get a sorted list of times 
across all trials by piping the output through `sort -n`. That's 
also why the tests reusing maps start with ( instead of [, so 
they will be grouped separately.
Jun 26 2018
prev sibling parent reply Eugene Wissner <belka caraus.de> writes:
On Tuesday, 26 June 2018 at 09:03:10 UTC, Eugene Wissner wrote:
 It seems it doesn't work with a branch in dub.sdl. I just 
 replaced the files in ~/.dub/packages.
And to make tanya perform better than built-in AAs in the first test, define a hash function: size_t hasher(int e) { return e; } and then: mixin(benchmarkCode!("tanya.container.hashtable", "Tanya_HashTable!(int,int, hasher)")); or mixin(benchmarkCodeReuse!("tanya.container.hashtable", "Tanya_HashTable!(int,int,hasher)")) Tanya hashes any value, also integral types; other hashtables probably not. It should theoretically provide better key distribution.
Jun 26 2018
parent Nathan S. <no.public.email example.com> writes:
On Tuesday, 26 June 2018 at 14:33:25 UTC, Eugene Wissner wrote:
 Tanya hashes any value, also integral types; other hashtables 
 probably not.
Your intuition is correct here. Most of the tables use `typeid(key).getHash(&key)`, which for `int` just returns `key`. = Built-in AA = General case: `typeid.getHash` with additional scrambling. Customizable: no. =memutils.hashmap= General case: `typeid.getHash`. Customizable: no. Other notes: * Special case for `toHash` member (bypasses `typeid`). * Interesting special cases for ref-counted data types, with further special cases for ref-counted strings and arrays. =vibe.utils.hashmap= General case: `typeid.getHash`. Customizable: yes, through optional `Traits` template parameter. Other notes: * Special case for `toHash` member (bypasses `typeid`). * Tries to implement a special case for objects with the default `Object.toHash` function, but it seems like it can never work. =jive.map= General case: `typeid.getHash`. Customizable: no. =containers.hashmap= General case: `typeid.getHash`. Customizable: yes, through optional `hashFunction` template parameter. Other notes: * Special case for strings on 64-bit builds. Uses FNV-1a instead of default hash function.
Jun 26 2018
prev sibling parent Nathan S. <no.public.email example.com> writes:
On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:
 Did you by chance also benchmark it with other languages like 
 C++, Go or Rust?
= Not reusing hashtables, optimizations enabled = 79 msecs Rust std::collections::HashMap 90 msecs Go built-in map 177 msecs C++ std::unordered_map (whichever implementation comes with Xcode) = Reusing hashtables, optimizations enabled = 24 msecs C++ std::unordered_map (whichever implementation comes with Xcode) 26 msecs Go built-in map 36 msecs Rust std::collections::HashMap
Jun 27 2018