digitalmars.D - D hash table comparison benchmark
- Nathan S. (42/42) Jun 25 2018 The below benchmarks come from writing 100 int-to-int mappings to
- Nathan S. (174/174) Jun 25 2018 Benchmark code:
- Nathan S. (18/18) Jun 25 2018 With LDC2 the times for vibe.utils.hashmap and memutils.hashmap
- Seb (11/29) Jun 25 2018 Did you by chance also benchmark it with other languages like
- Nathan S. (4/9) Jun 25 2018 I didn't since I was evaluating hashtable implementations for use
- Eugene Wissner (118/128) Jun 26 2018 It isn't fair to post the benchmarks without giving some time to
- Eugene Wissner (2/2) Jun 26 2018 It seems it doesn't work with a branch in dub.sdl. I just
- Nathan S. (4/4) Jun 26 2018 BTW the output is formatted so you can get a sorted list of times
- Eugene Wissner (16/18) Jun 26 2018 And to make tanya perform better than built-in AAs in the first
- Nathan S. (30/32) Jun 26 2018 Your intuition is correct here. Most of the tables use
- Nathan S. (11/13) Jun 27 2018 = Not reusing hashtables, optimizations enabled =
The below benchmarks come from writing 100 int-to-int mappings to a new hashtable then reading them back, repeated 10_000 times. The built-in AA doesn't deallocate memory when it falls out of scope but the other maps do. Benchmark code in next post. == Speed Ranking using LDC2 (optimized) == 21 msecs vibe.utils.hashmap 37 msecs memutils.hashmap 57 msecs built-in AA 102 msecs jive.map 185 msecs containers.hashmap w/GCAllocator 240 msecs containers.hashmap w/Mallocator == Speed Ranking using DMD (optimized) == 55 msecs memutils.hashmap 64 msecs vibe.utils.hashmap 80 msecs built-in AA 131 msecs jive.map 315 msecs containers.hashmap w/GCAllocator 361 msecs containers.hashmap w/Mallocator ** What if the array size is smaller or larger? The ordering didn't change so I won't post the results. ** ** What if we reuse the hashtable? ** == Speed Ranking using LDC2 (optimized) == 10.45 msecs vibe.utils.hashmap 11.85 msecs memutils.hashmap 12.61 msecs containers.hashmap w/GCAllocator 12.91 msecs containers.hashmap w/Mallocator 14.30 msecs built-in AA 19.21 msecs jive.map == Speed Ranking using DMD (optimized) == 18.05 msecs memutils.hashmap 21.03 msecs jive.map 24.99 msecs built-in AA 25.22 msecs containers.hashmap w/Mallocator 25.75 msecs containers.hashmap w/GCAllocator 29.93 msecs vibe.utils.hashmap == Not benchmarked == stdx.collections.hashtable (dlang-stdx/collections): compilation error kontainer.orderedAssocArray (alphaKAI/kontainer): doesn't accept int keys tanya.container.hashtable (caraus-ecms/tanya): either has a bug or is very slow
Jun 25 2018
Benchmark code: dub.sdl ``` name "hashbench" description "D hashtable comparison." dependency "emsi_containers" version="~>0.7.0" dependency "memutils" version="~>0.4.11" dependency "vibe-d:utils" version="~>0.8.4" dependency "jive" version="~>0.2.0" //dependency "collections" version="~>0.1.0" //dependency "tanya" version="~>0.10.0" //dependency "kontainer" version="~>0.0.2" ``` app.d ```d int nthKey(in uint n) nogc nothrow pure safe { // Can be any invertible function. // The goal is to map [0 .. N] to a sequence not in ascending order. int h = cast(int) (n + 1); h = (h ^ (h >>> 16)) * 0x85ebca6b; h = (h ^ (n >>> 13)) * 0xc2b2ae35; return h ^ (h >>> 16); } pragma(inline, false) uint hashBench(HashTable, Args...)(in uint N, in uint seed, Args initArgs) { static if (initArgs.length) HashTable hashtable = HashTable(initArgs); else // Separate branch needed for builtin AA. HashTable hashtable; foreach (uint n; 0 .. N) hashtable[nthKey(n)] = n + seed; uint sum; foreach_reverse (uint n; 0 .. N/2) sum += hashtable[nthKey(n)]; foreach_reverse(uint n; N/2 .. N) sum += hashtable[nthKey(n)]; return sum; } pragma(inline, false) uint hashBenchReuse(HashTable)(in uint N, in uint seed, ref HashTable hashtable) { foreach (uint n; 0 .. N) hashtable[nthKey(n)] = n + seed; uint sum; foreach_reverse (uint n; 0 .. N/2) sum += hashtable[nthKey(n)]; foreach_reverse(uint n; N/2 .. N) sum += hashtable[nthKey(n)]; return sum; } enum benchmarkCode(string name, string signature = name) = ` { sw.reset(); result = 0; sw.start(); foreach (_; 0 .. M) { result += hashBench!(`~signature~`)(N, result); } sw.stop(); string s = "`~name~`"; printf("[checksum %d] %3d msecs %s\n", result, sw.peek.total!"msecs", &s[0]); } `; enum benchmarkCodeReuse(string name, string signature = name) = ` { sw.reset(); result = 0; sw.start(); `~signature~` hashtable; foreach (_; 0 .. M) { result += hashBenchReuse!(`~signature~`)(N, result, hashtable); } sw.stop(); string s = "`~name~`"; printf("(checksum %d) %3.2f msecs %s\n", result, sw.peek.total!"usecs" / 1000.0, &s[0]); } `; void main(string[] args) { import std.datetime.stopwatch : AutoStart, StopWatch; import core.stdc.stdio : printf, puts; import std.experimental.allocator.gc_allocator : GCAllocator; import std.experimental.allocator.mallocator : Mallocator; alias BuiltinAA(K,V) = V[K]; import containers.hashmap : EMSI_HashMap = HashMap; import memutils.hashmap : Memutils_HashMap = HashMap; import vibe.utils.hashmap : Vibe_HashMap = HashMap; import jive.map : Jive_Map = Map; //import stdx.collections.hashtable : Stdx_Hashtable = Hashtable; //import tanya.container.hashtable : Tanya_HashTable = HashTable; //import kontainer.orderedAssocArray.orderedAssocArray : Kontainer_OrderedAssocArray = OrderedAssocArray; immutable uint N = args.length < 2 ? 100 : () { import std.conv : to; auto result = to!uint(args[1]); return (result == 0 ? 100 : result); }(); immutable M = N <= 500_000 ? (1000_000 / N) : 2; enum topLevelRepetitions = 3; printf("Hashtable benchmark N (size) = %d (repetitions) = %d\n", N, M); StopWatch sw = StopWatch(AutoStart.no); uint result; version(all) { puts("\n=Results (new hashtables)="); foreach (_repetition; 0 .. topLevelRepetitions) { mixin(benchmarkCode!("built-in AA", "BuiltinAA!(int, int)")); mixin(benchmarkCode!("containers.hashmap w/Mallocator", "EMSI_HashMap!(int, int, Mallocator)")); mixin(benchmarkCode!("containers.hashmap w/GCAllocator", "EMSI_HashMap!(int, int, GCAllocator)")); mixin(benchmarkCode!("memutils.hashmap", "Memutils_HashMap!(int,int)")); mixin(benchmarkCode!("vibe.utils.hashmap", "Vibe_HashMap!(int,int)")); mixin(benchmarkCode!("jive.map", "Jive_Map!(int,int)")); //mixin(benchmarkCode!("stdx.collections.hashtable", "Stdx_Hashtable!(int,int)")); //mixin(benchmarkCode!("tanya.container.hashtable", "Tanya_HashTable!(int,int)")); //mixin(benchmarkCode!("kontainer.orderedAssocArray.orderedAssocArray", "Kontainer_OrderedAssocArray!(int,int)")); } } version(all) { puts("\n=Results (reusing hashtables)=\n"); foreach (_repetition; 0 .. topLevelRepetitions) { mixin(benchmarkCodeReuse!("built-in AA", "BuiltinAA!(int, int)")); mixin(benchmarkCodeReuse!("containers.hashmap w/Mallocator", "EMSI_HashMap!(int, int, Mallocator)")); mixin(benchmarkCodeReuse!("containers.hashmap w/GCAllocator", "EMSI_HashMap!(int, int, GCAllocator)")); mixin(benchmarkCodeReuse!("memutils.hashmap", "Memutils_HashMap!(int,int)")); mixin(benchmarkCodeReuse!("vibe.utils.hashmap", "Vibe_HashMap!(int,int)")); mixin(benchmarkCodeReuse!("jive.map", "Jive_Map!(int,int)")); //mixin(benchmarkCodeReuse!("stdx.collections.hashtable", "Stdx_Hashtable!(int,int)")); //mixin(benchmarkCodeReuse!("tanya.container.hashtable", "Tanya_HashTable!(int,int)")); //mixin(benchmarkCodeReuse!("kontainer.orderedAssocArray.orderedAssocArray", "Kontainer_OrderedAssocArray!(int,int)")); } } } ```
Jun 25 2018
With LDC2 the times for vibe.utils.hashmap and memutils.hashmap are suspiciously low, leading me to suspect that the optimizer might be omitting most of the work. Here are the figures without optimizations enabled. == Speed Ranking using DMD (no optimizations) == 95 msecs built-in AA 168 msecs vibe.utils.hashmap 182 msecs jive.map 224 msecs memutils.hashmap 663 msecs containers.hashmap w/GCAllocator 686 msecs containers.hashmap w/Mallocator == Speed Ranking using LDC2 (no optimizations) == 68 msecs built-in AA 143 msecs vibe.utils.hashmap 155 msecs jive.map 164 msecs memutils.hashmap 515 msecs containers.hashmap w/GCAllocator 537 msecs containers.hashmap w/Mallocator
Jun 25 2018
On Tuesday, 26 June 2018 at 02:53:22 UTC, Nathan S. wrote:With LDC2 the times for vibe.utils.hashmap and memutils.hashmap are suspiciously low, leading me to suspect that the optimizer might be omitting most of the work. Here are the figures without optimizations enabled. == Speed Ranking using DMD (no optimizations) == 95 msecs built-in AA 168 msecs vibe.utils.hashmap 182 msecs jive.map 224 msecs memutils.hashmap 663 msecs containers.hashmap w/GCAllocator 686 msecs containers.hashmap w/Mallocator == Speed Ranking using LDC2 (no optimizations) == 68 msecs built-in AA 143 msecs vibe.utils.hashmap 155 msecs jive.map 164 msecs memutils.hashmap 515 msecs containers.hashmap w/GCAllocator 537 msecs containers.hashmap w/MallocatorDid you by chance also benchmark it with other languages like C++, Go or Rust? BTW I'm not sure what your plans are, but are you aware of this recent article? https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-table There were also plans to lower the AA implementation entirely into D runtime/user space, s.t. specialization can be done easier, but sadly these plans stagnated so far: https://github.com/dlang/druntime/pull/1282 https://github.com/dlang/druntime/pull/1985
Jun 25 2018
On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:Did you by chance also benchmark it with other languages like C++, Go or Rust?I didn't since I was evaluating hashtable implementations for use in a D application.BTW I'm not sure what your plans are, but are you aware of this recent article? https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-tableI wasn't, thanks.
Jun 25 2018
On Tuesday, 26 June 2018 at 04:17:44 UTC, Nathan S. wrote:On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:It isn't fair to post the benchmarks without giving some time to fix the bug :). Just kidding. Thanks a lot for taking time to report the bug. tanya's HashTable implementation is very young. I fixed the bug in the "bugfix/hashtable-endless" branch (still need to fix another rehash() before merging into master). Just put in dub.sdl: dependency "tanya" version="~bugfix/hashtable-endless" Here are the results from my machine with tanya included. I'm posting only optimized version for dmd and ldc. I just say that tanya's HashTable sucks in debug mode, I'm using an underlying structure to combine common functionality for the hash table and hash set. With optimizations a lot of function calls can be inlined. So dmd: Hashtable benchmark N (size) = 100 (repetitions) = 10000 =Results (new hashtables)= [checksum 1526449824] 139 msecs built-in AA [checksum 1526449824] 368 msecs containers.hashmap w/Mallocator [checksum 1526449824] 422 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 97 msecs memutils.hashmap [checksum 1526449824] 101 msecs vibe.utils.hashmap [checksum 1526449824] 181 msecs jive.map [checksum 1526449824] 242 msecs tanya.container.hashtable [checksum 1526449824] 128 msecs built-in AA [checksum 1526449824] 361 msecs containers.hashmap w/Mallocator [checksum 1526449824] 416 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 95 msecs memutils.hashmap [checksum 1526449824] 109 msecs vibe.utils.hashmap [checksum 1526449824] 179 msecs jive.map [checksum 1526449824] 239 msecs tanya.container.hashtable [checksum 1526449824] 131 msecs built-in AA [checksum 1526449824] 360 msecs containers.hashmap w/Mallocator [checksum 1526449824] 421 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 89 msecs memutils.hashmap [checksum 1526449824] 105 msecs vibe.utils.hashmap [checksum 1526449824] 180 msecs jive.map [checksum 1526449824] 237 msecs tanya.container.hashtable =Results (reusing hashtables)= (checksum 1526449824) 57.66 msecs built-in AA (checksum 1526449824) 52.76 msecs containers.hashmap w/Mallocator (checksum 1526449824) 48.49 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 31.16 msecs memutils.hashmap (checksum 1526449824) 45.19 msecs vibe.utils.hashmap (checksum 1526449824) 47.52 msecs jive.map (checksum 1526449824) 114.41 msecs tanya.container.hashtable (checksum 1526449824) 54.42 msecs built-in AA (checksum 1526449824) 52.37 msecs containers.hashmap w/Mallocator (checksum 1526449824) 53.10 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 32.39 msecs memutils.hashmap (checksum 1526449824) 46.94 msecs vibe.utils.hashmap (checksum 1526449824) 48.90 msecs jive.map (checksum 1526449824) 113.73 msecs tanya.container.hashtable (checksum 1526449824) 58.06 msecs built-in AA (checksum 1526449824) 53.29 msecs containers.hashmap w/Mallocator (checksum 1526449824) 55.08 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 30.94 msecs memutils.hashmap (checksum 1526449824) 44.89 msecs vibe.utils.hashmap (checksum 1526449824) 47.69 msecs jive.map (checksum 1526449824) 112.62 msecs tanya.container.hashtable LDC: Hashtable benchmark N (size) = 100 (repetitions) = 10000 =Results (new hashtables)= [checksum 1526449824] 103 msecs built-in AA [checksum 1526449824] 261 msecs containers.hashmap w/Mallocator [checksum 1526449824] 274 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 64 msecs memutils.hashmap [checksum 1526449824] 52 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable [checksum 1526449824] 97 msecs built-in AA [checksum 1526449824] 257 msecs containers.hashmap w/Mallocator [checksum 1526449824] 274 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 59 msecs memutils.hashmap [checksum 1526449824] 50 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable [checksum 1526449824] 96 msecs built-in AA [checksum 1526449824] 258 msecs containers.hashmap w/Mallocator [checksum 1526449824] 271 msecs containers.hashmap w/GCAllocator [checksum 1526449824] 60 msecs memutils.hashmap [checksum 1526449824] 50 msecs vibe.utils.hashmap [checksum 1526449824] 131 msecs jive.map [checksum 1526449824] 102 msecs tanya.container.hashtable =Results (reusing hashtables)= (checksum 1526449824) 31.89 msecs built-in AA (checksum 1526449824) 26.17 msecs containers.hashmap w/Mallocator (checksum 1526449824) 26.55 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 16.08 msecs memutils.hashmap (checksum 1526449824) 20.43 msecs vibe.utils.hashmap (checksum 1526449824) 30.75 msecs jive.map (checksum 1526449824) 54.34 msecs tanya.container.hashtable (checksum 1526449824) 31.96 msecs built-in AA (checksum 1526449824) 25.99 msecs containers.hashmap w/Mallocator (checksum 1526449824) 26.02 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 16.05 msecs memutils.hashmap (checksum 1526449824) 20.23 msecs vibe.utils.hashmap (checksum 1526449824) 30.76 msecs jive.map (checksum 1526449824) 52.78 msecs tanya.container.hashtable (checksum 1526449824) 36.33 msecs built-in AA (checksum 1526449824) 29.36 msecs containers.hashmap w/Mallocator (checksum 1526449824) 27.83 msecs containers.hashmap w/GCAllocator (checksum 1526449824) 15.97 msecs memutils.hashmap (checksum 1526449824) 20.28 msecs vibe.utils.hashmap (checksum 1526449824) 30.19 msecs jive.map (checksum 1526449824) 53.06 msecs tanya.container.hashtableDid you by chance also benchmark it with other languages like C++, Go or Rust?I didn't since I was evaluating hashtable implementations for use in a D application.BTW I'm not sure what your plans are, but are you aware of this recent article? https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-tableI wasn't, thanks.
Jun 26 2018
It seems it doesn't work with a branch in dub.sdl. I just replaced the files in ~/.dub/packages.
Jun 26 2018
BTW the output is formatted so you can get a sorted list of times across all trials by piping the output through `sort -n`. That's also why the tests reusing maps start with ( instead of [, so they will be grouped separately.
Jun 26 2018
On Tuesday, 26 June 2018 at 09:03:10 UTC, Eugene Wissner wrote:It seems it doesn't work with a branch in dub.sdl. I just replaced the files in ~/.dub/packages.And to make tanya perform better than built-in AAs in the first test, define a hash function: size_t hasher(int e) { return e; } and then: mixin(benchmarkCode!("tanya.container.hashtable", "Tanya_HashTable!(int,int, hasher)")); or mixin(benchmarkCodeReuse!("tanya.container.hashtable", "Tanya_HashTable!(int,int,hasher)")) Tanya hashes any value, also integral types; other hashtables probably not. It should theoretically provide better key distribution.
Jun 26 2018
On Tuesday, 26 June 2018 at 14:33:25 UTC, Eugene Wissner wrote:Tanya hashes any value, also integral types; other hashtables probably not.Your intuition is correct here. Most of the tables use `typeid(key).getHash(&key)`, which for `int` just returns `key`. = Built-in AA = General case: `typeid.getHash` with additional scrambling. Customizable: no. =memutils.hashmap= General case: `typeid.getHash`. Customizable: no. Other notes: * Special case for `toHash` member (bypasses `typeid`). * Interesting special cases for ref-counted data types, with further special cases for ref-counted strings and arrays. =vibe.utils.hashmap= General case: `typeid.getHash`. Customizable: yes, through optional `Traits` template parameter. Other notes: * Special case for `toHash` member (bypasses `typeid`). * Tries to implement a special case for objects with the default `Object.toHash` function, but it seems like it can never work. =jive.map= General case: `typeid.getHash`. Customizable: no. =containers.hashmap= General case: `typeid.getHash`. Customizable: yes, through optional `hashFunction` template parameter. Other notes: * Special case for strings on 64-bit builds. Uses FNV-1a instead of default hash function.
Jun 26 2018
On Tuesday, 26 June 2018 at 03:45:27 UTC, Seb wrote:Did you by chance also benchmark it with other languages like C++, Go or Rust?= Not reusing hashtables, optimizations enabled = 79 msecs Rust std::collections::HashMap 90 msecs Go built-in map 177 msecs C++ std::unordered_map (whichever implementation comes with Xcode) = Reusing hashtables, optimizations enabled = 24 msecs C++ std::unordered_map (whichever implementation comes with Xcode) 26 msecs Go built-in map 36 msecs Rust std::collections::HashMap
Jun 27 2018