Skip to content

Commit 760a040

Browse files
Merge pull request #129 from Hackerpilot/hash-nonsense
Improve hash code to array index translation
2 parents c692722 + f105d54 commit 760a040

File tree

5 files changed

+80
-88
lines changed

5 files changed

+80
-88
lines changed

src/containers/hashmap.d

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
module containers.hashmap;
99

10-
private import containers.internal.hash : generateHash;
10+
private import containers.internal.hash;
1111
private import containers.internal.node : shouldAddGCRange;
1212
private import stdx.allocator.mallocator : Mallocator;
1313
private import std.traits : isBasicType, Unqual;
@@ -370,7 +370,7 @@ private:
370370

371371
static struct MapRange(MapType, IterType Type)
372372
{
373-
static if (Type == IterType.both)
373+
static if (Type == IterType.both)
374374
{
375375
struct FrontType
376376
{
@@ -452,7 +452,7 @@ private:
452452
bool _empty;
453453
}
454454

455-
void initialize(size_t bucketCount = 4)
455+
void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT)
456456
{
457457
import std.conv : emplace;
458458
assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
@@ -478,7 +478,7 @@ private:
478478
{
479479
if (buckets.length == 0)
480480
initialize();
481-
immutable size_t index = hashToIndex(hash);
481+
immutable size_t index = hashToIndex(hash, buckets.length);
482482
foreach (ref item; buckets[index])
483483
{
484484
if (item.hash == hash && item.key == key)
@@ -543,20 +543,6 @@ private:
543543
allocator.deallocate(cast(void[]) oldBuckets);
544544
}
545545

546-
size_t hashToIndex(Hash hash) const pure nothrow @safe @nogc
547-
in
548-
{
549-
assert (buckets.length > 0);
550-
}
551-
out (result)
552-
{
553-
assert (result < buckets.length);
554-
}
555-
body
556-
{
557-
return cast(size_t)hash & (buckets.length - 1);
558-
}
559-
560546
inout(Node)* find(const K key, ref size_t index) inout
561547
{
562548
return find(key, index, hashFunction(key));
@@ -568,7 +554,7 @@ private:
568554

569555
if (buckets.empty)
570556
return null;
571-
index = hashToIndex(hash);
557+
index = hashToIndex(hash, buckets.length);
572558
foreach (ref r; buckets[index])
573559
{
574560
if (r.hash == hash && r == key)

src/containers/hashset.d

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
module containers.hashset;
99

10-
private import containers.internal.hash : generateHash;
10+
private import containers.internal.hash : generateHash, hashToIndex;
1111
private import containers.internal.node : shouldAddGCRange;
1212
private import stdx.allocator.mallocator : Mallocator;
1313
private import std.traits : isBasicType;
@@ -108,8 +108,8 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
108108
*/
109109
bool remove(T value)
110110
{
111-
Hash hash = hashFunction(value);
112-
size_t index = hashToIndex(hash);
111+
immutable Hash hash = hashFunction(value);
112+
immutable size_t index = hashToIndex(hash, buckets.length);
113113
static if (storeHash)
114114
immutable bool removed = buckets[index].remove(ItemNode(hash, value));
115115
else
@@ -134,8 +134,8 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
134134
{
135135
if (buckets.length == 0 || _length == 0)
136136
return null;
137-
Hash hash = hashFunction(value);
138-
immutable size_t index = hashToIndex(hash);
137+
immutable Hash hash = hashFunction(value);
138+
immutable index = hashToIndex(hash, buckets.length);
139139
return buckets[index].get(value, hash);
140140
}
141141

@@ -150,7 +150,7 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
150150
if (buckets.length == 0)
151151
initialize(4);
152152
Hash hash = hashFunction(value);
153-
immutable size_t index = hashToIndex(hash);
153+
immutable size_t index = hashToIndex(hash, buckets.length);
154154
static if (storeHash)
155155
auto r = buckets[index].insert(ItemNode(hash, value));
156156
else
@@ -200,12 +200,12 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
200200

201201
private:
202202

203-
import containers.internal.node : shouldAddGCRange, FatNodeInfo;
204-
import containers.internal.storage_type : ContainerStorageType;
205203
import containers.internal.element_type : ContainerElementType;
206204
import containers.internal.mixins : AllocatorState;
207-
import containers.unrolledlist : UnrolledList;
205+
import containers.internal.node : shouldAddGCRange, FatNodeInfo;
206+
import containers.internal.storage_type : ContainerStorageType;
208207
import std.traits : isPointer;
208+
import core.bitop : bsf;
209209

210210
alias LengthType = ubyte;
211211
alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof);
@@ -216,8 +216,8 @@ private:
216216

217217
void initialize(size_t bucketCount)
218218
{
219-
import stdx.allocator : makeArray;
220219
import core.memory : GC;
220+
import stdx.allocator : makeArray;
221221

222222
makeBuckets(bucketCount);
223223
static if (useGC)
@@ -329,13 +329,13 @@ private:
329329
static if (storeHash)
330330
{
331331
immutable Hash hash = node.items[i].hash;
332-
immutable size_t index = hashToIndex(hash);
332+
size_t index = hashToIndex(hash, buckets.length);
333333
buckets[index].insert(ItemNode(hash, node.items[i].value));
334334
}
335335
else
336336
{
337337
immutable Hash hash = hashFunction(node.items[i].value);
338-
immutable size_t index = hashToIndex(hash);
338+
size_t index = hashToIndex(hash, buckets.length);
339339
buckets[index].insert(ItemNode(node.items[i].value));
340340
}
341341
}
@@ -346,21 +346,6 @@ private:
346346
allocator.dispose(oldBuckets);
347347
}
348348

349-
size_t hashToIndex(Hash hash) const pure nothrow @safe
350-
in
351-
{
352-
assert (buckets.length > 0);
353-
}
354-
out (result)
355-
{
356-
import std.string : format;
357-
assert (result < buckets.length, "%d, %d".format(result, buckets.length));
358-
}
359-
body
360-
{
361-
return hash & (buckets.length - 1);
362-
}
363-
364349
static struct Bucket
365350
{
366351
this(this) @disable;

src/containers/internal/hash.d

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,27 @@ else
3434
return h;
3535
}
3636
}
37+
38+
/**
39+
* Convert a hash code into a valid array index.
40+
*
41+
* Prams:
42+
* hash = the hash code to be mapped
43+
* len = the length of the array that backs the hash container.
44+
*/
45+
size_t hashToIndex(const size_t hash, const size_t len) pure nothrow @nogc @safe
46+
{
47+
import core.bitop : bsr;
48+
49+
// This magic number taken from
50+
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
51+
//
52+
// It's amazing how much faster this makes the hash data structures
53+
// when faced with low quality hash functions.
54+
static if (size_t.sizeof == 8)
55+
return (hash * 11_400_714_819_323_198_485UL) >>> (64 - bsr(len));
56+
else
57+
return (hash * 2_654_435_769U) >>> (32 - bsr(len));
58+
}
59+
60+
enum size_t DEFAULT_BUCKET_COUNT = 8;

src/containers/openhashset.d

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@
66
*/
77
module containers.openhashset;
88

9-
private import containers.internal.hash : generateHash;
9+
private import containers.internal.hash;
1010
private import containers.internal.node : shouldAddGCRange;
11-
private import stdx.allocator.mallocator : Mallocator;
1211
private import stdx.allocator.common : stateSize;
12+
private import stdx.allocator.mallocator : Mallocator;
1313

1414
/**
15-
* Simple open-addressed hash set. Use this instead of HashSet when the size and
16-
* quantity of the data to be inserted is small.
15+
* Simple open-addressed hash set that uses linear probing to resolve sollisions.
1716
*
1817
* Params:
1918
* T = the element type of the hash set
@@ -149,7 +148,7 @@ struct OpenHashSet(T, Allocator = Mallocator,
149148
bool insert(T item)
150149
{
151150
if (nodes.length == 0)
152-
initialize(DEFAULT_INITIAL_CAPACITY);
151+
initialize(DEFAULT_BUCKET_COUNT);
153152
immutable size_t hash = hashFunction(item);
154153
size_t index = toIndex(nodes, item, hash);
155154
if (index == size_t.max)
@@ -211,12 +210,11 @@ struct OpenHashSet(T, Allocator = Mallocator,
211210

212211
private:
213212

214-
import containers.internal.storage_type : ContainerStorageType;
215213
import containers.internal.element_type : ContainerElementType;
216214
import containers.internal.mixins : AllocatorState;
215+
import containers.internal.storage_type : ContainerStorageType;
217216
import core.memory : GC;
218217

219-
enum DEFAULT_INITIAL_CAPACITY = 8;
220218
enum bool useGC = supportGC && shouldAddGCRange!T;
221219

222220
static struct Range(ThisT)
@@ -286,9 +284,10 @@ private:
286284
// Returns: size_t.max if the item was not found
287285
static size_t toIndex(const Node[] n, T item, size_t hash)
288286
{
289-
immutable size_t bucketMask = (n.length - 1);
290-
immutable size_t index = hash & bucketMask;
287+
assert (n.length > 0);
288+
immutable size_t index = hashToIndex(hash, n.length);
291289
size_t i = index;
290+
immutable bucketMask = n.length - 1;
292291
while (n[i].used && n[i].data != item)
293292
{
294293
i = (i + 1) & bucketMask;

test/hashmap_gc_test.d

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,46 @@
1-
21
import containers : HashMap;
32
import std.stdio : writefln;
43
import core.memory : GC;
54

6-
75
/**
86
* Generate a random alphanumeric string.
97
*/
10-
@trusted
11-
string randomString (uint len)
8+
@trusted string randomString(uint len)
129
{
13-
import std.ascii : letters, digits;
14-
import std.conv : to;
15-
import std.random : randomSample;
16-
import std.range : chain;
10+
import std.ascii : letters, digits;
11+
import std.conv : to;
12+
import std.random : randomSample;
13+
import std.range : chain;
1714

18-
auto asciiLetters = to! (dchar[]) (letters);
19-
auto asciiDigits = to! (dchar[]) (digits);
15+
auto asciiLetters = to!(dchar[])(letters);
16+
auto asciiDigits = to!(dchar[])(digits);
2017

21-
if (len == 0)
22-
len = 1;
18+
if (len == 0)
19+
len = 1;
2320

24-
auto res = to!string (randomSample (chain (asciiLetters, asciiDigits), len));
25-
return res;
21+
auto res = to!string(randomSample(chain(asciiLetters, asciiDigits), len));
22+
return res;
2623
}
2724

28-
void main ()
25+
void main()
2926
{
30-
immutable iterationCount = 4;
31-
HashMap!(string, string) hmap;
32-
33-
for (uint n = 1; n <= iterationCount; n++) {
34-
foreach (i; 0 .. 1_000_000)
35-
hmap[randomString (4)] = randomString (16);
36-
GC.collect ();
37-
hmap = HashMap!(string, string) (16);
38-
GC.collect ();
39-
40-
foreach (i; 0 .. 1_000_000)
41-
hmap[randomString (4)] = randomString (16);
42-
GC.collect ();
43-
hmap.clear ();
44-
GC.collect ();
45-
46-
writefln ("iteration %s/%s finished", n, iterationCount);
47-
}
27+
immutable iterationCount = 4;
28+
HashMap!(string, string) hmap;
29+
30+
for (uint n = 1; n <= iterationCount; n++)
31+
{
32+
foreach (i; 0 .. 1_000_000)
33+
hmap[randomString(4)] = randomString(16);
34+
GC.collect();
35+
hmap = HashMap!(string, string)(16);
36+
GC.collect();
37+
38+
foreach (i; 0 .. 1_000_000)
39+
hmap[randomString(4)] = randomString(16);
40+
GC.collect();
41+
hmap.clear();
42+
GC.collect();
43+
44+
writefln("iteration %s/%s finished", n, iterationCount);
45+
}
4846
}

0 commit comments

Comments
 (0)