Skip to content

Commit 96be8bc

Browse files
committed
Use LDC intrinsics to speed up hashmap and unrolledlist
1 parent 117c1d9 commit 96be8bc

File tree

3 files changed

+69
-41
lines changed

3 files changed

+69
-41
lines changed

src/containers/hashset.d

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ private:
207207
import containers.internal.node : shouldAddGCRange, FatNodeInfo;
208208
import containers.internal.storage_type : ContainerStorageType;
209209
import std.traits : isPointer;
210-
import core.bitop : bsf;
211210

212211
alias LengthType = ubyte;
213212
alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof);

src/containers/internal/hash.d

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,40 +44,27 @@ else
4444
*/
4545
size_t hashToIndex(const size_t hash, const size_t len) pure nothrow @nogc @safe
4646
{
47-
import core.bitop : bsr;
48-
4947
// This magic number taken from
5048
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
5149
//
5250
// It's amazing how much faster this makes the hash data structures
5351
// when faced with low quality hash functions.
54-
version (D_InlineAsm_X86_64)
52+
static if (size_t.sizeof == 8)
53+
enum ulong magic = 11_400_714_819_323_198_485UL;
54+
else
55+
enum uint magic = 2_654_435_769U;
56+
57+
if (len <= 1)
58+
return 0;
59+
version(LDC)
5560
{
56-
asm @nogc
57-
{
58-
naked;
59-
cmp RDI, 1;
60-
je one;
61-
mov RAX, 11_400_714_819_323_198_485UL;
62-
mul RSI;
63-
tzcnt R9, RDI; // We assume here that the length is a power of two
64-
mov RCX, 64;
65-
sub RCX, R9;
66-
shr RAX, CL;
67-
ret;
68-
one:
69-
xor RAX, RAX;
70-
ret;
71-
}
61+
import ldc.intrinsics : llvm_cttz;
62+
return (hash * magic) >>> ((size_t.sizeof * 8) - llvm_cttz(len, true));
7263
}
7364
else
7465
{
75-
if (len <= 1)
76-
return 0;
77-
static if (size_t.sizeof == 8)
78-
return (hash * 11_400_714_819_323_198_485UL) >>> (64 - bsr(len));
79-
else
80-
return (hash * 2_654_435_769U) >>> (32 - bsr(len));
66+
import core.bitop : bsr;
67+
return (hash * magic) >>> ((size_t.sizeof * 8) - bsr(len));
8168
}
8269
}
8370

src/containers/unrolledlist.d

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,16 @@ struct UnrolledList(T, Allocator = Mallocator,
252252
}
253253
body
254254
{
255-
import containers.internal.backwards : bsf;
256-
size_t index = bsf(_front.registry);
255+
version (LDC)
256+
{
257+
import ldc.intrinsics : llvm_cttz;
258+
size_t index = llvm_cttz(_front.registry, true);
259+
}
260+
else
261+
{
262+
import containers.internal.backwards : bsf;
263+
size_t index = bsf(_front.registry);
264+
}
257265
T r = _front.items[index];
258266
_front.markUnused(index);
259267
_length--;
@@ -302,9 +310,16 @@ struct UnrolledList(T, Allocator = Mallocator,
302310
}
303311
body
304312
{
305-
import containers.internal.backwards : bsf;
306-
307-
immutable size_t index = bsf(_front.registry);
313+
version (LDC)
314+
{
315+
import ldc.intrinsics : llvm_cttz;
316+
immutable index = llvm_cttz(_front.registry, true);
317+
}
318+
else
319+
{
320+
import containers.internal.backwards : bsf;
321+
immutable index = bsf(_front.registry);
322+
}
308323
return *(cast(typeof(return)*) &_front.items[index]);
309324
}
310325

@@ -377,13 +392,22 @@ struct UnrolledList(T, Allocator = Mallocator,
377392

378393
this(inout(Node)* current)
379394
{
380-
import containers.internal.backwards : bsf;
381395
import std.format:format;
382396

383397
this.current = current;
384398
if (current !is null)
385399
{
386-
index = bsf(current.registry);
400+
version (LDC)
401+
{
402+
import ldc.intrinsics : llvm_cttz;
403+
index = llvm_cttz(current.registry, true);
404+
}
405+
else
406+
{
407+
import containers.internal.backwards : bsf;
408+
index = bsf(current.registry);
409+
}
410+
387411
assert (index < nodeCapacity);
388412
}
389413
else
@@ -490,12 +514,22 @@ private:
490514

491515
static bool shouldMerge(const Node* first, const Node* second)
492516
{
493-
import containers.internal.backwards : popcnt;
494-
495517
if (first is null || second is null)
496518
return false;
497-
immutable f = popcnt(first.registry);
498-
immutable s = popcnt(second.registry);
519+
version (LDC)
520+
{
521+
import ldc.intrinsics : llvm_ctpop;
522+
523+
immutable f = llvm_ctpop(first.registry);
524+
immutable s = llvm_ctpop(second.registry);
525+
}
526+
else
527+
{
528+
import containers.internal.backwards : popcnt;
529+
530+
immutable f = popcnt(first.registry);
531+
immutable s = popcnt(second.registry);
532+
}
499533
return f + s <= nodeCapacity;
500534
}
501535

@@ -508,7 +542,6 @@ private:
508542
}
509543
body
510544
{
511-
import containers.internal.backwards : bsf;
512545
size_t i;
513546
ContainerStorageType!T[nodeCapacity] temp;
514547
foreach (j; 0 .. nodeCapacity)
@@ -529,11 +562,20 @@ private:
529562
{
530563
size_t nextAvailableIndex() const nothrow pure @safe @nogc
531564
{
532-
import containers.internal.backwards : bsf;
533565
static if (BookkeepingType.sizeof < uint.sizeof)
534-
return bsf(~(cast(uint) registry));
566+
immutable uint notReg = ~(cast(uint) registry);
567+
else
568+
immutable uint notReg = ~registry;
569+
version (LDC)
570+
{
571+
import ldc.intrinsics : llvm_cttz;
572+
return llvm_cttz(notReg, true);
573+
}
535574
else
536-
return bsf(~registry);
575+
{
576+
import containers.internal.backwards : bsf;
577+
return bsf(notReg);
578+
}
537579
}
538580

539581
void markUsed(size_t index) nothrow pure @safe @nogc

0 commit comments

Comments
 (0)