Skip to content

Commit b4ed7f5

Browse files
committed
improve performance by ~25% in heavy GC situations
1 parent 1652fc7 commit b4ed7f5

File tree

3 files changed

+18
-8
lines changed

3 files changed

+18
-8
lines changed

src/hash_table.c

+14-6
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,32 @@
77
* The implementation is one round of the xorshift64* algorithm.
88
* Code Source: Wikipedia
99
*/
10-
uint64_t hashFn(uint64_t x) {
10+
inline uint64_t hashFn(uint64_t x) {
1111
x ^= x >> 12; // a
1212
x ^= x << 25; // b
1313
x ^= x >> 27; // c
1414
return x * UINT64_C(2685821657736338717);
1515
}
1616

17-
uint64_t computeBucketIndex(statepoint_table_t* table, uint64_t key) {
17+
inline uint64_t computeBucketIndex(statepoint_table_t* table, uint64_t key) {
1818
// Using modulo may introduce a little bias in the table.
1919
// If you care, use the unbiased version that's floating around the internet.
20-
return hashFn(key) % table->size;
20+
21+
// NOTE: we use bitwise AND instead of modulo because the size
22+
// is a power-of-two. This is very important for performance.
23+
return hashFn(key) & (table->size - 1);
2124
}
2225

23-
size_t size_of_frame(uint16_t numSlots) {
26+
inline size_t size_of_frame(uint16_t numSlots) {
2427
return sizeof(frame_info_t) + numSlots * sizeof(pointer_slot_t);
2528
}
2629

27-
size_t frame_size(frame_info_t* frame) {
30+
inline size_t frame_size(frame_info_t* frame) {
2831
return size_of_frame(frame->numSlots);
2932
}
3033

3134
// returns the next frame relative the current frame
32-
frame_info_t* next_frame(frame_info_t* cur) {
35+
inline frame_info_t* next_frame(frame_info_t* cur) {
3336
uint8_t* next = ((uint8_t*)cur) + frame_size(cur);
3437
return (frame_info_t*)next;
3538
}
@@ -41,6 +44,11 @@ statepoint_table_t* new_table(float loadFactor, uint64_t expectedElms) {
4144

4245
uint64_t numBuckets = (expectedElms / loadFactor) + 1;
4346

47+
// round up to nearest power of two. this implementation requires
48+
// it for the performance of lookup_return_address
49+
uint64_t factor = ceil(log2((double) numBuckets));
50+
numBuckets = 1ULL << factor;
51+
4452
table_bucket_t* buckets = calloc(numBuckets, sizeof(table_bucket_t));
4553
assert(buckets && "bad alloc");
4654

src/include/hash_table.h

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <assert.h>
1010
#include <stdlib.h>
1111
#include <string.h>
12+
#include <math.h>
1213

1314
/** Functions **/
1415

test/Makefile

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
CC := cc
22
OPT_CC := -O3 -fPIC
33
OPT_LLC := -O3 -relocation-model=pic
4+
LD_FLAGS := -lm
45

56
all: statepoints a.out
67

78
a.out: fib.o driver.o shim.S ../dist/llvm-statepoint-tablegen.a
8-
$(CC) $(OPT_CC) $^
9+
$(CC) $(OPT_CC) $^ $(LD_FLAGS)
910

1011
fib.o: fib.ll
1112
llc $(OPT_LLC) fib.ll -o fib.s
@@ -19,4 +20,4 @@ statepoints:
1920
cd .. && make
2021

2122
clean:
22-
rm -f fib.s fib.o driver.o a.out
23+
rm -f fib.s fib.o driver.o a.out output.txt

0 commit comments

Comments
 (0)