Skip to content

Commit c94c7db

Browse files
committed
pack buckets into uint64
1 parent a4ac97c commit c94c7db

File tree

4 files changed

+96
-49
lines changed

4 files changed

+96
-49
lines changed

bucket.go

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ package cuckoo
33
import (
44
"bytes"
55
"fmt"
6+
"math/bits"
67
)
78

89
// fingerprint represents a single entry in a bucket.
910
type fingerprint uint16
1011

1112
// bucket keeps track of fingerprints hashing to the same index.
12-
type bucket [bucketSize]fingerprint
13+
type bucket uint64
1314

1415
const (
1516
nullFp = 0
@@ -21,8 +22,8 @@ const (
2122
// insert a fingerprint into a bucket. Returns true if there was enough space and insertion succeeded.
2223
// Note it allows inserting the same fingerprint multiple times.
2324
func (b *bucket) insert(fp fingerprint) bool {
24-
if i := b.index(nullFp); i != 4 {
25-
b[i] = fp
25+
if i := findZeros(uint64(*b)); i != 0 {
26+
*b |= bucket(fp) << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits)
2627
return true
2728
}
2829
return false
@@ -31,43 +32,37 @@ func (b *bucket) insert(fp fingerprint) bool {
3132
// delete a fingerprint from a bucket.
3233
// Returns true if the fingerprint was present and successfully removed.
3334
func (b *bucket) delete(fp fingerprint) bool {
34-
if i := b.index(fp); i != 4 {
35-
b[i] = nullFp
35+
if i := findValue(uint64(*b), uint16(fp)); i != 0 {
36+
*b &= ^(maxFingerprint << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits))
3637
return true
3738
}
3839
return false
3940
}
4041

42+
func (b *bucket) swap(i uint64, fp fingerprint) fingerprint {
43+
p := (*b) >> (i * fingerprintSizeBits) & maxFingerprint
44+
*b = (*b) & ^(maxFingerprint<<(i*fingerprintSizeBits)) | (bucket(fp) << (i * fingerprintSizeBits))
45+
return fingerprint(p)
46+
}
47+
4148
func (b *bucket) contains(needle fingerprint) bool {
42-
return b.index(needle) != 4
49+
return findValue(uint64(*b), uint16(needle)) != 0
4350
}
4451

45-
func (b *bucket) index(needle fingerprint) uint8 {
46-
if b[0] == needle {
47-
return 0
48-
}
49-
if b[1] == needle {
50-
return 1
51-
}
52-
if b[2] == needle {
53-
return 2
54-
}
55-
if b[3] == needle {
56-
return 3
57-
}
58-
return 4
52+
func (b *bucket) nullsCount() uint {
53+
return uint(bits.OnesCount64(findValue(uint64(*b), nullFp)))
5954
}
6055

6156
// reset deletes all fingerprints in the bucket.
6257
func (b *bucket) reset() {
63-
*b = [bucketSize]fingerprint{nullFp, nullFp, nullFp, nullFp}
58+
*b = 0
6459
}
6560

6661
func (b *bucket) String() string {
6762
var buf bytes.Buffer
6863
buf.WriteString("[")
69-
for _, by := range b {
70-
buf.WriteString(fmt.Sprintf("%5d ", by))
64+
for i := 3; i >= 0; i-- {
65+
buf.WriteString(fmt.Sprintf("%5d ", ((*b)>>(i*fingerprintSizeBits))&maxFingerprint))
7166
}
7267
buf.WriteString("]")
7368
return buf.String()

bucket_test.go

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,65 @@ import (
88
func TestBucket_Reset(t *testing.T) {
99
var bkt bucket
1010
for i := fingerprint(0); i < bucketSize; i++ {
11-
bkt[i] = i
11+
bkt.insert(i + 1)
1212
}
13+
1314
bkt.reset()
1415

1516
var want bucket
1617
if !reflect.DeepEqual(bkt, want) {
1718
t.Errorf("bucket.reset() got %v, want %v", bkt, want)
1819
}
1920
}
21+
22+
func TestBucket_Insert(t *testing.T) {
23+
var bkt bucket
24+
for i := fingerprint(0); i < bucketSize; i++ {
25+
if !bkt.insert(i + 1) {
26+
t.Error("bucket insert failed")
27+
}
28+
}
29+
if bkt.insert(5) {
30+
t.Error("expected bucket insert to fail after overflow")
31+
}
32+
}
33+
34+
func TestBucket_Delete(t *testing.T) {
35+
var bkt bucket
36+
for i := fingerprint(0); i < bucketSize; i++ {
37+
bkt.insert(i + 1)
38+
}
39+
40+
for i := fingerprint(0); i < bucketSize; i++ {
41+
if !bkt.delete(i + 1) {
42+
t.Error("bucket delete failed")
43+
}
44+
if !bkt.insert(i + 1) {
45+
t.Error("bucket insert after delete failed")
46+
}
47+
}
48+
}
49+
50+
func TestBucket_Swap(t *testing.T) {
51+
var bkt bucket
52+
bkt.insert(123)
53+
if prev := bkt.swap(3, 321); prev != 123 {
54+
t.Errorf("swap returned unexpected value %d", prev)
55+
}
56+
if !bkt.contains(321) {
57+
t.Errorf("contains after swap failed")
58+
}
59+
}
60+
61+
func TestBucket_Contains(t *testing.T) {
62+
var bkt bucket
63+
for i := fingerprint(0); i < bucketSize; i++ {
64+
bkt.insert(i + 1)
65+
}
66+
67+
for i := fingerprint(0); i < bucketSize; i++ {
68+
if !bkt.contains(i + 1) {
69+
t.Error("bucket contains failed")
70+
}
71+
}
72+
}

cuckoofilter.go

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func NewFilter(numElements uint) *Filter {
3737
return &Filter{
3838
buckets: buckets,
3939
count: 0,
40-
bucketIndexMask: uint(len(buckets) - 1),
40+
bucketIndexMask: numBuckets - 1,
4141
}
4242
}
4343

@@ -73,7 +73,11 @@ func (cf *Filter) Insert(data []byte) bool {
7373
if cf.insert(fp, i2) {
7474
return true
7575
}
76-
return cf.reinsert(fp, randi(&cf.rng, i1, i2))
76+
if cf.rng.Uint64()&1 == 0 {
77+
return cf.reinsert(fp, i1)
78+
} else {
79+
return cf.reinsert(fp, i2)
80+
}
7781
}
7882

7983
func (cf *Filter) insert(fp fingerprint, i uint) bool {
@@ -86,9 +90,9 @@ func (cf *Filter) insert(fp fingerprint, i uint) bool {
8690

8791
func (cf *Filter) reinsert(fp fingerprint, i uint) bool {
8892
for k := 0; k < maxCuckooKickouts; k++ {
89-
j := cf.rng.Intn(bucketSize)
93+
j := cf.rng.Uint64() & (bucketSize - 1)
9094
// Swap fingerprint with bucket entry.
91-
cf.buckets[i][j], fp = fp, cf.buckets[i][j]
95+
fp = cf.buckets[i].swap(j, fp)
9296

9397
// Move kicked out fingerprint to alternate location.
9498
i = getAltIndex(fp, i, cf.bucketIndexMask)
@@ -130,9 +134,7 @@ const bytesPerBucket = bucketSize * fingerprintSizeBits / 8
130134
func (cf *Filter) Encode() []byte {
131135
buf := make([]byte, 0, len(cf.buckets)*bytesPerBucket)
132136
for _, b := range cf.buckets {
133-
for _, fp := range b {
134-
buf = binary.LittleEndian.AppendUint16(buf, uint16(fp))
135-
}
137+
buf = binary.LittleEndian.AppendUint64(buf, uint64(b))
136138
}
137139
return buf
138140
}
@@ -152,14 +154,10 @@ func Decode(data []byte) (*Filter, error) {
152154

153155
var count, pos uint
154156
buckets := make([]bucket, numBuckets)
155-
for i, b := range buckets {
156-
for j := range b {
157-
buckets[i][j] = fingerprint(binary.LittleEndian.Uint16(data[pos : pos+2]))
158-
pos += 2
159-
if buckets[i][j] != nullFp {
160-
count++
161-
}
162-
}
157+
for i := range buckets {
158+
buckets[i] = bucket(binary.LittleEndian.Uint64(data[pos : pos+8]))
159+
pos += 8
160+
count += bucketSize - buckets[i].nullsCount()
163161
}
164162
return &Filter{
165163
buckets: buckets,

util.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"encoding/binary"
55
"math/bits"
66

7-
"github.com/zeebo/wyhash"
87
"github.com/zeebo/xxh3"
98
)
109

@@ -14,18 +13,10 @@ func init() {
1413
b := make([]byte, 2)
1514
for i := 0; i < maxFingerprint+1; i++ {
1615
binary.LittleEndian.PutUint16(b, uint16(i))
17-
altHash[i] = (uint(xxh3.Hash(b)))
16+
altHash[i] = uint(xxh3.Hash(b))
1817
}
1918
}
2019

21-
// randi returns either i1 or i2 randomly.
22-
func randi(rng *wyhash.RNG, i1, i2 uint) uint {
23-
if rng.Uint64()&1 == 0 {
24-
return i1
25-
}
26-
return i2
27-
}
28-
2920
func getAltIndex(fp fingerprint, i uint, bucketIndexMask uint) uint {
3021
return (i ^ altHash[fp]) & bucketIndexMask
3122
}
@@ -50,3 +41,13 @@ func getIndexAndFingerprint(data []byte, bucketIndexMask uint) (uint, fingerprin
5041
func getNextPow2(n uint64) uint {
5142
return uint(1 << bits.Len64(n-1))
5243
}
44+
45+
// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
46+
func findZeros(v uint64) uint64 {
47+
return ^((((v & 0x7FFF7FFF7FFF7FFF) + 0x7FFF7FFF7FFF7FFF) | v) | 0x7FFF7FFF7FFF7FFF)
48+
}
49+
50+
// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
51+
func findValue(x uint64, n uint16) uint64 {
52+
return findZeros(x ^ (^uint64(0) / (1<<16 - 1) * uint64(n)))
53+
}

0 commit comments

Comments
 (0)