@@ -11,37 +11,56 @@ import (
1111const maxCuckooKickouts = 500
1212
1313// Filter is a probabilistic counter.
14- type Filter struct {
15- buckets []bucket
16- count uint
14+ type Filter [T fingerprintsize ] struct {
15+ buckets []bucket [T ]
16+ getFingerprint func (hash uint64 ) T
17+ count uint
1718 // Bit mask set to len(buckets) - 1. As len(buckets) is always a power of 2,
1819 // applying this mask mimics the operation x % len(buckets).
1920 bucketIndexMask uint
2021}
2122
22- // NewFilter returns a new cuckoofilter suitable for the given number of elements.
23- // When inserting more elements, insertion speed will drop significantly and insertions might fail altogether.
24- // A capacity of 1000000 is a normal default, which allocates
25- // about ~2MB on 64-bit machines.
26- func NewFilter (numElements uint ) * Filter {
23+ func numBuckets (numElements uint ) uint {
2724 numBuckets := getNextPow2 (uint64 (numElements / bucketSize ))
2825 if float64 (numElements )/ float64 (numBuckets * bucketSize ) > 0.96 {
2926 numBuckets <<= 1
3027 }
3128 if numBuckets == 0 {
3229 numBuckets = 1
3330 }
34- buckets := make ([]bucket , numBuckets )
35- return & Filter {
31+ return numBuckets
32+ }
33+
34+ // NewFilter returns a new cuckoofilter suitable for the given number of elements.
35+ // When inserting more elements, insertion speed will drop significantly and insertions might fail altogether.
36+ // A capacity of 1000000 is a normal default, which allocates
37+ // about ~2MB on 64-bit machines.
38+ func NewFilter (numElements uint ) * Filter [uint16 ] {
39+ buckets := make ([]bucket [uint16 ], numBuckets (numElements ))
40+ return & Filter [uint16 ]{
41+ buckets : buckets ,
42+ count : 0 ,
43+ bucketIndexMask : uint (len (buckets ) - 1 ),
44+ getFingerprint : getFinterprintUint16 ,
45+ }
46+ }
47+
48+ // NewFilterLowPrecision is the same as NewFilter, but returns a filter that uses
49+ // half the memory but has lower precision.
50+ func NewFilterLowPrecision (numElements uint ) * Filter [uint8 ] {
51+ buckets := make ([]bucket [uint8 ], numBuckets (numElements ))
52+ return & Filter [uint8 ]{
3653 buckets : buckets ,
3754 count : 0 ,
3855 bucketIndexMask : uint (len (buckets ) - 1 ),
56+ getFingerprint : getFinterprintUint8 ,
3957 }
4058}
4159
60+
4261// Lookup returns true if data is in the filter.
43- func (cf * Filter ) Lookup (data []byte ) bool {
44- i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask )
62+ func (cf * Filter [ T ] ) Lookup (data []byte ) bool {
63+ i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask , cf . getFingerprint )
4564 if b := cf .buckets [i1 ]; b .contains (fp ) {
4665 return true
4766 }
@@ -51,7 +70,7 @@ func (cf *Filter) Lookup(data []byte) bool {
5170}
5271
5372// Reset removes all items from the filter, setting count to 0.
54- func (cf * Filter ) Reset () {
73+ func (cf * Filter [ T ] ) Reset () {
5574 for i := range cf .buckets {
5675 cf .buckets [i ].reset ()
5776 }
@@ -62,8 +81,8 @@ func (cf *Filter) Reset() {
6281// * Might return false negatives
6382// * Deletes are not guaranteed to work
6483// To increase success rate of inserts, create a larger filter.
65- func (cf * Filter ) Insert (data []byte ) bool {
66- i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask )
84+ func (cf * Filter [ T ] ) Insert (data []byte ) bool {
85+ i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask , cf . getFingerprint )
6786 if cf .insert (fp , i1 ) {
6887 return true
6988 }
@@ -74,15 +93,15 @@ func (cf *Filter) Insert(data []byte) bool {
7493 return cf .reinsert (fp , randi (i1 , i2 ))
7594}
7695
77- func (cf * Filter ) insert (fp fingerprint , i uint ) bool {
96+ func (cf * Filter [ T ] ) insert (fp T , i uint ) bool {
7897 if cf .buckets [i ].insert (fp ) {
7998 cf .count ++
8099 return true
81100 }
82101 return false
83102}
84103
85- func (cf * Filter ) reinsert (fp fingerprint , i uint ) bool {
104+ func (cf * Filter [ T ] ) reinsert (fp T , i uint ) bool {
86105 for k := 0 ; k < maxCuckooKickouts ; k ++ {
87106 j := rand .Intn (bucketSize )
88107 // Swap fingerprint with bucket entry.
@@ -98,13 +117,13 @@ func (cf *Filter) reinsert(fp fingerprint, i uint) bool {
98117}
99118
100119// Delete data from the filter. Returns true if the data was found and deleted.
101- func (cf * Filter ) Delete (data []byte ) bool {
102- i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask )
120+ func (cf * Filter [ T ] ) Delete (data []byte ) bool {
121+ i1 , fp := getIndexAndFingerprint (data , cf .bucketIndexMask , cf . getFingerprint )
103122 i2 := getAltIndex (fp , i1 , cf .bucketIndexMask )
104123 return cf .delete (fp , i1 ) || cf .delete (fp , i2 )
105124}
106125
107- func (cf * Filter ) delete (fp fingerprint , i uint ) bool {
126+ func (cf * Filter [ T ] ) delete (fp T , i uint ) bool {
108127 if cf .buckets [i ].delete (fp ) {
109128 cf .count --
110129 return true
@@ -113,19 +132,20 @@ func (cf *Filter) delete(fp fingerprint, i uint) bool {
113132}
114133
115134// Count returns the number of items in the filter.
116- func (cf * Filter ) Count () uint {
135+ func (cf * Filter [ T ] ) Count () uint {
117136 return cf .count
118137}
119138
120139// LoadFactor returns the fraction slots that are occupied.
121- func (cf * Filter ) LoadFactor () float64 {
140+ func (cf * Filter [ T ] ) LoadFactor () float64 {
122141 return float64 (cf .count ) / float64 (len (cf .buckets )* bucketSize )
123142}
124143
125- const bytesPerBucket = bucketSize * fingerprintSizeBits / 8
144+ // TODO(panmari): Size of fingerprint needs to be derived from type. Currently hardcoded to 16 for uint16.
145+ const bytesPerBucket = bucketSize * 16 / 8
126146
127147// Encode returns a byte slice representing a Cuckoofilter.
128- func (cf * Filter ) Encode () []byte {
148+ func (cf * Filter [ T ] ) Encode () []byte {
129149 bytes := make ([]byte , 0 , len (cf .buckets )* bytesPerBucket )
130150 for _ , b := range cf .buckets {
131151 for _ , f := range b {
@@ -138,7 +158,7 @@ func (cf *Filter) Encode() []byte {
138158}
139159
140160// Decode returns a Cuckoofilter from a byte slice created using Encode.
141- func Decode (bytes []byte ) (* Filter , error ) {
161+ func Decode (bytes []byte ) (* Filter [ uint16 ] , error ) {
142162 if len (bytes )% bucketSize != 0 {
143163 return nil , fmt .Errorf ("bytes must to be multiple of %d, got %d" , bucketSize , len (bytes ))
144164 }
@@ -150,21 +170,22 @@ func Decode(bytes []byte) (*Filter, error) {
150170 return nil , fmt .Errorf ("numBuckets must to be a power of 2, got %d" , numBuckets )
151171 }
152172 var count uint
153- buckets := make ([]bucket , numBuckets )
173+ buckets := make ([]bucket [ uint16 ] , numBuckets )
154174 for i , b := range buckets {
155175 for j := range b {
156176 var next []byte
157177 next , bytes = bytes [:2 ], bytes [2 :]
158178
159- if fp := fingerprint ( binary .LittleEndian .Uint16 (next ) ); fp != 0 {
179+ if fp := binary .LittleEndian .Uint16 (next ); fp != 0 {
160180 buckets [i ][j ] = fp
161181 count ++
162182 }
163183 }
164184 }
165- return & Filter {
185+ return & Filter [ uint16 ] {
166186 buckets : buckets ,
167187 count : count ,
168188 bucketIndexMask : uint (len (buckets ) - 1 ),
189+ getFingerprint : getFinterprintUint16 ,
169190 }, nil
170191}
0 commit comments