Skip to content

Commit ad51fe6

Browse files
Merge pull request #75 from apache/bloomfilter
(feat) BloomFilter Sketch
2 parents b6a455b + eec0039 commit ad51fe6

30 files changed

Lines changed: 2645 additions & 2 deletions

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ If you are interested in making contributions to this site please see our [Commu
5959
| | ReserviorItemsSketch<T> ||
6060
| | VarOptItemsSketch<T> ||
6161
| Membership | | |
62-
| | BloomFilterSketch | |
62+
| | BloomFilter | 🚧 |
6363

6464

6565
## Specialty Sketches

filters/bit_array.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package filters
19+
20+
import "math/bits"
21+
22+
// getBit returns the value of the bit at the specified index.
23+
func getBit(array []uint64, index uint64) bool {
24+
longIdx := index >> 6 // divide by 64
25+
bitIdx := index & 0x3F // mod 64
26+
return (array[longIdx] & (1 << bitIdx)) != 0
27+
}
28+
29+
// setBit sets the bit at the specified index to 1.
30+
func setBit(array []uint64, index uint64) {
31+
longIdx := index >> 6
32+
bitIdx := index & 0x3F
33+
array[longIdx] |= (1 << bitIdx)
34+
}
35+
36+
// clearBit sets the bit at the specified index to 0.
37+
func clearBit(array []uint64, index uint64) {
38+
longIdx := index >> 6
39+
bitIdx := index & 0x3F
40+
array[longIdx] &^= (1 << bitIdx)
41+
}
42+
43+
// assignBit sets the bit at the specified index to the given value.
44+
func assignBit(array []uint64, index uint64, value bool) {
45+
if value {
46+
setBit(array, index)
47+
} else {
48+
clearBit(array, index)
49+
}
50+
}
51+
52+
// getAndSetBit gets the current bit value and sets it to 1 in a single operation.
53+
// Returns true if the bit was already set, false if it was newly set.
54+
func getAndSetBit(array []uint64, index uint64) bool {
55+
longIdx := index >> 6
56+
bitIdx := index & 0x3F
57+
mask := uint64(1) << bitIdx
58+
wasSet := (array[longIdx] & mask) != 0
59+
array[longIdx] |= mask
60+
return wasSet
61+
}
62+
63+
// countBitsSet counts the number of bits set to 1 in the array.
64+
func countBitsSet(array []uint64) uint64 {
65+
count := uint64(0)
66+
for _, val := range array {
67+
count += uint64(bits.OnesCount64(val))
68+
}
69+
return count
70+
}
71+
72+
// unionWith performs a bitwise OR operation between target and source arrays.
73+
// The result is stored in target. Returns the number of bits set in the result.
74+
func unionWith(target, source []uint64) uint64 {
75+
count := uint64(0)
76+
for i := range target {
77+
target[i] |= source[i]
78+
count += uint64(bits.OnesCount64(target[i]))
79+
}
80+
return count
81+
}
82+
83+
// intersect performs a bitwise AND operation between target and source arrays.
84+
// The result is stored in target. Returns the number of bits set in the result.
85+
func intersect(target, source []uint64) uint64 {
86+
count := uint64(0)
87+
for i := range target {
88+
target[i] &= source[i]
89+
count += uint64(bits.OnesCount64(target[i]))
90+
}
91+
return count
92+
}
93+
94+
// invert performs a bitwise NOT operation on the array.
95+
// Returns the number of bits set in the result.
96+
func invert(array []uint64) uint64 {
97+
count := uint64(0)
98+
for i := range array {
99+
array[i] = ^array[i]
100+
count += uint64(bits.OnesCount64(array[i]))
101+
}
102+
return count
103+
}

filters/bit_array_test.go

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package filters
19+
20+
import (
21+
"testing"
22+
23+
"github.com/stretchr/testify/assert"
24+
)
25+
26+
func TestBitArrayBasicOperations(t *testing.T) {
27+
// Create array with 128 bits (2 longs)
28+
array := make([]uint64, 2)
29+
30+
// Test initial state - all bits should be 0
31+
assert.Equal(t, uint64(0), countBitsSet(array))
32+
assert.False(t, getBit(array, 0))
33+
assert.False(t, getBit(array, 63))
34+
assert.False(t, getBit(array, 64))
35+
assert.False(t, getBit(array, 127))
36+
37+
// Test setBit
38+
setBit(array, 5)
39+
assert.True(t, getBit(array, 5))
40+
assert.Equal(t, uint64(1), countBitsSet(array))
41+
42+
setBit(array, 65)
43+
assert.True(t, getBit(array, 65))
44+
assert.Equal(t, uint64(2), countBitsSet(array))
45+
46+
// Test clearBit
47+
clearBit(array, 5)
48+
assert.False(t, getBit(array, 5))
49+
assert.Equal(t, uint64(1), countBitsSet(array))
50+
51+
// Test assignBit
52+
assignBit(array, 10, true)
53+
assert.True(t, getBit(array, 10))
54+
assert.Equal(t, uint64(2), countBitsSet(array))
55+
56+
assignBit(array, 10, false)
57+
assert.False(t, getBit(array, 10))
58+
assert.Equal(t, uint64(1), countBitsSet(array))
59+
60+
// Test getAndSetBit
61+
wasSet := getAndSetBit(array, 20)
62+
assert.False(t, wasSet) // Was not set
63+
assert.True(t, getBit(array, 20))
64+
assert.Equal(t, uint64(2), countBitsSet(array))
65+
66+
wasSet = getAndSetBit(array, 20)
67+
assert.True(t, wasSet) // Was already set
68+
assert.True(t, getBit(array, 20))
69+
assert.Equal(t, uint64(2), countBitsSet(array))
70+
}
71+
72+
func TestBitArrayInversion(t *testing.T) {
73+
// Create array with 128 bits
74+
array := make([]uint64, 2)
75+
76+
// Set some bits
77+
setBit(array, 0)
78+
setBit(array, 10)
79+
setBit(array, 63)
80+
setBit(array, 100)
81+
assert.Equal(t, uint64(4), countBitsSet(array))
82+
83+
// Invert
84+
count := invert(array)
85+
assert.Equal(t, uint64(128-4), count)
86+
assert.Equal(t, uint64(128-4), countBitsSet(array))
87+
88+
// Previously set bits should now be clear
89+
assert.False(t, getBit(array, 0))
90+
assert.False(t, getBit(array, 10))
91+
assert.False(t, getBit(array, 63))
92+
assert.False(t, getBit(array, 100))
93+
94+
// Previously clear bits should now be set
95+
assert.True(t, getBit(array, 1))
96+
assert.True(t, getBit(array, 50))
97+
assert.True(t, getBit(array, 64))
98+
assert.True(t, getBit(array, 127))
99+
}
100+
101+
func TestBitArrayUnion(t *testing.T) {
102+
// Create two arrays with 192 bits (3 longs)
103+
array1 := make([]uint64, 3)
104+
array2 := make([]uint64, 3)
105+
array3 := make([]uint64, 3)
106+
107+
// Array1: bits 0-9
108+
for i := uint64(0); i < 10; i++ {
109+
setBit(array1, i)
110+
}
111+
112+
// Array2: bits 5-14
113+
for i := uint64(5); i < 15; i++ {
114+
setBit(array2, i)
115+
}
116+
117+
// Array3: even bits 0-18
118+
for i := uint64(0); i < 19; i += 2 {
119+
setBit(array3, i)
120+
}
121+
122+
// Union of array2 and array3
123+
count := unionWith(array2, array3)
124+
// Array2 had bits 5-14 (10 bits)
125+
// Array3 had even bits 0-18 (10 bits: 0,2,4,6,8,10,12,14,16,18)
126+
// Union should have: 5,6,7,8,9,10,11,12,13,14 + 0,2,4,16,18 = 15 bits
127+
assert.Equal(t, uint64(15), count)
128+
assert.Equal(t, uint64(15), countBitsSet(array2))
129+
}
130+
131+
func TestBitArrayIntersection(t *testing.T) {
132+
// Create two arrays
133+
array1 := make([]uint64, 3)
134+
array2 := make([]uint64, 3)
135+
136+
// Array1: bits 0-9
137+
for i := uint64(0); i < 10; i++ {
138+
setBit(array1, i)
139+
}
140+
141+
// Array2: bits 5-14
142+
for i := uint64(5); i < 15; i++ {
143+
setBit(array2, i)
144+
}
145+
146+
// Intersection
147+
count := intersect(array1, array2)
148+
// Overlap is bits 5-9 = 5 bits
149+
assert.Equal(t, uint64(5), count)
150+
assert.Equal(t, uint64(5), countBitsSet(array1))
151+
152+
// Verify the overlap bits
153+
for i := uint64(5); i < 10; i++ {
154+
assert.True(t, getBit(array1, i))
155+
}
156+
// Verify non-overlap bits are cleared
157+
for i := uint64(0); i < 5; i++ {
158+
assert.False(t, getBit(array1, i))
159+
}
160+
}
161+
162+
func TestBitArrayBoundaries(t *testing.T) {
163+
// Test bit operations at long boundaries
164+
array := make([]uint64, 3) // 192 bits
165+
166+
// Test at boundary of first and second long (bit 63-64)
167+
setBit(array, 63)
168+
setBit(array, 64)
169+
assert.True(t, getBit(array, 63))
170+
assert.True(t, getBit(array, 64))
171+
172+
clearBit(array, 63)
173+
assert.False(t, getBit(array, 63))
174+
assert.True(t, getBit(array, 64))
175+
176+
// Test at boundary of second and third long (bit 127-128)
177+
setBit(array, 127)
178+
setBit(array, 128)
179+
assert.True(t, getBit(array, 127))
180+
assert.True(t, getBit(array, 128))
181+
182+
// Test last bit
183+
setBit(array, 191)
184+
assert.True(t, getBit(array, 191))
185+
assert.Equal(t, uint64(4), countBitsSet(array))
186+
}

0 commit comments

Comments
 (0)