Skip to content

Commit f88feb0

Browse files
committed
Merge remote-tracking branch 'origin/master' into feat/persist-elements
# Conflicts: # bbloom.go # bbloom_test.go
2 parents a57d2a6 + e882fc3 commit f88feb0

File tree

3 files changed

+198
-19
lines changed

3 files changed

+198
-19
lines changed

bbloom.go

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,14 @@ func New(params ...float64) (bloomfilter *Bloom, err error) {
8686
}
8787
size, exponent := getSize(uint64(entries))
8888
bloomfilter = &Bloom{
89-
sizeExp: exponent,
90-
size: size - 1,
91-
setLocs: locs,
92-
shift: 64 - exponent,
93-
bitset: make([]uint64, size>>6),
94-
k0: defaultK0,
95-
k1: defaultK1,
89+
sizeExp: exponent,
90+
size: size - 1,
91+
setLocs: locs,
92+
shift: 64 - exponent,
93+
bitset: make([]uint64, size>>6),
94+
k0: defaultK0,
95+
k1: defaultK1,
96+
hashVersion: 1,
9697
}
9798
return bloomfilter, nil
9899
}
@@ -112,6 +113,8 @@ func New(params ...float64) (bloomfilter *Bloom, err error) {
112113
//
113114
// The params are interpreted the same way as in [New]. Custom keys are
114115
// preserved across [Bloom.JSONMarshal] / [JSONUnmarshal] round-trips.
116+
// Note: custom keys are included in plaintext in the [Bloom.JSONMarshal]
117+
// output, so treat serialized filters accordingly.
115118
func NewWithKeys(k0, k1 uint64, params ...float64) (*Bloom, error) {
116119
bf, err := New(params...)
117120
if err != nil {
@@ -136,12 +139,23 @@ func NewWithBoolset(bs []byte, locs uint64) (bloomfilter *Bloom) {
136139
return bloomfilter
137140
}
138141

142+
// NewWithBoolsetAndKeys creates a bloom filter from a pre-existing bitset
143+
// with caller-provided SipHash keys. See [NewWithKeys] for why custom keys
144+
// matter and [NewWithBoolset] for how the bitset is interpreted.
145+
func NewWithBoolsetAndKeys(bs []byte, locs, k0, k1 uint64) (bloomfilter *Bloom) {
146+
bloomfilter = NewWithBoolset(bs, locs)
147+
bloomfilter.k0 = k0
148+
bloomfilter.k1 = k1
149+
return bloomfilter
150+
}
151+
139152
// bloomJSONImExport
140153
// Im/Export structure used by JSONMarshal / JSONUnmarshal
141154
type bloomJSONImExport struct {
142155
FilterSet []byte
143156
SetLocs uint64
144157
Elements *uint64 `json:"Elements,omitempty"`
158+
Version uint8 `json:"Version,omitempty"`
145159
K0 *uint64 `json:"K0,omitempty"`
146160
K1 *uint64 `json:"K1,omitempty"`
147161
}
@@ -157,8 +171,9 @@ type Bloom struct {
157171
setLocs uint64
158172
shift uint64
159173

160-
content uint64
161-
k0, k1 uint64 // SipHash keys
174+
content uint64
175+
k0, k1 uint64 // SipHash keys
176+
hashVersion uint8 // 0 = legacy, 1 = l|=1 fix (issue #11)
162177
}
163178

164179
// ElementsAdded returns the element counter. The counter is incremented by
@@ -292,10 +307,13 @@ func (bl *Bloom) isSet(idx uint64) bool {
292307
func (bl *Bloom) marshal() bloomJSONImExport {
293308
bloomImEx := bloomJSONImExport{}
294309
bloomImEx.SetLocs = uint64(bl.setLocs)
295-
bloomImEx.Elements = &bl.content
310+
elements := bl.content
311+
bloomImEx.Elements = &elements
312+
bloomImEx.Version = bl.hashVersion
296313
if bl.k0 != defaultK0 || bl.k1 != defaultK1 {
297-
bloomImEx.K0 = &bl.k0
298-
bloomImEx.K1 = &bl.k1
314+
k0, k1 := bl.k0, bl.k1
315+
bloomImEx.K0 = &k0
316+
bloomImEx.K1 = &k1
299317
}
300318
bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
301319
for i, w := range bl.bitset {
@@ -335,15 +353,18 @@ func JSONUnmarshal(dbData []byte) (*Bloom, error) {
335353
if err != nil {
336354
return nil, err
337355
}
338-
bf := NewWithBoolset(bloomImEx.FilterSet, bloomImEx.SetLocs)
339-
if bloomImEx.Elements != nil {
340-
bf.content = *bloomImEx.Elements
356+
if (bloomImEx.K0 == nil) != (bloomImEx.K1 == nil) {
357+
return nil, errors.New("both K0 and K1 must be present or both absent")
341358
}
342-
if bloomImEx.K0 != nil {
343-
bf.k0 = *bloomImEx.K0
359+
var bf *Bloom
360+
if bloomImEx.K0 != nil && bloomImEx.K1 != nil {
361+
bf = NewWithBoolsetAndKeys(bloomImEx.FilterSet, bloomImEx.SetLocs, *bloomImEx.K0, *bloomImEx.K1)
362+
} else {
363+
bf = NewWithBoolset(bloomImEx.FilterSet, bloomImEx.SetLocs)
344364
}
345-
if bloomImEx.K1 != nil {
346-
bf.k1 = *bloomImEx.K1
365+
bf.hashVersion = bloomImEx.Version
366+
if bloomImEx.Elements != nil {
367+
bf.content = *bloomImEx.Elements
347368
}
348369
return bf, nil
349370
}

bbloom_test.go

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,22 @@ func TestM_JSON(t *testing.T) {
8383
t.Errorf("FAILED !AddIfNotHas = %v; want %v", cnt2, shallBe)
8484
}
8585
}
86+
87+
func TestSipHashLowAlwaysOdd(t *testing.T) {
88+
bf, err := New(float64(1<<20), float64(7))
89+
if err != nil {
90+
t.Fatal(err)
91+
}
92+
93+
for i := range 10000 {
94+
entry := []byte("entry-" + strconv.Itoa(i))
95+
l, _ := bf.sipHash(entry)
96+
if l%2 == 0 {
97+
t.Fatalf("l is even for entry %q: l=%d", entry, l)
98+
}
99+
}
100+
}
101+
86102
func TestJSON_ElementsRoundTrip(t *testing.T) {
87103
bf, err := New(float64(n*10), float64(7))
88104
if err != nil {
@@ -176,6 +192,39 @@ func TestNewWithKeys(t *testing.T) {
176192
}
177193
}
178194

195+
func TestJSONBackwardCompatV0(t *testing.T) {
196+
// simulate a filter created with the legacy hash (version 0)
197+
bf, err := New(float64(n*10), float64(7))
198+
if err != nil {
199+
t.Fatal(err)
200+
}
201+
bf.hashVersion = 0 // legacy
202+
203+
entries := wordlist1[:1000]
204+
for _, e := range entries {
205+
bf.Add(e)
206+
}
207+
208+
// serialize (will have Version:0 which is omitted from JSON)
209+
data := bf.JSONMarshal()
210+
211+
// deserialize -- should restore version 0 and use legacy hash
212+
bf2, err := JSONUnmarshal(data)
213+
if err != nil {
214+
t.Fatal(err)
215+
}
216+
217+
if bf2.hashVersion != 0 {
218+
t.Fatalf("expected hashVersion 0, got %d", bf2.hashVersion)
219+
}
220+
221+
for _, e := range entries {
222+
if !bf2.Has(e) {
223+
t.Fatalf("v0 filter lost entry %q after JSON round-trip", e)
224+
}
225+
}
226+
}
227+
179228
func TestNewWithKeysJSON(t *testing.T) {
180229
k0 := uint64(0x0123456789abcdef)
181230
k1 := uint64(0xfedcba9876543210)
@@ -209,6 +258,49 @@ func TestNewWithKeysJSON(t *testing.T) {
209258
}
210259
}
211260

261+
func TestJSONRoundTripV1(t *testing.T) {
262+
bf, err := New(float64(n*10), float64(7))
263+
if err != nil {
264+
t.Fatal(err)
265+
}
266+
267+
entries := wordlist1[:1000]
268+
for _, e := range entries {
269+
bf.Add(e)
270+
}
271+
272+
data := bf.JSONMarshal()
273+
274+
bf2, err := JSONUnmarshal(data)
275+
if err != nil {
276+
t.Fatal(err)
277+
}
278+
279+
if bf2.hashVersion != 1 {
280+
t.Fatalf("expected hashVersion 1, got %d", bf2.hashVersion)
281+
}
282+
283+
for _, e := range entries {
284+
if !bf2.Has(e) {
285+
t.Fatalf("v1 filter lost entry %q after JSON round-trip", e)
286+
}
287+
}
288+
}
289+
290+
func TestJSONUnmarshalPartialKeys(t *testing.T) {
291+
// Only K0 present, K1 absent -- should error, not silently fall back.
292+
jsonK0Only := []byte(`{"FilterSet":"AAAAAAAAAA==","SetLocs":3,"K0":42}`)
293+
if _, err := JSONUnmarshal(jsonK0Only); err == nil {
294+
t.Fatal("expected error for JSON with K0 but no K1")
295+
}
296+
297+
// Only K1 present, K0 absent.
298+
jsonK1Only := []byte(`{"FilterSet":"AAAAAAAAAA==","SetLocs":3,"K1":99}`)
299+
if _, err := JSONUnmarshal(jsonK1Only); err == nil {
300+
t.Fatal("expected error for JSON with K1 but no K0")
301+
}
302+
}
303+
212304
func TestDefaultKeysOmittedFromJSON(t *testing.T) {
213305
bf, err := New(float64(512), float64(3))
214306
if err != nil {
@@ -236,6 +328,69 @@ func TestDefaultKeysOmittedFromJSON(t *testing.T) {
236328
}
237329
}
238330

331+
func TestNewWithBoolsetAndKeys(t *testing.T) {
332+
k0 := uint64(0x0123456789abcdef)
333+
k1 := uint64(0xfedcba9876543210)
334+
entries := wordlist1[:1000]
335+
336+
// Build a reference filter with custom keys and populate it.
337+
ref, err := NewWithKeys(k0, k1, float64(n*10), float64(7))
338+
if err != nil {
339+
t.Fatal(err)
340+
}
341+
for _, e := range entries {
342+
ref.Add(e)
343+
}
344+
345+
// Export the raw bitset so we can reconstruct with NewWithBoolsetAndKeys.
346+
rawBitset := ref.JSONMarshal()
347+
refImport, err := JSONUnmarshal(rawBitset)
348+
if err != nil {
349+
t.Fatal(err)
350+
}
351+
352+
t.Run("keys are stored", func(t *testing.T) {
353+
// NewWithBoolsetAndKeys must propagate k0/k1 into the Bloom struct,
354+
// otherwise all lookups will use the wrong hash positions.
355+
got := NewWithBoolsetAndKeys(make([]byte, 64), 7, k0, k1)
356+
if got.k0 != k0 || got.k1 != k1 {
357+
t.Fatalf("keys not set: got k0=%x k1=%x, want k0=%x k1=%x",
358+
got.k0, got.k1, k0, k1)
359+
}
360+
})
361+
362+
t.Run("entries survive bitset round-trip", func(t *testing.T) {
363+
// A filter rebuilt from the same bitset and keys must recognize
364+
// every entry that was added to the original.
365+
for _, e := range entries {
366+
if !refImport.Has(e) {
367+
t.Fatalf("entry %q lost after round-trip", e)
368+
}
369+
}
370+
})
371+
372+
t.Run("wrong keys miss entries", func(t *testing.T) {
373+
// Unmarshal the custom-key filter, then force default keys.
374+
// Lookups must fail, proving the keys actually affect hashing.
375+
wrong, err := JSONUnmarshal(ref.JSONMarshal())
376+
if err != nil {
377+
t.Fatal(err)
378+
}
379+
wrong.k0 = defaultK0
380+
wrong.k1 = defaultK1
381+
382+
misses := 0
383+
for _, e := range entries {
384+
if !wrong.Has(e) {
385+
misses++
386+
}
387+
}
388+
if misses == 0 {
389+
t.Fatal("default keys matched every entry; custom keys had no effect")
390+
}
391+
})
392+
}
393+
239394
func TestFillRatio(t *testing.T) {
240395
bf, err := New(float64(512), float64(7))
241396
if err != nil {

sipHash.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ func (bl *Bloom) sipHash(p []byte) (l, h uint64) {
234234
hash := v0 ^ v1 ^ v2 ^ v3
235235
h = hash >> bl.shift
236236
l = hash << bl.shift >> bl.shift
237+
if bl.hashVersion >= 1 {
238+
l |= 1 // force odd so l is coprime to 2^sizeExp (issue #11)
239+
}
237240
return l, h
238241

239242
}

0 commit comments

Comments
 (0)