Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion bbloom.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ func NewWithBoolsetAndKeys(bs []byte, locs, k0, k1 uint64) (bloomfilter *Bloom)
type bloomJSONImExport struct {
FilterSet []byte
SetLocs uint64
Elements *uint64 `json:"Elements,omitempty"`
Version uint8 `json:"Version,omitempty"`
K0 *uint64 `json:"K0,omitempty"`
K1 *uint64 `json:"K1,omitempty"`
Expand All @@ -175,7 +176,11 @@ type Bloom struct {
hashVersion uint8 // 0 = legacy, 1 = l|=1 fix (issue #11)
}

// ElementsAdded returns the number of elements added to the bloom filter.
// ElementsAdded returns the element counter. The counter is incremented by
// [Bloom.Add] on every call (including duplicates) and by [Bloom.AddIfNotHas]
// only when the entry is new. Use [Bloom.AddIfNotHas] when an accurate count
// of distinct elements is needed (e.g. for sizing a replacement filter).
// The counter is preserved across [Bloom.JSONMarshal] / [JSONUnmarshal].
func (bl *Bloom) ElementsAdded() uint64 {
return bl.content
}
Expand Down Expand Up @@ -302,6 +307,8 @@ func (bl *Bloom) isSet(idx uint64) bool {
func (bl *Bloom) marshal() bloomJSONImExport {
bloomImEx := bloomJSONImExport{}
bloomImEx.SetLocs = uint64(bl.setLocs)
elements := bl.content
bloomImEx.Elements = &elements
bloomImEx.Version = bl.hashVersion
if bl.k0 != defaultK0 || bl.k1 != defaultK1 {
k0, k1 := bl.k0, bl.k1
Expand Down Expand Up @@ -356,6 +363,9 @@ func JSONUnmarshal(dbData []byte) (*Bloom, error) {
bf = NewWithBoolset(bloomImEx.FilterSet, bloomImEx.SetLocs)
}
bf.hashVersion = bloomImEx.Version
if bloomImEx.Elements != nil {
bf.content = *bloomImEx.Elements
}
return bf, nil
}

Expand Down
62 changes: 62 additions & 0 deletions bbloom_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package bbloom

import (
"encoding/json"
"fmt"
"math"
"os"
Expand Down Expand Up @@ -98,6 +99,67 @@ func TestSipHashLowAlwaysOdd(t *testing.T) {
}
}

func TestJSON_ElementsRoundTrip(t *testing.T) {
bf, err := New(float64(n*10), float64(7))
if err != nil {
t.Fatal(err)
}

for i := range wordlist1 {
bf.Add(wordlist1[i])
}

if bf.ElementsAdded() != uint64(n) {
t.Fatalf("ElementsAdded = %d, want %d", bf.ElementsAdded(), n)
}

data := bf.JSONMarshal()

bf2, err := JSONUnmarshal(data)
if err != nil {
t.Fatal(err)
}

if bf2.ElementsAdded() != uint64(n) {
t.Fatalf("ElementsAdded after round-trip = %d, want %d", bf2.ElementsAdded(), n)
}
}

func TestJSON_ElementsBackwardCompat(t *testing.T) {
// simulate old-format JSON by marshaling a filter and stripping Elements
bf, err := New(float64(512), float64(3))
if err != nil {
t.Fatal(err)
}
bf.Add([]byte("hello"))
bf.Add([]byte("world"))

// marshal, strip Elements field, unmarshal
data := bf.JSONMarshal()
var m map[string]any
if err := json.Unmarshal(data, &m); err != nil {
t.Fatal(err)
}
delete(m, "Elements")
oldJSON, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}

bf2, err := JSONUnmarshal(oldJSON)
if err != nil {
t.Fatal(err)
}
// old format has no Elements field, so ElementsAdded should be 0
if bf2.ElementsAdded() != 0 {
t.Fatalf("ElementsAdded from old JSON = %d, want 0", bf2.ElementsAdded())
}
// but the bitset should still work
if !bf2.Has([]byte("hello")) || !bf2.Has([]byte("world")) {
t.Fatal("filter lost entries after old-format round-trip")
}
}

func TestNewWithKeys(t *testing.T) {
k0 := uint64(0x0123456789abcdef)
k1 := uint64(0xfedcba9876543210)
Expand Down
Loading