Skip to content

Commit 816cbc0

Browse files
Copilotdrawks
authored andcommitted
Replace fnv32 with xxh3 for shard hashing
* refactors duplicated hashing function out into `helper.HashString` * adds benchmark test coverage for `helper.HashString` * replaces `fnv32` with `xxh3` from `github.com/zeebo/xxh3` * replaces modulo division in `cache.GetShard` and `persister.Whisper.store` with a bitmask operation * updates vendored modules * updates carbonserver tests to force explicit ordering to avoid issues from arbitrary insert order changes with hash/shard assignment
1 parent e3432bd commit 816cbc0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+6433
-31
lines changed

cache/cache.go

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ const (
2727
Noop
2828
)
2929

30-
const shardCount = 1024
30+
const shardCount = 1 << 10 // 1024 - an arbitrary sized power of 2
3131

3232
type cacheSettings struct {
3333
maxSize int64
@@ -187,22 +187,9 @@ func (c *Cache) Stat(send helper.StatCallback) {
187187
helper.SendAndSubstractUint32("droppedRealtimeIndex", &c.stat.droppedRealtimeIndex, send)
188188
}
189189

190-
// hash function
191-
// @TODO: try crc32 or something else?
192-
func fnv32(key string) uint32 {
193-
hash := uint32(2166136261)
194-
const prime32 = uint32(16777619)
195-
for i := 0; i < len(key); i++ {
196-
hash *= prime32
197-
hash ^= uint32(key[i])
198-
}
199-
return hash
200-
}
201-
202190
// GetShard returns shard under given key
203191
func (c *Cache) GetShard(key string) *Shard {
204-
// @TODO: remove type casts?
205-
return c.data[uint(fnv32(key))%uint(shardCount)]
192+
return c.data[helper.HashString(key)&(shardCount-1)]
206193
}
207194

208195
func (c *Cache) Get(key string) []points.Point {

carbonserver/cache_index_test.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ func (f *testInfo) checkExpandGlobs(t *testing.T, query string, shdExist bool) {
137137
func (f *testInfo) commonCacheIdxTestHelper(t *testing.T) {
138138
defer os.RemoveAll(f.whisperDir)
139139

140+
// force "sorted" strategy to ensure determinism in tests
141+
f.testCache.SetWriteStrategy("sorted")
142+
140143
for _, filePath := range addFiles {
141144
if err := addFileToSys(filePath, f.whisperDir); err != nil {
142145
t.Fatal(err)
@@ -148,9 +151,11 @@ func (f *testInfo) commonCacheIdxTestHelper(t *testing.T) {
148151
f.forceChan <- struct{}{}
149152
time.Sleep(2 * time.Second)
150153

151-
// add metrics to cache
154+
// add metrics to cache with distinct timestamps
155+
timestamps := []int64{20, 30, 40, 50, 10}
156+
152157
for i, metricName := range addMetrics {
153-
f.testCache.Add(points.OnePoint(metricName, float64(i), 10))
158+
f.testCache.Add(points.OnePoint(metricName, float64(i), timestamps[i]))
154159
}
155160
// check expandblobs for new metrics
156161
f.checkExpandGlobs(t, addMetrics[2], false)
@@ -161,7 +166,8 @@ func (f *testInfo) commonCacheIdxTestHelper(t *testing.T) {
161166
p1, _ := f.testCache.PopNotConfirmed(m1)
162167
f.testCache.Confirm(p1)
163168

164-
if !p1.Eq(points.OnePoint(addMetrics[4], 4, 10)) {
169+
// assert [4] comes first, it has the smallest timestamp
170+
if !p1.Eq(points.OnePoint(addMetrics[4], 4, timestamps[4])) {
165171
fmt.Printf("error - recived wrong point - %v\n", p1)
166172
t.FailNow()
167173
}
@@ -183,8 +189,8 @@ func (f *testInfo) commonCacheIdxTestHelper(t *testing.T) {
183189
p2, _ := f.testCache.PopNotConfirmed(m2)
184190
f.testCache.Confirm(p2)
185191

186-
// queue within cache is sorted by length of metric name
187-
if !p2.Eq(points.OnePoint(addMetrics[0], 0, 10)) {
192+
// assert [0] comes second, it has the second smallest timestamp
193+
if !p2.Eq(points.OnePoint(addMetrics[0], 0, timestamps[0])) {
188194
fmt.Printf("error - recived wrong point - %v\n", p2)
189195
t.FailNow()
190196
}

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ require (
3838
require (
3939
github.com/cespare/xxhash/v2 v2.3.0
4040
github.com/greatroar/blobloom v0.8.1
41+
github.com/zeebo/xxh3 v1.0.2
4142
golang.org/x/net v0.43.0
4243
google.golang.org/protobuf v1.36.11
4344
)
@@ -67,6 +68,7 @@ require (
6768
github.com/jcmturner/gofork v1.7.6 // indirect
6869
github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect
6970
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
71+
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
7072
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
7173
github.com/onsi/ginkgo v1.14.0 // indirect
7274
github.com/onsi/gomega v1.10.1 // indirect

go.sum

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
165165
github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
166166
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
167167
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
168+
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
169+
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
168170
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
169171
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
170172
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -243,6 +245,10 @@ github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0
243245
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
244246
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
245247
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
248+
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
249+
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
250+
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
251+
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
246252
go.einride.tech/aip v0.73.0 h1:bPo4oqBo2ZQeBKo4ZzLb1kxYXTY1ysJhpvQyfuGzvps=
247253
go.einride.tech/aip v0.73.0/go.mod h1:Mj7rFbmXEgw0dq1dqJ7JGMvYCZZVxmGOR3S4ZcV5LvQ=
248254
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=

helper/hash.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package helper
2+
3+
import (
4+
"github.com/zeebo/xxh3"
5+
)
6+
7+
// HashString is a wrapper for a fast 64-bit non-cryptographic hash function,
8+
// typically used for sharding, distribution, and other non-security-critical purposes.
9+
func HashString(s string) uint64 {
10+
return xxh3.HashString(s)
11+
}

helper/hash_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package helper
2+
3+
import (
4+
"fmt"
5+
"runtime"
6+
"testing"
7+
)
8+
9+
func BenchmarkHashString(b *testing.B) {
10+
for _, n := range []int{16, 17, 32, 33, 64, 65, 96, 97, 128, 129, 240, 241} {
11+
b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
12+
var acc uint64
13+
d := string(make([]byte, n))
14+
15+
b.SetBytes(int64(n))
16+
b.ResetTimer()
17+
for i := 0; i < b.N; i++ {
18+
acc = HashString(d)
19+
}
20+
runtime.KeepAlive(acc)
21+
})
22+
}
23+
}

persister/whisper.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525
"github.com/lomik/zapwriter"
2626
)
2727

28-
const storeMutexCount = 32768
28+
const storeMutexCount = 1 << 15 // 32768
2929
const maxPathLength = 4095
3030
const maxFilenameLength = 255
3131

@@ -205,15 +205,6 @@ func (p *Whisper) SetTaggedFn(fn func(string, bool)) {
205205
p.taggedFn = fn
206206
}
207207

208-
func fnv32(key string) uint32 {
209-
hash := uint32(2166136261)
210-
const prime32 = uint32(16777619)
211-
for i := 0; i < len(key); i++ {
212-
hash *= prime32
213-
hash ^= uint32(key[i])
214-
}
215-
return hash
216-
}
217208
func (p *Whisper) registerOutOfOrderWriteLags(points []*whisper.TimeSeriesPoint) {
218209
if !p.prometheus.enabled {
219210
return
@@ -262,7 +253,7 @@ func (p *Whisper) store(metric string) {
262253
// avoid concurrent store same metric
263254
// @TODO: may be flock?
264255
// start := time.Now()
265-
mutexIndex := fnv32(metric) % storeMutexCount
256+
mutexIndex := helper.HashString(metric) & (storeMutexCount - 1)
266257
p.storeMutex[mutexIndex].Lock()
267258
// atomic.AddUint64(&p.blockAvoidConcurrentNs, uint64(time.Since(start).Nanoseconds()))
268259
defer p.storeMutex[mutexIndex].Unlock()

vendor/github.com/klauspost/cpuid/v2/.gitignore

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml

Lines changed: 74 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt

Lines changed: 35 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)