Skip to content

Commit 484e506

Browse files
authored
Merge pull request #154 from proost/feat-varopt-items-union-serialization
feat: var opt items union serialization
2 parents 04edefe + 19cee51 commit 484e506

9 files changed

Lines changed: 571 additions & 43 deletions

internal/family.go

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,19 @@ type family struct {
2323
}
2424

2525
type families struct {
26-
HLL family
27-
Frequency family
28-
Kll family
29-
CPC family
30-
CountMinSketch family
31-
BloomFilter family
32-
Tuple family
33-
Theta family
34-
TDigest family
35-
ReservoirItems family
36-
VarOptItems family
37-
ReservoirUnion family
26+
HLL family
27+
Frequency family
28+
Kll family
29+
CPC family
30+
CountMinSketch family
31+
BloomFilter family
32+
Tuple family
33+
Theta family
34+
TDigest family
35+
ReservoirItems family
36+
VarOptItems family
37+
ReservoirUnion family
38+
VarOptItemsUnion family
3839
}
3940

4041
var FamilyEnum = &families{
@@ -86,4 +87,8 @@ var FamilyEnum = &families{
8687
Id: 12,
8788
MaxPreLongs: 1,
8889
},
90+
VarOptItemsUnion: family{
91+
Id: 14,
92+
MaxPreLongs: 4,
93+
},
8994
}

sampling/varopt_items_sketch_decoder.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import (
3030

3131
// TODO: Support Stream I/O.
3232

33-
// Decode reconstructs a VarOptItemsSketch from a byte slice using the provided ItemsSerDe implementation for deserialization.
33+
// DecodeVarOptItemsSketch reconstructs a VarOptItemsSketch from a byte slice using the provided ItemsSerDe implementation for deserialization.
3434
// Returns the reconstructed VarOptItemsSketch or an error if deserialization fails.
35-
func Decode[T any](buffer []byte, serde common.ItemSketchSerde[T]) (*VarOptItemsSketch[T], error) {
35+
func DecodeVarOptItemsSketch[T any](buffer []byte, serde common.ItemSketchSerde[T]) (*VarOptItemsSketch[T], error) {
3636
if len(buffer) < 8 {
3737
return nil, errors.New("data too short")
3838
}

sampling/varopt_items_sketch_serialization_test.go

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func TestVarOptItemsSketchJavaCompat(t *testing.T) {
102102
}
103103
require.NoError(t, err)
104104

105-
sketch, err := Decode[int64](data, common.ItemSketchLongSerDe{})
105+
sketch, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
106106
require.NoError(t, err)
107107
assert.Equal(t, n == 0, sketch.IsEmpty())
108108
assert.Equal(t, 32, sketch.K())
@@ -129,7 +129,7 @@ func TestVarOptItemsSketchJavaCompat(t *testing.T) {
129129
}
130130
require.NoError(t, err)
131131

132-
sketch, err := Decode[string](data, common.ItemSketchStringSerDe{})
132+
sketch, err := DecodeVarOptItemsSketch[string](data, common.ItemSketchStringSerDe{})
133133
require.NoError(t, err)
134134
assert.False(t, sketch.IsEmpty())
135135
assert.Equal(t, 1024, sketch.K())
@@ -155,7 +155,7 @@ func TestVarOptItemsSketchJavaCompat(t *testing.T) {
155155
}
156156
require.NoError(t, err)
157157

158-
sketch, err := Decode[int64](data, common.ItemSketchLongSerDe{})
158+
sketch, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
159159
require.NoError(t, err)
160160
assert.False(t, sketch.IsEmpty())
161161
assert.Equal(t, 1024, sketch.K())
@@ -189,7 +189,7 @@ func TestVarOptItemsSketchCppCompat(t *testing.T) {
189189
}
190190
require.NoError(t, err)
191191

192-
sketch, err := Decode[int64](data, common.ItemSketchLongSerDe{})
192+
sketch, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
193193
require.NoError(t, err)
194194
assert.Equal(t, n == 0, sketch.IsEmpty())
195195
assert.Equal(t, 32, sketch.K())
@@ -216,7 +216,7 @@ func TestVarOptItemsSketchCppCompat(t *testing.T) {
216216
}
217217
require.NoError(t, err)
218218

219-
sketch, err := Decode[string](data, common.ItemSketchStringSerDe{})
219+
sketch, err := DecodeVarOptItemsSketch[string](data, common.ItemSketchStringSerDe{})
220220
require.NoError(t, err)
221221
assert.False(t, sketch.IsEmpty())
222222
assert.Equal(t, 1024, sketch.K())
@@ -242,7 +242,7 @@ func TestVarOptItemsSketchCppCompat(t *testing.T) {
242242
}
243243
require.NoError(t, err)
244244

245-
sketch, err := Decode[int64](data, common.ItemSketchLongSerDe{})
245+
sketch, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
246246
require.NoError(t, err)
247247
assert.False(t, sketch.IsEmpty())
248248
assert.Equal(t, 1024, sketch.K())
@@ -278,7 +278,7 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
278278
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
279279
data[1] = 0
280280

281-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
281+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
282282
require.ErrorContains(t, err, "invalid serialization version: expected 2, got 0")
283283
})
284284

@@ -287,7 +287,7 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
287287
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
288288
data[2] = 0
289289

290-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
290+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
291291
require.ErrorContains(t, err, "invalid family ID: expected 13, got 0")
292292
})
293293

@@ -299,7 +299,7 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
299299
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
300300
data[0] = preLongs
301301

302-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
302+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
303303
require.ErrorContains(t, err, fmt.Sprintf("invalid preamble longs: expected warmup or full, got %d", preLongs))
304304
})
305305
}
@@ -312,39 +312,39 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
312312
data := cloneBytes(source)
313313
data[0] = preambleLongsFull
314314

315-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
315+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
316316
require.ErrorContains(t, err, "invalid preamble longs: expected warmup because n<=k, got 4")
317317
})
318318

319319
t.Run("zero k", func(t *testing.T) {
320320
data := cloneBytes(source)
321321
binary.LittleEndian.PutUint32(data[4:], 0)
322322

323-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
323+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
324324
require.ErrorContains(t, err, "k must be at least 1 and less than 2^31 - 1")
325325
})
326326

327327
t.Run("negative H count", func(t *testing.T) {
328328
data := cloneBytes(source)
329329
binary.LittleEndian.PutUint32(data[16:], math.MaxUint32)
330330

331-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
331+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
332332
require.ErrorContains(t, err, "invalid state in warmup mode: expected n==h, got n=50, h=4294967295")
333333
})
334334

335335
t.Run("negative R count", func(t *testing.T) {
336336
data := cloneBytes(source)
337337
binary.LittleEndian.PutUint32(data[20:], uint32(0xffffff80))
338338

339-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
339+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
340340
require.ErrorContains(t, err, "invalid state in warmup mode: expected r==0, got r=4294967168")
341341
})
342342

343343
t.Run("warmup preamble in full mode", func(t *testing.T) {
344344
data := encodeVarOptItemsSketch(t, createUnweightedVarOptItemsSketch(t, 32, 33), common.ItemSketchLongSerDe{})
345345
data[0] = (data[0] & 0xc0) | preambleLongsWarmup
346346

347-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
347+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
348348
require.ErrorContains(t, err, "invalid preamble longs: expected full because n>k, got 3")
349349
})
350350
})
@@ -356,7 +356,7 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
356356
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchStringSerDe{})
357357
require.Len(t, data, int(preambleLongsEmpty<<3))
358358

359-
loaded, err := Decode[string](data, common.ItemSketchStringSerDe{})
359+
loaded, err := DecodeVarOptItemsSketch[string](data, common.ItemSketchStringSerDe{})
360360
require.NoError(t, err)
361361
assert.Equal(t, int64(0), loaded.N())
362362
assert.Equal(t, 0, loaded.NumSamples())
@@ -373,15 +373,15 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
373373
}
374374
data[3] = 0
375375

376-
_, err = Decode[string](data, common.ItemSketchStringSerDe{})
376+
_, err = DecodeVarOptItemsSketch[string](data, common.ItemSketchStringSerDe{})
377377
require.ErrorContains(t, err, "invalid preamble longs: expected warmup or full, got 1")
378378
})
379379

380380
t.Run("invalid full mode H plus R count", func(t *testing.T) {
381381
data := encodeVarOptItemsSketch(t, createUnweightedVarOptItemsSketch(t, 32, 33), common.ItemSketchLongSerDe{})
382382
binary.LittleEndian.PutUint32(data[20:], 0)
383383

384-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
384+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
385385
require.ErrorContains(t, err, "invalid state in full mode: expected h+r==k")
386386
})
387387

@@ -390,23 +390,23 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
390390
data := encodeVarOptItemsSketch(t, createUnweightedVarOptItemsSketch(t, 32, 33), common.ItemSketchLongSerDe{})
391391
binary.LittleEndian.PutUint64(data[24:], math.Float64bits(0))
392392

393-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
393+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
394394
require.ErrorContains(t, err, "data is corrupt in full mode: invalid R region weight")
395395
})
396396

397397
t.Run("negative", func(t *testing.T) {
398398
data := encodeVarOptItemsSketch(t, createUnweightedVarOptItemsSketch(t, 32, 33), common.ItemSketchLongSerDe{})
399399
binary.LittleEndian.PutUint64(data[24:], math.Float64bits(-1.5))
400400

401-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
401+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
402402
require.ErrorContains(t, err, "data is corrupt in full mode: invalid R region weight")
403403
})
404404

405405
t.Run("nan", func(t *testing.T) {
406406
data := encodeVarOptItemsSketch(t, createUnweightedVarOptItemsSketch(t, 32, 33), common.ItemSketchLongSerDe{})
407407
binary.LittleEndian.PutUint64(data[24:], math.Float64bits(math.NaN()))
408408

409-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
409+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
410410
require.ErrorContains(t, err, "data is corrupt in full mode: invalid R region weight")
411411
})
412412
})
@@ -417,7 +417,7 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
417417
preambleBytes := int(data[0]&0x3f) << 3
418418
binary.LittleEndian.PutUint64(data[preambleBytes:], math.Float64bits(-1.5))
419419

420-
_, err := Decode[int64](data, common.ItemSketchLongSerDe{})
420+
_, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
421421
require.ErrorContains(t, err, "non-positive weight: -1.500000")
422422
})
423423

@@ -426,11 +426,11 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
426426
sketch := createUnweightedVarOptItemsSketch(t, 100, 10)
427427
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
428428

429-
loaded, err := Decode[int64](data, common.ItemSketchLongSerDe{})
429+
loaded, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
430430
require.NoError(t, err)
431431
assertVarOptItemsSketchEqual(t, sketch, loaded)
432432

433-
_, err = Decode[int64](data[:len(data)-1], common.ItemSketchLongSerDe{})
433+
_, err = DecodeVarOptItemsSketch[int64](data[:len(data)-1], common.ItemSketchLongSerDe{})
434434
require.ErrorContains(t, err, "unexpected EOF")
435435
})
436436

@@ -439,11 +439,11 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
439439
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
440440
require.Equal(t, preambleLongsWarmup, data[0]&0x3f)
441441

442-
loaded, err := Decode[int64](data, common.ItemSketchLongSerDe{})
442+
loaded, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
443443
require.NoError(t, err)
444444
assertVarOptItemsSketchEqual(t, sketch, loaded)
445445

446-
_, err = Decode[int64](data[:len(data)-1000], common.ItemSketchLongSerDe{})
446+
_, err = DecodeVarOptItemsSketch[int64](data[:len(data)-1000], common.ItemSketchLongSerDe{})
447447
require.ErrorContains(t, err, "unexpected EOF")
448448
})
449449

@@ -470,11 +470,11 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
470470
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchLongSerDe{})
471471
require.Equal(t, preambleLongsFull, data[0]&0x3f)
472472

473-
loaded, err := Decode[int64](data, common.ItemSketchLongSerDe{})
473+
loaded, err := DecodeVarOptItemsSketch[int64](data, common.ItemSketchLongSerDe{})
474474
require.NoError(t, err)
475475
assertVarOptItemsSketchEqual(t, sketch, loaded)
476476

477-
_, err = Decode[int64](data[:len(data)-100], common.ItemSketchLongSerDe{})
477+
_, err = DecodeVarOptItemsSketch[int64](data[:len(data)-100], common.ItemSketchLongSerDe{})
478478
require.ErrorContains(t, err, "unexpected EOF")
479479
})
480480

@@ -487,11 +487,11 @@ func TestVarOptItemsSketchSerialization(t *testing.T) {
487487
require.NoError(t, sketch.Update("heavy item", 100.0))
488488

489489
data := encodeVarOptItemsSketch(t, sketch, common.ItemSketchStringSerDe{})
490-
loaded, err := Decode[string](data, common.ItemSketchStringSerDe{})
490+
loaded, err := DecodeVarOptItemsSketch[string](data, common.ItemSketchStringSerDe{})
491491
require.NoError(t, err)
492492
assertVarOptItemsSketchEqual(t, sketch, loaded)
493493

494-
_, err = Decode[string](data[:len(data)-12], common.ItemSketchStringSerDe{})
494+
_, err = DecodeVarOptItemsSketch[string](data[:len(data)-12], common.ItemSketchStringSerDe{})
495495
require.ErrorContains(t, err, "offset out of bounds")
496496
})
497497
})

0 commit comments

Comments
 (0)