From 486115c9fa56ef0f8f3d84102293a6ea9fbf363b Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Sun, 1 Mar 2026 18:52:17 -0500 Subject: [PATCH 01/12] feat(sampling): implement varopt union core and serde --- internal/family.go | 5 + sampling/varopt_items_sketch_serde.go | 306 ++++++++++++++++ sampling/varopt_items_union.go | 460 ++++++++++++++++++++++++ sampling/varopt_items_union_preamble.go | 27 ++ 4 files changed, 798 insertions(+) create mode 100644 sampling/varopt_items_sketch_serde.go create mode 100644 sampling/varopt_items_union.go create mode 100644 sampling/varopt_items_union_preamble.go diff --git a/internal/family.go b/internal/family.go index 45fb4df..bd3f392 100644 --- a/internal/family.go +++ b/internal/family.go @@ -34,6 +34,7 @@ type families struct { TDigest family ReservoirItems family VarOptItems family + VarOptUnion family ReservoirUnion family } @@ -82,6 +83,10 @@ var FamilyEnum = &families{ Id: 13, MaxPreLongs: 4, }, + VarOptUnion: family{ + Id: 14, + MaxPreLongs: 4, + }, ReservoirUnion: family{ Id: 12, MaxPreLongs: 1, diff --git a/sampling/varopt_items_sketch_serde.go b/sampling/varopt_items_sketch_serde.go new file mode 100644 index 0000000..9af2378 --- /dev/null +++ b/sampling/varopt_items_sketch_serde.go @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "encoding/binary" + "errors" + "math" + + "github.com/apache/datasketches-go/internal" +) + +const ( + varOptPreambleLongsEmpty = 1 + varOptPreambleLongsWarmup = 3 + varOptPreambleLongsFull = 4 + + varOptSerVer = 2 + varOptFlagEmpty = 0x04 + varOptFlagGadget = 0x80 +) + +// ToSlice serializes the sketch to a byte slice using Java/C++ compatible preamble layout. +func (s *VarOptItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { + rfBits, err := resizeFactorBitsFor(s.rf) + if err != nil { + return nil, err + } + + flags := byte(0) + if s.marks != nil { + flags |= varOptFlagGadget + } + + preLongs := varOptPreambleLongsEmpty + totalItems := 0 + if s.IsEmpty() { + flags |= varOptFlagEmpty + } else { + totalItems = s.h + s.r + if s.r == 0 { + preLongs = varOptPreambleLongsWarmup + } else { + preLongs = varOptPreambleLongsFull + } + } + + weightsBytes := s.h * 8 + markBytes := 0 + if s.marks != nil { + markBytes = packedBoolBytes(s.h) + } + + var items []T + if totalItems > 0 { + items = make([]T, 0, totalItems) + for i := 0; i < s.h; i++ { + items = append(items, s.data[i]) + } + for i := s.h + 1; i <= s.k && s.r > 0; i++ { + items = append(items, s.data[i]) + } + } + itemsBytes, err := serde.SerializeToBytes(items) + if err != nil { + return nil, err + } + + preambleBytes := preLongs * 8 + out := make([]byte, preambleBytes+weightsBytes+markBytes+len(itemsBytes)) + + out[0] = rfBits | byte(preLongs) + out[1] = varOptSerVer + out[2] = byte(internal.FamilyEnum.VarOptItems.Id) + out[3] = flags + binary.LittleEndian.PutUint32(out[4:], uint32(s.k)) + + if !s.IsEmpty() { + binary.LittleEndian.PutUint64(out[8:], uint64(s.n)) + binary.LittleEndian.PutUint32(out[16:], uint32(s.h)) + binary.LittleEndian.PutUint32(out[20:], uint32(s.r)) + if s.r > 0 { + binary.LittleEndian.PutUint64(out[24:], math.Float64bits(s.totalWeightR)) + } + } + + weightOffset := preambleBytes + if !s.IsEmpty() { + weightOffset = 24 + if s.r > 0 { + weightOffset += 8 + } + } + for i := 0; i < s.h; i++ { + binary.LittleEndian.PutUint64(out[weightOffset+i*8:], math.Float64bits(s.weights[i])) + } + + markOffset := weightOffset + weightsBytes + if s.marks != nil && s.h > 0 { + packBoolsInto(out[markOffset:markOffset+markBytes], s.marks[:s.h]) + } + + copy(out[markOffset+markBytes:], itemsBytes) + return out, nil +} + +// NewVarOptItemsSketchFromSlice deserializes a sketch from bytes. +func NewVarOptItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*VarOptItemsSketch[T], error) { + if len(data) < 8 { + return nil, errors.New("data too short") + } + + preLongs := int(data[0] & 0x3F) + rf, err := resizeFactorFromHeaderByte(data[0]) + if err != nil { + return nil, err + } + ver := data[1] + family := data[2] + flags := data[3] + k := int(binary.LittleEndian.Uint32(data[4:])) + + if ver != varOptSerVer { + return nil, errors.New("unsupported serialization version") + } + if family != byte(internal.FamilyEnum.VarOptItems.Id) { + return nil, errors.New("wrong sketch family") + } + if k < 1 || k > varOptMaxK { + return nil, errors.New("invalid k in serialized varopt sketch") + } + + hasEmptyFlag := (flags & varOptFlagEmpty) != 0 + if preLongs == varOptPreambleLongsEmpty && !hasEmptyFlag { + return nil, errors.New("invalid varopt sketch header: empty preLongs without empty flag") + } + if preLongs != varOptPreambleLongsEmpty && hasEmptyFlag { + return nil, errors.New("invalid varopt sketch header: non-empty preLongs with empty flag") + } + + isEmpty := hasEmptyFlag + isGadget := (flags & varOptFlagGadget) != 0 + if isEmpty { + out, err := NewVarOptItemsSketch[T](uint(k), WithResizeFactor(rf)) + if err != nil { + return nil, err + } + if isGadget { + out.marks = make([]bool, 0, cap(out.data)) + } + return out, nil + } + + if preLongs != varOptPreambleLongsWarmup && preLongs != varOptPreambleLongsFull { + return nil, errors.New("invalid preLongs for non-empty varopt sketch") + } + if len(data) < preLongs*8 { + return nil, errors.New("data too short for varopt preamble") + } + + n := int64(binary.LittleEndian.Uint64(data[8:])) + h := int(binary.LittleEndian.Uint32(data[16:])) + r := int(binary.LittleEndian.Uint32(data[20:])) + if h < 0 || r < 0 { + return nil, errors.New("invalid h/r in serialized varopt sketch") + } + if r > 0 && h+r != k { + return nil, errors.New("invalid varopt sketch state: h + r must equal k in sampling mode") + } + if r == 0 && h > k { + return nil, errors.New("invalid varopt sketch state: h exceeds k in warmup mode") + } + + totalWeightR := 0.0 + if r > 0 { + totalWeightR = math.Float64frombits(binary.LittleEndian.Uint64(data[24:])) + } + + weightOffset := 24 + if r > 0 { + weightOffset += 8 + } + weightsBytes := h * 8 + if len(data) < weightOffset+weightsBytes { + return nil, errors.New("data too short for varopt weights") + } + + hWeights := make([]float64, h) + for i := 0; i < h; i++ { + w := math.Float64frombits(binary.LittleEndian.Uint64(data[weightOffset+i*8:])) + if w <= 0 || math.IsNaN(w) || math.IsInf(w, 0) { + return nil, errors.New("invalid non-positive or non-finite weight in serialized varopt sketch") + } + hWeights[i] = w + } + + markOffset := weightOffset + weightsBytes + hMarks := make([]bool, h) + numMarksInH := uint32(0) + if isGadget && h > 0 { + markBytes := packedBoolBytes(h) + if len(data) < markOffset+markBytes { + return nil, errors.New("data too short for varopt marks") + } + unpackBoolsFrom(data[markOffset:markOffset+markBytes], hMarks) + for _, m := range hMarks { + if m { + numMarksInH++ + } + } + markOffset += markBytes + } + + totalItems := h + r + items, err := serde.DeserializeFromBytes(data[markOffset:], totalItems) + if err != nil { + return nil, err + } + + if r == 0 { + out := &VarOptItemsSketch[T]{ + data: append([]T(nil), items...), + weights: append([]float64(nil), hWeights...), + k: k, + n: n, + h: h, + m: 0, + r: 0, + totalWeightR: 0, + rf: rf, + numMarksInH: numMarksInH, + } + if isGadget { + out.marks = append([]bool(nil), hMarks...) + } + return out, nil + } + + // Sampling mode layout uses an explicit gap slot at index h. + dataOut := make([]T, k+1) + weightsOut := make([]float64, k+1) + marksOut := make([]bool, k+1) + + copy(dataOut[:h], items[:h]) + copy(dataOut[h+1:h+1+r], items[h:]) + copy(weightsOut[:h], hWeights) + for i := h; i <= k; i++ { + weightsOut[i] = -1.0 + } + if isGadget { + copy(marksOut[:h], hMarks) + } + + out := &VarOptItemsSketch[T]{ + data: dataOut, + weights: weightsOut, + k: k, + n: n, + h: h, + m: 0, + r: r, + totalWeightR: totalWeightR, + rf: rf, + numMarksInH: numMarksInH, + } + if isGadget { + out.marks = marksOut + } + return out, nil +} + +func packedBoolBytes(n int) int { + if n <= 0 { + return 0 + } + return (n + 7) / 8 +} + +func packBoolsInto(dst []byte, src []bool) { + for i, b := range src { + if b { + dst[i/8] |= 1 << uint(i%8) + } + } +} + +func unpackBoolsFrom(src []byte, dst []bool) { + for i := range dst { + dst[i] = (src[i/8] & (1 << uint(i%8))) != 0 + } +} diff --git a/sampling/varopt_items_union.go b/sampling/varopt_items_union.go new file mode 100644 index 0000000..236e05c --- /dev/null +++ b/sampling/varopt_items_union.go @@ -0,0 +1,460 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "encoding/binary" + "errors" + "math" + "math/rand" + + "github.com/apache/datasketches-go/internal" +) + +// VarOptItemsUnion provides union operations over VarOpt sketches. +// +// This file implements the phase-1 core API used by callers to merge sketches. +// Full gadget-resolution logic for marked items in H is handled in a follow-up step. +type VarOptItemsUnion[T any] struct { + gadget *VarOptItemsSketch[T] + maxK int + + n int64 + + // outer tau is the largest tau of any input sketch in estimation mode + outerTauNumer float64 + outerTauDenom int64 +} + +func NewVarOptItemsUnion[T any](maxK int) (*VarOptItemsUnion[T], error) { + if maxK < 1 || maxK > varOptMaxK { + return nil, errors.New("maxK must be at least 1 and less than 2^31 - 1") + } + + gadget, err := newVarOptItemsSketchAsGadget[T](maxK) + if err != nil { + return nil, err + } + + return &VarOptItemsUnion[T]{ + gadget: gadget, + maxK: maxK, + }, nil +} + +func (u *VarOptItemsUnion[T]) MaxK() int { + return u.maxK +} + +func (u *VarOptItemsUnion[T]) Reset() error { + gadget, err := newVarOptItemsSketchAsGadget[T](u.maxK) + if err != nil { + return err + } + u.gadget = gadget + u.n = 0 + u.outerTauNumer = 0 + u.outerTauDenom = 0 + return nil +} + +// UpdateSketch merges a VarOpt sketch into this union. +func (u *VarOptItemsUnion[T]) UpdateSketch(sketch *VarOptItemsSketch[T]) error { + if sketch == nil || sketch.N() == 0 { + return nil + } + + u.n += sketch.N() + + // Insert H-region items as unmarked. + for i := 0; i < sketch.h; i++ { + if err := u.gadget.update(sketch.data[i], sketch.weights[i], false); err != nil { + return err + } + } + + // Insert R-region items as marked with corrected weight tau. + if sketch.r > 0 { + tau := sketch.tau() + cumWeight := 0.0 + rSeen := 0 + for i := sketch.h + 1; i <= sketch.k; i++ { + w := tau + // Match Java/C++ weight-correcting iterator semantics: + // correct the last R item to absorb floating-point residual. + if rSeen == sketch.r-1 { + w = sketch.totalWeightR - cumWeight + } else { + cumWeight += tau + } + rSeen++ + if err := u.gadget.update(sketch.data[i], w, true); err != nil { + return err + } + } + u.resolveOuterTau(sketch) + } + + return nil +} + +// Result returns the current union result sketch. +// +// If marked items remain in H, full resolution logic is required and is implemented +// in the next step. For now we fail fast with a clear error. +func (u *VarOptItemsUnion[T]) Result() (*VarOptItemsSketch[T], error) { + if u.gadget == nil || u.gadget.N() == 0 { + return NewVarOptItemsSketch[T](uint(u.maxK)) + } + + if u.gadget.numMarksInH == 0 { + out := copyVarOptItemsSketch(u.gadget, true) + out.n = u.n + return out, nil + } + + // Marked items in H require the full resolution path. + if out, ok, err := u.detectAndHandleSubcaseOfPseudoExact(); err != nil { + return nil, err + } else if ok { + return out, nil + } + return u.migrateMarkedItemsByDecreasingK() +} + +func (u *VarOptItemsUnion[T]) resolveOuterTau(sketch *VarOptItemsSketch[T]) { + if sketch.r == 0 { + return + } + + sketchTau := sketch.tau() + if u.outerTauDenom == 0 { + u.outerTauNumer = sketch.totalWeightR + u.outerTauDenom = int64(sketch.r) + return + } + + outerTau := u.outerTauNumer / float64(u.outerTauDenom) + if sketchTau > outerTau { + u.outerTauNumer = sketch.totalWeightR + u.outerTauDenom = int64(sketch.r) + return + } + if sketchTau == outerTau { + u.outerTauNumer += sketch.totalWeightR + u.outerTauDenom += int64(sketch.r) + } +} + +func newVarOptItemsSketchAsGadget[T any](k int) (*VarOptItemsSketch[T], error) { + sketch, err := NewVarOptItemsSketch[T](uint(k)) + if err != nil { + return nil, err + } + sketch.marks = make([]bool, 0, cap(sketch.data)) + return sketch, nil +} + +func (u *VarOptItemsUnion[T]) detectAndHandleSubcaseOfPseudoExact() (*VarOptItemsSketch[T], bool, error) { + condition1 := u.gadget.r == 0 + condition2 := u.gadget.numMarksInH > 0 + condition3 := int64(u.gadget.numMarksInH) == u.outerTauDenom + + if !(condition1 && condition2 && condition3) { + return nil, false, nil + } + + if u.thereExistUnmarkedHItemsLighterThanTarget(u.gadget.tau()) { + return nil, false, nil + } + + out, err := u.markMovingGadgetCoercer() + if err != nil { + return nil, false, err + } + return out, true, nil +} + +func (u *VarOptItemsUnion[T]) thereExistUnmarkedHItemsLighterThanTarget(threshold float64) bool { + for i := 0; i < u.gadget.h; i++ { + if u.gadget.weights[i] < threshold && !u.gadget.marks[i] { + return true + } + } + return false +} + +func (u *VarOptItemsUnion[T]) markMovingGadgetCoercer() (*VarOptItemsSketch[T], error) { + resultK := u.gadget.h + u.gadget.r + resultH := 0 + resultR := 0 + nextRPos := resultK + + data := make([]T, resultK+1) + weights := make([]float64, resultK+1) + + // Move existing R region items first (weight remains implicit via totalWeightR). + for i := u.gadget.h + 1; i <= u.gadget.k && i < len(u.gadget.data); i++ { + data[nextRPos] = u.gadget.data[i] + weights[nextRPos] = -1.0 + resultR++ + nextRPos-- + } + + transferredWeight := 0.0 + for i := 0; i < u.gadget.h; i++ { + if u.gadget.marks[i] { + data[nextRPos] = u.gadget.data[i] + weights[nextRPos] = -1.0 + transferredWeight += u.gadget.weights[i] + resultR++ + nextRPos-- + } else { + data[resultH] = u.gadget.data[i] + weights[resultH] = u.gadget.weights[i] + resultH++ + } + } + + if resultH+resultR != resultK { + return nil, errors.New("invalid state resolving pseudo-exact union gadget") + } + + // Gap slot. + weights[resultH] = -1.0 + + out := &VarOptItemsSketch[T]{ + data: data, + weights: weights, + k: resultK, + n: u.n, + h: resultH, + m: 0, + r: resultR, + totalWeightR: u.gadget.totalWeightR + transferredWeight, + rf: varOptDefaultResizeFactor, + numMarksInH: 0, + } + + if err := out.heapify(); err != nil { + return nil, err + } + return out, nil +} + +func (u *VarOptItemsUnion[T]) migrateMarkedItemsByDecreasingK() (*VarOptItemsSketch[T], error) { + gcopy := copyVarOptItemsSketch(u.gadget, false) + gcopy.n = u.n + + rCount := gcopy.r + hCount := gcopy.h + k := gcopy.k + + // If non-full and pseudo-exact, set k to sample count so reductions increase tau. + if rCount == 0 && hCount < k { + gcopy.k = hCount + } + + if gcopy.k < 2 { + return nil, errors.New("cannot resolve marked items with k < 2") + } + if err := decreaseKBy1(gcopy); err != nil { + return nil, err + } + + for gcopy.numMarksInH > 0 { + if gcopy.k < 2 { + return nil, errors.New("cannot continue resolving marked items with k < 2") + } + if err := decreaseKBy1(gcopy); err != nil { + return nil, err + } + } + + gcopy.numMarksInH = 0 + gcopy.marks = nil + return gcopy, nil +} + +func decreaseKBy1[T any](s *VarOptItemsSketch[T]) error { + if s.k <= 1 { + return errors.New("cannot decrease k below 1 in union") + } + + switch { + case s.h == 0 && s.r == 0: + s.k-- + return nil + case s.h > 0 && s.r == 0: + s.k-- + if s.h > s.k { + return s.transitionFromWarmup() + } + return nil + case s.h > 0 && s.r > 0: + oldGapIdx := s.h + oldFinalRIdx := (s.h + 1 + s.r) - 1 + s.swap(oldFinalRIdx, oldGapIdx) + + pulledIdx := s.h - 1 + pulledItem := s.data[pulledIdx] + pulledWeight := s.weights[pulledIdx] + pulledMark := s.marks[pulledIdx] + + if pulledMark { + s.numMarksInH-- + } + s.weights[pulledIdx] = -1.0 + + s.h-- + s.k-- + s.n-- + return s.update(pulledItem, pulledWeight, pulledMark) + case s.h == 0 && s.r > 0: + if s.r < 2 { + return errors.New("invalid pure-reservoir state while decreasing k") + } + rIdxToDelete := 1 + rand.Intn(s.r) + rightmostRIdx := (1 + s.r) - 1 + s.swap(rIdxToDelete, rightmostRIdx) + s.weights[rightmostRIdx] = -1.0 + + s.k-- + s.r-- + return nil + default: + return errors.New("invalid sketch state while decreasing k") + } +} + +func copyVarOptItemsSketch[T any](in *VarOptItemsSketch[T], asSketch bool) *VarOptItemsSketch[T] { + dataCopy := make([]T, len(in.data)) + copy(dataCopy, in.data) + + weightsCopy := make([]float64, len(in.weights)) + copy(weightsCopy, in.weights) + + var marksCopy []bool + numMarksInH := in.numMarksInH + if !asSketch && in.marks != nil { + marksCopy = make([]bool, len(in.marks)) + copy(marksCopy, in.marks) + } else { + numMarksInH = 0 + } + + return &VarOptItemsSketch[T]{ + data: dataCopy, + weights: weightsCopy, + marks: marksCopy, + k: in.k, + n: in.n, + h: in.h, + m: in.m, + r: in.r, + totalWeightR: in.totalWeightR, + rf: in.rf, + numMarksInH: numMarksInH, + } +} + +// ToSlice serializes the union state to bytes. +func (u *VarOptItemsUnion[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { + empty := u.gadget == nil || u.gadget.N() == 0 + if empty { + out := make([]byte, varOptUnionPreambleLongsEmpty*8) + out[0] = byte(varOptUnionPreambleLongsEmpty) + out[1] = varOptUnionSerVer + out[2] = byte(internal.FamilyEnum.VarOptUnion.Id) + out[3] = varOptUnionFlagEmpty + binary.LittleEndian.PutUint32(out[4:], uint32(u.maxK)) + return out, nil + } + + gadgetBytes, err := u.gadget.ToSlice(serde) + if err != nil { + return nil, err + } + + preBytes := varOptUnionPreambleLongsNonEmpty * 8 + out := make([]byte, preBytes+len(gadgetBytes)) + out[0] = byte(varOptUnionPreambleLongsNonEmpty) + out[1] = varOptUnionSerVer + out[2] = byte(internal.FamilyEnum.VarOptUnion.Id) + out[3] = 0 + binary.LittleEndian.PutUint32(out[4:], uint32(u.maxK)) + binary.LittleEndian.PutUint64(out[8:], uint64(u.n)) + binary.LittleEndian.PutUint64(out[16:], math.Float64bits(u.outerTauNumer)) + binary.LittleEndian.PutUint64(out[24:], uint64(u.outerTauDenom)) + copy(out[preBytes:], gadgetBytes) + return out, nil +} + +// NewVarOptItemsUnionFromSlice deserializes union state from bytes. +func NewVarOptItemsUnionFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*VarOptItemsUnion[T], error) { + if len(data) < 8 { + return nil, errors.New("data too short") + } + preLongs := int(data[0] & 0x3F) + ver := data[1] + family := data[2] + flags := data[3] + maxK := int(binary.LittleEndian.Uint32(data[4:])) + + if ver != varOptUnionSerVer { + return nil, errors.New("unsupported serialization version") + } + if family != byte(internal.FamilyEnum.VarOptUnion.Id) { + return nil, errors.New("wrong sketch family") + } + if preLongs != varOptUnionPreambleLongsEmpty && preLongs != varOptUnionPreambleLongsNonEmpty { + return nil, errors.New("invalid preLongs for varopt union") + } + + union, err := NewVarOptItemsUnion[T](maxK) + if err != nil { + return nil, err + } + + hasEmptyFlag := (flags & varOptUnionFlagEmpty) != 0 + if preLongs == varOptUnionPreambleLongsEmpty && !hasEmptyFlag { + return nil, errors.New("invalid varopt union header: empty preLongs without empty flag") + } + if preLongs != varOptUnionPreambleLongsEmpty && hasEmptyFlag { + return nil, errors.New("invalid varopt union header: non-empty preLongs with empty flag") + } + + if hasEmptyFlag { + return union, nil + } + + if len(data) < varOptUnionPreambleLongsNonEmpty*8 { + return nil, errors.New("data too short for non-empty varopt union") + } + + union.n = int64(binary.LittleEndian.Uint64(data[8:])) + union.outerTauNumer = math.Float64frombits(binary.LittleEndian.Uint64(data[16:])) + union.outerTauDenom = int64(binary.LittleEndian.Uint64(data[24:])) + + gadget, err := NewVarOptItemsSketchFromSlice[T](data[varOptUnionPreambleLongsNonEmpty*8:], serde) + if err != nil { + return nil, err + } + union.gadget = gadget + return union, nil +} diff --git a/sampling/varopt_items_union_preamble.go b/sampling/varopt_items_union_preamble.go new file mode 100644 index 0000000..ae2f1eb --- /dev/null +++ b/sampling/varopt_items_union_preamble.go @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +// VarOpt union serialization constants aligned with Java/C++ preamble definitions. +const ( + varOptUnionPreambleLongsEmpty = 1 + varOptUnionPreambleLongsNonEmpty = 4 + varOptUnionSerVer = 2 + varOptUnionFlagEmpty = 0x04 +) + From 18ed869c0202c0986c33dcce94a9de871be7de93 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Sun, 1 Mar 2026 18:52:24 -0500 Subject: [PATCH 02/12] test(sampling): add varopt union and serde unit tests --- sampling/varopt_items_serde_test.go | 143 +++++++++++++++++++++++++++ sampling/varopt_items_union_test.go | 145 ++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100644 sampling/varopt_items_serde_test.go create mode 100644 sampling/varopt_items_union_test.go diff --git a/sampling/varopt_items_serde_test.go b/sampling/varopt_items_serde_test.go new file mode 100644 index 0000000..b3ca707 --- /dev/null +++ b/sampling/varopt_items_serde_test.go @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "encoding/binary" + "testing" + + "github.com/apache/datasketches-go/internal" + "github.com/stretchr/testify/assert" +) + +func TestVarOptItemsSketchSerde_EmptyRoundTrip(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + restored, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, restored.IsEmpty()) + assert.Equal(t, 16, restored.K()) +} + +func TestVarOptItemsSketchSerde_WarmupRoundTrip(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 10; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + restored, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.H(), restored.H()) + assert.Equal(t, sketch.R(), restored.R()) +} + +func TestVarOptItemsSketchSerde_SamplingRoundTrip(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 80; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Greater(t, sketch.R(), 0) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + restored, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, sketch.K(), restored.K()) + assert.Equal(t, sketch.N(), restored.N()) + assert.Equal(t, sketch.H(), restored.H()) + assert.Equal(t, sketch.R(), restored.R()) + assert.InDelta(t, sketch.totalWeightR, restored.totalWeightR, 1e-9) +} + +func TestVarOptItemsUnionSerde_EmptyRoundTrip(t *testing.T) { + union, err := NewVarOptItemsUnion[int64](16) + assert.NoError(t, err) + + data, err := union.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + restored, err := NewVarOptItemsUnionFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, union.MaxK(), restored.MaxK()) + + result, err := restored.Result() + assert.NoError(t, err) + assert.True(t, result.IsEmpty()) +} + +func TestVarOptItemsUnionSerde_NonEmptyRoundTrip(t *testing.T) { + union, err := NewVarOptItemsUnion[int64](16) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 10; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.NoError(t, union.UpdateSketch(sketch)) + + data, err := union.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + restored, err := NewVarOptItemsUnionFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, union.MaxK(), restored.MaxK()) + + result, err := restored.Result() + assert.NoError(t, err) + assert.Equal(t, sketch.N(), result.N()) + assert.Equal(t, sketch.NumSamples(), result.NumSamples()) +} + +func TestVarOptItemsSketchSerde_HeaderConsistency(t *testing.T) { + // preLongs says empty, but empty flag is not set. + data := make([]byte, 8) + data[0] = byte(varOptPreambleLongsEmpty) + data[1] = varOptSerVer + data[2] = byte(internal.FamilyEnum.VarOptItems.Id) + data[3] = 0 + binary.LittleEndian.PutUint32(data[4:], 8) + + _, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "empty preLongs without empty flag") +} + +func TestVarOptItemsUnionSerde_HeaderConsistency(t *testing.T) { + // preLongs says empty, but empty flag is not set. + data := make([]byte, 8) + data[0] = byte(varOptUnionPreambleLongsEmpty) + data[1] = varOptUnionSerVer + data[2] = byte(internal.FamilyEnum.VarOptUnion.Id) + data[3] = 0 + binary.LittleEndian.PutUint32(data[4:], 8) + + _, err := NewVarOptItemsUnionFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "empty preLongs without empty flag") +} diff --git a/sampling/varopt_items_union_test.go b/sampling/varopt_items_union_test.go new file mode 100644 index 0000000..f066daf --- /dev/null +++ b/sampling/varopt_items_union_test.go @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sampling + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewVarOptItemsUnion(t *testing.T) { + _, err := NewVarOptItemsUnion[int](0) + assert.ErrorContains(t, err, "maxK must be at least 1") + + union, err := NewVarOptItemsUnion[int](16) + assert.NoError(t, err) + assert.Equal(t, 16, union.MaxK()) +} + +func TestVarOptItemsUnion_ResultEmpty(t *testing.T) { + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, 8, result.K()) + assert.Equal(t, int64(0), result.N()) + assert.True(t, result.IsEmpty()) +} + +func TestVarOptItemsUnion_UpdateSketchExactMode(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int](16) + assert.NoError(t, err) + + for i := 0; i < 8; i++ { + assert.NoError(t, sketch.Update(i, float64(i+1))) + } + + union, err := NewVarOptItemsUnion[int](16) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(sketch)) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, int64(8), result.N()) + assert.Equal(t, 8, result.NumSamples()) + assert.Equal(t, 8, result.H()) + assert.Equal(t, 0, result.R()) +} + +func TestVarOptItemsUnion_UpdateSketchNilNoop(t *testing.T) { + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + + assert.NoError(t, union.UpdateSketch(nil)) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, int64(0), result.N()) +} + +func TestVarOptItemsUnion_Reset(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int](8) + assert.NoError(t, err) + for i := 0; i < 4; i++ { + assert.NoError(t, sketch.Update(i, float64(i+1))) + } + + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(sketch)) + + assert.NoError(t, union.Reset()) + + result, err := union.Result() + assert.NoError(t, err) + assert.True(t, result.IsEmpty()) + assert.Equal(t, 8, result.K()) +} + +func TestVarOptItemsUnion_ResultPseudoExactMarkedResolution(t *testing.T) { + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + + // Construct a pseudo-exact gadget: r=0 with marked items in H. + for i := 1; i <= 4; i++ { + assert.NoError(t, union.gadget.update(i, float64(i), true)) + } + union.n = 4 + union.outerTauDenom = int64(union.gadget.numMarksInH) + + out, err := union.Result() + assert.NoError(t, err) + assert.NotNil(t, out) + assert.Equal(t, int64(4), out.N()) + assert.Equal(t, 4, out.K()) + assert.Equal(t, 0, out.H()) + assert.Equal(t, 4, out.R()) + assert.Nil(t, out.marks) + assert.Equal(t, uint32(0), out.numMarksInH) +} + +func TestVarOptItemsUnion_ResultMigrateMarkedItemsByDecreasingK(t *testing.T) { + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + + // Construct a compact, valid estimation-mode gadget with one marked item in H. + // Layout: [H=0] [gap=1] [R=2], with k=2, h=1, r=1. + union.gadget = &VarOptItemsSketch[int]{ + data: []int{100, 0, 1}, + weights: []float64{10.0, -1.0, -1.0}, + marks: []bool{true, false, false}, + k: 2, + n: 10, + h: 1, + m: 0, + r: 1, + totalWeightR: 5.0, + rf: varOptDefaultResizeFactor, + numMarksInH: 1, + } + union.n = 10 + + out, err := union.Result() + assert.NoError(t, err) + assert.NotNil(t, out) + assert.Equal(t, int64(10), out.N()) + assert.Nil(t, out.marks) + assert.Equal(t, uint32(0), out.numMarksInH) +} From c959cc68702a0e2a0d9b3263026fcf28144013e3 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Sun, 1 Mar 2026 18:52:30 -0500 Subject: [PATCH 03/12] test(sampling): add varopt cross-language compatibility coverage --- sampling/compatibility_test.go | 296 ++++++++++++++++++ .../varopt_sketch_long_n0_cpp.sk | Bin 0 -> 8 bytes .../varopt_sketch_long_n1000000_cpp.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100000_cpp.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10000_cpp.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n1000_cpp.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100_cpp.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10_cpp.sk | Bin 0 -> 184 bytes .../varopt_sketch_long_n1_cpp.sk | Bin 0 -> 40 bytes .../varopt_sketch_long_sampling_cpp.sk | Bin 0 -> 8248 bytes .../varopt_sketch_string_exact_cpp.sk | Bin 0 -> 2916 bytes .../varopt_union_double_sampling_cpp.sk | Bin 0 -> 572 bytes .../varopt_sketch_long_n0_java.sk | Bin 0 -> 8 bytes .../varopt_sketch_long_n1000000_java.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100000_java.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10000_java.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n1000_java.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100_java.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10_java.sk | Bin 0 -> 184 bytes .../varopt_sketch_long_n1_java.sk | Bin 0 -> 40 bytes .../varopt_sketch_long_sampling_java.sk | Bin 0 -> 8248 bytes .../varopt_sketch_string_exact_java.sk | Bin 0 -> 2916 bytes .../varopt_union_double_sampling_java.sk | Bin 0 -> 572 bytes 23 files changed, 296 insertions(+) create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n0_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n1000000_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n100000_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n10000_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n1000_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n100_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n10_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_n1_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_long_sampling_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_sketch_string_exact_cpp.sk create mode 100644 serialization_test_data/cpp_generated_files/varopt_union_double_sampling_cpp.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n0_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n1000000_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n100000_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n10000_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n1000_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n100_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n10_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_n1_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_long_sampling_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_sketch_string_exact_java.sk create mode 100644 serialization_test_data/java_generated_files/varopt_union_double_sampling_java.sk diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index d51b7c2..2065891 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -247,8 +247,304 @@ func TestGenerateGoBinariesForCompatibilityTesting(t *testing.T) { }) } }) + + // ========== VarOptItemsSketch (8 files) ========== + // Matches Java/C++ VarOptCrossLanguageTest / serialize_for_java scenarios. + t.Run("varopt_sketch_long", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + for _, n := range nArr { + n := n + t.Run(fmt.Sprintf("n%d", n), func(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](32) + assert.NoError(t, err) + for i := 1; i <= n; i++ { + assert.NoError(t, sketch.Update(int64(i), 1.0)) + } + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + assert.NoError(t, os.WriteFile( + fmt.Sprintf("%s/varopt_sketch_long_n%d_go.sk", internal.GoPath, n), + data, + 0644, + )) + }) + } + }) + + // ========== VarOptItemsSketch exact (1 file) ========== + t.Run("varopt_sketch_string_exact", func(t *testing.T) { + sketch, err := NewVarOptItemsSketch[string](1024) + assert.NoError(t, err) + for i := 1; i <= 200; i++ { + assert.NoError(t, sketch.Update(fmt.Sprintf("%d", i), 1000.0/float64(i))) + } + data, err := sketch.ToSlice(StringSerDe{}) + assert.NoError(t, err) + assert.NoError(t, os.WriteFile( + fmt.Sprintf("%s/varopt_sketch_string_exact_go.sk", internal.GoPath), + data, + 0644, + )) + }) + + // ========== VarOptItemsSketch sampling (1 file) ========== + t.Run("varopt_sketch_long_sampling", func(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](1024) + assert.NoError(t, err) + for i := int64(0); i < 2000; i++ { + assert.NoError(t, sketch.Update(i, 1.0)) + } + // Negative heavy items to allow predicate filtering, aligned with Java/C++. + assert.NoError(t, sketch.Update(-1, 100000.0)) + assert.NoError(t, sketch.Update(-2, 110000.0)) + assert.NoError(t, sketch.Update(-3, 120000.0)) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + assert.NoError(t, os.WriteFile( + fmt.Sprintf("%s/varopt_sketch_long_sampling_go.sk", internal.GoPath), + data, + 0644, + )) + }) + + // ========== VarOptItemsUnion sampling (1 file) ========== + t.Run("varopt_union_double_sampling", func(t *testing.T) { + const ( + kSmall = 16 + kMax = 128 + n1 = 32 + n2 = 64 + ) + + // Small-k sketch in sampling mode. + sketch1, err := NewVarOptItemsSketch[float64](kSmall) + assert.NoError(t, err) + for i := 0; i < n1; i++ { + assert.NoError(t, sketch1.Update(float64(i), 1.0)) + } + assert.NoError(t, sketch1.Update(-1.0, float64(n1*n1))) // negative heavy item + + // Another sketch with different n to yield a different implicit per-item weight. + sketch2, err := NewVarOptItemsSketch[float64](kSmall) + assert.NoError(t, err) + for i := 0; i < n2; i++ { + assert.NoError(t, sketch2.Update(float64(i), 1.0)) + } + + union, err := NewVarOptItemsUnion[float64](kMax) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(sketch1)) + assert.NoError(t, union.UpdateSketch(sketch2)) + + data, err := union.ToSlice(Float64SerDe{}) + assert.NoError(t, err) + assert.NoError(t, os.WriteFile( + fmt.Sprintf("%s/varopt_union_double_sampling_go.sk", internal.GoPath), + data, + 0644, + )) + }) +} + +func TestVarOptItemsSketch_JavaCompat(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + for _, n := range nArr { + t.Run(fmt.Sprintf("long_n%d", n), func(t *testing.T) { + path := filepath.Join(internal.JavaPath, fmt.Sprintf("varopt_sketch_long_n%d_java.sk", n)) + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 32, sketch.K()) + assert.Equal(t, int64(n), sketch.N()) + assert.Equal(t, min(n, 32), sketch.NumSamples()) + }) + } + + t.Run("string_exact", func(t *testing.T) { + path := filepath.Join(internal.JavaPath, "varopt_sketch_string_exact_java.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[string](data, StringSerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(200), sketch.N()) + assert.Equal(t, 200, sketch.NumSamples()) + + ss, err := sketch.EstimateSubsetSum(func(_ string) bool { return true }) + assert.NoError(t, err) + weight := 0.0 + for i := 1; i <= 200; i++ { + weight += 1000.0 / float64(i) + } + assert.InDelta(t, weight, ss.TotalSketchWeight, 1e-9) + }) + + t.Run("long_sampling", func(t *testing.T) { + path := filepath.Join(internal.JavaPath, "varopt_sketch_long_sampling_java.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(2003), sketch.N()) + assert.Equal(t, 1024, sketch.NumSamples()) + + ssAll, err := sketch.EstimateSubsetSum(func(_ int64) bool { return true }) + assert.NoError(t, err) + assert.InDelta(t, 332000.0, ssAll.TotalSketchWeight, 1e-9) + + ssNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v < 0 }) + assert.NoError(t, err) + assert.InDelta(t, 330000.0, ssNeg.Estimate, 1e-9) + + ssNonNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 2000.0, ssNonNeg.Estimate, 1e-9) + }) } +func TestVarOptItemsSketch_CppCompat(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + for _, n := range nArr { + t.Run(fmt.Sprintf("long_n%d", n), func(t *testing.T) { + path := filepath.Join(internal.CppPath, fmt.Sprintf("varopt_sketch_long_n%d_cpp.sk", n)) + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("CPP file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 32, sketch.K()) + assert.Equal(t, int64(n), sketch.N()) + assert.Equal(t, min(n, 32), sketch.NumSamples()) + }) + } + + t.Run("string_exact", func(t *testing.T) { + path := filepath.Join(internal.CppPath, "varopt_sketch_string_exact_cpp.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("CPP file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[string](data, StringSerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(200), sketch.N()) + assert.Equal(t, 200, sketch.NumSamples()) + + ss, err := sketch.EstimateSubsetSum(func(_ string) bool { return true }) + assert.NoError(t, err) + weight := 0.0 + for i := 1; i <= 200; i++ { + weight += 1000.0 / float64(i) + } + assert.InDelta(t, weight, ss.TotalSketchWeight, 1e-9) + }) + + t.Run("long_sampling", func(t *testing.T) { + path := filepath.Join(internal.CppPath, "varopt_sketch_long_sampling_cpp.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("CPP file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(2003), sketch.N()) + assert.Equal(t, 1024, sketch.NumSamples()) + + ssAll, err := sketch.EstimateSubsetSum(func(_ int64) bool { return true }) + assert.NoError(t, err) + assert.InDelta(t, 332000.0, ssAll.TotalSketchWeight, 1e-9) + + ssNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v < 0 }) + assert.NoError(t, err) + assert.InDelta(t, 330000.0, ssNeg.Estimate, 1e-9) + + ssNonNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 2000.0, ssNonNeg.Estimate, 1e-9) + }) +} + +func TestVarOptItemsUnion_JavaCompat(t *testing.T) { + path := filepath.Join(internal.JavaPath, "varopt_union_double_sampling_java.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", path) + return + } + data, err := os.ReadFile(path) + assert.NoError(t, err) + + union, err := NewVarOptItemsUnionFromSlice[float64](data, Float64SerDe{}) + assert.NoError(t, err) + result, err := union.Result() + assert.NoError(t, err) + assert.True(t, result.K() < 128) + assert.Equal(t, int64(97), result.N()) + + ss, err := result.EstimateSubsetSum(func(v float64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 96.0, ss.Estimate, 1e-9) + assert.InDelta(t, 96.0+1024.0, ss.TotalSketchWeight, 1e-9) +} + +func TestVarOptItemsUnion_CppCompat(t *testing.T) { + path := filepath.Join(internal.CppPath, "varopt_union_double_sampling_cpp.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("CPP file not found: %s", path) + return + } + data, err := os.ReadFile(path) + assert.NoError(t, err) + + union, err := NewVarOptItemsUnionFromSlice[float64](data, Float64SerDe{}) + assert.NoError(t, err) + result, err := union.Result() + assert.NoError(t, err) + assert.True(t, result.K() < 128) + assert.True(t, result.K() >= 16) + assert.Equal(t, int64(97), result.N()) + + ss, err := result.EstimateSubsetSum(func(v float64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 96.0, ss.Estimate, 1e-9) + assert.InDelta(t, 96.0+1024.0, ss.TotalSketchWeight, 1e-9) +} + + // TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. func TestSerializationCompatibilityEmpty(t *testing.T) { filename := filepath.Join(internal.GoPath, "reservoir_items_long_empty_k128_go.sk") diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n0_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n0_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..e4505fe12adbd93fea1b01467102f5f2af82bf2b GIT binary patch literal 8 PcmX@e#LJ?|oldkPA#} zXF~Y#Y&>9o_Fjni9%Vi-f0HkSfA11En7?-wM0|rA8<>A78A4yNVgmCQ&twGCj?57E zB_HAd^OxH|^xx)!&?+Y&=I%QPvEODggwFj5areWg5OXE;A?jDfLilg>n8D_i+e6Hq N_5fnvpXZ!laR9@;CO-fG literal 0 HcmV?d00001 diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n100000_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n100000_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..7f496fd28cf96437ab772e95c48b2c09ecb28bda GIT binary patch literal 288 zcmX@Y#LJ+-z`(GejS)yffC7w>^~1rS55k%A6GET!fT)|}2BBryAmUTXApFCg5Ppn1 zgntA=F+BJU5f^?6q5ps3n_*G#LdfsUW?FhB+cs+!#mIqP)`VEBNUIgLm#6kFq snGk+f7=#XGhtM;ZL*!%XApCRE5PE(yM7#v*4>3lF_-03lxF$3_0EoCECIA2c literal 0 HcmV?d00001 diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n10000_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n10000_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..1337d0fc9f11564310c898d19687b10a5dd4963f GIT binary patch literal 288 zcmX@Y#LJ+-z`!7&4kVyJ0mkS!?2s=8;T#o&&@K`Xx`+!xmr6nCDm4gwk{d$LmVwZJ zlp*v)5eUu7455`&AhfVNgjQ6A(4xW+nwJGa7brkzRwW315vuM2D}-;s3!z;VAhbBt j{%cV4eWCpAauE4IsJI){-q)fK@p7m-0e%Rd3n~r(g3l6_ literal 0 HcmV?d00001 diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n1000_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n1000_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..1ad81c9b7aaa8dde14bb542637c4183fe95ad3e7 GIT binary patch literal 288 zcmX@Y#LJ+-z`*c=8Aw2Z0*v9%?_kaZ;oO7LAy9P>7$M?2pmZ@*Tmj0z2&E4}&B}uE zuR+au0u?WZ^6x<9mqNuiKRQw&3&WDO$g{s>J6%T>Zu~7LEs5@1m Q;_^^h2TFf{>Yop#0axJ_(*OVf literal 0 HcmV?d00001 diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n100_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n100_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..f779a78086829f0ccc9373fc2a7f4319bb80df96 GIT binary patch literal 288 zcmXxWJqtik7>41ulfgm^!gdUl(SX6C@mO{LyR!S4O8qe!xI&*nBa&XeL)CQ literal 0 HcmV?d00001 diff --git a/serialization_test_data/cpp_generated_files/varopt_sketch_long_n10_cpp.sk b/serialization_test_data/cpp_generated_files/varopt_sketch_long_n10_cpp.sk new file mode 100644 index 0000000000000000000000000000000000000000..f1ac8c0721d95f007b71540e929cda603a58fde6 GIT binary patch literal 184 zcmX@i#LJ+-z`(!-#2^5pA-oUv)S?+71~D)}X=W(R0;O4@G#iv=hteESniERH+y?+O C?*YnZzi2m`S-~2_sQ6wuUrJ$P(EZI}yru?201GWGG|_ckJsmGd0=w zxUyyrmF<=#`!d3`C?z*Me)EUtdA;7R_v`)n&T)=E>aqBi!D9KxAC3b_ z{CK$lw$PVfu7q4cda9m6bYU!l;TVD6=_^VEz-EQxF7^4c%WFN0+< z3d`X&-}eGO$IsL=!#f9?*LEjbuABK?`GeSBuBu!b|3$ikd3Cyt`Ac+7Jgq;Du7$O6 zt9NbISBL*DKfB&n`StN9y>o)^lz+{b-M9G3bRRmuTt7O_d;opQy*~Uw{G0R; z`ih=B^27OG(oyo|eaA@iQTQH)$yKs9O7E-mSh;aH9w%TTwsvPCorIHcGJb>~V|(|f z(rxv8LQls^!FQQgwAao%XUi2epG(&Xe>r~zuEhVE zucA{h74zyHfuHl!aiaa1=4<$+-Oq*j&FA1oxlQ=D?=GlsGri4xJErq@&^s~Aon7=- zxK7_5`V+YtdMcV9FfZtRL(DVyP58s?ALcKoljLvEN6e%7$LKrs2|8T9FShWWljf&z z4gU=NBZlZZN1w+FI9u-}dY7In^i^Ex`!eb4*j)ccJvaHcFbjXiJbHemf5Y3@!~Fz( zcldweUwFpdk@8jK1KA!1GWa2MDE{ewb~=I1LFdFU%!R%6O?D?YKM$7FlaEf6D?k^- zC$JC}#v&Mw5tzf>qV!YvG;Z;GC@%MycN}-Wgj@z)imocx+x}7WvgT32e*Pf+x9JI3 zL9QY`%0G?K{1~i+m9Yv|#cJ5fyJG3;SOaf){{eSi=Eq?ztc~$FO>Z4~g#CJS4ZUB< zHQ-;O8_}=fLi=0sb^cEIX7oApcx=gUg{`p-cCep7ulKI@^dPz;-3dG6Wb=D^y7Aw@ z?$`rM$@QdrVN*SydUt}q2W|Ot=?uERy#aV#??8GG4#puk6nD5YoF0Mi;JX;9_Zj!! z<2Ry5(_`=h9E;;{p`HnJB7TSyF$p`$FL5WQ_ZH@VhNJyHrrEnLH=Uk=GjSH~m)omn z4*w&5sJpZ1b?)4xC)rzQZxJrW8Rm86&)ILlUm>>=|AVVA1yeB%Kf^QnChGr!UyWWv zuf=t^&wK;D5jWvb-#u6EYwvyBy%c^atbsf2pOs%MpN@aX?Frs5FtD`O9>6 zdspeT@|pB?e8+ok&^PfGW?>V#R#@4+0q)$E`yFe$cZW`*|DxZt|1|#|KM?Zp-~aA< zL+IeRzP;@Hwe&?y^S&_iHhLbTN0{fP^I%>)B%hxyfCVweej&Or7Qt}Lt>;O)DE=D! zZu(2#|1Z6f{A~OZbV)3Qv-OvzC+aOrM`1bKpuaplK)wRqhQ6kEH%6Ps;45;K=_*)2 ze>M6=jKxfQHR#p)YtqHc>FU-ga%x-zzvPr!EA9xK~hCEtmk#qUCQ#cmib-<@tJzk=VBzs~!6({EwA z+;sE4{C-&8_pQfm@^#%ABzM975IWp^7@cfyfu8N=@0v%_qv+!LcG%lyZ;W|odMrH- z$KwfoiF67*kxs%%xX}C~`eU4eQ*jy=cmEadTj9M`^mM>Iarg zh>NhCdrRmD_m1aUC7@5_$mBUY~npJa!Y+*y7?El8YjrNlv~H>D}~d`+MnqxF3t^sic33{Zaf2cu0PzzHjJn@rqoO z{iFOi`RV)<{Ce`+%zxnT^Nv&WX?&SKME)%Q9Hs={OJBrGco|#kDdjya^<|o0HUEiD zHD88HFw6XNcYmQ9(k=De=4a9y?BC%(PhZ6_?|M}J0}S{-fovFpp_u6IP>j}-!#pR3 zVJ_ULzaAE|_bj~>^U0^uoiN?I>)<%tuICmVZa)Ik?0=1A<)1b$hMVk1(#3JHd^cQx z&zQe$uM9okygioVKZ_ORL%kVpjycRLn785&r2Be* zTYduOv)2~C$ByRB?XR`ph2IrxdFLDSVYxE=H~E+K_M+>{mE^z8?}PR1_oMq`jQke+ z1NmY6gWfyRJSTsc+;AL$MdY7yZzO*-z0&^sK_AE95xJUrX5*9mDc+GN|8Mix`H#@U z-T9T@i~iW&QhJ>HG=44l<~V~t6KCP4LE4`~&&31!lj$bjKc8L@q&v;b7xM?xOX+3U zS$+k5NbWiQDt-#4Vj6yiPuM$w>v6UDciw+g?>hc^+<+T#6UOSl1(Iz0V!E&Wjb<9m@Lyw zT{9y^XIkPP-iE1ZZMtkZz^R#tL!FsAb>6TPaO<4sd3yKm`R%~_`Mkg1^L*bE14mgb zgDsZdEOh<9;xG6DXk7uXMXo=V@b5*mTB`Z)Avjdi)mqb4&i^li528=KAAM>LKUWGz zZf#$kd1Mwp|0?W@ni#%cVQvPr?2cU1nV$yLconm!2u9ry8~>bM$iAu26XT_526OoT=L{NSCT6M0_~AlJPk$GyYk_nHuTFF5pOg~9vO;f$ka zNNtYJ`_*9Wbt8Ohyl(+dzFpJXK1J~URdC^C)m{aY3=aTjq1g zkcADF`Fs)_+|>Togw+zCQ-Xvl9N`@M5K_hUSqNqwjk_GfTkGN8Jlt!{F92KOf&=Cm&C+&@2^`;^Y_ z9mo9|2bDKcPL@7PR6i0eNKj*Wxd~(^(Jw@lb~*hefGS%B<_0>T%0~P z?~_qU-2X(#-&uaNbW0-7ArZRU*KInaB=Y_e;Y#Dc4JB;}JeLF*S^V+dEAJ%md=lV< ze`NSh;5o&^oYC+8Gh$yn&nq4_Che`xE{*58#Y3St^L*3bc%GjRHe`9!XZw9TM<3Yt zuWQH%`FNf_s2l4H-#(tJ7s?yj=QXr@xzAqsOD$`uT;=6Cdm*nmZSdw?FVEY{=kW5} z-8H0ryFBA53!(L>!{C;isf9yK-8t8@D~=t z`G|pv1I?Ef%Q2jjXc*UZbo8xXqd70p@G#}YlIpq9oSSGU_&#{4H8pl$6N_5H%bdGf$4d*#997u|ebH=KWRyLU;goAc#{WgUs(+s!$1LA!agX>q%Y z^X3Am**2hPnTvDhfYL)X+BoMn2#MwE z?^oD3?>1uMpi~vMpi~vMpi~vMpi~vMpi*qK~_OlK~_OlK~_OlK~_OlK~_ap zMOH;tMOH;tMOH;tMOH;tMOGjykQK-ZWCgMUS%IuTRv;^o)sWSY)sWSY)sWSY)sWSY z)sWSY)sfYa)sfYa)sfYa)sfYa)sfYaHIOxsHIOxsHIOxsHIOxsHIOxsHIX%uHIX%u zHIX%uHIX%uHIX&VL6{9cEfOsiDVgv|q5P)(HGx0Vw0NDyqK8%KNCpZX#0VCaL38;M#`vn{ZivIWi z|9^PHG)$c@ln=8XYF~p3RNT`60K&j2_5c6? literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_long_n0_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_long_n0_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..e4505fe12adbd93fea1b01467102f5f2af82bf2b GIT binary patch literal 8 PcmX@e#LJ?ON2z^D66)gVx zJA`jl$O7hfaB+j_7yBXlb+{n(Vt$DHksKbde9S|Les@)fde5B@zOn~|me7Z&J7)=z z|Jn#K=V>~Gp8^%Xq{|65&+rl#m^MGp3#R85Lfmcd&H?7HJIfBH9ZMndkw+l%iMN=* K;`IU$e*gfD-y>@P literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_long_n100000_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_long_n100000_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..7df4970d84b39680f5fc21bf1de7acbf1ef02a67 GIT binary patch literal 288 zcmX@Y#LJ+-z`(GejS)yffC7w>^~2#;GDN)c9z?J*4Z`o}f$;ZVfzZ#pAaqO!M1J;6 z2tQOGLMO*U#5pw}^l5g8czFnfj^c%gueOEoH-CrFUyC3#?-B^z$qJ#H;vnkhLfx%u r2;sL_LTJ0~5Oq0F`;DRQYlZqlUKFD4Cl7=cfturW8=~**a|jIp?gt^9 literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_long_n10000_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_long_n10000_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..d120f49b8b31b975a9f0d9a6324cf9d3570ccc50 GIT binary patch literal 288 zcmX@Y#LJ+-z`!7&4kVyJ0mkS!?9jvn;h2j+Xg*E|9Sv18TNJ{$2<3b5Ko|uq5ZZtr zLYJsP=o``y+78N(;)C%2DMRQsC_hyZ!cP=}&<9i?bS0Fp%njiuK*fXDApBAY#ju?b f!sv&(qX){L0`<>Bs5`zu-9H)17lWE}8cG8Igu)b} literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_long_n1000_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_long_n1000_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..3a936e3b9ecb2c056cb7d3a716955b718956a302 GIT binary patch literal 288 zcmX@Y#LJ+-z`*c=8Aw2Z0*v9%?{FR}zK9Vbn92m9>!AEDDE}&y-UL-|2Q}jwRGmMR zE`y3Mf%3(n`UIfjf1%<>pmaS{ybEfsD3pH_DxVMK8$jiaq5PFl+6+oJK-F)7@()63 PZm2j5l->uWv!OHq{<9L! literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_long_n100_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_long_n100_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..eee954c855e0e6bafaa9b55b26fd3ac698446f22 GIT binary patch literal 288 zcmXxcF$+Lo6ouj2hgeKzx+!E+in5?YR%MVu$xrpe7~b#J={aY)uL(s(7-5G!&Uj&gHBPu?(?xgo7I$OQxe=|*zYMoGsMt{`_vm(nHdQX(?CL`Eowy0p?ga+kiB z24N^Ib&;6BuE1magQw5eyFTCj!#Nm}G;JV|JPIHC(ydb~lPAHbJH=jL4_3h)u zc?nD~SBjU$GFTSNVR;Poj*7gr{grrST%vCbR+Cr95IpDI&9RoeHg=U?@ZKF+AZCrqw?1K)_5-A`w3i+n3q@Q&>~O5Y3lF8MZl_V5_&*W!MM*m-{Prpmpm;`hv_i`X2iGg9n6Hu-Os|aVjAzu&U0W+ z%!TFj<>r6GJeU{n`u?HV8PA%3U*7=VRfvas-*Ejqu&8=5ERHwKm*l0eG_G^6ET6~A z^G4?H$Ul_NlBdBV-c?oouy0FmwtFM7fxH+0i0|i(`83{? zH}pN@3@9(?;Z^zqX2YCnH%f1NS5j*Sal<3$6SDNd_ zuXs-nKE-@5zQT7UHMg2SH#bsV#oSiDS$&{AgK%o%T;d-6!_*tw^H24x>N|NU^Ec$9 z%$<;r;bU=tz7cpt-#6+Lu!#C%_dnr9czrw|pK9(rpU!9COl)RuHlKsZ5k|ImL#eiV=4aXgVoznA~; zLi`jzjc2gF?>NWL<4JRMyyKEQ)_azkyCRRntN5$FI`(|dQ`>h#-%Y%Q74@|?_q+Tq z-oyKNOaDW@&;CcesQP361V?!PSL*TdH0Ecjw^4sro}e#~^v&P@->WC(?etB+6!N>i zCx{1QDom$;hJBsfzbsFuFFlr1@2tO|zCG?_(l<~2rM!Zz<={Cn7ru+Rv5`F` z?WvczPyU|1O}qdv;7&n)TfH#gCc-T)h7GVg1|8)FZ9GGH@#b1Y!4 zC2xhTF&v+`Ul+?_TlIlF&i7S!uf6(b_)^~5-3WO{%xk_A?~E(txy*HwcgF$pp1c?K z#y%MC{?FdqSKbeExif&zw|5X9jG^`n<->3|K2-nOd`TZ;LlKqqU6r76FaGm)Xe3Ly{8z4GOJKR(|6SyoN*Uzrj~|Pj{@Wen)-1J9qg#?CZ`0ensB} zd?fE^-(x<8_ja!e#;aGb_c?!oFELg<;2%(uUr{?7e^L&mqr=AL$>jFSk6v^YFZw58uQ5SO5pwSCFsu zU7g(jKwbok;!^c8Sj7EW=1b{|kuP+oth^kS#|k*qzW42^Bu~z(@KN@*!@=qy>QC(1 zD6b{2jdd^-!?28f7tAg8?gr|a)bq$2$s1!6Y>LgWIbO84C2xhTF&q!LGeLhFc_aBF z-}k9}l=sZi|CxNI{2x35yO|%Oo{TrRWT=bInSzrvmRH|md+mzIy>qw!<+67U#qvwyt4KlxyM$G(Z`2jv;e zPnPHQjv?}C@``*0pNX?@vHi34jl#L=-{W|F(R_(}3wSH_MSQzGKk~)6S00Q@*J0 zai4rY9>9ax)4dMv@8yTpuk*I*gY6x{=io{GF}^cO-zoWNJcDPkhxs6L-|~y{{b|7CvpG) literal 0 HcmV?d00001 diff --git a/serialization_test_data/java_generated_files/varopt_sketch_string_exact_java.sk b/serialization_test_data/java_generated_files/varopt_sketch_string_exact_java.sk new file mode 100644 index 0000000000000000000000000000000000000000..2da7e4ed24dcb8dd19a57c0596d540b71f14a439 GIT binary patch literal 2916 zcmXw&drXye7{`xyH*Xo|ez=^&9YHwn<@a9ncz6IYguqK>l1(Iz0V!E&Wjb<9m@Lyw zT{9y^XIkPP-iE1ZZMtkZz^R#tL!FsAb>6TPaO<4sd3yKm`R%~_`Mkg1^L*bE14mgb zgDsZdEOh<9;xG6DXk7uXMXo=V@b5*mTB`Z)Avjdi)mqb4&i^li528=KAAM>LKUWGz zZf#$kd1Mwp|0?W@ni#%cVQvPr?2cU1nV$yLconm!2u9ry8~>bM$iAu26XT_526OoT=L{NSCT6M0_~AlJPk$GyYk_nHuTFF5pOg~9vO;f$ka zNNtYJ`_*9Wbt8Ohyl(+dzFpJXK1J~URdC^C)m{aY3=aTjq1g zkcADF`Fs)_+|>Togw+zCQ-Xvl9N`@M5K_hUSqNqwjk_GfTkGN8Jlt!{F92KOf&=Cm&C+&@2^`;^Y_ z9mo9|2bDKcPL@7PR6i0eNKj*Wxd~(^(Jw@lb~*hefGS%B<_0>T%0~P z?~_qU-2X(#-&uaNbW0-7ArZRU*KInaB=Y_e;Y#Dc4JB;}JeLF*S^V+dEAJ%md=lV< ze`NSh;5o&^oYC+8Gh$yn&nq4_Che`xE{*58#Y3St^L*3bc%GjRHe`9!XZw9TM<3Yt zuWQH%`FNf_s2l4H-#(tJ7s?yj=QXr@xzAqsOD$`uT;=6Cdm*nmZSdw?FVEY{=kW5} z-8H0ryFBA53!(L>!{C;isf9yK-8t8@D~=t z`G|pv1I?Ef%Q2jjXc*UZbo8xXqd70p@G#}YlIpq9oSSGU_&#{4H8pl$6N_5H%bdGf$4d*#997u|ebH=KWRyLU;goAc#{WgUs(+s!$1LA!agX>q%Y z^X3Am**2hPnTvDhfYL)X+BoMn2#MwE z?^oD3?>1uMpi~vMpi~vMpi~vMpi~vMpi*qK~_OlK~_OlK~_OlK~_OlK~_ap zMOH;tMOH;tMOH;tMOH;tMOGjykQK-ZWCgMUS%IuTRv;^o)sWSY)sWSY)sWSY)sWSY z)sWSY)sfYa)sfYa)sfYa)sfYa)sfYaHIOxsHIOxsHIOxsHIOxsHIOxsHIX%uHIX%u zHIX%uHIX%uHIX&VL6{9cEfOsiDVgv|q5P)(HGx0Vw0NDyqK8%KNCpZX#0VCaL38;M#`vn{ZivIWi z|9^o@LHQC;S_VohKxs=TtpcUhptK2;wt>=yP+AX48$f9WDDZ;vU7$2H pOd1@Ze3<<{5PpL{R9p#4TR>@;dtmlyLBt!Jp|m?x9n5}n2LK97C_DfF literal 0 HcmV?d00001 From 3c0eaea8c01a674750de3c5622c6f4e840bf0bc9 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 11:53:54 -0500 Subject: [PATCH 04/12] sampling: add varopt go compatibility sketches and tests --- sampling/compatibility_test.go | 99 ++++++++++++++++++ .../varopt_sketch_long_n0_go.sk | Bin 0 -> 8 bytes .../varopt_sketch_long_n1000000_go.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100000_go.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10000_go.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n1000_go.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n100_go.sk | Bin 0 -> 288 bytes .../varopt_sketch_long_n10_go.sk | Bin 0 -> 184 bytes .../varopt_sketch_long_n1_go.sk | Bin 0 -> 40 bytes .../varopt_sketch_long_sampling_go.sk | Bin 0 -> 8248 bytes .../varopt_sketch_string_exact_go.sk | Bin 0 -> 2916 bytes .../varopt_union_double_sampling_go.sk | Bin 0 -> 572 bytes 12 files changed, 99 insertions(+) create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n0_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n1000000_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n100000_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n10000_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n1000_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n100_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n10_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_n1_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_long_sampling_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_sketch_string_exact_go.sk create mode 100644 serialization_test_data/go_generated_files/varopt_union_double_sampling_go.sk diff --git a/sampling/compatibility_test.go b/sampling/compatibility_test.go index 2065891..87a5725 100644 --- a/sampling/compatibility_test.go +++ b/sampling/compatibility_test.go @@ -499,6 +499,82 @@ func TestVarOptItemsSketch_CppCompat(t *testing.T) { }) } +func TestVarOptItemsSketch_GoCompat(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + for _, n := range nArr { + t.Run(fmt.Sprintf("long_n%d", n), func(t *testing.T) { + path := filepath.Join(internal.GoPath, fmt.Sprintf("varopt_sketch_long_n%d_go.sk", n)) + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Go file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 32, sketch.K()) + assert.Equal(t, int64(n), sketch.N()) + assert.Equal(t, min(n, 32), sketch.NumSamples()) + }) + } + + t.Run("string_exact", func(t *testing.T) { + path := filepath.Join(internal.GoPath, "varopt_sketch_string_exact_go.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Go file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[string](data, StringSerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(200), sketch.N()) + assert.Equal(t, 200, sketch.NumSamples()) + + ss, err := sketch.EstimateSubsetSum(func(_ string) bool { return true }) + assert.NoError(t, err) + weight := 0.0 + for i := 1; i <= 200; i++ { + weight += 1000.0 / float64(i) + } + assert.InDelta(t, weight, ss.TotalSketchWeight, 1e-9) + }) + + t.Run("long_sampling", func(t *testing.T) { + path := filepath.Join(internal.GoPath, "varopt_sketch_long_sampling_go.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Go file not found: %s", path) + return + } + + data, err := os.ReadFile(path) + assert.NoError(t, err) + + sketch, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 1024, sketch.K()) + assert.Equal(t, int64(2003), sketch.N()) + assert.Equal(t, 1024, sketch.NumSamples()) + + ssAll, err := sketch.EstimateSubsetSum(func(_ int64) bool { return true }) + assert.NoError(t, err) + assert.InDelta(t, 332000.0, ssAll.TotalSketchWeight, 1e-9) + + ssNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v < 0 }) + assert.NoError(t, err) + assert.InDelta(t, 330000.0, ssNeg.Estimate, 1e-9) + + ssNonNeg, err := sketch.EstimateSubsetSum(func(v int64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 2000.0, ssNonNeg.Estimate, 1e-9) + }) +} + func TestVarOptItemsUnion_JavaCompat(t *testing.T) { path := filepath.Join(internal.JavaPath, "varopt_union_double_sampling_java.sk") if _, err := os.Stat(path); os.IsNotExist(err) { @@ -544,6 +620,29 @@ func TestVarOptItemsUnion_CppCompat(t *testing.T) { assert.InDelta(t, 96.0+1024.0, ss.TotalSketchWeight, 1e-9) } +func TestVarOptItemsUnion_GoCompat(t *testing.T) { + path := filepath.Join(internal.GoPath, "varopt_union_double_sampling_go.sk") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skipf("Go file not found: %s", path) + return + } + data, err := os.ReadFile(path) + assert.NoError(t, err) + + union, err := NewVarOptItemsUnionFromSlice[float64](data, Float64SerDe{}) + assert.NoError(t, err) + result, err := union.Result() + assert.NoError(t, err) + assert.True(t, result.K() < 128) + assert.True(t, result.K() >= 16) + assert.Equal(t, int64(97), result.N()) + + ss, err := result.EstimateSubsetSum(func(v float64) bool { return v >= 0 }) + assert.NoError(t, err) + assert.InDelta(t, 96.0, ss.Estimate, 1e-9) + assert.InDelta(t, 96.0+1024.0, ss.TotalSketchWeight, 1e-9) +} + // TestSerializationCompatibilityEmpty tests deserialization of an empty sketch. func TestSerializationCompatibilityEmpty(t *testing.T) { diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n0_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n0_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..e4505fe12adbd93fea1b01467102f5f2af82bf2b GIT binary patch literal 8 PcmX@e#LJ?N<~sxeSpETwsRhEk-acd4d^C&u?W1(}D*f`sy}A=!d2d zdOn0=2w4izzjZbbSp92G2!FOUgnrWmQOIfs@z literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n100000_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n100000_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..356b8cef1c6f51e47b310e5da46985b71cce3fdf GIT binary patch literal 288 zcmX@Y#LJ+-z`(GejS)yffC7w>^}~VVK7_M*BZS`R15xL=6T*L$58Lj1EJUUK*U!|LFi(rIU4gJe7{DBI$se84FC^=A+7)b literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n10000_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n10000_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..e2276a2a3e7a568099abba823309d63812391baf GIT binary patch literal 288 zcmX@Y#LJ+-z`!7&4kVyJ0mkS!?9j{u;hcuj(ozuqbXEv$BoCqe_#m`|G=yeggwXSN zA@piV2%Rnhq5Ih&vq968A*lPH g6oWZbT@TbgC8&Eu)F2wP6d-g5RKqr?{moE$0JKICegFUf literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n1000_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n1000_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..fdf0fbb8b520d0b9f27aec39c809cbdb7c257a5e GIT binary patch literal 288 zcmX@Y#LJ+-z`*c=8Aw2Z0*v9%?;yei;V>~mXbz~l=TJ4Lq4Hax{1Z_5Wl;5xq2k$4 z+5$>{gX)_ERbK{GF8~#v2jx$N($Apu6{z}fsJi1&z8#d#f|`F0Dt`+~dqHVgsQfLc Rc@v=e%Aw-xq3W`sGynpN6oCK$ literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n100_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n100_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..00bcb597719b6276e098f3ccccfb9625df7b8fd6 GIT binary patch literal 288 zcmXxWF$+LI9ES0KR}4xvWpWHG7!)$OlEG+EC#B@0`d|#tz4i8cUKNTKk>|ggb|Kar r^o<_+XmQ3K18i}?33rSz#|;%4EU?5FYaB7b6IX2T!U|JdFvAW%iYW+i literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_long_n10_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_long_n10_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..f1ac8c0721d95f007b71540e929cda603a58fde6 GIT binary patch literal 184 zcmX@i#LJ+-z`(!-#2^5pA-oUv)S?+71~D)}X=W(R0;O4@G#iv=hteESniERH+y?+O Cw_ft1nS+V$VYLP#ms4M!l)IX4qT31rOpW zy`#0f4YtLO`j*DrlXt-T<~#9k@LT=yv5UMbhT(qo?z}F)<2$~S_re|S_2GRnmUs5& z1NHT==O`bno{JCV!*Dnjvu`9Hg=4($dtSi)2RKeX9y7@&@<}+s{waJa4$(iICw8xm z_s)`6(|1=Mrf;tLJe-fM)pP0(mrvn8@_)zt9_35qOYyq9Kk@eb0!GLe@&op)lCQ>S z`C7gXyP4a-_wYU5wMpLC_iW)?F%lEozh2)+o(Mm*?}_?ub9-7(cmbQ+U(Vjk@;Uq}zh>X>e5iUJyeVJ9 z3){QRyZ7_E`nt(W;(d7)eOd9L{7-y@kMRlSaQ7*HhR^W@zQnBhe{koGJP_;sU*^j% z`tHg4yxpVQaVd`0t#)ho;YkIBs+;>q+?<2TJGm&f5Lcv~K!Keaqc9_>A! z%G2RB{Xskf=9XvTnX#`upYg2tIrj7&*?A7EBj4aXx#gqm%ggg&e*6Lp;5Ge)cwxL` z?_Mk>FODT}fp?VRrLhc_#bCVUUbwjs`7*zsB=Yn;6<_NemGqafrwX5=Z;`p`@)}qZ z|An=%Hip<2hV|t2ajN~>)f>vIx>rEGvAjQT%A4T`{eyi^174LM!4K}_;^mWBBT%z6scevXGFUfo9?~Q%1FZRQ{-jNiKn;)cp&ijV&9DEob zjw5g+j>7!*ejn4vFJck*?%O*--$a~*-|B0JU-{l?>eF!smRHa1&JXtgVLs+RT;}S} zsPB=!1@e;m!}&totG|}|OLI%qm*O&v*I+8~86+ z-P~^dLvf3GZ*!4+8$LES1vi@8slE$$<5=I*TVIs?q5b>$0qiF~6f@8Bn?E8yioe)@ zj337nm`y#q`P1^H@{xE>{vV#m_g$3dmS5&q@G4%z+1`7dFSq9=zlG6w8}HyND;-~X}jf%hf!-DP<^_4ruC zzJxpx&hV}m=90*p*moSe>r1A-MIMfu?7wACX`T;LnM;iy*_W1oio?vO=X-euo)I%) zX3T=E?Y-x2ZEP;@ggNvt;tPC73(RFMuf7-Z{QL_nfCaG-?z6WDpK9MeEG~b_%jjQ^ zrPcGfTbAGBU-GXo1k2$+u{>75;eMZ$cqR9%@TwT)9W~u+VXmfndigT*wdHlNF4n{P z_%$}bl*F*PFU8S&iqRrYVS1P zAEw?7yJHWG_TJ6r1LWPJKL1!0GN!;*)W)JyZEKY+&yU-a+3io`lcgv-n)Tk8gkcPo#?{OvJOY%s4S9mY=Yy5Y-jy>GJjRW*Yt5rofb#3e%cP!!z0YvpY4tJH2`_-b-Ibc`crqFE)SK{;cwWJR8r+bMPB{ z5I%En2+ynUDUYjO!@UCP1rY*;{iO&N#bQ_-FWA!zhq+T)y$qJcqUzc77v>@A<#3gE zmgg1lmAooemRE_Hv$wY2Z#g{W&UfYp=&NO}Hm;M`<@IomJRSbeo`&j;FqeCcd42tp z-ESsujxDeyw!%E-X1LQhFXDy)Ucxbk!Gz-LN~(HrJB}yR%c? zTiyrHtM}un`GAm_)Ia&NhO1xDaX z{25o_YFvXQylWj_kF&fZzxqb`Cftl$@QQau@@?3~{I7fmejp!#adD6OUW~$hSXtiz zeh?2~Z0~4;N8~m5Gv8GYkE@@+lbBY234LeeXYq;r6kd>D#KH2*d@aAq56673{JQ)G z)^+~`M$2#G9sJGwxR~Dyj+Q^rmtOvm|B1`}bM%;ZHa9{3RQ?Q~sD$xJ{ literal 0 HcmV?d00001 diff --git a/serialization_test_data/go_generated_files/varopt_sketch_string_exact_go.sk b/serialization_test_data/go_generated_files/varopt_sketch_string_exact_go.sk new file mode 100644 index 0000000000000000000000000000000000000000..2da7e4ed24dcb8dd19a57c0596d540b71f14a439 GIT binary patch literal 2916 zcmXw&drXye7{`xyH*Xo|ez=^&9YHwn<@a9ncz6IYguqK>l1(Iz0V!E&Wjb<9m@Lyw zT{9y^XIkPP-iE1ZZMtkZz^R#tL!FsAb>6TPaO<4sd3yKm`R%~_`Mkg1^L*bE14mgb zgDsZdEOh<9;xG6DXk7uXMXo=V@b5*mTB`Z)Avjdi)mqb4&i^li528=KAAM>LKUWGz zZf#$kd1Mwp|0?W@ni#%cVQvPr?2cU1nV$yLconm!2u9ry8~>bM$iAu26XT_526OoT=L{NSCT6M0_~AlJPk$GyYk_nHuTFF5pOg~9vO;f$ka zNNtYJ`_*9Wbt8Ohyl(+dzFpJXK1J~URdC^C)m{aY3=aTjq1g zkcADF`Fs)_+|>Togw+zCQ-Xvl9N`@M5K_hUSqNqwjk_GfTkGN8Jlt!{F92KOf&=Cm&C+&@2^`;^Y_ z9mo9|2bDKcPL@7PR6i0eNKj*Wxd~(^(Jw@lb~*hefGS%B<_0>T%0~P z?~_qU-2X(#-&uaNbW0-7ArZRU*KInaB=Y_e;Y#Dc4JB;}JeLF*S^V+dEAJ%md=lV< ze`NSh;5o&^oYC+8Gh$yn&nq4_Che`xE{*58#Y3St^L*3bc%GjRHe`9!XZw9TM<3Yt zuWQH%`FNf_s2l4H-#(tJ7s?yj=QXr@xzAqsOD$`uT;=6Cdm*nmZSdw?FVEY{=kW5} z-8H0ryFBA53!(L>!{C;isf9yK-8t8@D~=t z`G|pv1I?Ef%Q2jjXc*UZbo8xXqd70p@G#}YlIpq9oSSGU_&#{4H8pl$6N_5H%bdGf$4d*#997u|ebH=KWRyLU;goAc#{WgUs(+s!$1LA!agX>q%Y z^X3Am**2hPnTvDhfYL)X+BoMn2#MwE z?^oD3?>1uMpi~vMpi~vMpi~vMpi~vMpi*qK~_OlK~_OlK~_OlK~_OlK~_ap zMOH;tMOH;tMOH;tMOH;tMOGjykQK-ZWCgMUS%IuTRv;^o)sWSY)sWSY)sWSY)sWSY z)sWSY)sfYa)sfYa)sfYa)sfYa)sfYaHIOxsHIOxsHIOxsHIOxsHIOxsHIX%uHIX%u zHIX%uHIX%uHIX&VL6{9cEfOs Date: Tue, 3 Mar 2026 12:22:29 -0500 Subject: [PATCH 05/12] sampling: harden varopt deserialization validation --- sampling/varopt_items_serde_test.go | 67 +++++++++++++++++++++++++++ sampling/varopt_items_sketch_serde.go | 20 ++++++-- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/sampling/varopt_items_serde_test.go b/sampling/varopt_items_serde_test.go index b3ca707..715c2fc 100644 --- a/sampling/varopt_items_serde_test.go +++ b/sampling/varopt_items_serde_test.go @@ -19,12 +19,30 @@ package sampling import ( "encoding/binary" + "math" "testing" "github.com/apache/datasketches-go/internal" "github.com/stretchr/testify/assert" ) +type emptyCorruptingInt64SerDe struct{} + +func (emptyCorruptingInt64SerDe) SerializeToBytes(items []int64) ([]byte, error) { + if len(items) == 0 { + return []byte{0xCA, 0xFE, 0xBA, 0xBE}, nil + } + return Int64SerDe{}.SerializeToBytes(items) +} + +func (emptyCorruptingInt64SerDe) DeserializeFromBytes(data []byte, numItems int) ([]int64, error) { + return Int64SerDe{}.DeserializeFromBytes(data, numItems) +} + +func (emptyCorruptingInt64SerDe) SizeOfItem() int { + return Int64SerDe{}.SizeOfItem() +} + func TestVarOptItemsSketchSerde_EmptyRoundTrip(t *testing.T) { sketch, err := NewVarOptItemsSketch[int64](16) assert.NoError(t, err) @@ -38,6 +56,21 @@ func TestVarOptItemsSketchSerde_EmptyRoundTrip(t *testing.T) { assert.Equal(t, 16, restored.K()) } +func TestVarOptItemsSketchSerde_EmptySketchIgnoresCustomEmptyItemsBytes(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + + data, err := sketch.ToSlice(emptyCorruptingInt64SerDe{}) + assert.NoError(t, err) + assert.Equal(t, 8, len(data)) + + restored, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + assert.True(t, restored.IsEmpty()) + assert.Equal(t, 16, restored.K()) + assert.Equal(t, int64(0), restored.N()) +} + func TestVarOptItemsSketchSerde_WarmupRoundTrip(t *testing.T) { sketch, err := NewVarOptItemsSketch[int64](16) assert.NoError(t, err) @@ -129,6 +162,40 @@ func TestVarOptItemsSketchSerde_HeaderConsistency(t *testing.T) { assert.ErrorContains(t, err, "empty preLongs without empty flag") } +func TestVarOptItemsSketchSerde_FullPreLongsWithZeroRIsInvalid(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 10; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Equal(t, 0, sketch.R()) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + data[0] = (data[0] & 0xC0) | byte(varOptPreambleLongsFull) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "full preLongs with empty r region") +} + +func TestVarOptItemsSketchSerde_NaNTotalWeightRIsInvalid(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 80; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Greater(t, sketch.R(), 0) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + binary.LittleEndian.PutUint64(data[24:], math.Float64bits(math.NaN())) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "invalid totalWeightR") +} + func TestVarOptItemsUnionSerde_HeaderConsistency(t *testing.T) { // preLongs says empty, but empty flag is not set. data := make([]byte, 8) diff --git a/sampling/varopt_items_sketch_serde.go b/sampling/varopt_items_sketch_serde.go index 9af2378..f3c1dc4 100644 --- a/sampling/varopt_items_sketch_serde.go +++ b/sampling/varopt_items_sketch_serde.go @@ -76,9 +76,13 @@ func (s *VarOptItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { items = append(items, s.data[i]) } } - itemsBytes, err := serde.SerializeToBytes(items) - if err != nil { - return nil, err + + itemsBytes := []byte(nil) + if totalItems > 0 { + itemsBytes, err = serde.SerializeToBytes(items) + if err != nil { + return nil, err + } } preambleBytes := preLongs * 8 @@ -115,7 +119,9 @@ func (s *VarOptItemsSketch[T]) ToSlice(serde ItemsSerDe[T]) ([]byte, error) { packBoolsInto(out[markOffset:markOffset+markBytes], s.marks[:s.h]) } - copy(out[markOffset+markBytes:], itemsBytes) + if totalItems > 0 { + copy(out[markOffset+markBytes:], itemsBytes) + } return out, nil } @@ -179,6 +185,9 @@ func NewVarOptItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*Va if h < 0 || r < 0 { return nil, errors.New("invalid h/r in serialized varopt sketch") } + if preLongs == varOptPreambleLongsFull && r == 0 { + return nil, errors.New("invalid varopt sketch header: full preLongs with empty r region") + } if r > 0 && h+r != k { return nil, errors.New("invalid varopt sketch state: h + r must equal k in sampling mode") } @@ -189,6 +198,9 @@ func NewVarOptItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*Va totalWeightR := 0.0 if r > 0 { totalWeightR = math.Float64frombits(binary.LittleEndian.Uint64(data[24:])) + if math.IsNaN(totalWeightR) { + return nil, errors.New("invalid totalWeightR in serialized varopt sketch: NaN") + } } weightOffset := 24 From 2da780cef2e4612d152e9ddffa7e0f265424c188 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 12:30:28 -0500 Subject: [PATCH 06/12] sampling: align varopt serde validation and warmup capacity --- sampling/varopt_items_serde_test.go | 2 ++ sampling/varopt_items_sketch_serde.go | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sampling/varopt_items_serde_test.go b/sampling/varopt_items_serde_test.go index 715c2fc..ec78f2c 100644 --- a/sampling/varopt_items_serde_test.go +++ b/sampling/varopt_items_serde_test.go @@ -87,6 +87,8 @@ func TestVarOptItemsSketchSerde_WarmupRoundTrip(t *testing.T) { assert.Equal(t, sketch.N(), restored.N()) assert.Equal(t, sketch.H(), restored.H()) assert.Equal(t, sketch.R(), restored.R()) + assert.Greater(t, cap(restored.data), restored.H()) + assert.Equal(t, cap(restored.data), cap(restored.weights)) } func TestVarOptItemsSketchSerde_SamplingRoundTrip(t *testing.T) { diff --git a/sampling/varopt_items_sketch_serde.go b/sampling/varopt_items_sketch_serde.go index f3c1dc4..f8cba35 100644 --- a/sampling/varopt_items_sketch_serde.go +++ b/sampling/varopt_items_sketch_serde.go @@ -245,9 +245,24 @@ func NewVarOptItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*Va } if r == 0 { + ceilingLgK := math.Log2(float64(internal.CeilPowerOf2(k))) + initialLgSize := startingSubMultiple(int(ceilingLgK), int(rf), minLgArrItems) + warmupCap := adjustedSamplingAllocationSize(k, 1< Date: Tue, 3 Mar 2026 12:44:17 -0500 Subject: [PATCH 07/12] sampling: tighten varopt preamble and state validation --- sampling/varopt_items_serde_test.go | 89 ++++++++++++++++++++++++++- sampling/varopt_items_sketch_serde.go | 34 +++++++--- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/sampling/varopt_items_serde_test.go b/sampling/varopt_items_serde_test.go index ec78f2c..0f77661 100644 --- a/sampling/varopt_items_serde_test.go +++ b/sampling/varopt_items_serde_test.go @@ -164,7 +164,7 @@ func TestVarOptItemsSketchSerde_HeaderConsistency(t *testing.T) { assert.ErrorContains(t, err, "empty preLongs without empty flag") } -func TestVarOptItemsSketchSerde_FullPreLongsWithZeroRIsInvalid(t *testing.T) { +func TestVarOptItemsSketchSerde_WarmupDataWithFullPreLongsIsInvalid(t *testing.T) { sketch, err := NewVarOptItemsSketch[int64](16) assert.NoError(t, err) for i := int64(1); i <= 10; i++ { @@ -178,7 +178,92 @@ func TestVarOptItemsSketchSerde_FullPreLongsWithZeroRIsInvalid(t *testing.T) { data[0] = (data[0] & 0xC0) | byte(varOptPreambleLongsFull) _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) - assert.ErrorContains(t, err, "full preLongs with empty r region") + assert.ErrorContains(t, err, "n <= k but not in warmup mode") +} + +func TestVarOptItemsSketchSerde_WarmupModeRequiresNEqualsH(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 10; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + binary.LittleEndian.PutUint64(data[8:], uint64(9)) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "warmup mode but n != h") +} + +func TestVarOptItemsSketchSerde_WarmupModeRequiresRZero(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 10; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + binary.LittleEndian.PutUint32(data[20:], uint32(1)) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "warmup mode but r > 0") +} + +func TestVarOptItemsSketchSerde_FullModeRequiresHSumREqualsK(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 80; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Greater(t, sketch.R(), 0) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + h := binary.LittleEndian.Uint32(data[16:]) + binary.LittleEndian.PutUint32(data[16:], h-1) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "full mode but h + r != k") +} + +func TestVarOptItemsSketchSerde_NGreaterThanKRequiresFullMode(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 80; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Greater(t, sketch.N(), int64(sketch.K())) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + data[0] = (data[0] & 0xC0) | byte(varOptPreambleLongsWarmup) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "n > k but not in full mode") +} + +func TestVarOptItemsSketchSerde_FullModeRequiresRPositive(t *testing.T) { + sketch, err := NewVarOptItemsSketch[int64](16) + assert.NoError(t, err) + for i := int64(1); i <= 80; i++ { + assert.NoError(t, sketch.Update(i, float64(i))) + } + assert.Greater(t, sketch.R(), 0) + + data, err := sketch.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + + binary.LittleEndian.PutUint32(data[16:], uint32(sketch.K())) + binary.LittleEndian.PutUint32(data[20:], uint32(0)) + + _, err = NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.ErrorContains(t, err, "full mode but r == 0") } func TestVarOptItemsSketchSerde_NaNTotalWeightRIsInvalid(t *testing.T) { diff --git a/sampling/varopt_items_sketch_serde.go b/sampling/varopt_items_sketch_serde.go index f8cba35..6e1d222 100644 --- a/sampling/varopt_items_sketch_serde.go +++ b/sampling/varopt_items_sketch_serde.go @@ -185,21 +185,37 @@ func NewVarOptItemsSketchFromSlice[T any](data []byte, serde ItemsSerDe[T]) (*Va if h < 0 || r < 0 { return nil, errors.New("invalid h/r in serialized varopt sketch") } - if preLongs == varOptPreambleLongsFull && r == 0 { - return nil, errors.New("invalid varopt sketch header: full preLongs with empty r region") + if n < 0 { + return nil, errors.New("invalid n in serialized varopt sketch: negative") } - if r > 0 && h+r != k { - return nil, errors.New("invalid varopt sketch state: h + r must equal k in sampling mode") - } - if r == 0 && h > k { - return nil, errors.New("invalid varopt sketch state: h exceeds k in warmup mode") + + if n <= int64(k) { + if preLongs != varOptPreambleLongsWarmup { + return nil, errors.New("invalid varopt sketch state: n <= k but not in warmup mode") + } + if int64(h) != n { + return nil, errors.New("invalid varopt sketch state: warmup mode but n != h") + } + if r > 0 { + return nil, errors.New("invalid varopt sketch state: warmup mode but r > 0") + } + } else { + if preLongs != varOptPreambleLongsFull { + return nil, errors.New("invalid varopt sketch state: n > k but not in full mode") + } + if h+r != k { + return nil, errors.New("invalid varopt sketch state: full mode but h + r != k") + } + if r == 0 { + return nil, errors.New("invalid varopt sketch state: full mode but r == 0") + } } totalWeightR := 0.0 if r > 0 { totalWeightR = math.Float64frombits(binary.LittleEndian.Uint64(data[24:])) - if math.IsNaN(totalWeightR) { - return nil, errors.New("invalid totalWeightR in serialized varopt sketch: NaN") + if math.IsNaN(totalWeightR) || math.IsInf(totalWeightR, 0) || totalWeightR <= 0 { + return nil, errors.New("invalid totalWeightR in serialized varopt sketch: non-positive or non-finite") } } From 257bcac9d548a8335988f91e90d0b92c01fac1bc Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 12:48:58 -0500 Subject: [PATCH 08/12] sampling: remove duplicate maxK validation in varopt union --- sampling/varopt_items_union.go | 4 ---- sampling/varopt_items_union_test.go | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sampling/varopt_items_union.go b/sampling/varopt_items_union.go index 236e05c..56217cd 100644 --- a/sampling/varopt_items_union.go +++ b/sampling/varopt_items_union.go @@ -42,10 +42,6 @@ type VarOptItemsUnion[T any] struct { } func NewVarOptItemsUnion[T any](maxK int) (*VarOptItemsUnion[T], error) { - if maxK < 1 || maxK > varOptMaxK { - return nil, errors.New("maxK must be at least 1 and less than 2^31 - 1") - } - gadget, err := newVarOptItemsSketchAsGadget[T](maxK) if err != nil { return nil, err diff --git a/sampling/varopt_items_union_test.go b/sampling/varopt_items_union_test.go index f066daf..7d8c164 100644 --- a/sampling/varopt_items_union_test.go +++ b/sampling/varopt_items_union_test.go @@ -25,7 +25,7 @@ import ( func TestNewVarOptItemsUnion(t *testing.T) { _, err := NewVarOptItemsUnion[int](0) - assert.ErrorContains(t, err, "maxK must be at least 1") + assert.ErrorContains(t, err, "k must be at least 1") union, err := NewVarOptItemsUnion[int](16) assert.NoError(t, err) From 9e1045deabc09c34f8c7fc01a038204b5e88427b Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 12:55:59 -0500 Subject: [PATCH 09/12] sampling: reuse gadget reset in varopt union --- sampling/varopt_items_union.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sampling/varopt_items_union.go b/sampling/varopt_items_union.go index 56217cd..eeecdeb 100644 --- a/sampling/varopt_items_union.go +++ b/sampling/varopt_items_union.go @@ -58,11 +58,16 @@ func (u *VarOptItemsUnion[T]) MaxK() int { } func (u *VarOptItemsUnion[T]) Reset() error { - gadget, err := newVarOptItemsSketchAsGadget[T](u.maxK) - if err != nil { - return err + if u.gadget == nil { + gadget, err := newVarOptItemsSketchAsGadget[T](u.maxK) + if err != nil { + return err + } + u.gadget = gadget + } else { + u.gadget.Reset() } - u.gadget = gadget + u.n = 0 u.outerTauNumer = 0 u.outerTauDenom = 0 From 6accda0babc291b04ef0ad2d8ca0d4d3a3d8d79d Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 13:00:17 -0500 Subject: [PATCH 10/12] sampling: drop varopt union maxK accessor --- sampling/varopt_items_serde_test.go | 4 ++-- sampling/varopt_items_union.go | 4 ---- sampling/varopt_items_union_test.go | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/sampling/varopt_items_serde_test.go b/sampling/varopt_items_serde_test.go index 0f77661..12ce6cd 100644 --- a/sampling/varopt_items_serde_test.go +++ b/sampling/varopt_items_serde_test.go @@ -120,7 +120,7 @@ func TestVarOptItemsUnionSerde_EmptyRoundTrip(t *testing.T) { restored, err := NewVarOptItemsUnionFromSlice[int64](data, Int64SerDe{}) assert.NoError(t, err) - assert.Equal(t, union.MaxK(), restored.MaxK()) + assert.Equal(t, union.maxK, restored.maxK) result, err := restored.Result() assert.NoError(t, err) @@ -143,7 +143,7 @@ func TestVarOptItemsUnionSerde_NonEmptyRoundTrip(t *testing.T) { restored, err := NewVarOptItemsUnionFromSlice[int64](data, Int64SerDe{}) assert.NoError(t, err) - assert.Equal(t, union.MaxK(), restored.MaxK()) + assert.Equal(t, union.maxK, restored.maxK) result, err := restored.Result() assert.NoError(t, err) diff --git a/sampling/varopt_items_union.go b/sampling/varopt_items_union.go index eeecdeb..71c1a37 100644 --- a/sampling/varopt_items_union.go +++ b/sampling/varopt_items_union.go @@ -53,10 +53,6 @@ func NewVarOptItemsUnion[T any](maxK int) (*VarOptItemsUnion[T], error) { }, nil } -func (u *VarOptItemsUnion[T]) MaxK() int { - return u.maxK -} - func (u *VarOptItemsUnion[T]) Reset() error { if u.gadget == nil { gadget, err := newVarOptItemsSketchAsGadget[T](u.maxK) diff --git a/sampling/varopt_items_union_test.go b/sampling/varopt_items_union_test.go index 7d8c164..ddfc898 100644 --- a/sampling/varopt_items_union_test.go +++ b/sampling/varopt_items_union_test.go @@ -29,7 +29,7 @@ func TestNewVarOptItemsUnion(t *testing.T) { union, err := NewVarOptItemsUnion[int](16) assert.NoError(t, err) - assert.Equal(t, 16, union.MaxK()) + assert.Equal(t, 16, union.maxK) } func TestVarOptItemsUnion_ResultEmpty(t *testing.T) { From cb9f7cf80c01c4ba9e6fa6940856b7632b6069eb Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 13:12:42 -0500 Subject: [PATCH 11/12] sampling: add varopt union update scenario tests --- sampling/varopt_items_union_test.go | 111 ++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/sampling/varopt_items_union_test.go b/sampling/varopt_items_union_test.go index ddfc898..196da6c 100644 --- a/sampling/varopt_items_union_test.go +++ b/sampling/varopt_items_union_test.go @@ -63,6 +63,117 @@ func TestVarOptItemsUnion_UpdateSketchExactMode(t *testing.T) { assert.Equal(t, 0, result.R()) } +func TestVarOptItemsUnion_UpdateSketchSamplingWithExtremeHeavyItem(t *testing.T) { + const k = 16 + + sketch1, err := NewVarOptItemsSketch[int](uint(k)) + assert.NoError(t, err) + for i := 0; i < 500; i++ { + assert.NoError(t, sketch1.Update(i, 1.0)) + } + assert.NoError(t, sketch1.Update(-1, 1e12)) + assert.Greater(t, sketch1.R(), 0) + + sketch2, err := NewVarOptItemsSketch[int](uint(k)) + assert.NoError(t, err) + for i := 1000; i < 1500; i++ { + assert.NoError(t, sketch2.Update(i, 1.0)) + } + assert.Greater(t, sketch2.R(), 0) + + union, err := NewVarOptItemsUnion[int](k) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(sketch1)) + assert.NoError(t, union.UpdateSketch(sketch2)) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, int64(1001), result.N()) + assert.Equal(t, k, result.K()) + assert.LessOrEqual(t, result.NumSamples(), k) + + foundHeavy := false + for sample := range result.All() { + if sample.Item == -1 { + foundHeavy = true + break + } + } + assert.True(t, foundHeavy, "extreme heavy item should be retained in union result") +} + +func TestVarOptItemsUnion_UpdateSketchIdenticalSamplingSketches(t *testing.T) { + const k = 16 + const n = 1000 + + base, err := NewVarOptItemsSketch[int64](uint(k)) + assert.NoError(t, err) + for i := 0; i < n; i++ { + assert.NoError(t, base.Update(int64(i), 1.0)) + } + assert.Greater(t, base.R(), 0) + + data, err := base.ToSlice(Int64SerDe{}) + assert.NoError(t, err) + clone, err := NewVarOptItemsSketchFromSlice[int64](data, Int64SerDe{}) + assert.NoError(t, err) + + union, err := NewVarOptItemsUnion[int64](k) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(base)) + assert.NoError(t, union.UpdateSketch(clone)) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, int64(2*n), result.N()) + assert.Equal(t, k, result.K()) + assert.LessOrEqual(t, result.NumSamples(), k) + + ss, err := result.EstimateSubsetSum(func(_ int64) bool { return true }) + assert.NoError(t, err) + assert.InDelta(t, float64(2*n), ss.TotalSketchWeight, 1e-9) +} + +func TestVarOptItemsUnion_UpdateSketchDifferentKWeightedItems(t *testing.T) { + smallK := 8 + largeK := 32 + + small, err := NewVarOptItemsSketch[int](uint(smallK)) + assert.NoError(t, err) + totalWeight := 0.0 + for i := 1; i <= 200; i++ { + w := float64(i) + totalWeight += w + assert.NoError(t, small.Update(i, w)) + } + assert.Greater(t, small.R(), 0) + + large, err := NewVarOptItemsSketch[int](uint(largeK)) + assert.NoError(t, err) + for i := 1; i <= 400; i++ { + w := float64(i) * 0.5 + totalWeight += w + assert.NoError(t, large.Update(10000+i, w)) + } + assert.Greater(t, large.R(), 0) + + union, err := NewVarOptItemsUnion[int](largeK) + assert.NoError(t, err) + assert.NoError(t, union.UpdateSketch(small)) + assert.NoError(t, union.UpdateSketch(large)) + + result, err := union.Result() + assert.NoError(t, err) + assert.Equal(t, int64(600), result.N()) + assert.GreaterOrEqual(t, result.K(), 1) + assert.LessOrEqual(t, result.K(), largeK) + assert.LessOrEqual(t, result.NumSamples(), largeK) + + ss, err := result.EstimateSubsetSum(func(_ int) bool { return true }) + assert.NoError(t, err) + assert.InDelta(t, totalWeight, ss.TotalSketchWeight, 1e-9) +} + func TestVarOptItemsUnion_UpdateSketchNilNoop(t *testing.T) { union, err := NewVarOptItemsUnion[int](8) assert.NoError(t, err) From 8eff9c726ef9336cbcaf8b2035d610ef6ec2ee12 Mon Sep 17 00:00:00 2001 From: Fengzdadi Date: Tue, 3 Mar 2026 13:44:23 -0500 Subject: [PATCH 12/12] fix(sampling): align varopt union pseudo-exact weight consistency check --- sampling/varopt_items_union.go | 3 +++ sampling/varopt_items_union_test.go | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/sampling/varopt_items_union.go b/sampling/varopt_items_union.go index 71c1a37..5b175c1 100644 --- a/sampling/varopt_items_union.go +++ b/sampling/varopt_items_union.go @@ -231,6 +231,9 @@ func (u *VarOptItemsUnion[T]) markMovingGadgetCoercer() (*VarOptItemsSketch[T], if resultH+resultR != resultK { return nil, errors.New("invalid state resolving pseudo-exact union gadget") } + if math.Abs(transferredWeight-u.outerTauNumer) > 1e-10 { + return nil, errors.New("unexpected mismatch in transferred weight") + } // Gap slot. weights[resultH] = -1.0 diff --git a/sampling/varopt_items_union_test.go b/sampling/varopt_items_union_test.go index 196da6c..190be8c 100644 --- a/sampling/varopt_items_union_test.go +++ b/sampling/varopt_items_union_test.go @@ -214,6 +214,7 @@ func TestVarOptItemsUnion_ResultPseudoExactMarkedResolution(t *testing.T) { } union.n = 4 union.outerTauDenom = int64(union.gadget.numMarksInH) + union.outerTauNumer = 10.0 out, err := union.Result() assert.NoError(t, err) @@ -226,6 +227,25 @@ func TestVarOptItemsUnion_ResultPseudoExactMarkedResolution(t *testing.T) { assert.Equal(t, uint32(0), out.numMarksInH) } +func TestVarOptItemsUnion_ResultPseudoExactTransferredWeightMismatch(t *testing.T) { + union, err := NewVarOptItemsUnion[int](8) + assert.NoError(t, err) + + // Construct a pseudo-exact gadget where transferred marked-H weight is known. + assert.NoError(t, union.gadget.update(1, 1.0, true)) + assert.NoError(t, union.gadget.update(2, 2.0, true)) + assert.NoError(t, union.gadget.update(3, 3.0, true)) + union.n = 3 + union.outerTauDenom = int64(union.gadget.numMarksInH) + + // Intentionally break bookkeeping to ensure we fail fast like Java/C++ checks. + union.outerTauNumer = 123.456 + + out, err := union.Result() + assert.Nil(t, out) + assert.ErrorContains(t, err, "unexpected mismatch in transferred weight") +} + func TestVarOptItemsUnion_ResultMigrateMarkedItemsByDecreasingK(t *testing.T) { union, err := NewVarOptItemsUnion[int](8) assert.NoError(t, err)