@@ -105,6 +105,10 @@ func NewFrequencyItemsSketchWithMaxMapSize[C comparable](maxMapSize int, hasher
105105// sketch and must be a power of 2. The maximum capacity of this internal hash map is
106106// 0.75 times * maxMapSize. Both the ultimate accuracy and size of this sketch are a
107107// function of maxMapSize.
108+ //
109+ // If the sketch contains string values and the caller cares about
110+ // cross-language compatibility, it is the caller's responsibility to ensure
111+ // that the serialized string data is encoded as valid UTF-8.
108112func NewFrequencyItemsSketchFromSlice [C comparable ](slc []byte , hasher common.ItemSketchHasher [C ], serde common.ItemSketchSerde [C ]) (* ItemsSketch [C ], error ) {
109113 if serde == nil {
110114 return nil , errors .New ("no SerDe provided" )
@@ -327,6 +331,10 @@ func (i *ItemsSketch[C]) IsEmpty() bool {
327331// Update this sketch with an item and a frequency count of one.
328332//
329333// item for which the frequency should be increased.
334+ //
335+ // If the sketch contains string values and the caller cares about
336+ // cross-language compatibility, it is the caller's responsibility to ensure
337+ // that the input string is encoded as valid UTF-8.
330338func (i * ItemsSketch [C ]) Update (item C ) error {
331339 return i .UpdateMany (item , 1 )
332340}
@@ -337,6 +345,10 @@ func (i *ItemsSketch[C]) Update(item C) error {
337345// and is only used by the sketch to determine uniqueness.
338346// count the amount by which the frequency of the item should be increased.
339347// A count of zero is a no-op, and a negative count will throw an exception.
348+ //
349+ // If the sketch contains string values and the caller cares about
350+ // cross-language compatibility, it is the caller's responsibility to ensure
351+ // that input strings are encoded as valid UTF-8.
340352func (i * ItemsSketch [C ]) UpdateMany (item C , count int64 ) error {
341353 if internal .IsNil (item ) || count == 0 {
342354 return nil
@@ -374,6 +386,10 @@ func (i *ItemsSketch[C]) UpdateMany(item C, count int64) error {
374386//
375387// return a sketch whose estimates are within the guarantees of the largest error tolerance
376388// of the two merged sketches.
389+ //
390+ // If the sketch contains string values and the caller cares about
391+ // cross-language compatibility, it is the caller's responsibility to ensure
392+ // that string values in both sketches are encoded as valid UTF-8.
377393func (i * ItemsSketch [C ]) Merge (other * ItemsSketch [C ]) (* ItemsSketch [C ], error ) {
378394 if other == nil || other .IsEmpty () {
379395 return i , nil
@@ -412,7 +428,11 @@ func (i *ItemsSketch[C]) ToString() (string, error) {
412428 return sb .String (), nil
413429}
414430
415- // ToSlice returns a slice representation of this sketch
431+ // ToSlice returns a slice representation of this sketch.
432+ //
433+ // If the sketch contains string values and the caller cares about
434+ // cross-language compatibility, it is the caller's responsibility to ensure
435+ // that the serialized string data is encoded as valid UTF-8.
416436func (i * ItemsSketch [C ]) ToSlice () ([]byte , error ) {
417437 if i .hashMap .serde == nil {
418438 return nil , errors .New ("no SerDe provided" )
0 commit comments