Skip to content

Commit b6a455b

Browse files
Merge pull request #78 from tsenart/master
kll: reduce allocations in ItemsSketch
2 parents 055aada + 15bcc91 commit b6a455b

1 file changed

Lines changed: 45 additions & 41 deletions

File tree

kll/items_sketch.go

Lines changed: 45 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ type ItemsSketch[C comparable] struct {
4848
n uint64
4949
levels []uint32
5050
items []C
51-
minItem *C
52-
maxItem *C
51+
minItem C
52+
maxItem C
53+
hasMinMax bool
5354
sortedView *ItemsSketchSortedView[C]
5455
serde common.ItemSketchSerde[C]
5556
compareFn common.CompareFn[C]
@@ -128,40 +129,42 @@ func NewKllItemsSketchFromSlice[C comparable](sl []byte, compareFn common.Compar
128129
n = memVal.n
129130
minK = memVal.minK
130131
isLevelZeroSorted = memVal.level0SortedFlag
131-
minItem *C
132-
maxItem *C
132+
minItem C
133+
maxItem C
134+
hasMinMax bool
133135
items = make([]C, levelsArr[memVal.numLevels])
134136
)
135137

136138
switch memVal.sketchStructure {
137139
case _COMPACT_EMPTY:
138-
minItem = nil
139-
maxItem = nil
140+
hasMinMax = false
140141
items = make([]C, k)
141142
case _COMPACT_SINGLE:
142143
offset := _N_LONG_ADR
143144
deserItems, err := serde.DeserializeManyFromSlice(sl, offset, 1)
144145
if err != nil {
145146
return nil, err
146147
}
147-
minItem = &deserItems[0]
148-
maxItem = &deserItems[0]
148+
minItem = deserItems[0]
149+
maxItem = deserItems[0]
150+
hasMinMax = true
149151
items = make([]C, k)
150152
items[k-1] = deserItems[0]
151153
case _COMPACT_FULL:
152154
offset := int(_DATA_START_ADR + memVal.numLevels*4)
153155
deserMinItems, err := serde.DeserializeManyFromSlice(sl, offset, 1)
154-
minItem = &deserMinItems[0]
155156
if err != nil {
156157
return nil, err
157158
}
158-
offset += serde.SizeOf(*minItem)
159+
minItem = deserMinItems[0]
160+
offset += serde.SizeOf(minItem)
159161
deserMaxItems, err := serde.DeserializeManyFromSlice(sl, offset, 1)
160-
maxItem = &deserMaxItems[0]
161162
if err != nil {
162163
return nil, err
163164
}
164-
offset += serde.SizeOf(*maxItem)
165+
maxItem = deserMaxItems[0]
166+
hasMinMax = true
167+
offset += serde.SizeOf(maxItem)
165168
numRetained := levelsArr[memVal.numLevels] - levelsArr[0]
166169
deseRetItems, err := serde.DeserializeManyFromSlice(sl, offset, int(numRetained))
167170
if err != nil {
@@ -183,6 +186,7 @@ func NewKllItemsSketchFromSlice[C comparable](sl []byte, compareFn common.Compar
183186
items: items,
184187
minItem: minItem,
185188
maxItem: maxItem,
189+
hasMinMax: hasMinMax,
186190
serde: serde,
187191
compareFn: compareFn,
188192
}, nil
@@ -213,15 +217,15 @@ func (s *ItemsSketch[C]) GetMinItem() (C, error) {
213217
if s.IsEmpty() {
214218
return *new(C), fmt.Errorf("operation is undefined for an empty sketch")
215219
}
216-
return *s.minItem, nil
220+
return s.minItem, nil
217221
}
218222

219223
// GetMaxItem returns the maximum item of the stream. This may be distinct from the largest item retained by the sketch algorithm.
220224
func (s *ItemsSketch[C]) GetMaxItem() (C, error) {
221225
if s.IsEmpty() {
222226
return *new(C), fmt.Errorf("operation is undefined for an empty sketch")
223227
}
224-
return *s.maxItem, nil
228+
return s.maxItem, nil
225229
}
226230

227231
// IsEstimationMode returns true if the sketch is in estimation mode, otherwise false.
@@ -463,8 +467,10 @@ func (s *ItemsSketch[C]) Reset() {
463467
s.isLevelZeroSorted = false
464468
s.numLevels = 1
465469
s.levels = []uint32{uint32(s.k), uint32(s.k)}
466-
s.minItem = nil
467-
s.maxItem = nil
470+
s.hasMinMax = false
471+
var zero C
472+
s.minItem = zero
473+
s.maxItem = zero
468474
s.items = make([]C, s.k)
469475
s.sortedView = nil
470476
}
@@ -616,12 +622,12 @@ func (s *ItemsSketch[C]) getLevelsArrSizeBytes(structure sketchStructure) int {
616622
}
617623

618624
func (s *ItemsSketch[C]) getMinMaxSizeBytes() int {
619-
return s.serde.SizeOf(*s.minItem) + s.serde.SizeOf(*s.maxItem)
625+
return s.serde.SizeOf(s.minItem) + s.serde.SizeOf(s.maxItem)
620626
}
621627

622628
func (s *ItemsSketch[C]) getMinMaxByteArr() []byte {
623-
minBytes := s.serde.SerializeOneToSlice(*s.minItem)
624-
maxBytes := s.serde.SerializeOneToSlice(*s.maxItem)
629+
minBytes := s.serde.SerializeOneToSlice(s.minItem)
630+
maxBytes := s.serde.SerializeOneToSlice(s.maxItem)
625631
minMaxBytes := make([]byte, len(minBytes)+len(maxBytes))
626632
copy(minMaxBytes, minBytes)
627633
copy(minMaxBytes[len(minBytes):], maxBytes)
@@ -682,15 +688,16 @@ func (s *ItemsSketch[C]) updateItem(item C, compareFn common.CompareFn[C]) {
682688
if internal.IsNil(item) {
683689
return
684690
}
685-
if s.IsEmpty() {
686-
s.minItem = &item
687-
s.maxItem = &item
691+
if !s.hasMinMax {
692+
s.minItem = item
693+
s.maxItem = item
694+
s.hasMinMax = true
688695
} else {
689-
if compareFn(item, *s.minItem) {
690-
s.minItem = &item
696+
if compareFn(item, s.minItem) {
697+
s.minItem = item
691698
}
692-
if compareFn(*s.maxItem, item) {
693-
s.maxItem = &item
699+
if compareFn(s.maxItem, item) {
700+
s.maxItem = item
694701
}
695702
}
696703
level0space := s.levels[0]
@@ -818,18 +825,20 @@ func (s *ItemsSketch[C]) mergeItemsSketch(other *ItemsSketch[C]) {
818825
if myEmpty {
819826
s.minItem = other.minItem
820827
s.maxItem = other.maxItem
828+
s.hasMinMax = other.hasMinMax
821829
} else {
822-
if s.compareFn(myMin, *other.minItem) {
823-
s.minItem = &myMin
830+
if s.compareFn(myMin, other.minItem) {
831+
s.minItem = myMin
824832
} else {
825833
s.minItem = other.minItem
826834
}
827835

828-
if s.compareFn(*other.maxItem, myMax) {
829-
s.maxItem = &myMax
836+
if s.compareFn(other.maxItem, myMax) {
837+
s.maxItem = myMax
830838
} else {
831839
s.maxItem = other.maxItem
832840
}
841+
s.hasMinMax = true
833842
}
834843
}
835844

@@ -859,8 +868,8 @@ func (s *ItemsSketch[C]) compressWhileUpdatingSketch() {
859868
}
860869
halfAdjPop := adjPop / 2
861870

862-
//the following is specific to generic Items
863-
myItemsArr := s.GetTotalItemsArray()
871+
// Work directly on s.items to avoid allocation from GetTotalItemsArray.
872+
myItemsArr := s.items
864873
if level == 0 { // level zero might not be sorted, so we must sort it if we wish to compact it
865874
tmpSlice := myItemsArr[adjBeg : adjBeg+adjPop]
866875
slices.SortFunc(tmpSlice, func(a, b C) int {
@@ -904,17 +913,16 @@ func (s *ItemsSketch[C]) compressWhileUpdatingSketch() {
904913
newIndex = myLevelsArr[lvl] + halfAdjPop //adjust boundary
905914
s.levels[lvl] = newIndex
906915
}
907-
s.items = myItemsArr
908916
}
909917

910918
func (s *ItemsSketch[C]) addEmptyTopLevelToCompletelyFullSketch() {
911-
myCurLevelsArr := s.getLevelsArray()
919+
// Use s.levels directly to avoid copy from getLevelsArray.
920+
myCurLevelsArr := s.levels
912921
myCurNumLevels := s.numLevels
913922
myCurTotalItemsCapacity := myCurLevelsArr[myCurNumLevels]
914923

915-
myCurItemsArr := s.GetTotalItemsArray()
916-
minItem := s.minItem
917-
maxItem := s.maxItem
924+
// Use s.items directly to avoid copy from GetTotalItemsArray.
925+
myCurItemsArr := s.items
918926

919927
deltaItemsCap := levelCapacity(s.k, myCurNumLevels+1, 0, s.m)
920928
myNewTotalItemsCapacity := myCurTotalItemsCapacity + deltaItemsCap
@@ -928,7 +936,6 @@ func (s *ItemsSketch[C]) addEmptyTopLevelToCompletelyFullSketch() {
928936
myNewNumLevels uint8
929937
)
930938

931-
//myNewLevelsArr := make([]uint32, myCurNumLevels+2)
932939
// GROW LEVELS ARRAY
933940
if growLevelsArr {
934941
//grow levels arr by one and copy the old data to the new array, extra space at the top.
@@ -956,9 +963,6 @@ func (s *ItemsSketch[C]) addEmptyTopLevelToCompletelyFullSketch() {
956963
// update our sketch with new expanded spaces
957964
s.numLevels = myNewNumLevels
958965
s.levels = myNewLevelsArr
959-
960-
s.minItem = minItem
961-
s.maxItem = maxItem
962966
s.items = myNewItemsArr
963967
}
964968

0 commit comments

Comments
 (0)