@@ -110,16 +110,34 @@ func (s SubsettableHistogram) subsetTo(newScale int) SubsettableHistogram {
110110 tallyBuckets : slices .Clone (s .tallyBuckets ),
111111 scale : s .scale ,
112112 }
113+
113114 // compress every other bucket per -1 scale
114115 for dup .scale > newScale {
115- if (len (dup .tallyBuckets )- 1 )% 2 != 0 {
116- panic (fmt .Sprintf ("cannot subset from scale %v to %v, %v-buckets is not divisible by 2" , dup .scale , dup .scale - 1 , len (dup .tallyBuckets )- 1 ))
116+ if (len (dup .tallyBuckets )- 2 )% 2 != 0 { // -2 for 0 and last-2^N
117+ panic (fmt .Sprintf (
118+ "cannot subset from scale %v to %v, %v-buckets is not divisible by 2" ,
119+ dup .scale , dup .scale - 1 , len (dup .tallyBuckets )- 2 ))
120+ }
121+ if len (dup .tallyBuckets ) <= 3 {
122+ // at 3 buckets, there's just 0, start, end.
123+ //
124+ // this is well past the point of being useful,
125+ // and it means we might try to slice `[1:0]` or similar.
126+ panic (fmt .Sprintf (
127+ "not enough buckets to subset from scale %d to %d, only have %d: %v" ,
128+ dup .scale , dup .scale - 1 , len (dup .tallyBuckets ), dup .tallyBuckets ))
117129 }
118- half := make (tally.DurationBuckets , 0 , ((len (dup .tallyBuckets )- 1 )/ 2 )+ 1 )
130+
131+ // the first and last buckets will be kept, the rest will lose half per scale
132+ bucketsToCompress := dup .tallyBuckets [1 : len (dup .tallyBuckets )- 2 ] // trim
133+
134+ half := make (tally.DurationBuckets , 0 , (len (bucketsToCompress )/ 2 )+ 2 )
119135 half = append (half , dup .tallyBuckets [0 ]) // keep the zero value intact
120- for i := 1 ; i < len (dup . tallyBuckets ); i += 2 {
121- half = append (half , dup . tallyBuckets [i ]) // compress the first, third, etc
136+ for i := 0 ; i < len (bucketsToCompress ); i += 2 {
137+ half = append (half , bucketsToCompress [i ]) // keep the first, third, etc
122138 }
139+ half = append (half , dup .tallyBuckets [len (dup .tallyBuckets )- 1 ]) // keep the last 2^N intact
140+
123141 dup .tallyBuckets = half
124142 dup .scale --
125143 }
@@ -163,6 +181,14 @@ func (i IntSubsettableHistogram) tags() map[string]string {
163181// Choose scale/start/end values that match your *intent*, i.e. the range you want to capture, and then choose
164182// a length that EXCEEDS that by at least 2x, ideally 4x-10x, and ends at a highly-divisible-by-2 value.
165183//
184+ // Length should ideally be a highly-divisible-by-2 value, to keep subsetting "clean" as long as possible,
185+ // i.e. the ranges of values in each bucket stays the same rather than having the second-to-last one cover
186+ // a smaller range.
187+ //
188+ // This length will be padded internally to add both a zero value and a next-power-of-2 value, to help keep
189+ // negative values identifiable, and keep the upper limit unchanged across all scales (as it cannot cleanly
190+ // subset, because it holds an infinite range of values).
191+ //
166192// The exact length / exceeding / etc does not matter (just write a test so it's documented and can be reviewed),
167193// it just serves to document your intent and to make sure that we have some head-room so we can understand how
168194// wrongly we guessed at our needs if it's exceeded during an incident of some kind. We've failed at this
@@ -190,30 +216,33 @@ func makeSubsettableHistogram(scale int, start, end time.Duration, length int) S
190216 panic (fmt .Sprintf ("length must be between 12 < %d <=160" , length ))
191217 }
192218 // make sure the number of buckets completes a "full" row,
193- // i.e. the next bucket would be a power of 2 of the start value.
219+ // to which we will add one more to reach the next start*2^N value.
194220 //
195221 // for a more visual example of what this means, see the logged strings in tests:
196- // each "row" of values must be the same width.
222+ // each "row" of values must be the same width, and it must have a single value in the last row .
197223 //
198224 // adding a couple buckets to ensure this is met costs very little, and ensures
199225 // subsetting combines values more consistently (not crossing rows) for longer.
200226 powerOfTwoWidth := int (math .Pow (2 , float64 (scale ))) // num of buckets needed to double a value
201227 missing := length % powerOfTwoWidth
202228 if missing != 0 {
203- panic (fmt .Sprintf (`number of buckets must "fill" a power of 2 to end at a consistent row width . ` +
229+ panic (fmt .Sprintf (`number of buckets must end at a power of 2 of the starting value . ` +
204230 `got %d, probably raise to %d` ,
205- length , length + missing ))
231+ length , length + missing ,
232+ ))
206233 }
207234
208235 var buckets tally.DurationBuckets
209236 for i := 0 ; i < length ; i ++ {
210237 buckets = append (buckets , nextBucket (start , len (buckets ), scale ))
211238 }
239+ // the loop above has "filled" a full row, we need one more to reach the next power of 2.
240+ buckets = append (buckets , nextBucket (start , len (buckets ), scale ))
212241
213- if last (buckets ) <= end * 2 {
242+ if last (buckets ) < end * 2 {
214243 panic (fmt .Sprintf ("not enough buckets (%d) to exceed the end target (%v) by at least 2x. " +
215244 "you are STRONGLY encouraged to include ~2x-10x more range than your intended end value, " +
216- "because we almost always underestimate the ranges needed during incidents" ,
245+ "preferably 4x+, because we almost always underestimate the ranges needed during incidents" ,
217246 length , last (buckets )),
218247 )
219248 }
@@ -251,7 +280,7 @@ func (s SubsettableHistogram) buckets() tally.DurationBuckets { return s.tallyBu
251280func (s SubsettableHistogram ) print (to func (string , ... any )) {
252281 to ("%v\n " , s .tallyBuckets [0 :1 ]) // zero value on its own row
253282 for rowStart := 1 ; rowStart < s .len (); rowStart += s .width () {
254- to ("%v\n " , s .tallyBuckets [rowStart :rowStart + s .width ()])
283+ to ("%v\n " , s .tallyBuckets [rowStart :min ( rowStart + s .width (), len ( s . tallyBuckets ) )])
255284 }
256285}
257286
@@ -268,7 +297,7 @@ func (i IntSubsettableHistogram) print(to func(string, ...any)) {
268297 to ("[%d]\n " , int (i .tallyBuckets [0 ])) // zero value on its own row
269298 for rowStart := 1 ; rowStart < i .len (); rowStart += i .width () {
270299 ints := make ([]int , 0 , i .width ())
271- for _ , d := range i .tallyBuckets [rowStart : rowStart + i .width ()] {
300+ for _ , d := range i .tallyBuckets [rowStart : min ( rowStart + i .width (), len ( i . tallyBuckets ) )] {
272301 ints = append (ints , int (d ))
273302 }
274303 to ("%v\n " , ints )
0 commit comments