Skip to content

Commit a484e23

Browse files
authored
Minor histogram-bucket tweak: end on a power-of-2, not before (#7251)
While writing some query-related code and experimenting, I realized this all works better if I change the strategy slightly to: - always start with 0, so negatives are visible - always end at a power-of-2 - everything in the middle gets subset, but not the ends This way: - start/end are the same across all scales, which makes series trivial to correlate (all histograms that can be merged have the same histogram_{start,end} values, and all the same bucketid/bucket per histogram_scale) - end is on a nice "round" value - "end-inf" is never combined into any other bucket, which is good because it covers a different time range than any other bucket Subsetting logic is nearly the same, just slice to [1:-1], subset those values, and then add the first and last verbatim. --------- Signed-off-by: Steven L <[email protected]>
1 parent 91242c1 commit a484e23

File tree

2 files changed

+47
-13
lines changed

2 files changed

+47
-13
lines changed

common/metrics/histograms.go

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,16 +110,34 @@ func (s SubsettableHistogram) subsetTo(newScale int) SubsettableHistogram {
110110
tallyBuckets: slices.Clone(s.tallyBuckets),
111111
scale: s.scale,
112112
}
113+
113114
// compress every other bucket per -1 scale
114115
for dup.scale > newScale {
115-
if (len(dup.tallyBuckets)-1)%2 != 0 {
116-
panic(fmt.Sprintf("cannot subset from scale %v to %v, %v-buckets is not divisible by 2", dup.scale, dup.scale-1, len(dup.tallyBuckets)-1))
116+
if (len(dup.tallyBuckets)-2)%2 != 0 { // -2 for 0 and last-2^N
117+
panic(fmt.Sprintf(
118+
"cannot subset from scale %v to %v, %v-buckets is not divisible by 2",
119+
dup.scale, dup.scale-1, len(dup.tallyBuckets)-2))
120+
}
121+
if len(dup.tallyBuckets) <= 3 {
122+
// at 3 buckets, there's just 0, start, end.
123+
//
124+
// this is well past the point of being useful,
125+
// and it means we might try to slice `[1:0]` or similar.
126+
panic(fmt.Sprintf(
127+
"not enough buckets to subset from scale %d to %d, only have %d: %v",
128+
dup.scale, dup.scale-1, len(dup.tallyBuckets), dup.tallyBuckets))
117129
}
118-
half := make(tally.DurationBuckets, 0, ((len(dup.tallyBuckets)-1)/2)+1)
130+
131+
// the first and last buckets will be kept, the rest will lose half per scale
132+
bucketsToCompress := dup.tallyBuckets[1 : len(dup.tallyBuckets)-2] // trim
133+
134+
half := make(tally.DurationBuckets, 0, (len(bucketsToCompress)/2)+2)
119135
half = append(half, dup.tallyBuckets[0]) // keep the zero value intact
120-
for i := 1; i < len(dup.tallyBuckets); i += 2 {
121-
half = append(half, dup.tallyBuckets[i]) // compress the first, third, etc
136+
for i := 0; i < len(bucketsToCompress); i += 2 {
137+
half = append(half, bucketsToCompress[i]) // keep the first, third, etc
122138
}
139+
half = append(half, dup.tallyBuckets[len(dup.tallyBuckets)-1]) // keep the last 2^N intact
140+
123141
dup.tallyBuckets = half
124142
dup.scale--
125143
}
@@ -163,6 +181,14 @@ func (i IntSubsettableHistogram) tags() map[string]string {
163181
// Choose scale/start/end values that match your *intent*, i.e. the range you want to capture, and then choose
164182
// a length that EXCEEDS that by at least 2x, ideally 4x-10x, and ends at a highly-divisible-by-2 value.
165183
//
184+
// Length should ideally be a highly-divisible-by-2 value, to keep subsetting "clean" as long as possible,
185+
// i.e. the ranges of values in each bucket stays the same rather than having the second-to-last one cover
186+
// a smaller range.
187+
//
188+
// This length will be padded internally to add both a zero value and a next-power-of-2 value, to help keep
189+
// negative values identifiable, and keep the upper limit unchanged across all scales (as it cannot cleanly
190+
// subset, because it holds an infinite range of values).
191+
//
166192
// The exact length / exceeding / etc does not matter (just write a test so it's documented and can be reviewed),
167193
// it just serves to document your intent and to make sure that we have some head-room so we can understand how
168194
// wrongly we guessed at our needs if it's exceeded during an incident of some kind. We've failed at this
@@ -190,30 +216,33 @@ func makeSubsettableHistogram(scale int, start, end time.Duration, length int) S
190216
panic(fmt.Sprintf("length must be between 12 < %d <=160", length))
191217
}
192218
// make sure the number of buckets completes a "full" row,
193-
// i.e. the next bucket would be a power of 2 of the start value.
219+
// to which we will add one more to reach the next start*2^N value.
194220
//
195221
// for a more visual example of what this means, see the logged strings in tests:
196-
// each "row" of values must be the same width.
222+
// each "row" of values must be the same width, and it must have a single value in the last row.
197223
//
198224
// adding a couple buckets to ensure this is met costs very little, and ensures
199225
// subsetting combines values more consistently (not crossing rows) for longer.
200226
powerOfTwoWidth := int(math.Pow(2, float64(scale))) // num of buckets needed to double a value
201227
missing := length % powerOfTwoWidth
202228
if missing != 0 {
203-
panic(fmt.Sprintf(`number of buckets must "fill" a power of 2 to end at a consistent row width. `+
229+
panic(fmt.Sprintf(`number of buckets must end at a power of 2 of the starting value. `+
204230
`got %d, probably raise to %d`,
205-
length, length+missing))
231+
length, length+missing,
232+
))
206233
}
207234

208235
var buckets tally.DurationBuckets
209236
for i := 0; i < length; i++ {
210237
buckets = append(buckets, nextBucket(start, len(buckets), scale))
211238
}
239+
// the loop above has "filled" a full row, we need one more to reach the next power of 2.
240+
buckets = append(buckets, nextBucket(start, len(buckets), scale))
212241

213-
if last(buckets) <= end*2 {
242+
if last(buckets) < end*2 {
214243
panic(fmt.Sprintf("not enough buckets (%d) to exceed the end target (%v) by at least 2x. "+
215244
"you are STRONGLY encouraged to include ~2x-10x more range than your intended end value, "+
216-
"because we almost always underestimate the ranges needed during incidents",
245+
"preferably 4x+, because we almost always underestimate the ranges needed during incidents",
217246
length, last(buckets)),
218247
)
219248
}
@@ -251,7 +280,7 @@ func (s SubsettableHistogram) buckets() tally.DurationBuckets { return s.tallyBu
251280
func (s SubsettableHistogram) print(to func(string, ...any)) {
252281
to("%v\n", s.tallyBuckets[0:1]) // zero value on its own row
253282
for rowStart := 1; rowStart < s.len(); rowStart += s.width() {
254-
to("%v\n", s.tallyBuckets[rowStart:rowStart+s.width()])
283+
to("%v\n", s.tallyBuckets[rowStart:min(rowStart+s.width(), len(s.tallyBuckets))])
255284
}
256285
}
257286

@@ -268,7 +297,7 @@ func (i IntSubsettableHistogram) print(to func(string, ...any)) {
268297
to("[%d]\n", int(i.tallyBuckets[0])) // zero value on its own row
269298
for rowStart := 1; rowStart < i.len(); rowStart += i.width() {
270299
ints := make([]int, 0, i.width())
271-
for _, d := range i.tallyBuckets[rowStart : rowStart+i.width()] {
300+
for _, d := range i.tallyBuckets[rowStart:min(rowStart+i.width(), len(i.tallyBuckets))] {
272301
ints = append(ints, int(d))
273302
}
274303
to("%v\n", ints)

common/metrics/histograms_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ func TestHistogramValues(t *testing.T) {
3232
[2m11.072s 2m35.871754977s 3m5.363800047s 3m40.43594988s]
3333
[4m22.144s 5m11.743509955s 6m10.727600094s 7m20.87189976s]
3434
[8m44.288s 10m23.48701991s 12m21.455200189s 14m41.743799521s]
35+
[17m28.576s]
3536
`)
3637
})
3738
t.Run("low_1ms_to_100s", func(t *testing.T) {
@@ -57,6 +58,7 @@ func TestHistogramValues(t *testing.T) {
5758
[2m11.072s 3m5.363800047s]
5859
[4m22.144s 6m10.727600094s]
5960
[8m44.288s 12m21.455200189s]
61+
[17m28.576s]
6062
`)
6163
})
6264
t.Run("high_1ms_to_24h", func(t *testing.T) {
@@ -90,6 +92,7 @@ func TestHistogramValues(t *testing.T) {
9092
[9h19m14.432s 11h5m3.169274274s 13h10m53.132812125s 15h40m31.603169349s]
9193
[18h38m28.864s 22h10m6.338548549s 26h21m46.265624251s 31h21m3.206338698s]
9294
[37h16m57.728s 44h20m12.677097099s 52h43m32.531248503s 62h42m6.412677396s]
95+
[74h33m55.456s]
9396
`)
9497
})
9598
t.Run("mid_1ms_24h", func(t *testing.T) {
@@ -123,6 +126,7 @@ func TestHistogramValues(t *testing.T) {
123126
[9h19m14.432s 13h10m53.132812125s]
124127
[18h38m28.864s 26h21m46.265624251s]
125128
[37h16m57.728s 52h43m32.531248503s]
129+
[74h33m55.456s]
126130
`)
127131
})
128132
t.Run("mid_to_16k_ints", func(t *testing.T) {
@@ -153,6 +157,7 @@ func TestHistogramValues(t *testing.T) {
153157
[8192 9741 11585 13777]
154158
[16384 19483 23170 27554]
155159
[32768 38967 46340 55108]
160+
[65536]
156161
`)
157162
})
158163
}

0 commit comments

Comments
 (0)