Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .chloggen/49020-optimize-limit-buckets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
change_type: enhancement

component: processor/transform

note: Improve `merge_histogram_buckets` with `method="limit_buckets"` to compact buckets closer to the configured limit.

issues: [49020]

subtext:

change_logs: [user]
2 changes: 1 addition & 1 deletion processor/transformprocessor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ The `merge_histogram_buckets` function merges explicit histogram buckets. The `m

`target_value` is interpreted according to `method`:
- `remove_explicit_bound`: `target_value` is the explicit boundary to remove. The function merges the bucket ending at this boundary with the next bucket. This method uses floating-point tolerance (epsilon = 1e-12) when matching the boundary.
- `limit_buckets`: `target_value` is the maximum number of buckets to keep. It must be a positive integer. The function reduces resolution in uniform compaction passes until the histogram has no more than `target_value` buckets. In each pass, it merges adjacent bucket pairs from lower to higher bucket order, combines their counts, and keeps an unpaired final bucket unchanged. Bucket count values and boundary widths do not affect which buckets are merged. Because each pass roughly halves the number of buckets, the resulting histogram may have fewer than `target_value` buckets.
- `limit_buckets`: `target_value` is the maximum number of buckets to keep. It must be a positive integer. The function reduces resolution with a single uniform compaction pass. It chooses the smallest divisor that keeps the resulting bucket count at or below `target_value`, merges adjacent buckets in groups of that size from lower to higher bucket order, combines their counts, and keeps any partial final group. Bucket count values and boundary widths do not affect which buckets are merged. The resulting histogram may have fewer than `target_value` buckets when no smaller uniform divisor can stay within the limit.

The function:
- Preserves the total count and sum of the histogram.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,29 @@ func limitHistogramBucketsFromDataPoint(dp pmetric.HistogramDataPoint, maxBucket
return
}

for int64(bucketCounts.Len()) > maxBuckets {
compactHistogramBuckets(explicitBounds, bucketCounts)
}
divisor := ceilDiv(bucketCounts.Len(), int(maxBuckets))
compactHistogramBuckets(explicitBounds, bucketCounts, divisor)
}

func ceilDiv(dividend, divisor int) int {
return (dividend-1)/divisor + 1
}

func compactHistogramBuckets(bounds pcommon.Float64Slice, counts pcommon.UInt64Slice) {
func compactHistogramBuckets(bounds pcommon.Float64Slice, counts pcommon.UInt64Slice, divisor int) {
compactCounts := pcommon.NewUInt64Slice()
compactCounts.EnsureCapacity((counts.Len() + 1) / 2)
for i := 0; i < counts.Len(); i += 2 {
if i+1 == counts.Len() {
compactCounts.Append(counts.At(i))
continue
compactCounts.EnsureCapacity(ceilDiv(counts.Len(), divisor))
for i := 0; i < counts.Len(); i += divisor {
end := min(i+divisor, counts.Len())
var count uint64
for j := i; j < end; j++ {
count += counts.At(j)
}
compactCounts.Append(counts.At(i) + counts.At(i+1))
compactCounts.Append(count)
}

compactBounds := pcommon.NewFloat64Slice()
compactBounds.EnsureCapacity(compactCounts.Len() - 1)
for i := 1; i < bounds.Len(); i += 2 {
for i := divisor - 1; i < bounds.Len(); i += divisor {
compactBounds.Append(bounds.At(i))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,17 @@ func TestMergeHistogramBuckets(t *testing.T) {
expectedBounds: []float64{0.2, 1.0, 5.0, 30.0},
expectedCounts: []uint64{84, 126, 5, 50, 1},
},
{
name: "limit buckets uses smallest divisor that stays within limit",
inputBounds: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9},
inputCounts: []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
inputCount: 55,
inputSum: 385,
targetValue: int64(4),
method: ottl.NewTestingOptional(mergeHistogramBucketsMethodLimitBuckets),
expectedBounds: []float64{3, 6, 9},
expectedCounts: []uint64{6, 15, 24, 10},
},
{
name: "limit buckets single compaction pass may reduce below limit",
inputBounds: []float64{0.1, 0.5, 1.0},
Expand Down
Loading