From c302df8800c2c74cc430f0d6a8ca1f2a0538bb7f Mon Sep 17 00:00:00 2001 From: Dmitry Anoshin Date: Thu, 11 Jun 2026 15:42:19 +0000 Subject: [PATCH] [processor/transform] Optimize limit_buckets compaction divisor Use the ceiling bucket-count-to-limit divisor to compact limit_buckets histograms in one pass, keeping the result within the configured limit while avoiding an extra halving pass. Update the docs and add regression coverage for the 10 buckets to 4 buckets case. Assisted-by: OpenAI Codex --- .chloggen/49020-optimize-limit-buckets.yaml | 11 ++++++++ processor/transformprocessor/README.md | 2 +- .../metrics/func_merge_histogram_buckets.go | 26 +++++++++++-------- .../func_merge_histogram_buckets_test.go | 11 ++++++++ 4 files changed, 38 insertions(+), 12 deletions(-) create mode 100644 .chloggen/49020-optimize-limit-buckets.yaml diff --git a/.chloggen/49020-optimize-limit-buckets.yaml b/.chloggen/49020-optimize-limit-buckets.yaml new file mode 100644 index 0000000000000..7f1335510cee9 --- /dev/null +++ b/.chloggen/49020-optimize-limit-buckets.yaml @@ -0,0 +1,11 @@ +change_type: enhancement + +component: processor/transform + +note: Improve `merge_histogram_buckets` with `method="limit_buckets"` to compact buckets closer to the configured limit. + +issues: [49020] + +subtext: + +change_logs: [user] diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 0a2bba027f206..0215840de6069 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -667,7 +667,7 @@ The `merge_histogram_buckets` function merges explicit histogram buckets. The `m `target_value` is interpreted according to `method`: - `remove_explicit_bound`: `target_value` is the explicit boundary to remove. The function merges the bucket ending at this boundary with the next bucket. This method uses floating-point tolerance (epsilon = 1e-12) when matching the boundary. -- `limit_buckets`: `target_value` is the maximum number of buckets to keep. It must be a positive integer. The function reduces resolution in uniform compaction passes until the histogram has no more than `target_value` buckets. In each pass, it merges adjacent bucket pairs from lower to higher bucket order, combines their counts, and keeps an unpaired final bucket unchanged. Bucket count values and boundary widths do not affect which buckets are merged. Because each pass roughly halves the number of buckets, the resulting histogram may have fewer than `target_value` buckets. +- `limit_buckets`: `target_value` is the maximum number of buckets to keep. It must be a positive integer. The function reduces resolution with a single uniform compaction pass. It chooses the smallest divisor that keeps the resulting bucket count at or below `target_value`, merges adjacent buckets in groups of that size from lower to higher bucket order, combines their counts, and keeps any partial final group. Bucket count values and boundary widths do not affect which buckets are merged. The resulting histogram may have fewer than `target_value` buckets when no smaller uniform divisor can stay within the limit. The function: - Preserves the total count and sum of the histogram. diff --git a/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets.go b/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets.go index 74a6ef3a29dec..7f2e707fdb2ac 100644 --- a/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets.go +++ b/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets.go @@ -166,25 +166,29 @@ func limitHistogramBucketsFromDataPoint(dp pmetric.HistogramDataPoint, maxBucket return } - for int64(bucketCounts.Len()) > maxBuckets { - compactHistogramBuckets(explicitBounds, bucketCounts) - } + divisor := ceilDiv(bucketCounts.Len(), int(maxBuckets)) + compactHistogramBuckets(explicitBounds, bucketCounts, divisor) +} + +func ceilDiv(dividend, divisor int) int { + return (dividend-1)/divisor + 1 } -func compactHistogramBuckets(bounds pcommon.Float64Slice, counts pcommon.UInt64Slice) { +func compactHistogramBuckets(bounds pcommon.Float64Slice, counts pcommon.UInt64Slice, divisor int) { compactCounts := pcommon.NewUInt64Slice() - compactCounts.EnsureCapacity((counts.Len() + 1) / 2) - for i := 0; i < counts.Len(); i += 2 { - if i+1 == counts.Len() { - compactCounts.Append(counts.At(i)) - continue + compactCounts.EnsureCapacity(ceilDiv(counts.Len(), divisor)) + for i := 0; i < counts.Len(); i += divisor { + end := min(i+divisor, counts.Len()) + var count uint64 + for j := i; j < end; j++ { + count += counts.At(j) } - compactCounts.Append(counts.At(i) + counts.At(i+1)) + compactCounts.Append(count) } compactBounds := pcommon.NewFloat64Slice() compactBounds.EnsureCapacity(compactCounts.Len() - 1) - for i := 1; i < bounds.Len(); i += 2 { + for i := divisor - 1; i < bounds.Len(); i += divisor { compactBounds.Append(bounds.At(i)) } diff --git a/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets_test.go b/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets_test.go index a5ab80b21dfd5..bcc40c770c553 100644 --- a/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets_test.go +++ b/processor/transformprocessor/internal/metrics/func_merge_histogram_buckets_test.go @@ -140,6 +140,17 @@ func TestMergeHistogramBuckets(t *testing.T) { expectedBounds: []float64{0.2, 1.0, 5.0, 30.0}, expectedCounts: []uint64{84, 126, 5, 50, 1}, }, + { + name: "limit buckets uses smallest divisor that stays within limit", + inputBounds: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}, + inputCounts: []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + inputCount: 55, + inputSum: 385, + targetValue: int64(4), + method: ottl.NewTestingOptional(mergeHistogramBucketsMethodLimitBuckets), + expectedBounds: []float64{3, 6, 9}, + expectedCounts: []uint64{6, 15, 24, 10}, + }, { name: "limit buckets single compaction pass may reduce below limit", inputBounds: []float64{0.1, 0.5, 1.0},