Skip to content

Commit 9bae771

Browse files
committed
fix avg_over_time panic with max series
1 parent f6b43c7 commit 9bae771

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

pkg/traceql/engine_metrics_average.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,19 @@ func (b *averageOverTimeSeriesAggregator) Combine(in []*tempopb.TimeSeries) {
273273
// This is a counter series, we can skip it
274274
continue
275275
}
276+
countIndex, ok := countPosMapper[ts.PromLabels]
277+
if !ok {
278+
// The count series might have been truncated, skip this value
279+
continue
280+
}
276281
for i, sample := range ts.Samples {
277282
pos := IntervalOfMs(sample.TimestampMs, b.start, b.end, b.step)
278283
if pos < 0 || pos >= len(b.weightedAverageSeries[ts.PromLabels].values) {
279284
continue
280285
}
281286

282287
incomingMean := sample.Value
283-
incomingWeight := in[countPosMapper[ts.PromLabels]].Samples[i].Value
288+
incomingWeight := in[countIndex].Samples[i].Value
284289
existing.addWeigthedMean(pos, incomingMean, incomingWeight)
285290
b.aggregateExemplars(ts, b.weightedAverageSeries[ts.PromLabels])
286291
}

pkg/traceql/engine_metrics_test.go

+74
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"math"
66
"math/rand/v2"
7+
"strings"
78
"testing"
89
"time"
910

@@ -1022,6 +1023,79 @@ func TestObserveSeriesAverageOverTimeForSpanAttribute(t *testing.T) {
10221023
assert.Equal(t, 100.0, fooBar.Values[2])
10231024
}
10241025

1026+
func TestObserveSeriesAverageOverTimeForSpanAttributeWithTruncation(t *testing.T) {
1027+
req := &tempopb.QueryRangeRequest{
1028+
Start: uint64(1 * time.Second),
1029+
End: uint64(3 * time.Second),
1030+
Step: uint64(1 * time.Second),
1031+
Query: "{ } | avg_over_time(span.http.status_code) by (span.foo)",
1032+
}
1033+
1034+
// A variety of spans across times, durations, and series. All durations are powers of 2 for simplicity
1035+
in := []Span{
1036+
newMockSpan(nil).WithStartTime(uint64(1*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 200),
1037+
newMockSpan(nil).WithStartTime(uint64(1*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 300),
1038+
newMockSpan(nil).WithStartTime(uint64(1*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 400),
1039+
1040+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 200),
1041+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 200),
1042+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 100),
1043+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 100),
1044+
1045+
newMockSpan(nil).WithStartTime(uint64(3*time.Second)).WithSpanString("foo", "baz").WithSpanInt("http.status_code", 200),
1046+
newMockSpan(nil).WithStartTime(uint64(3*time.Second)).WithSpanString("foo", "baz").WithSpanInt("http.status_code", 400),
1047+
newMockSpan(nil).WithStartTime(uint64(3*time.Second)).WithSpanString("foo", "baz").WithSpanInt("http.status_code", 500),
1048+
}
1049+
1050+
in2 := []Span{
1051+
newMockSpan(nil).WithStartTime(uint64(1*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 100),
1052+
newMockSpan(nil).WithStartTime(uint64(1*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 300),
1053+
1054+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 400),
1055+
newMockSpan(nil).WithStartTime(uint64(2*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 200),
1056+
1057+
newMockSpan(nil).WithStartTime(uint64(3*time.Second)).WithSpanString("foo", "baz").WithSpanInt("http.status_code", 100),
1058+
newMockSpan(nil).WithStartTime(uint64(3*time.Second)).WithSpanString("foo", "bar").WithSpanInt("http.status_code", 100),
1059+
}
1060+
1061+
e := NewEngine()
1062+
layer1A, _ := e.CompileMetricsQueryRange(req, 0, 0, false)
1063+
layer1B, _ := e.CompileMetricsQueryRange(req, 0, 0, false)
1064+
layer2A, _ := e.CompileMetricsQueryRangeNonRaw(req, AggregateModeSum)
1065+
layer2B, _ := e.CompileMetricsQueryRangeNonRaw(req, AggregateModeSum)
1066+
layer3, _ := e.CompileMetricsQueryRangeNonRaw(req, AggregateModeFinal)
1067+
1068+
for _, s := range in {
1069+
layer1A.metricsPipeline.observe(s)
1070+
}
1071+
1072+
layer2A.ObserveSeries(layer1A.Results().ToProto(req))
1073+
1074+
for _, s := range in2 {
1075+
layer1B.metricsPipeline.observe(s)
1076+
}
1077+
1078+
layer2B.ObserveSeries(layer1B.Results().ToProto(req))
1079+
1080+
layer3.ObserveSeries(layer2A.Results().ToProto(req))
1081+
layer2bResults := layer2B.Results().ToProto(req)
1082+
truncated2bResults := make([]*tempopb.TimeSeries, 0, len(layer2bResults)-1)
1083+
for _, ts := range layer2bResults {
1084+
if !strings.Contains(ts.PromLabels, internalLabelMetaType) {
1085+
// add all values series
1086+
truncated2bResults = append(truncated2bResults, ts)
1087+
} else {
1088+
// the panic appears when the count series with 3 samples is missing
1089+
if len(ts.Samples) != 3 {
1090+
truncated2bResults = append(truncated2bResults, ts)
1091+
}
1092+
}
1093+
}
1094+
assert.NotPanics(t, func() {
1095+
layer3.ObserveSeries(truncated2bResults)
1096+
}, "should not panic on truncation")
1097+
}
1098+
10251099
func TestMaxOverTimeForDuration(t *testing.T) {
10261100
req := &tempopb.QueryRangeRequest{
10271101
Start: uint64(1 * time.Second),

0 commit comments

Comments
 (0)