Skip to content

Commit cb659d5

Browse files
[r352] Revert otlp: Stick to OTLP vocabulary on invalid label value length error (#12262)
Commit: 9bf900e <!-- Thanks for sending a pull request! Before submitting: 1. Read our CONTRIBUTING.md guide 2. Rebase your PR if it gets out of sync with main --> #### What this PR does #### Which issue(s) this PR fixes or relates to Fixes #<issue number> #### Checklist - [ ] Tests updated. - [ ] Documentation added. - [ ] `CHANGELOG.md` updated - the order of entries should be `[CHANGE]`, `[FEATURE]`, `[ENHANCEMENT]`, `[BUGFIX]`. If changelog entry is not needed, please add the `changelog-not-needed` label to the PR. - [ ] [`about-versioning.md`](https://github.com/grafana/mimir/blob/main/docs/sources/mimir/configure/about-versioning.md) updated with experimental features.
1 parent 9204ebd commit cb659d5

6 files changed

Lines changed: 29 additions & 106 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@
5959
* Renamed `cortex_ingest_storage_writer_produce_requests_total` to `cortex_ingest_storage_writer_produce_records_enqueued_total`
6060
* Renamed `cortex_ingest_storage_writer_produce_failures_total` to `cortex_ingest_storage_writer_produce_records_failed_total`
6161
* [CHANGE] Distributor: moved HA tracker timeout config to limits. #11774
62-
* Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker_update_timeout`.
63-
* Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker_update_timeout_jitter_max`.
64-
* Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker_failover_timeout`.
62+
* Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker.ha_tracker_update_timeout`
63+
* Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker.ha_tracker_update_timeout_jitter_max`
64+
* Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker.ha_tracker_failover_timeout`
6565
* [CHANGE] Distributor: `Memberlist` marked as stable as an option for backend storage for the HA tracker. #11861
6666
* [CHANGE] Distributor: `etcd` deprecated as an option for backend storage for the HA tracker. #12047
6767
* [CHANGE] Memberlist: Apply new default configuration values for MemberlistKV. This unlocks using it as backend storage for the HA Tracker. We have observed better performance with these defaults across different production loads. #11874
@@ -146,7 +146,6 @@
146146
* [ENHANCEMENT] Query-frontend: Accurate tracking of samples processed from cache. #11719
147147
* [ENHANCEMENT] Store-gateway: Change level 0 blocks to be reported as 'unknown/old_block' in metrics instead of '0' to improve clarity. Level 0 indicates blocks with metadata from before compaction level tracking was added to the bucket index. #11891
148148
* [ENHANCEMENT] Compactor, distributor, ruler, scheduler and store-gateway: Makes `-<component-ring-config>.auto-forget-unhealthy-periods` configurable for each component. Deprecates the `-store-gateway.sharding-ring.auto-forget-enabled` flag. #11923
149-
* [ENHANCEMENT] otlp: Stick to OTLP vocabulary on invalid label value length error. #11889
150149
* [BUGFIX] OTLP: Fix response body and Content-Type header to align with spec. #10852
151150
* [BUGFIX] Compactor: fix issue where block becomes permanently stuck when the Compactor's block cleanup job partially deletes a block. #10888
152151
* [BUGFIX] Storage: fix intermittent failures in S3 upload retries. #10952

pkg/distributor/errors.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ func (e validationError) Cause() mimirpb.ErrorCause {
120120
return mimirpb.BAD_DATA
121121
}
122122

123-
func (e validationError) Unwrap() error {
124-
return e.error
125-
}
126-
127123
// Ensure that validationError implements Error.
128124
var _ Error = validationError{}
129125

pkg/distributor/otel.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,6 @@ func OTLPHandler(
135135
writeErrorToHTTPResponseBody(r, w, statusClientClosedRequest, codes.Canceled, "push request context canceled", logger)
136136
return
137137
}
138-
if labelValueTooLongErr := (LabelValueTooLongError{}); errors.As(pushErr, &labelValueTooLongErr) {
139-
// Translate from Mimir to OTel domain terminology
140-
pushErr = newValidationError(otelAttributeValueTooLongError{labelValueTooLongErr})
141-
}
142138
var (
143139
httpCode int
144140
grpcCode codes.Code
@@ -693,14 +689,3 @@ func translateBucketsLayout(spans []prompb.BucketSpan, deltas []int64) (int32, [
693689

694690
return firstSpan.Offset - 1, buckets
695691
}
696-
697-
type otelAttributeValueTooLongError struct {
698-
LabelValueTooLongError
699-
}
700-
701-
func (e otelAttributeValueTooLongError) Error() string {
702-
return fmt.Sprintf(
703-
"received a metric whose attribute value length exceeds the limit of %d, attribute: '%s', value: '%.200s' (truncated) metric: '%.200s'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
704-
e.Limit, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series),
705-
)
706-
}

pkg/distributor/otel_test.go

Lines changed: 13 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818

1919
"github.com/go-kit/log"
2020
"github.com/grafana/dskit/concurrency"
21-
"github.com/grafana/dskit/flagext"
2221
"github.com/grafana/dskit/httpgrpc"
2322
"github.com/grafana/dskit/middleware"
2423
"github.com/grafana/dskit/user"
@@ -34,7 +33,6 @@ import (
3433
"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
3534
"google.golang.org/genproto/googleapis/rpc/status"
3635
"google.golang.org/grpc/codes"
37-
grpcstatus "google.golang.org/grpc/status"
3836
"google.golang.org/protobuf/proto"
3937

4038
"github.com/grafana/mimir/pkg/mimirpb"
@@ -881,7 +879,7 @@ func TestHandlerOTLPPush(t *testing.T) {
881879
metadata []mimirpb.MetricMetadata
882880
compression string
883881
maxMsgSize int
884-
verifyFunc func(*testing.T, context.Context, *Request, testCase) error
882+
verifyFunc func(*testing.T, *Request, testCase) error
885883
requestContentType string
886884
responseCode int
887885
responseContentType string
@@ -894,7 +892,7 @@ func TestHandlerOTLPPush(t *testing.T) {
894892
resourceAttributePromotionConfig OTelResourceAttributePromotionConfig
895893
}
896894

897-
samplesVerifierFunc := func(t *testing.T, _ context.Context, pushReq *Request, tc testCase) error {
895+
samplesVerifierFunc := func(t *testing.T, pushReq *Request, tc testCase) error {
898896
t.Helper()
899897

900898
request, err := pushReq.WriteRequest()
@@ -996,7 +994,7 @@ func TestHandlerOTLPPush(t *testing.T) {
996994
maxMsgSize: 30,
997995
series: sampleSeries,
998996
metadata: sampleMetadata,
999-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
997+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
1000998
_, err := pushReq.WriteRequest()
1001999
return err
10021000
},
@@ -1012,7 +1010,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10121010
maxMsgSize: 100000,
10131011
series: sampleSeries,
10141012
metadata: sampleMetadata,
1015-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1013+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
10161014
_, err := pushReq.WriteRequest()
10171015
return err
10181016
},
@@ -1028,7 +1026,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10281026
maxMsgSize: 100000,
10291027
series: sampleSeries,
10301028
metadata: sampleMetadata,
1031-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1029+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
10321030
_, err := pushReq.WriteRequest()
10331031
return err
10341032
},
@@ -1045,7 +1043,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10451043
maxMsgSize: 30,
10461044
series: sampleSeries,
10471045
metadata: sampleMetadata,
1048-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1046+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
10491047
_, err := pushReq.WriteRequest()
10501048
return err
10511049
},
@@ -1061,7 +1059,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10611059
maxMsgSize: 30,
10621060
series: sampleSeries,
10631061
metadata: sampleMetadata,
1064-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1062+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
10651063
_, err := pushReq.WriteRequest()
10661064
return err
10671065
},
@@ -1076,7 +1074,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10761074
maxMsgSize: 100000,
10771075
series: sampleSeries,
10781076
metadata: sampleMetadata,
1079-
verifyFunc: func(*testing.T, context.Context, *Request, testCase) error {
1077+
verifyFunc: func(*testing.T, *Request, testCase) error {
10801078
return httpgrpc.Errorf(http.StatusTooManyRequests, "go slower")
10811079
},
10821080
responseCode: http.StatusTooManyRequests,
@@ -1105,7 +1103,7 @@ func TestHandlerOTLPPush(t *testing.T) {
11051103
Unit: "metric_unit",
11061104
},
11071105
},
1108-
verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1106+
verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
11091107
request, err := pushReq.WriteRequest()
11101108
require.NoError(t, err)
11111109

@@ -1148,7 +1146,7 @@ func TestHandlerOTLPPush(t *testing.T) {
11481146
Unit: "metric_unit",
11491147
},
11501148
},
1151-
verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
1149+
verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
11521150
request, err := pushReq.WriteRequest()
11531151
require.NoError(t, err)
11541152

@@ -1171,50 +1169,6 @@ func TestHandlerOTLPPush(t *testing.T) {
11711169
responseCode: http.StatusOK,
11721170
responseContentType: pbContentType,
11731171
},
1174-
{
1175-
name: "Attribute value too long",
1176-
maxMsgSize: 100000,
1177-
series: []prompb.TimeSeries{
1178-
{
1179-
Labels: []prompb.Label{
1180-
{Name: "__name__", Value: "foo"},
1181-
{Name: "too_long", Value: "huge value"},
1182-
},
1183-
Samples: []prompb.Sample{
1184-
{Value: 1, Timestamp: time.Date(2020, 4, 1, 0, 0, 0, 0, time.UTC).UnixNano()},
1185-
},
1186-
},
1187-
},
1188-
metadata: sampleMetadata,
1189-
verifyFunc: func(_ *testing.T, ctx context.Context, pushReq *Request, _ testCase) error {
1190-
var limitsCfg validation.Limits
1191-
flagext.DefaultValues(&limitsCfg)
1192-
limitsCfg.MaxLabelValueLength = len("huge value") - 1
1193-
distributors, _, _, _ := prepare(t, prepConfig{numDistributors: 1, limits: &limitsCfg})
1194-
distributor := distributors[0]
1195-
return distributor.prePushValidationMiddleware(func(context.Context, *Request) error { return nil })(ctx, pushReq)
1196-
},
1197-
responseCode: http.StatusBadRequest,
1198-
responseContentType: pbContentType,
1199-
errMessage: "received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
1200-
expectedLogs: []string{`level=warn user=test msg="detected an error while ingesting OTLP metrics request (the request may have been partially ingested)" httpCode=400 err="received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits" insight=true`},
1201-
expectedRetryHeader: false,
1202-
},
1203-
{
1204-
name: "Unexpected gRPC status error",
1205-
maxMsgSize: 100000,
1206-
series: sampleSeries,
1207-
metadata: sampleMetadata,
1208-
verifyFunc: func(*testing.T, context.Context, *Request, testCase) error {
1209-
return grpcstatus.New(codes.Unknown, "unexpected error calling some dependency").Err()
1210-
},
1211-
responseCode: http.StatusServiceUnavailable,
1212-
responseContentType: pbContentType,
1213-
responseContentLength: 44,
1214-
errMessage: "unexpected error calling some dependency",
1215-
expectedLogs: []string{`level=error user=test msg="detected an error while ingesting OTLP metrics request (the request may have been partially ingested)" httpCode=503 err="rpc error: code = Unknown desc = unexpected error calling some dependency"`},
1216-
expectedRetryHeader: true,
1217-
},
12181172
}
12191173
for _, tt := range tests {
12201174
t.Run(tt.name, func(t *testing.T) {
@@ -1235,11 +1189,10 @@ func TestHandlerOTLPPush(t *testing.T) {
12351189
"test": testLimits,
12361190
}),
12371191
)
1238-
1239-
pusher := func(ctx context.Context, pushReq *Request) error {
1192+
pusher := func(_ context.Context, pushReq *Request) error {
12401193
t.Helper()
12411194
t.Cleanup(pushReq.CleanUp)
1242-
return tt.verifyFunc(t, ctx, pushReq, tt)
1195+
return tt.verifyFunc(t, pushReq, tt)
12431196
}
12441197

12451198
logs := &concurrency.SyncBuffer{}
@@ -1251,9 +1204,7 @@ func TestHandlerOTLPPush(t *testing.T) {
12511204

12521205
assert.Equal(t, tt.responseCode, resp.Code)
12531206
assert.Equal(t, tt.responseContentType, resp.Header().Get("Content-Type"))
1254-
if tt.responseContentLength > 0 {
1255-
assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
1256-
}
1207+
assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
12571208
if tt.errMessage != "" {
12581209
body, err := io.ReadAll(resp.Body)
12591210
require.NoError(t, err)

pkg/distributor/validate.go

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ package distributor
88
import (
99
"errors"
1010
"fmt"
11-
"slices"
1211
"strings"
1312
"time"
1413
"unicode"
@@ -123,16 +122,6 @@ var (
123122
nativeHistogramCustomBucketsNotReducibleMsgFormat = globalerror.NativeHistogramCustomBucketsNotReducible.Message("received a native histogram sample with more custom buckets than the limit, timestamp: %d series: %s, buckets: %d, limit: %d")
124123
)
125124

126-
type LabelValueTooLongError struct {
127-
Label mimirpb.LabelAdapter
128-
Series []mimirpb.LabelAdapter
129-
Limit int
130-
}
131-
132-
func (e LabelValueTooLongError) Error() string {
133-
return fmt.Sprintf(labelValueTooLongMsgFormat, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series))
134-
}
135-
136125
// sampleValidationConfig helps with getting required config to validate sample.
137126
type sampleValidationConfig interface {
138127
CreationGracePeriod(userID string) time.Duration
@@ -462,7 +451,7 @@ func validateLabels(m *sampleValidationMetrics, cfg labelValidationConfig, userI
462451
} else if len(l.Value) > maxLabelValueLength {
463452
cat.IncrementDiscardedSamples(ls, 1, reasonLabelValueTooLong, ts)
464453
m.labelValueTooLong.WithLabelValues(userID, group).Inc()
465-
return LabelValueTooLongError{Label: l, Series: slices.Clone(ls), Limit: maxLabelValueLength}
454+
return fmt.Errorf(labelValueTooLongMsgFormat, l.Name, l.Value, mimirpb.FromLabelAdaptersToString(ls))
466455
} else if lastLabelName == l.Name {
467456
cat.IncrementDiscardedSamples(ls, 1, reasonDuplicateLabelNames, ts)
468457
m.duplicateLabelNames.WithLabelValues(userID, group).Inc()

pkg/distributor/validate_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,18 @@ func TestValidateLabels(t *testing.T) {
152152
metric: map[model.LabelName]model.LabelValue{model.MetricNameLabel: "badLabelValue", "much_shorter_name": "test_value_please_ignore_no_really_nothing_to_see_here", "team": "biz"},
153153
skipLabelNameValidation: false,
154154
skipLabelCountValidation: false,
155-
err: LabelValueTooLongError{
156-
Label: mimirpb.LabelAdapter{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
157-
Limit: 25,
158-
Series: []mimirpb.LabelAdapter{
159-
{Name: model.MetricNameLabel, Value: "badLabelValue"},
160-
{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
161-
{Name: "team", Value: "biz"},
162-
},
163-
},
155+
err: fmt.Errorf(
156+
labelValueTooLongMsgFormat,
157+
"much_shorter_name",
158+
"test_value_please_ignore_no_really_nothing_to_see_here",
159+
mimirpb.FromLabelAdaptersToString(
160+
[]mimirpb.LabelAdapter{
161+
{Name: model.MetricNameLabel, Value: "badLabelValue"},
162+
{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
163+
{Name: "team", Value: "biz"},
164+
},
165+
),
166+
),
164167
},
165168
{
166169
metric: map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo", "bar": "baz", "blip": "blop", "team": "plof"},

0 commit comments

Comments
 (0)