Skip to content

Commit d0edfeb

Browse files
[r350] Revert "otlp: Stick to OTLP vocabulary on invalid label value length error (#11889)" (#12263)
This reverts commit 9bf900e. <!-- Thanks for sending a pull request! Before submitting: 1. Read our CONTRIBUTING.md guide 2. Rebase your PR if it gets out of sync with main --> #### What this PR does #### Which issue(s) this PR fixes or relates to Fixes #<issue number> #### Checklist - [ ] Tests updated. - [ ] Documentation added. - [ ] `CHANGELOG.md` updated - the order of entries should be `[CHANGE]`, `[FEATURE]`, `[ENHANCEMENT]`, `[BUGFIX]`. If changelog entry is not needed, please add the `changelog-not-needed` label to the PR. - [ ] [`about-versioning.md`](https://github.com/grafana/mimir/blob/main/docs/sources/mimir/configure/about-versioning.md) updated with experimental features.
1 parent d21159b commit d0edfeb

6 files changed

Lines changed: 29 additions & 90 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
* Renamed `cortex_ingest_storage_writer_produce_requests_total` to `cortex_ingest_storage_writer_produce_records_enqueued_total`
2121
* Renamed `cortex_ingest_storage_writer_produce_failures_total` to `cortex_ingest_storage_writer_produce_records_failed_total`
2222
* [CHANGE] Distributor: moved HA tracker timeout config to limits. #11774
23-
* Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker_update_timeout`.
24-
* Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker_update_timeout_jitter_max`.
25-
* Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker_failover_timeout`.
23+
* Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker.ha_tracker_update_timeout`
24+
* Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker.ha_tracker_update_timeout_jitter_max`
25+
* Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker.ha_tracker_failover_timeout`
2626
* [CHANGE] Distributor: `Memberlist` marked as stable as an option for backend storage for the HA tracker. #11861
2727
* [CHANGE] Memberlist: Apply new default configuration values for MemberlistKV. This unlocks using it as backend storage for the HA Tracker. We have observed better performance with these defaults across different production loads. #11874
2828
* `memberlist.packet-dial-timeout`: `500ms`
@@ -106,7 +106,6 @@
106106
* [ENHANCEMENT] Query-frontend: Accurate tracking of samples processed from cache. #11719
107107
* [ENHANCEMENT] Store-gateway: Change level 0 blocks to be reported as 'unknown/old_block' in metrics instead of '0' to improve clarity. Level 0 indicates blocks with metadata from before compaction level tracking was added to the bucket index. #11891
108108
* [ENHANCEMENT] Compactor, distributor, ruler, scheduler and store-gateway: Makes `-<component-ring-config>.auto-forget-unhealthy-periods` configurable for each component. Deprecates the `-store-gateway.sharding-ring.auto-forget-enabled` flag. #11923
109-
* [ENHANCEMENT] otlp: Stick to OTLP vocabulary on invalid label value length error. #11889
110109
* [BUGFIX] OTLP: Fix response body and Content-Type header to align with spec. #10852
111110
* [BUGFIX] Compactor: fix issue where block becomes permanently stuck when the Compactor's block cleanup job partially deletes a block. #10888
112111
* [BUGFIX] Storage: fix intermittent failures in S3 upload retries. #10952

pkg/distributor/errors.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ func (e validationError) Cause() mimirpb.ErrorCause {
120120
return mimirpb.BAD_DATA
121121
}
122122

123-
func (e validationError) Unwrap() error {
124-
return e.error
125-
}
126-
127123
// Ensure that validationError implements Error.
128124
var _ Error = validationError{}
129125

pkg/distributor/otel.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,6 @@ func OTLPHandler(
133133
writeErrorToHTTPResponseBody(r, w, statusClientClosedRequest, codes.Canceled, "push request context canceled", logger)
134134
return
135135
}
136-
if labelValueTooLongErr := (LabelValueTooLongError{}); errors.As(pushErr, &labelValueTooLongErr) {
137-
// Translate from Mimir to OTel domain terminology
138-
pushErr = newValidationError(otelAttributeValueTooLongError{labelValueTooLongErr})
139-
}
140136
var (
141137
httpCode int
142138
grpcCode codes.Code
@@ -679,14 +675,3 @@ func translateBucketsLayout(spans []prompb.BucketSpan, deltas []int64) (int32, [
679675

680676
return firstSpan.Offset - 1, buckets
681677
}
682-
683-
type otelAttributeValueTooLongError struct {
684-
LabelValueTooLongError
685-
}
686-
687-
func (e otelAttributeValueTooLongError) Error() string {
688-
return fmt.Sprintf(
689-
"received a metric whose attribute value length exceeds the limit of %d, attribute: '%s', value: '%.200s' (truncated) metric: '%.200s'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
690-
e.Limit, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series),
691-
)
692-
}

pkg/distributor/otel_test.go

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818

1919
"github.com/go-kit/log"
2020
"github.com/grafana/dskit/concurrency"
21-
"github.com/grafana/dskit/flagext"
2221
"github.com/grafana/dskit/httpgrpc"
2322
"github.com/grafana/dskit/middleware"
2423
"github.com/grafana/dskit/user"
@@ -684,7 +683,7 @@ func TestHandlerOTLPPush(t *testing.T) {
684683
metadata []mimirpb.MetricMetadata
685684
compression string
686685
maxMsgSize int
687-
verifyFunc func(*testing.T, context.Context, *Request, testCase) error
686+
verifyFunc func(*testing.T, *Request, testCase) error
688687
requestContentType string
689688
responseCode int
690689
responseContentType string
@@ -697,7 +696,7 @@ func TestHandlerOTLPPush(t *testing.T) {
697696
resourceAttributePromotionConfig OTelResourceAttributePromotionConfig
698697
}
699698

700-
samplesVerifierFunc := func(t *testing.T, _ context.Context, pushReq *Request, tc testCase) error {
699+
samplesVerifierFunc := func(t *testing.T, pushReq *Request, tc testCase) error {
701700
t.Helper()
702701

703702
request, err := pushReq.WriteRequest()
@@ -799,7 +798,7 @@ func TestHandlerOTLPPush(t *testing.T) {
799798
maxMsgSize: 30,
800799
series: sampleSeries,
801800
metadata: sampleMetadata,
802-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
801+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
803802
_, err := pushReq.WriteRequest()
804803
return err
805804
},
@@ -815,7 +814,7 @@ func TestHandlerOTLPPush(t *testing.T) {
815814
maxMsgSize: 100000,
816815
series: sampleSeries,
817816
metadata: sampleMetadata,
818-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
817+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
819818
_, err := pushReq.WriteRequest()
820819
return err
821820
},
@@ -831,7 +830,7 @@ func TestHandlerOTLPPush(t *testing.T) {
831830
maxMsgSize: 100000,
832831
series: sampleSeries,
833832
metadata: sampleMetadata,
834-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
833+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
835834
_, err := pushReq.WriteRequest()
836835
return err
837836
},
@@ -848,7 +847,7 @@ func TestHandlerOTLPPush(t *testing.T) {
848847
maxMsgSize: 30,
849848
series: sampleSeries,
850849
metadata: sampleMetadata,
851-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
850+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
852851
_, err := pushReq.WriteRequest()
853852
return err
854853
},
@@ -864,7 +863,7 @@ func TestHandlerOTLPPush(t *testing.T) {
864863
maxMsgSize: 30,
865864
series: sampleSeries,
866865
metadata: sampleMetadata,
867-
verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
866+
verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
868867
_, err := pushReq.WriteRequest()
869868
return err
870869
},
@@ -879,7 +878,7 @@ func TestHandlerOTLPPush(t *testing.T) {
879878
maxMsgSize: 100000,
880879
series: sampleSeries,
881880
metadata: sampleMetadata,
882-
verifyFunc: func(*testing.T, context.Context, *Request, testCase) error {
881+
verifyFunc: func(*testing.T, *Request, testCase) error {
883882
return httpgrpc.Errorf(http.StatusTooManyRequests, "go slower")
884883
},
885884
responseCode: http.StatusTooManyRequests,
@@ -908,7 +907,7 @@ func TestHandlerOTLPPush(t *testing.T) {
908907
Unit: "metric_unit",
909908
},
910909
},
911-
verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
910+
verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
912911
request, err := pushReq.WriteRequest()
913912
require.NoError(t, err)
914913

@@ -951,7 +950,7 @@ func TestHandlerOTLPPush(t *testing.T) {
951950
Unit: "metric_unit",
952951
},
953952
},
954-
verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
953+
verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
955954
request, err := pushReq.WriteRequest()
956955
require.NoError(t, err)
957956

@@ -974,35 +973,6 @@ func TestHandlerOTLPPush(t *testing.T) {
974973
responseCode: http.StatusOK,
975974
responseContentType: pbContentType,
976975
},
977-
{
978-
name: "Attribute value too long",
979-
maxMsgSize: 100000,
980-
series: []prompb.TimeSeries{
981-
{
982-
Labels: []prompb.Label{
983-
{Name: "__name__", Value: "foo"},
984-
{Name: "too_long", Value: "huge value"},
985-
},
986-
Samples: []prompb.Sample{
987-
{Value: 1, Timestamp: time.Date(2020, 4, 1, 0, 0, 0, 0, time.UTC).UnixNano()},
988-
},
989-
},
990-
},
991-
metadata: sampleMetadata,
992-
verifyFunc: func(_ *testing.T, ctx context.Context, pushReq *Request, _ testCase) error {
993-
var limitsCfg validation.Limits
994-
flagext.DefaultValues(&limitsCfg)
995-
limitsCfg.MaxLabelValueLength = len("huge value") - 1
996-
distributors, _, _, _ := prepare(t, prepConfig{numDistributors: 1, limits: &limitsCfg})
997-
distributor := distributors[0]
998-
return distributor.prePushValidationMiddleware(func(context.Context, *Request) error { return nil })(ctx, pushReq)
999-
},
1000-
responseCode: http.StatusBadRequest,
1001-
responseContentType: pbContentType,
1002-
errMessage: "received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
1003-
expectedLogs: []string{`level=error user=test msg="detected an error while ingesting OTLP metrics request (the request may have been partially ingested)" httpCode=400 err="received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits" insight=true`},
1004-
expectedRetryHeader: false,
1005-
},
1006976
}
1007977
for _, tt := range tests {
1008978
t.Run(tt.name, func(t *testing.T) {
@@ -1023,11 +993,10 @@ func TestHandlerOTLPPush(t *testing.T) {
1023993
"test": testLimits,
1024994
}),
1025995
)
1026-
1027-
pusher := func(ctx context.Context, pushReq *Request) error {
996+
pusher := func(_ context.Context, pushReq *Request) error {
1028997
t.Helper()
1029998
t.Cleanup(pushReq.CleanUp)
1030-
return tt.verifyFunc(t, ctx, pushReq, tt)
999+
return tt.verifyFunc(t, pushReq, tt)
10311000
}
10321001

10331002
logs := &concurrency.SyncBuffer{}
@@ -1039,9 +1008,7 @@ func TestHandlerOTLPPush(t *testing.T) {
10391008

10401009
assert.Equal(t, tt.responseCode, resp.Code)
10411010
assert.Equal(t, tt.responseContentType, resp.Header().Get("Content-Type"))
1042-
if tt.responseContentLength > 0 {
1043-
assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
1044-
}
1011+
assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
10451012
if tt.errMessage != "" {
10461013
body, err := io.ReadAll(resp.Body)
10471014
require.NoError(t, err)

pkg/distributor/validate.go

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ package distributor
88
import (
99
"errors"
1010
"fmt"
11-
"slices"
1211
"strings"
1312
"time"
1413
"unicode"
@@ -123,16 +122,6 @@ var (
123122
nativeHistogramCustomBucketsNotReducibleMsgFormat = globalerror.NativeHistogramCustomBucketsNotReducible.Message("received a native histogram sample with more custom buckets than the limit, timestamp: %d series: %s, buckets: %d, limit: %d")
124123
)
125124

126-
type LabelValueTooLongError struct {
127-
Label mimirpb.LabelAdapter
128-
Series []mimirpb.LabelAdapter
129-
Limit int
130-
}
131-
132-
func (e LabelValueTooLongError) Error() string {
133-
return fmt.Sprintf(labelValueTooLongMsgFormat, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series))
134-
}
135-
136125
// sampleValidationConfig helps with getting required config to validate sample.
137126
type sampleValidationConfig interface {
138127
CreationGracePeriod(userID string) time.Duration
@@ -462,7 +451,7 @@ func validateLabels(m *sampleValidationMetrics, cfg labelValidationConfig, userI
462451
} else if len(l.Value) > maxLabelValueLength {
463452
cat.IncrementDiscardedSamples(ls, 1, reasonLabelValueTooLong, ts)
464453
m.labelValueTooLong.WithLabelValues(userID, group).Inc()
465-
return LabelValueTooLongError{Label: l, Series: slices.Clone(ls), Limit: maxLabelValueLength}
454+
return fmt.Errorf(labelValueTooLongMsgFormat, l.Name, l.Value, mimirpb.FromLabelAdaptersToString(ls))
466455
} else if lastLabelName == l.Name {
467456
cat.IncrementDiscardedSamples(ls, 1, reasonDuplicateLabelNames, ts)
468457
m.duplicateLabelNames.WithLabelValues(userID, group).Inc()

pkg/distributor/validate_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,18 @@ func TestValidateLabels(t *testing.T) {
152152
metric: map[model.LabelName]model.LabelValue{model.MetricNameLabel: "badLabelValue", "much_shorter_name": "test_value_please_ignore_no_really_nothing_to_see_here", "team": "biz"},
153153
skipLabelNameValidation: false,
154154
skipLabelCountValidation: false,
155-
err: LabelValueTooLongError{
156-
Label: mimirpb.LabelAdapter{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
157-
Limit: 25,
158-
Series: []mimirpb.LabelAdapter{
159-
{Name: model.MetricNameLabel, Value: "badLabelValue"},
160-
{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
161-
{Name: "team", Value: "biz"},
162-
},
163-
},
155+
err: fmt.Errorf(
156+
labelValueTooLongMsgFormat,
157+
"much_shorter_name",
158+
"test_value_please_ignore_no_really_nothing_to_see_here",
159+
mimirpb.FromLabelAdaptersToString(
160+
[]mimirpb.LabelAdapter{
161+
{Name: model.MetricNameLabel, Value: "badLabelValue"},
162+
{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
163+
{Name: "team", Value: "biz"},
164+
},
165+
),
166+
),
164167
},
165168
{
166169
metric: map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo", "bar": "baz", "blip": "blop", "team": "plof"},

0 commit comments

Comments
 (0)