[r350] Revert "otlp: Stick to OTLP vocabulary on invalid label value length error (#11889)" (#12263)

julienduchesne · web-flow · commit d0edfebab955 · 2025-07-31T15:24:03.000-04:00
This reverts commit 9bf900e.  #### What this PR does #### Which issue(s) this PR fixes or relates to Fixes #<issue number> #### Checklist - [ ] Tests updated. - [ ] Documentation added. - [ ] `CHANGELOG.md` updated - the order of entries should be `[CHANGE]`, `[FEATURE]`, `[ENHANCEMENT]`, `[BUGFIX]`. If changelog entry is not needed, please add the `changelog-not-needed` label to the PR. - [ ] [`about-versioning.md`](https://github.com/grafana/mimir/blob/main/docs/sources/mimir/configure/about-versioning.md) updated with experimental features.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,9 +20,9 @@
   * Renamed `cortex_ingest_storage_writer_produce_requests_total` to `cortex_ingest_storage_writer_produce_records_enqueued_total`
   * Renamed `cortex_ingest_storage_writer_produce_failures_total` to `cortex_ingest_storage_writer_produce_records_failed_total`
 * [CHANGE] Distributor: moved HA tracker timeout config to limits. #11774
-  * Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker_update_timeout`.
-  * Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker_update_timeout_jitter_max`.
-  * Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker_failover_timeout`.
+  * Moved `distributor.ha_tracker.ha_tracker_update_timeout` to `limits.ha_tracker.ha_tracker_update_timeout`
+  * Moved `distributor.ha_tracker.ha_tracker_update_timeout_jitter_max` to `limits.ha_tracker.ha_tracker_update_timeout_jitter_max`
+  * Moved `distributor.ha_tracker.ha_tracker_failover_timeout` to `limits.ha_tracker.ha_tracker_failover_timeout`
 * [CHANGE] Distributor: `Memberlist` marked as stable as an option for backend storage for the HA tracker. #11861
 * [CHANGE] Memberlist: Apply new default configuration values for MemberlistKV. This unlocks using it as backend storage for the HA Tracker. We have observed better performance with these defaults across different production loads. #11874
   * `memberlist.packet-dial-timeout`: `500ms`
@@ -106,7 +106,6 @@
 * [ENHANCEMENT] Query-frontend: Accurate tracking of samples processed from cache. #11719
 * [ENHANCEMENT] Store-gateway: Change level 0 blocks to be reported as 'unknown/old_block' in metrics instead of '0' to improve clarity. Level 0 indicates blocks with metadata from before compaction level tracking was added to the bucket index. #11891
 * [ENHANCEMENT] Compactor, distributor, ruler, scheduler and store-gateway: Makes `-<component-ring-config>.auto-forget-unhealthy-periods` configurable for each component. Deprecates the `-store-gateway.sharding-ring.auto-forget-enabled` flag. #11923
-* [ENHANCEMENT] otlp: Stick to OTLP vocabulary on invalid label value length error. #11889
 * [BUGFIX] OTLP: Fix response body and Content-Type header to align with spec. #10852
 * [BUGFIX] Compactor: fix issue where block becomes permanently stuck when the Compactor's block cleanup job partially deletes a block. #10888
 * [BUGFIX] Storage: fix intermittent failures in S3 upload retries. #10952
diff --git a/pkg/distributor/errors.go b/pkg/distributor/errors.go
@@ -120,10 +120,6 @@ func (e validationError) Cause() mimirpb.ErrorCause {
 	return mimirpb.BAD_DATA
 }
 
-func (e validationError) Unwrap() error {
-	return e.error
-}
-
 // Ensure that validationError implements Error.
 var _ Error = validationError{}
 
diff --git a/pkg/distributor/otel.go b/pkg/distributor/otel.go
@@ -133,10 +133,6 @@ func OTLPHandler(
 			writeErrorToHTTPResponseBody(r, w, statusClientClosedRequest, codes.Canceled, "push request context canceled", logger)
 			return
 		}
-		if labelValueTooLongErr := (LabelValueTooLongError{}); errors.As(pushErr, &labelValueTooLongErr) {
-			// Translate from Mimir to OTel domain terminology
-			pushErr = newValidationError(otelAttributeValueTooLongError{labelValueTooLongErr})
-		}
 		var (
 			httpCode int
 			grpcCode codes.Code
@@ -679,14 +675,3 @@ func translateBucketsLayout(spans []prompb.BucketSpan, deltas []int64) (int32, [
 
 	return firstSpan.Offset - 1, buckets
 }
-
-type otelAttributeValueTooLongError struct {
-	LabelValueTooLongError
-}
-
-func (e otelAttributeValueTooLongError) Error() string {
-	return fmt.Sprintf(
-		"received a metric whose attribute value length exceeds the limit of %d, attribute: '%s', value: '%.200s' (truncated) metric: '%.200s'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
-		e.Limit, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series),
-	)
-}
diff --git a/pkg/distributor/otel_test.go b/pkg/distributor/otel_test.go
@@ -18,7 +18,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/grafana/dskit/concurrency"
-	"github.com/grafana/dskit/flagext"
 	"github.com/grafana/dskit/httpgrpc"
 	"github.com/grafana/dskit/middleware"
 	"github.com/grafana/dskit/user"
@@ -684,7 +683,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 		metadata                         []mimirpb.MetricMetadata
 		compression                      string
 		maxMsgSize                       int
-		verifyFunc                       func(*testing.T, context.Context, *Request, testCase) error
+		verifyFunc                       func(*testing.T, *Request, testCase) error
 		requestContentType               string
 		responseCode                     int
 		responseContentType              string
@@ -697,7 +696,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 		resourceAttributePromotionConfig OTelResourceAttributePromotionConfig
 	}
 
-	samplesVerifierFunc := func(t *testing.T, _ context.Context, pushReq *Request, tc testCase) error {
+	samplesVerifierFunc := func(t *testing.T, pushReq *Request, tc testCase) error {
 		t.Helper()
 
 		request, err := pushReq.WriteRequest()
@@ -799,7 +798,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize: 30,
 			series:     sampleSeries,
 			metadata:   sampleMetadata,
-			verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
 				_, err := pushReq.WriteRequest()
 				return err
 			},
@@ -815,7 +814,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize:  100000,
 			series:      sampleSeries,
 			metadata:    sampleMetadata,
-			verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
 				_, err := pushReq.WriteRequest()
 				return err
 			},
@@ -831,7 +830,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize:  100000,
 			series:      sampleSeries,
 			metadata:    sampleMetadata,
-			verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
 				_, err := pushReq.WriteRequest()
 				return err
 			},
@@ -848,7 +847,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize:  30,
 			series:      sampleSeries,
 			metadata:    sampleMetadata,
-			verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
 				_, err := pushReq.WriteRequest()
 				return err
 			},
@@ -864,7 +863,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize:  30,
 			series:      sampleSeries,
 			metadata:    sampleMetadata,
-			verifyFunc: func(_ *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(_ *testing.T, pushReq *Request, _ testCase) error {
 				_, err := pushReq.WriteRequest()
 				return err
 			},
@@ -879,7 +878,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 			maxMsgSize: 100000,
 			series:     sampleSeries,
 			metadata:   sampleMetadata,
-			verifyFunc: func(*testing.T, context.Context, *Request, testCase) error {
+			verifyFunc: func(*testing.T, *Request, testCase) error {
 				return httpgrpc.Errorf(http.StatusTooManyRequests, "go slower")
 			},
 			responseCode:          http.StatusTooManyRequests,
@@ -908,7 +907,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 					Unit: "metric_unit",
 				},
 			},
-			verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
 				request, err := pushReq.WriteRequest()
 				require.NoError(t, err)
 
@@ -951,7 +950,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 					Unit: "metric_unit",
 				},
 			},
-			verifyFunc: func(t *testing.T, _ context.Context, pushReq *Request, _ testCase) error {
+			verifyFunc: func(t *testing.T, pushReq *Request, _ testCase) error {
 				request, err := pushReq.WriteRequest()
 				require.NoError(t, err)
 
@@ -974,35 +973,6 @@ func TestHandlerOTLPPush(t *testing.T) {
 			responseCode:        http.StatusOK,
 			responseContentType: pbContentType,
 		},
-		{
-			name:       "Attribute value too long",
-			maxMsgSize: 100000,
-			series: []prompb.TimeSeries{
-				{
-					Labels: []prompb.Label{
-						{Name: "__name__", Value: "foo"},
-						{Name: "too_long", Value: "huge value"},
-					},
-					Samples: []prompb.Sample{
-						{Value: 1, Timestamp: time.Date(2020, 4, 1, 0, 0, 0, 0, time.UTC).UnixNano()},
-					},
-				},
-			},
-			metadata: sampleMetadata,
-			verifyFunc: func(_ *testing.T, ctx context.Context, pushReq *Request, _ testCase) error {
-				var limitsCfg validation.Limits
-				flagext.DefaultValues(&limitsCfg)
-				limitsCfg.MaxLabelValueLength = len("huge value") - 1
-				distributors, _, _, _ := prepare(t, prepConfig{numDistributors: 1, limits: &limitsCfg})
-				distributor := distributors[0]
-				return distributor.prePushValidationMiddleware(func(context.Context, *Request) error { return nil })(ctx, pushReq)
-			},
-			responseCode:        http.StatusBadRequest,
-			responseContentType: pbContentType,
-			errMessage:          "received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits",
-			expectedLogs:        []string{`level=error user=test msg="detected an error while ingesting OTLP metrics request (the request may have been partially ingested)" httpCode=400 err="received a metric whose attribute value length exceeds the limit of 9, attribute: 'too_long', value: 'huge value' (truncated) metric: 'foo{too_long=\"huge value\"}'. See: https://grafana.com/docs/grafana-cloud/send-data/otlp/otlp-format-considerations/#metrics-ingestion-limits" insight=true`},
-			expectedRetryHeader: false,
-		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -1023,11 +993,10 @@ func TestHandlerOTLPPush(t *testing.T) {
 					"test": testLimits,
 				}),
 			)
-
-			pusher := func(ctx context.Context, pushReq *Request) error {
+			pusher := func(_ context.Context, pushReq *Request) error {
 				t.Helper()
 				t.Cleanup(pushReq.CleanUp)
-				return tt.verifyFunc(t, ctx, pushReq, tt)
+				return tt.verifyFunc(t, pushReq, tt)
 			}
 
 			logs := &concurrency.SyncBuffer{}
@@ -1039,9 +1008,7 @@ func TestHandlerOTLPPush(t *testing.T) {
 
 			assert.Equal(t, tt.responseCode, resp.Code)
 			assert.Equal(t, tt.responseContentType, resp.Header().Get("Content-Type"))
-			if tt.responseContentLength > 0 {
-				assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
-			}
+			assert.Equal(t, strconv.Itoa(tt.responseContentLength), resp.Header().Get("Content-Length"))
 			if tt.errMessage != "" {
 				body, err := io.ReadAll(resp.Body)
 				require.NoError(t, err)
diff --git a/pkg/distributor/validate.go b/pkg/distributor/validate.go
@@ -8,7 +8,6 @@ package distributor
 import (
 	"errors"
 	"fmt"
-	"slices"
 	"strings"
 	"time"
 	"unicode"
@@ -123,16 +122,6 @@ var (
 	nativeHistogramCustomBucketsNotReducibleMsgFormat = globalerror.NativeHistogramCustomBucketsNotReducible.Message("received a native histogram sample with more custom buckets than the limit, timestamp: %d series: %s, buckets: %d, limit: %d")
 )
 
-type LabelValueTooLongError struct {
-	Label  mimirpb.LabelAdapter
-	Series []mimirpb.LabelAdapter
-	Limit  int
-}
-
-func (e LabelValueTooLongError) Error() string {
-	return fmt.Sprintf(labelValueTooLongMsgFormat, e.Label.Name, e.Label.Value, mimirpb.FromLabelAdaptersToString(e.Series))
-}
-
 // sampleValidationConfig helps with getting required config to validate sample.
 type sampleValidationConfig interface {
 	CreationGracePeriod(userID string) time.Duration
@@ -462,7 +451,7 @@ func validateLabels(m *sampleValidationMetrics, cfg labelValidationConfig, userI
 		} else if len(l.Value) > maxLabelValueLength {
 			cat.IncrementDiscardedSamples(ls, 1, reasonLabelValueTooLong, ts)
 			m.labelValueTooLong.WithLabelValues(userID, group).Inc()
-			return LabelValueTooLongError{Label: l, Series: slices.Clone(ls), Limit: maxLabelValueLength}
+			return fmt.Errorf(labelValueTooLongMsgFormat, l.Name, l.Value, mimirpb.FromLabelAdaptersToString(ls))
 		} else if lastLabelName == l.Name {
 			cat.IncrementDiscardedSamples(ls, 1, reasonDuplicateLabelNames, ts)
 			m.duplicateLabelNames.WithLabelValues(userID, group).Inc()
diff --git a/pkg/distributor/validate_test.go b/pkg/distributor/validate_test.go
@@ -152,15 +152,18 @@ func TestValidateLabels(t *testing.T) {
 			metric:                   map[model.LabelName]model.LabelValue{model.MetricNameLabel: "badLabelValue", "much_shorter_name": "test_value_please_ignore_no_really_nothing_to_see_here", "team": "biz"},
 			skipLabelNameValidation:  false,
 			skipLabelCountValidation: false,
-			err: LabelValueTooLongError{
-				Label: mimirpb.LabelAdapter{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
-				Limit: 25,
-				Series: []mimirpb.LabelAdapter{
-					{Name: model.MetricNameLabel, Value: "badLabelValue"},
-					{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
-					{Name: "team", Value: "biz"},
-				},
-			},
+			err: fmt.Errorf(
+				labelValueTooLongMsgFormat,
+				"much_shorter_name",
+				"test_value_please_ignore_no_really_nothing_to_see_here",
+				mimirpb.FromLabelAdaptersToString(
+					[]mimirpb.LabelAdapter{
+						{Name: model.MetricNameLabel, Value: "badLabelValue"},
+						{Name: "much_shorter_name", Value: "test_value_please_ignore_no_really_nothing_to_see_here"},
+						{Name: "team", Value: "biz"},
+					},
+				),
+			),
 		},
 		{
 			metric:                   map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo", "bar": "baz", "blip": "blop", "team": "plof"},

Original file line number	Diff line number	Diff line change
`@@ -120,10 +120,6 @@ func (e validationError) Cause() mimirpb.ErrorCause {`
`120`	`120`	`return mimirpb.BAD_DATA`
`121`	`121`	`}`
`122`	`122`
`123`		`-func (e validationError) Unwrap() error {`
`124`		`- return e.error`
`125`		`-}`
`126`		`-`
`127`	`123`	`// Ensure that validationError implements Error.`
`128`	`124`	`var _ Error = validationError{}`
`129`	`125`