Skip to content

Commit a89de0f

Browse files
committed
[exporter/otlphttp] Add non_retryable_status to skip retries for specific HTTP codes
Fixes #14228
1 parent a330ae2 commit a89de0f

File tree

8 files changed

+282
-16
lines changed

8 files changed

+282
-16
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'enhancement'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
7+
component: exporter/otlphttp
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add `non_retryable_status` configuration option to prevent internal retries for specific HTTP status codes.
11+
12+
# One or more tracking issues or pull requests related to the change
13+
issues: [14228]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
The new `retry_on_failure.non_retryable_status` field allows configuring HTTP status codes
20+
(e.g., 429, 502, 503, 504) that should NOT trigger internal retries. When configured, these
21+
status codes are treated as permanent errors and data is dropped immediately instead of being
22+
queued and retried. This is useful in gateway mode deployments to prevent queue buildup when
23+
backend services return rate limit or temporary failure responses.
24+
25+
# Optional: The change log or logs in which this entry should be included.
26+
# e.g. '[user]' or '[user, api]'
27+
# Include 'user' if the change is relevant to end users.
28+
# Include 'api' if there is a change to a library API.
29+
# Default: '[user]'
30+
change_logs: [user]

exporter/otlphttpexporter/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ The following settings can be optionally configured:
4242
- `write_buffer_size` (default = 512 * 1024): WriteBufferSize for HTTP client.
4343
- `encoding` (default = proto): The encoding to use for the messages (valid options: `proto`, `json`)
4444
- `retry_on_failure`: see [Retry on Failure](../exporterhelper/README.md#retry-on-failure) for the full set of available options.
45+
- `non_retryable_status` (default = []): List of HTTP status codes that should NOT trigger retries. By default, the exporter retries on 429, 502, 503, and 504. This option allows marking these codes as permanent errors to prevent internal retries. Useful in gateway mode to prevent queue buildup when backends return rate limit errors.
4546
- `sending_queue`: see [Sending Queue](../exporterhelper/README.md#sending-queue) for the full set of available options.
4647

4748
Example:
@@ -70,5 +71,16 @@ exporters:
7071
encoding: json
7172
```
7273

74+
To prevent internal retries for specific HTTP status codes (useful in gateway mode):
75+
76+
```yaml
77+
exporters:
78+
otlphttp:
79+
endpoint: https://backend:4318
80+
retry_on_failure:
81+
enabled: true
82+
non_retryable_status: [429, 503] # Don't retry rate limits and service unavailable
83+
```
84+
7385
The full list of settings exposed for this exporter are documented [here](./config.go)
7486
with detailed sample configurations [here](./testdata/config.yaml).

exporter/otlphttpexporter/config.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,21 @@ func (e *EncodingType) UnmarshalText(text []byte) error {
4444
return nil
4545
}
4646

47+
// RetryConfig extends the standard BackOffConfig with additional HTTP-specific options.
48+
type RetryConfig struct {
49+
configretry.BackOffConfig `mapstructure:",squash"`
50+
51+
// NonRetryableStatus is a list of HTTP status codes that should NOT be retried.
52+
// By default, the exporter retries on 429, 502, 503, and 504 per OTLP spec.
53+
// Use this to treat specific retryable codes as permanent errors.
54+
NonRetryableStatus []int `mapstructure:"non_retryable_status"`
55+
}
56+
4757
// Config defines configuration for OTLP/HTTP exporter.
4858
type Config struct {
4959
ClientConfig confighttp.ClientConfig `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct.
5060
QueueConfig configoptional.Optional[exporterhelper.QueueBatchConfig] `mapstructure:"sending_queue"`
51-
RetryConfig configretry.BackOffConfig `mapstructure:"retry_on_failure"`
61+
RetryConfig RetryConfig `mapstructure:"retry_on_failure"`
5262

5363
// The URL to send traces to. If omitted the Endpoint + "/v1/traces" will be used.
5464
TracesEndpoint string `mapstructure:"traces_endpoint"`
@@ -73,5 +83,13 @@ func (cfg *Config) Validate() error {
7383
if cfg.ClientConfig.Endpoint == "" && cfg.TracesEndpoint == "" && cfg.MetricsEndpoint == "" && cfg.LogsEndpoint == "" && cfg.ProfilesEndpoint == "" {
7484
return errors.New("at least one endpoint must be specified")
7585
}
86+
87+
// Validate non-retryable status codes
88+
for _, code := range cfg.RetryConfig.NonRetryableStatus {
89+
if code < 100 || code > 599 {
90+
return fmt.Errorf("invalid HTTP status code in retry_on_failure.non_retryable_status: %d (must be between 100-599)", code)
91+
}
92+
}
93+
7694
return nil
7795
}

exporter/otlphttpexporter/config_test.go

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@ func TestUnmarshalConfig(t *testing.T) {
4545
require.NoError(t, cm.Unmarshal(&cfg))
4646
assert.Equal(t,
4747
&Config{
48-
RetryConfig: configretry.BackOffConfig{
49-
Enabled: true,
50-
InitialInterval: 10 * time.Second,
51-
RandomizationFactor: 0.7,
52-
Multiplier: 1.3,
53-
MaxInterval: 1 * time.Minute,
54-
MaxElapsedTime: 10 * time.Minute,
48+
RetryConfig: RetryConfig{
49+
BackOffConfig: configretry.BackOffConfig{
50+
Enabled: true,
51+
InitialInterval: 10 * time.Second,
52+
RandomizationFactor: 0.7,
53+
Multiplier: 1.3,
54+
MaxInterval: 1 * time.Minute,
55+
MaxElapsedTime: 10 * time.Minute,
56+
},
5557
},
5658
QueueConfig: configoptional.Some(exporterhelper.QueueBatchConfig{
5759
Sizer: exporterhelper.RequestSizerTypeRequests,
@@ -196,14 +198,73 @@ func TestConfigValidate(t *testing.T) {
196198
},
197199
wantErr: false,
198200
},
201+
{
202+
name: "valid non-retryable status codes",
203+
cfg: &Config{
204+
ClientConfig: confighttp.ClientConfig{
205+
Endpoint: "http://localhost:4318",
206+
},
207+
RetryConfig: RetryConfig{
208+
NonRetryableStatus: []int{429, 503},
209+
},
210+
},
211+
wantErr: false,
212+
},
213+
{
214+
name: "empty non-retryable status list",
215+
cfg: &Config{
216+
ClientConfig: confighttp.ClientConfig{
217+
Endpoint: "http://localhost:4318",
218+
},
219+
RetryConfig: RetryConfig{
220+
NonRetryableStatus: []int{},
221+
},
222+
},
223+
wantErr: false,
224+
},
225+
{
226+
name: "invalid status code too low",
227+
cfg: &Config{
228+
ClientConfig: confighttp.ClientConfig{
229+
Endpoint: "http://localhost:4318",
230+
},
231+
RetryConfig: RetryConfig{
232+
NonRetryableStatus: []int{99},
233+
},
234+
},
235+
wantErr: true,
236+
},
237+
{
238+
name: "invalid status code too high",
239+
cfg: &Config{
240+
ClientConfig: confighttp.ClientConfig{
241+
Endpoint: "http://localhost:4318",
242+
},
243+
RetryConfig: RetryConfig{
244+
NonRetryableStatus: []int{600},
245+
},
246+
},
247+
wantErr: true,
248+
},
249+
{
250+
name: "invalid status code in list",
251+
cfg: &Config{
252+
ClientConfig: confighttp.ClientConfig{
253+
Endpoint: "http://localhost:4318",
254+
},
255+
RetryConfig: RetryConfig{
256+
NonRetryableStatus: []int{429, 0, 503},
257+
},
258+
},
259+
wantErr: true,
260+
},
199261
}
200262

201263
for _, tt := range tests {
202264
t.Run(tt.name, func(t *testing.T) {
203265
err := tt.cfg.Validate()
204266
if tt.wantErr {
205267
require.Error(t, err)
206-
assert.Contains(t, err.Error(), "at least one endpoint must be specified")
207268
} else {
208269
assert.NoError(t, err)
209270
}

exporter/otlphttpexporter/factory.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ func createDefaultConfig() component.Config {
4444
clientConfig.WriteBufferSize = 512 * 1024
4545

4646
return &Config{
47-
RetryConfig: configretry.NewDefaultBackOffConfig(),
47+
RetryConfig: RetryConfig{
48+
BackOffConfig: configretry.NewDefaultBackOffConfig(),
49+
NonRetryableStatus: nil, // Empty by default - preserves existing retry behavior for 429, 502, 503, 504
50+
},
4851
QueueConfig: configoptional.Some(exporterhelper.NewDefaultQueueConfig()),
4952
Encoding: EncodingProto,
5053
ClientConfig: clientConfig,
@@ -96,7 +99,7 @@ func createTraces(
9699
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
97100
// explicitly disable since we rely on http.Client timeout logic.
98101
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
99-
exporterhelper.WithRetry(oCfg.RetryConfig),
102+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
100103
exporterhelper.WithQueue(oCfg.QueueConfig))
101104
}
102105

@@ -122,7 +125,7 @@ func createMetrics(
122125
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
123126
// explicitly disable since we rely on http.Client timeout logic.
124127
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
125-
exporterhelper.WithRetry(oCfg.RetryConfig),
128+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
126129
exporterhelper.WithQueue(oCfg.QueueConfig))
127130
}
128131

@@ -147,7 +150,7 @@ func createLogs(
147150
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
148151
// explicitly disable since we rely on http.Client timeout logic.
149152
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
150-
exporterhelper.WithRetry(oCfg.RetryConfig),
153+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
151154
exporterhelper.WithQueue(oCfg.QueueConfig))
152155
}
153156

@@ -173,6 +176,6 @@ func createProfiles(
173176
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
174177
// explicitly disable since we rely on http.Client timeout logic.
175178
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
176-
exporterhelper.WithRetry(oCfg.RetryConfig),
179+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
177180
exporterhelper.WithQueue(oCfg.QueueConfig))
178181
}

exporter/otlphttpexporter/otlp.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net/http"
1313
"net/url"
1414
"runtime"
15+
"slices"
1516
"strconv"
1617
"time"
1718

@@ -222,7 +223,7 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
222223
}
223224
formattedErr = statusutil.NewStatusFromMsgAndHTTPCode(errString, resp.StatusCode).Err()
224225

225-
if !isRetryableStatusCode(resp.StatusCode) {
226+
if !e.isRetryableStatusCode(resp.StatusCode) {
226227
return consumererror.NewPermanent(formattedErr)
227228
}
228229

@@ -253,7 +254,13 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
253254

254255
// Determine if the status code is retryable according to the specification.
255256
// For more, see https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#failures-1
256-
func isRetryableStatusCode(code int) bool {
257+
func (e *baseExporter) isRetryableStatusCode(code int) bool {
258+
// First, check if this status code is explicitly configured as non-retryable
259+
if slices.Contains(e.config.RetryConfig.NonRetryableStatus, code) {
260+
return false
261+
}
262+
263+
// Then check the default retryable status codes per OTLP spec
257264
switch code {
258265
case http.StatusTooManyRequests:
259266
return true

0 commit comments

Comments
 (0)