Skip to content

Commit 13e6247

Browse files
committed
[exporter/otlphttp] Add retryable_statuses to control which HTTP codes trigger retries
Fixes #14228
1 parent 7c31dd5 commit 13e6247

File tree

8 files changed

+293
-30
lines changed

8 files changed

+293
-30
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'enhancement'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
7+
component: exporter/otlphttp
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add `retryable_statuses` configuration option to control which HTTP status codes trigger retries.
11+
12+
# One or more tracking issues or pull requests related to the change
13+
issues: [14228]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
The new `retry_on_failure.retryable_statuses` field (default: [429, 502, 503, 504]) allows
20+
configuring which HTTP status codes should trigger retries. Codes not in this list are treated
21+
as permanent errors. This is useful in gateway mode to prevent retrying rate limit errors (429)
22+
while still retrying server errors.
23+
24+
# Optional: The change log or logs in which this entry should be included.
25+
# e.g. '[user]' or '[user, api]'
26+
# Include 'user' if the change is relevant to end users.
27+
# Include 'api' if there is a change to a library API.
28+
# Default: '[user]'
29+
change_logs: [user]

exporter/otlphttpexporter/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ The following settings can be optionally configured:
4444
- `write_buffer_size` (default = 512 * 1024): WriteBufferSize for HTTP client.
4545
- `encoding` (default = proto): The encoding to use for the messages (valid options: `proto`, `json`)
4646
- `retry_on_failure`: see [Retry on Failure](../exporterhelper/README.md#retry-on-failure) for the full set of available options.
47+
- `retryable_statuses` (default = [429, 502, 503, 504]): List of HTTP status codes that should trigger retries. This option allows limiting which codes will be retried in case a backend is known to return codes that are not actually retryable.
4748
- `sending_queue`: see [Sending Queue](../exporterhelper/README.md#sending-queue) for the full set of available options.
4849

4950
Example:
@@ -72,5 +73,16 @@ exporters:
7273
encoding: json
7374
```
7475

76+
To customize which HTTP status codes should trigger retries (useful in gateway mode):
77+
78+
```yaml
79+
exporters:
80+
otlp_http:
81+
endpoint: https://backend:4318
82+
retry_on_failure:
83+
enabled: true
84+
retryable_statuses: [502, 503, 504] # Don't retry 429 rate limits
85+
```
86+
7587
The full list of settings exposed for this exporter are documented [here](./config.go)
7688
with detailed sample configurations [here](./testdata/config.yaml).

exporter/otlphttpexporter/config.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,22 @@ func (e *EncodingType) UnmarshalText(text []byte) error {
4444
return nil
4545
}
4646

47+
// RetryConfig extends the standard BackOffConfig with additional HTTP-specific options.
48+
type RetryConfig struct {
49+
configretry.BackOffConfig `mapstructure:",squash"`
50+
51+
// RetryableStatuses is a list of HTTP status codes that should trigger retries.
52+
// By default, this is set to [429, 502, 503, 504] per OTLP spec.
53+
// Modify this list to control which codes are retried - codes not in this list
54+
// will be treated as permanent errors.
55+
RetryableStatuses []int `mapstructure:"retryable_statuses"`
56+
}
57+
4758
// Config defines configuration for OTLP/HTTP exporter.
4859
type Config struct {
4960
ClientConfig confighttp.ClientConfig `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct.
5061
QueueConfig configoptional.Optional[exporterhelper.QueueBatchConfig] `mapstructure:"sending_queue"`
51-
RetryConfig configretry.BackOffConfig `mapstructure:"retry_on_failure"`
62+
RetryConfig RetryConfig `mapstructure:"retry_on_failure"`
5263

5364
// The URL to send traces to. If omitted the Endpoint + "/v1/traces" will be used.
5465
TracesEndpoint string `mapstructure:"traces_endpoint"`
@@ -73,5 +84,13 @@ func (cfg *Config) Validate() error {
7384
if cfg.ClientConfig.Endpoint == "" && cfg.TracesEndpoint == "" && cfg.MetricsEndpoint == "" && cfg.LogsEndpoint == "" && cfg.ProfilesEndpoint == "" {
7485
return errors.New("at least one endpoint must be specified")
7586
}
87+
88+
// Validate retryable status codes
89+
for _, code := range cfg.RetryConfig.RetryableStatuses {
90+
if code < 100 || code > 599 {
91+
return fmt.Errorf("invalid HTTP status code in retry_on_failure.retryable_statuses: %d (must be between 100-599)", code)
92+
}
93+
}
94+
7695
return nil
7796
}

exporter/otlphttpexporter/config_test.go

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,16 @@ func TestUnmarshalConfig(t *testing.T) {
4545
require.NoError(t, cm.Unmarshal(&cfg))
4646
assert.Equal(t,
4747
&Config{
48-
RetryConfig: configretry.BackOffConfig{
49-
Enabled: true,
50-
InitialInterval: 10 * time.Second,
51-
RandomizationFactor: 0.7,
52-
Multiplier: 1.3,
53-
MaxInterval: 1 * time.Minute,
54-
MaxElapsedTime: 10 * time.Minute,
48+
RetryConfig: RetryConfig{
49+
BackOffConfig: configretry.BackOffConfig{
50+
Enabled: true,
51+
InitialInterval: 10 * time.Second,
52+
RandomizationFactor: 0.7,
53+
Multiplier: 1.3,
54+
MaxInterval: 1 * time.Minute,
55+
MaxElapsedTime: 10 * time.Minute,
56+
},
57+
RetryableStatuses: []int{429, 502, 503, 504},
5558
},
5659
QueueConfig: configoptional.Some(exporterhelper.QueueBatchConfig{
5760
Sizer: exporterhelper.RequestSizerTypeRequests,
@@ -196,14 +199,73 @@ func TestConfigValidate(t *testing.T) {
196199
},
197200
wantErr: false,
198201
},
202+
{
203+
name: "valid retryable status codes",
204+
cfg: &Config{
205+
ClientConfig: confighttp.ClientConfig{
206+
Endpoint: "http://localhost:4318",
207+
},
208+
RetryConfig: RetryConfig{
209+
RetryableStatuses: []int{429, 502, 503, 504},
210+
},
211+
},
212+
wantErr: false,
213+
},
214+
{
215+
name: "empty retryable status list",
216+
cfg: &Config{
217+
ClientConfig: confighttp.ClientConfig{
218+
Endpoint: "http://localhost:4318",
219+
},
220+
RetryConfig: RetryConfig{
221+
RetryableStatuses: []int{},
222+
},
223+
},
224+
wantErr: false,
225+
},
226+
{
227+
name: "invalid status code too low",
228+
cfg: &Config{
229+
ClientConfig: confighttp.ClientConfig{
230+
Endpoint: "http://localhost:4318",
231+
},
232+
RetryConfig: RetryConfig{
233+
RetryableStatuses: []int{99},
234+
},
235+
},
236+
wantErr: true,
237+
},
238+
{
239+
name: "invalid status code too high",
240+
cfg: &Config{
241+
ClientConfig: confighttp.ClientConfig{
242+
Endpoint: "http://localhost:4318",
243+
},
244+
RetryConfig: RetryConfig{
245+
RetryableStatuses: []int{600},
246+
},
247+
},
248+
wantErr: true,
249+
},
250+
{
251+
name: "invalid status code in list",
252+
cfg: &Config{
253+
ClientConfig: confighttp.ClientConfig{
254+
Endpoint: "http://localhost:4318",
255+
},
256+
RetryConfig: RetryConfig{
257+
RetryableStatuses: []int{429, 0, 503},
258+
},
259+
},
260+
wantErr: true,
261+
},
199262
}
200263

201264
for _, tt := range tests {
202265
t.Run(tt.name, func(t *testing.T) {
203266
err := tt.cfg.Validate()
204267
if tt.wantErr {
205268
require.Error(t, err)
206-
assert.Contains(t, err.Error(), "at least one endpoint must be specified")
207269
} else {
208270
assert.NoError(t, err)
209271
}

exporter/otlphttpexporter/factory.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ func createDefaultConfig() component.Config {
4545
clientConfig.WriteBufferSize = 512 * 1024
4646

4747
return &Config{
48-
RetryConfig: configretry.NewDefaultBackOffConfig(),
48+
RetryConfig: RetryConfig{
49+
BackOffConfig: configretry.NewDefaultBackOffConfig(),
50+
RetryableStatuses: []int{429, 502, 503, 504}, // Default retryable codes per OTLP spec
51+
},
4952
QueueConfig: configoptional.Some(exporterhelper.NewDefaultQueueConfig()),
5053
Encoding: EncodingProto,
5154
ClientConfig: clientConfig,
@@ -97,7 +100,7 @@ func createTraces(
97100
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
98101
// explicitly disable since we rely on http.Client timeout logic.
99102
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
100-
exporterhelper.WithRetry(oCfg.RetryConfig),
103+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
101104
exporterhelper.WithQueue(oCfg.QueueConfig))
102105
}
103106

@@ -123,7 +126,7 @@ func createMetrics(
123126
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
124127
// explicitly disable since we rely on http.Client timeout logic.
125128
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
126-
exporterhelper.WithRetry(oCfg.RetryConfig),
129+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
127130
exporterhelper.WithQueue(oCfg.QueueConfig))
128131
}
129132

@@ -148,7 +151,7 @@ func createLogs(
148151
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
149152
// explicitly disable since we rely on http.Client timeout logic.
150153
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
151-
exporterhelper.WithRetry(oCfg.RetryConfig),
154+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
152155
exporterhelper.WithQueue(oCfg.QueueConfig))
153156
}
154157

@@ -174,6 +177,6 @@ func createProfiles(
174177
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
175178
// explicitly disable since we rely on http.Client timeout logic.
176179
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
177-
exporterhelper.WithRetry(oCfg.RetryConfig),
180+
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
178181
exporterhelper.WithQueue(oCfg.QueueConfig))
179182
}

exporter/otlphttpexporter/otlp.go

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net/http"
1313
"net/url"
1414
"runtime"
15+
"slices"
1516
"strconv"
1617
"time"
1718

@@ -222,7 +223,7 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
222223
}
223224
formattedErr = statusutil.NewStatusFromMsgAndHTTPCode(errString, resp.StatusCode).Err()
224225

225-
if !isRetryableStatusCode(resp.StatusCode) {
226+
if !e.isRetryableStatusCode(resp.StatusCode) {
226227
return consumererror.NewPermanent(formattedErr)
227228
}
228229

@@ -251,21 +252,10 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
251252
return formattedErr
252253
}
253254

254-
// Determine if the status code is retryable according to the specification.
255+
// Determine if the status code is retryable based on the configured retryable statuses.
255256
// For more, see https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#failures-1
256-
func isRetryableStatusCode(code int) bool {
257-
switch code {
258-
case http.StatusTooManyRequests:
259-
return true
260-
case http.StatusBadGateway:
261-
return true
262-
case http.StatusServiceUnavailable:
263-
return true
264-
case http.StatusGatewayTimeout:
265-
return true
266-
default:
267-
return false
268-
}
257+
func (e *baseExporter) isRetryableStatusCode(code int) bool {
258+
return slices.Contains(e.config.RetryConfig.RetryableStatuses, code)
269259
}
270260

271261
func readResponseBody(resp *http.Response) ([]byte, error) {

0 commit comments

Comments
 (0)