Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .chloggen/otlphttp-non-retryable-status.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'enhancement'

# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
component: exporter/otlphttp

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `retryable_statuses` configuration option to control which HTTP status codes trigger retries.

# One or more tracking issues or pull requests related to the change
issues: [14228]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
The new `retry_on_failure.retryable_statuses` field (default: [429, 502, 503, 504]) allows
configuring which HTTP status codes should trigger retries. Codes not in this list are treated
as permanent errors. This is useful in gateway mode to prevent retrying rate limit errors (429)
while still retrying server errors.

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
12 changes: 12 additions & 0 deletions exporter/otlphttpexporter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The following settings can be optionally configured:
- `write_buffer_size` (default = 512 * 1024): WriteBufferSize for HTTP client.
- `encoding` (default = proto): The encoding to use for the messages (valid options: `proto`, `json`)
- `retry_on_failure`: see [Retry on Failure](../exporterhelper/README.md#retry-on-failure) for the full set of available options.
- `retryable_statuses` (default = [429, 502, 503, 504]): List of HTTP status codes that should trigger retries. This option allows limiting which codes will be retried in case a backend is known to return codes that are not actually retryable.
- `sending_queue`: see [Sending Queue](../exporterhelper/README.md#sending-queue) for the full set of available options.

Example:
Expand Down Expand Up @@ -72,5 +73,16 @@ exporters:
encoding: json
```

To customize which HTTP status codes should trigger retries (useful in gateway mode):

```yaml
exporters:
otlp_http:
endpoint: https://backend:4318
retry_on_failure:
enabled: true
retryable_statuses: [502, 503, 504] # Don't retry 429 rate limits
```

The full list of settings exposed for this exporter are documented [here](./config.go)
with detailed sample configurations [here](./testdata/config.yaml).
21 changes: 20 additions & 1 deletion exporter/otlphttpexporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,22 @@ func (e *EncodingType) UnmarshalText(text []byte) error {
return nil
}

// RetryConfig extends the standard BackOffConfig with additional HTTP-specific options.
type RetryConfig struct {
configretry.BackOffConfig `mapstructure:",squash"`

// RetryableStatuses is a list of HTTP status codes that should trigger retries.
// By default, this is set to [429, 502, 503, 504] per OTLP spec.
// Modify this list to control which codes are retried - codes not in this list
// will be treated as permanent errors.
RetryableStatuses []int `mapstructure:"retryable_statuses"`
}

// Config defines configuration for OTLP/HTTP exporter.
type Config struct {
ClientConfig confighttp.ClientConfig `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct.
QueueConfig configoptional.Optional[exporterhelper.QueueBatchConfig] `mapstructure:"sending_queue"`
RetryConfig configretry.BackOffConfig `mapstructure:"retry_on_failure"`
RetryConfig RetryConfig `mapstructure:"retry_on_failure"`

// The URL to send traces to. If omitted the Endpoint + "/v1/traces" will be used.
TracesEndpoint string `mapstructure:"traces_endpoint"`
Expand All @@ -73,5 +84,13 @@ func (cfg *Config) Validate() error {
if cfg.ClientConfig.Endpoint == "" && cfg.TracesEndpoint == "" && cfg.MetricsEndpoint == "" && cfg.LogsEndpoint == "" && cfg.ProfilesEndpoint == "" {
return errors.New("at least one endpoint must be specified")
}

// Validate retryable status codes
for _, code := range cfg.RetryConfig.RetryableStatuses {
if code < 100 || code > 599 {
return fmt.Errorf("invalid HTTP status code in retry_on_failure.retryable_statuses: %d (must be between 100-599)", code)
}
}

return nil
}
78 changes: 70 additions & 8 deletions exporter/otlphttpexporter/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,16 @@ func TestUnmarshalConfig(t *testing.T) {
require.NoError(t, cm.Unmarshal(&cfg))
assert.Equal(t,
&Config{
RetryConfig: configretry.BackOffConfig{
Enabled: true,
InitialInterval: 10 * time.Second,
RandomizationFactor: 0.7,
Multiplier: 1.3,
MaxInterval: 1 * time.Minute,
MaxElapsedTime: 10 * time.Minute,
RetryConfig: RetryConfig{
BackOffConfig: configretry.BackOffConfig{
Enabled: true,
InitialInterval: 10 * time.Second,
RandomizationFactor: 0.7,
Multiplier: 1.3,
MaxInterval: 1 * time.Minute,
MaxElapsedTime: 10 * time.Minute,
},
RetryableStatuses: []int{429, 502, 503, 504},
},
QueueConfig: configoptional.Some(exporterhelper.QueueBatchConfig{
Sizer: exporterhelper.RequestSizerTypeRequests,
Expand Down Expand Up @@ -196,14 +199,73 @@ func TestConfigValidate(t *testing.T) {
},
wantErr: false,
},
{
name: "valid retryable status codes",
cfg: &Config{
ClientConfig: confighttp.ClientConfig{
Endpoint: "http://localhost:4318",
},
RetryConfig: RetryConfig{
RetryableStatuses: []int{429, 502, 503, 504},
},
},
wantErr: false,
},
{
name: "empty retryable status list",
cfg: &Config{
ClientConfig: confighttp.ClientConfig{
Endpoint: "http://localhost:4318",
},
RetryConfig: RetryConfig{
RetryableStatuses: []int{},
},
},
wantErr: false,
},
{
name: "invalid status code too low",
cfg: &Config{
ClientConfig: confighttp.ClientConfig{
Endpoint: "http://localhost:4318",
},
RetryConfig: RetryConfig{
RetryableStatuses: []int{99},
},
},
wantErr: true,
},
{
name: "invalid status code too high",
cfg: &Config{
ClientConfig: confighttp.ClientConfig{
Endpoint: "http://localhost:4318",
},
RetryConfig: RetryConfig{
RetryableStatuses: []int{600},
},
},
wantErr: true,
},
{
name: "invalid status code in list",
cfg: &Config{
ClientConfig: confighttp.ClientConfig{
Endpoint: "http://localhost:4318",
},
RetryConfig: RetryConfig{
RetryableStatuses: []int{429, 0, 503},
},
},
wantErr: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.cfg.Validate()
if tt.wantErr {
require.Error(t, err)
assert.Contains(t, err.Error(), "at least one endpoint must be specified")
} else {
assert.NoError(t, err)
}
Expand Down
13 changes: 8 additions & 5 deletions exporter/otlphttpexporter/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ func createDefaultConfig() component.Config {
clientConfig.WriteBufferSize = 512 * 1024

return &Config{
RetryConfig: configretry.NewDefaultBackOffConfig(),
RetryConfig: RetryConfig{
BackOffConfig: configretry.NewDefaultBackOffConfig(),
RetryableStatuses: []int{429, 502, 503, 504}, // Default retryable codes per OTLP spec
},
QueueConfig: configoptional.Some(exporterhelper.NewDefaultQueueConfig()),
Encoding: EncodingProto,
ClientConfig: clientConfig,
Expand Down Expand Up @@ -97,7 +100,7 @@ func createTraces(
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
// explicitly disable since we rely on http.Client timeout logic.
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
exporterhelper.WithRetry(oCfg.RetryConfig),
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
exporterhelper.WithQueue(oCfg.QueueConfig))
}

Expand All @@ -123,7 +126,7 @@ func createMetrics(
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
// explicitly disable since we rely on http.Client timeout logic.
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
exporterhelper.WithRetry(oCfg.RetryConfig),
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
exporterhelper.WithQueue(oCfg.QueueConfig))
}

Expand All @@ -148,7 +151,7 @@ func createLogs(
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
// explicitly disable since we rely on http.Client timeout logic.
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
exporterhelper.WithRetry(oCfg.RetryConfig),
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
exporterhelper.WithQueue(oCfg.QueueConfig))
}

Expand All @@ -174,6 +177,6 @@ func createProfiles(
exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: false}),
// explicitly disable since we rely on http.Client timeout logic.
exporterhelper.WithTimeout(exporterhelper.TimeoutConfig{Timeout: 0}),
exporterhelper.WithRetry(oCfg.RetryConfig),
exporterhelper.WithRetry(oCfg.RetryConfig.BackOffConfig),
exporterhelper.WithQueue(oCfg.QueueConfig))
}
20 changes: 5 additions & 15 deletions exporter/otlphttpexporter/otlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"net/http"
"net/url"
"runtime"
"slices"
"strconv"
"time"

Expand Down Expand Up @@ -222,7 +223,7 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
}
formattedErr = statusutil.NewStatusFromMsgAndHTTPCode(errString, resp.StatusCode).Err()

if !isRetryableStatusCode(resp.StatusCode) {
if !e.isRetryableStatusCode(resp.StatusCode) {
return consumererror.NewPermanent(formattedErr)
}

Expand Down Expand Up @@ -251,21 +252,10 @@ func (e *baseExporter) export(ctx context.Context, url string, request []byte, p
return formattedErr
}

// Determine if the status code is retryable according to the specification.
// Determine if the status code is retryable based on the configured retryable statuses.
// For more, see https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#failures-1
func isRetryableStatusCode(code int) bool {
switch code {
case http.StatusTooManyRequests:
return true
case http.StatusBadGateway:
return true
case http.StatusServiceUnavailable:
return true
case http.StatusGatewayTimeout:
return true
default:
return false
}
func (e *baseExporter) isRetryableStatusCode(code int) bool {
return slices.Contains(e.config.RetryConfig.RetryableStatuses, code)
}

func readResponseBody(resp *http.Response) ([]byte, error) {
Expand Down
Loading