Skip to content

Commit f1449fe

Browse files
Evrard-NilPierreLeGuenclaude
authored
fix: don't retry and log at warn for 4xx provider errors (#424)
* fix: don't retry and log at warn for 4xx provider errors HTTP 4xx errors (e.g., max_tokens exceeding context window) are client errors that won't be resolved by retrying with another provider. Return the original HttpError immediately so it's logged at warn, not error. * fix: exclude 429/408 from non-retryable 4xx, sanitize errors, add tests Address PR review comments: - Exclude 429 (rate limit) and 408 (timeout) from non-retryable 4xx errors since other providers may have capacity - Sanitize error messages before returning on 4xx path to prevent leaking internal infrastructure details (IPs, URLs) - Use 400..=499 inclusive range for readability - Add tests for 4xx short-circuit, 429/408 retry, and sanitization Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Pierre LE GUEN <26087574+PierreLeGuen@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent febae58 commit f1449fe

1 file changed

Lines changed: 146 additions & 0 deletions

File tree

  • crates/services/src/inference_provider_pool

crates/services/src/inference_provider_pool/mod.rs

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,26 @@ impl InferenceProviderPool {
928928
return Ok((result, provider.clone()));
929929
}
930930
Err(e) => {
931+
// For HTTP client errors (4xx), don't retry with other providers.
932+
// The request itself is invalid (e.g., too many tokens), so retrying won't help.
933+
// Exception: 429 (rate limit) and 408 (request timeout) are retryable
934+
// as other providers may have capacity or better connectivity.
935+
if let CompletionError::HttpError { status_code, .. } = &e {
936+
if (400..=499).contains(status_code)
937+
&& *status_code != 429
938+
&& *status_code != 408
939+
{
940+
tracing::warn!(
941+
model_id = %model_id,
942+
attempt = attempt + 1,
943+
status_code,
944+
operation = operation_name,
945+
"Client error from provider, not retrying"
946+
);
947+
return Err(Self::sanitize_completion_error(e, model_id));
948+
}
949+
}
950+
931951
// Log the failure for debugging
932952
tracing::warn!(
933953
model_id = %model_id,
@@ -2393,4 +2413,130 @@ mod tests {
23932413
let external = pool.external_providers.read().await;
23942414
assert_eq!(external.len(), 1);
23952415
}
2416+
2417+
// ==================== 4xx Retry Behavior Tests ====================
2418+
2419+
/// Helper to create a pool with a registered mock provider
2420+
async fn pool_with_mock_provider() -> (InferenceProviderPool, String) {
2421+
let pool = InferenceProviderPool::new(
2422+
"http://localhost:8080/models".to_string(),
2423+
None,
2424+
5,
2425+
ExternalProvidersConfig::default(),
2426+
);
2427+
let mock_provider = Arc::new(inference_providers::mock::MockProvider::new());
2428+
let model_id = "Qwen/Qwen3-30B-A3B-Instruct-2507".to_string();
2429+
pool.register_provider(model_id.clone(), mock_provider)
2430+
.await;
2431+
(pool, model_id)
2432+
}
2433+
2434+
#[tokio::test]
2435+
async fn test_4xx_error_does_not_retry() {
2436+
let (pool, model_id) = pool_with_mock_provider().await;
2437+
2438+
let result: Result<((), _), _> = pool
2439+
.retry_with_fallback(&model_id, "test_op", None, |_provider| async {
2440+
Err(CompletionError::HttpError {
2441+
status_code: 400,
2442+
message: "Bad request".to_string(),
2443+
is_external: false,
2444+
})
2445+
})
2446+
.await;
2447+
2448+
assert!(result.is_err());
2449+
let err = result.err().expect("Expected an error");
2450+
match err {
2451+
CompletionError::HttpError { status_code, .. } => {
2452+
assert_eq!(status_code, 400);
2453+
}
2454+
other => panic!("Expected HttpError, got: {:?}", other),
2455+
}
2456+
}
2457+
2458+
#[tokio::test]
2459+
async fn test_429_error_retries_with_fallback() {
2460+
let (pool, model_id) = pool_with_mock_provider().await;
2461+
2462+
// 429 should NOT short-circuit - it should fall through to the normal retry path
2463+
let result: Result<((), _), _> = pool
2464+
.retry_with_fallback(&model_id, "test_op", None, |_provider| async {
2465+
Err(CompletionError::HttpError {
2466+
status_code: 429,
2467+
message: "Rate limit exceeded".to_string(),
2468+
is_external: false,
2469+
})
2470+
})
2471+
.await;
2472+
2473+
// Should fail after trying all providers (not short-circuit on 429)
2474+
// The error should be sanitized (go through the normal retry path with sanitize_completion_error)
2475+
assert!(result.is_err());
2476+
let err = result.err().expect("Expected an error");
2477+
let err_msg = err.to_string();
2478+
assert!(
2479+
err_msg.contains("Provider failed for model"),
2480+
"Expected sanitized error (went through retry path), got: {}",
2481+
err_msg
2482+
);
2483+
}
2484+
2485+
#[tokio::test]
2486+
async fn test_408_error_retries_with_fallback() {
2487+
let (pool, model_id) = pool_with_mock_provider().await;
2488+
2489+
// 408 should NOT short-circuit - it should fall through to the normal retry path
2490+
let result: Result<((), _), _> = pool
2491+
.retry_with_fallback(&model_id, "test_op", None, |_provider| async {
2492+
Err(CompletionError::HttpError {
2493+
status_code: 408,
2494+
message: "Request timeout".to_string(),
2495+
is_external: false,
2496+
})
2497+
})
2498+
.await;
2499+
2500+
assert!(result.is_err());
2501+
let err = result.err().expect("Expected an error");
2502+
let err_msg = err.to_string();
2503+
assert!(
2504+
err_msg.contains("Provider failed for model"),
2505+
"Expected sanitized error (went through retry path), got: {}",
2506+
err_msg
2507+
);
2508+
}
2509+
2510+
#[tokio::test]
2511+
async fn test_4xx_error_is_sanitized() {
2512+
let (pool, model_id) = pool_with_mock_provider().await;
2513+
2514+
let result: Result<((), _), _> = pool
2515+
.retry_with_fallback(&model_id, "test_op", None, |_provider| async {
2516+
Err(CompletionError::HttpError {
2517+
status_code: 400,
2518+
message: "Error at http://192.168.0.1:8000/v1/chat/completions".to_string(),
2519+
is_external: false,
2520+
})
2521+
})
2522+
.await;
2523+
2524+
assert!(result.is_err());
2525+
let err = result.err().expect("Expected an error");
2526+
match err {
2527+
CompletionError::HttpError { message, .. } => {
2528+
assert!(
2529+
!message.contains("192.168.0.1"),
2530+
"Error message should be sanitized, but contained IP: {}",
2531+
message
2532+
);
2533+
assert!(
2534+
message.contains("[URL_REDACTED]"),
2535+
"Error message should contain redacted URL marker: {}",
2536+
message
2537+
);
2538+
}
2539+
other => panic!("Expected HttpError, got: {:?}", other),
2540+
}
2541+
}
23962542
}

0 commit comments

Comments
 (0)