@@ -928,6 +928,26 @@ impl InferenceProviderPool {
928928 return Ok ( ( result, provider. clone ( ) ) ) ;
929929 }
930930 Err ( e) => {
931+ // For HTTP client errors (4xx), don't retry with other providers.
932+ // The request itself is invalid (e.g., too many tokens), so retrying won't help.
933+ // Exception: 429 (rate limit) and 408 (request timeout) are retryable
934+ // as other providers may have capacity or better connectivity.
935+ if let CompletionError :: HttpError { status_code, .. } = & e {
936+ if ( 400 ..=499 ) . contains ( status_code)
937+ && * status_code != 429
938+ && * status_code != 408
939+ {
940+ tracing:: warn!(
941+ model_id = %model_id,
942+ attempt = attempt + 1 ,
943+ status_code,
944+ operation = operation_name,
945+ "Client error from provider, not retrying"
946+ ) ;
947+ return Err ( Self :: sanitize_completion_error ( e, model_id) ) ;
948+ }
949+ }
950+
931951 // Log the failure for debugging
932952 tracing:: warn!(
933953 model_id = %model_id,
@@ -2393,4 +2413,130 @@ mod tests {
23932413 let external = pool. external_providers . read ( ) . await ;
23942414 assert_eq ! ( external. len( ) , 1 ) ;
23952415 }
2416+
2417+ // ==================== 4xx Retry Behavior Tests ====================
2418+
2419+ /// Helper to create a pool with a registered mock provider
2420+ async fn pool_with_mock_provider ( ) -> ( InferenceProviderPool , String ) {
2421+ let pool = InferenceProviderPool :: new (
2422+ "http://localhost:8080/models" . to_string ( ) ,
2423+ None ,
2424+ 5 ,
2425+ ExternalProvidersConfig :: default ( ) ,
2426+ ) ;
2427+ let mock_provider = Arc :: new ( inference_providers:: mock:: MockProvider :: new ( ) ) ;
2428+ let model_id = "Qwen/Qwen3-30B-A3B-Instruct-2507" . to_string ( ) ;
2429+ pool. register_provider ( model_id. clone ( ) , mock_provider)
2430+ . await ;
2431+ ( pool, model_id)
2432+ }
2433+
2434+ #[ tokio:: test]
2435+ async fn test_4xx_error_does_not_retry ( ) {
2436+ let ( pool, model_id) = pool_with_mock_provider ( ) . await ;
2437+
2438+ let result: Result < ( ( ) , _ ) , _ > = pool
2439+ . retry_with_fallback ( & model_id, "test_op" , None , |_provider| async {
2440+ Err ( CompletionError :: HttpError {
2441+ status_code : 400 ,
2442+ message : "Bad request" . to_string ( ) ,
2443+ is_external : false ,
2444+ } )
2445+ } )
2446+ . await ;
2447+
2448+ assert ! ( result. is_err( ) ) ;
2449+ let err = result. err ( ) . expect ( "Expected an error" ) ;
2450+ match err {
2451+ CompletionError :: HttpError { status_code, .. } => {
2452+ assert_eq ! ( status_code, 400 ) ;
2453+ }
2454+ other => panic ! ( "Expected HttpError, got: {:?}" , other) ,
2455+ }
2456+ }
2457+
2458+ #[ tokio:: test]
2459+ async fn test_429_error_retries_with_fallback ( ) {
2460+ let ( pool, model_id) = pool_with_mock_provider ( ) . await ;
2461+
2462+ // 429 should NOT short-circuit - it should fall through to the normal retry path
2463+ let result: Result < ( ( ) , _ ) , _ > = pool
2464+ . retry_with_fallback ( & model_id, "test_op" , None , |_provider| async {
2465+ Err ( CompletionError :: HttpError {
2466+ status_code : 429 ,
2467+ message : "Rate limit exceeded" . to_string ( ) ,
2468+ is_external : false ,
2469+ } )
2470+ } )
2471+ . await ;
2472+
2473+ // Should fail after trying all providers (not short-circuit on 429)
2474+ // The error should be sanitized (go through the normal retry path with sanitize_completion_error)
2475+ assert ! ( result. is_err( ) ) ;
2476+ let err = result. err ( ) . expect ( "Expected an error" ) ;
2477+ let err_msg = err. to_string ( ) ;
2478+ assert ! (
2479+ err_msg. contains( "Provider failed for model" ) ,
2480+ "Expected sanitized error (went through retry path), got: {}" ,
2481+ err_msg
2482+ ) ;
2483+ }
2484+
2485+ #[ tokio:: test]
2486+ async fn test_408_error_retries_with_fallback ( ) {
2487+ let ( pool, model_id) = pool_with_mock_provider ( ) . await ;
2488+
2489+ // 408 should NOT short-circuit - it should fall through to the normal retry path
2490+ let result: Result < ( ( ) , _ ) , _ > = pool
2491+ . retry_with_fallback ( & model_id, "test_op" , None , |_provider| async {
2492+ Err ( CompletionError :: HttpError {
2493+ status_code : 408 ,
2494+ message : "Request timeout" . to_string ( ) ,
2495+ is_external : false ,
2496+ } )
2497+ } )
2498+ . await ;
2499+
2500+ assert ! ( result. is_err( ) ) ;
2501+ let err = result. err ( ) . expect ( "Expected an error" ) ;
2502+ let err_msg = err. to_string ( ) ;
2503+ assert ! (
2504+ err_msg. contains( "Provider failed for model" ) ,
2505+ "Expected sanitized error (went through retry path), got: {}" ,
2506+ err_msg
2507+ ) ;
2508+ }
2509+
2510+ #[ tokio:: test]
2511+ async fn test_4xx_error_is_sanitized ( ) {
2512+ let ( pool, model_id) = pool_with_mock_provider ( ) . await ;
2513+
2514+ let result: Result < ( ( ) , _ ) , _ > = pool
2515+ . retry_with_fallback ( & model_id, "test_op" , None , |_provider| async {
2516+ Err ( CompletionError :: HttpError {
2517+ status_code : 400 ,
2518+ message : "Error at http://192.168.0.1:8000/v1/chat/completions" . to_string ( ) ,
2519+ is_external : false ,
2520+ } )
2521+ } )
2522+ . await ;
2523+
2524+ assert ! ( result. is_err( ) ) ;
2525+ let err = result. err ( ) . expect ( "Expected an error" ) ;
2526+ match err {
2527+ CompletionError :: HttpError { message, .. } => {
2528+ assert ! (
2529+ !message. contains( "192.168.0.1" ) ,
2530+ "Error message should be sanitized, but contained IP: {}" ,
2531+ message
2532+ ) ;
2533+ assert ! (
2534+ message. contains( "[URL_REDACTED]" ) ,
2535+ "Error message should contain redacted URL marker: {}" ,
2536+ message
2537+ ) ;
2538+ }
2539+ other => panic ! ( "Expected HttpError, got: {:?}" , other) ,
2540+ }
2541+ }
23962542}
0 commit comments