Skip to content

Commit 94e3d9a

Browse files
fix(agent): restore safe non-streaming fallback after stream failures (NousResearch#3020)
After streaming retries are exhausted on transient errors, fall back to non-streaming instead of propagating the error. Also fall back for any other pre-delivery stream error (not just 'streaming not supported'). Added user-facing message when streaming is not supported by a model/ provider, directing users to set display.streaming: false in config.yaml to avoid the fallback delay. Cherry-picked from PR NousResearch#3008 by kshitijk4poor. Added UX message for streaming-not-supported detection. Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
1 parent 0dcd6ab commit 94e3d9a

File tree

2 files changed

+68
-21
lines changed

2 files changed

+68
-21
lines changed

run_agent.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3824,10 +3824,8 @@ def _call():
38243824
)
38253825

38263826
if _is_timeout or _is_conn_err:
3827-
# Transient network / timeout error. Retry the
3828-
# streaming request with a fresh connection rather
3829-
# than falling back to non-streaming (which would
3830-
# hang for up to 15 min on the same dead server).
3827+
# Transient network / timeout error. Retry the
3828+
# streaming request with a fresh connection first.
38313829
if _stream_attempt < _max_stream_retries:
38323830
logger.info(
38333831
"Streaming attempt %s/%s failed (%s: %s), "
@@ -3845,30 +3843,34 @@ def _call():
38453843
)
38463844
request_client_holder["client"] = None
38473845
continue
3848-
# Exhausted retries — propagate to outer loop
38493846
logger.warning(
3850-
"Streaming exhausted %s retries on transient error: %s",
3847+
"Streaming exhausted %s retries on transient error, "
3848+
"falling back to non-streaming: %s",
38513849
_max_stream_retries + 1,
38523850
e,
38533851
)
3854-
result["error"] = e
3855-
return
3856-
3857-
# Non-transient error (e.g. "streaming not supported",
3858-
# auth error, 4xx). Fall back to non-streaming once.
3859-
err_msg = str(e).lower()
3860-
if "stream" in err_msg and "not supported" in err_msg:
3852+
else:
3853+
_err_lower = str(e).lower()
3854+
_is_stream_unsupported = (
3855+
"stream" in _err_lower
3856+
and "not supported" in _err_lower
3857+
)
3858+
if _is_stream_unsupported:
3859+
self._safe_print(
3860+
"\n⚠ Streaming is not supported for this "
3861+
"model/provider. Falling back to non-streaming.\n"
3862+
" To avoid this delay, set display.streaming: false "
3863+
"in config.yaml\n"
3864+
)
38613865
logger.info(
3862-
"Streaming not supported, falling back to non-streaming: %s", e
3866+
"Streaming failed before delivery, falling back to non-streaming: %s",
3867+
e,
38633868
)
3864-
try:
3865-
result["response"] = self._interruptible_api_call(api_kwargs)
3866-
except Exception as fallback_err:
3867-
result["error"] = fallback_err
3868-
return
38693869

3870-
# Unknown error — propagate to outer retry loop
3871-
result["error"] = e
3870+
try:
3871+
result["response"] = self._interruptible_api_call(api_kwargs)
3872+
except Exception as fallback_err:
3873+
result["error"] = fallback_err
38723874
return
38733875
finally:
38743876
request_client = request_client_holder.get("client")

tests/test_streaming.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,51 @@ def test_fallback_error_propagates(self, mock_close, mock_create, mock_non_strea
487487
with pytest.raises(Exception, match="Rate limit exceeded"):
488488
agent._interruptible_streaming_api_call({})
489489

490+
@patch("run_agent.AIAgent._interruptible_api_call")
491+
@patch("run_agent.AIAgent._create_request_openai_client")
492+
@patch("run_agent.AIAgent._close_request_openai_client")
493+
def test_exhausted_transient_stream_error_falls_back(self, mock_close, mock_create, mock_non_stream):
494+
"""Transient stream errors retry first, then fall back after retries are exhausted."""
495+
from run_agent import AIAgent
496+
import httpx
497+
498+
mock_client = MagicMock()
499+
mock_client.chat.completions.create.side_effect = httpx.ConnectError("socket closed")
500+
mock_create.return_value = mock_client
501+
502+
fallback_response = SimpleNamespace(
503+
id="fallback",
504+
model="test",
505+
choices=[SimpleNamespace(
506+
index=0,
507+
message=SimpleNamespace(
508+
role="assistant",
509+
content="fallback after retries exhausted",
510+
tool_calls=None,
511+
reasoning_content=None,
512+
),
513+
finish_reason="stop",
514+
)],
515+
usage=None,
516+
)
517+
mock_non_stream.return_value = fallback_response
518+
519+
agent = AIAgent(
520+
model="test/model",
521+
quiet_mode=True,
522+
skip_context_files=True,
523+
skip_memory=True,
524+
)
525+
agent.api_mode = "chat_completions"
526+
agent._interrupt_requested = False
527+
528+
response = agent._interruptible_streaming_api_call({})
529+
530+
assert response.choices[0].message.content == "fallback after retries exhausted"
531+
assert mock_client.chat.completions.create.call_count == 3
532+
mock_non_stream.assert_called_once()
533+
assert mock_close.call_count >= 1
534+
490535

491536
# ── Test: Reasoning Streaming ────────────────────────────────────────────
492537

0 commit comments

Comments
 (0)