@@ -237,6 +237,11 @@ def _is_stream_transient_error(exc: BaseException) -> bool:
237237
238238 Transient errors (recoverable=True): network issues, server errors, timeouts.
239239 Permanent errors (recoverable=False): auth, bad request, context window, etc.
240+
241+ NOTE: "Failed to parse tool call arguments" (malformed LLM output) is NOT
242+ transient at the stream level — retrying with the same messages produces the
243+ same malformed output. This error is handled at the EventLoopNode level
244+ where the conversation can be modified before retrying.
240245 """
241246 try :
242247 from litellm .exceptions import (
@@ -917,30 +922,6 @@ async def stream(
917922 # and we skip the retry path — nothing was yielded in vain.)
918923 has_content = accumulated_text or tool_calls_acc
919924 if not has_content :
920- # If the conversation ends with an assistant or tool
921- # message, an empty stream is expected — the LLM has
922- # nothing new to say. Don't burn retries on this;
923- # let the caller (EventLoopNode) decide what to do.
924- # Typical case: client_facing node where the LLM set
925- # all outputs via set_output tool calls, and the tool
926- # results are the last messages.
927- last_role = next (
928- (m ["role" ] for m in reversed (full_messages ) if m .get ("role" ) != "system" ),
929- None ,
930- )
931- if last_role in ("assistant" , "tool" ):
932- logger .warning (
933- "[stream] %s returned empty stream after %s message "
934- "(no text, no tool calls). Treating as a no-op turn. "
935- "If this repeats, the agent may be stuck — check for "
936- "ghost empty assistant messages in conversation history." ,
937- self .model ,
938- last_role ,
939- )
940- for event in tail_events :
941- yield event
942- return
943-
944925 # finish_reason=length means the model exhausted
945926 # max_tokens before producing content. Retrying with
946927 # the same max_tokens will never help.
@@ -958,10 +939,16 @@ async def stream(
958939 yield event
959940 return
960941
961- # Empty stream after a user message — use short fixed
962- # retries, not the rate-limit backoff. This is likely
963- # a deterministic conversation-structure issue, so long
964- # exponential waits don't help.
942+ # Empty stream — always retry regardless of last message
943+ # role. Ghost empty streams after tool results are NOT
944+ # expected no-ops; they create infinite loops when the
945+ # conversation doesn't change between iterations.
946+ # After retries, return the empty result and let the
947+ # caller (EventLoopNode) decide how to handle it.
948+ last_role = next (
949+ (m ["role" ] for m in reversed (full_messages ) if m .get ("role" ) != "system" ),
950+ None ,
951+ )
965952 if attempt < EMPTY_STREAM_MAX_RETRIES :
966953 token_count , token_method = _estimate_tokens (
967954 self .model ,
@@ -974,7 +961,8 @@ async def stream(
974961 attempt = attempt ,
975962 )
976963 logger .warning (
977- f"[stream-retry] { self .model } returned empty stream — "
964+ f"[stream-retry] { self .model } returned empty stream "
965+ f"after { last_role } message — "
978966 f"~{ token_count } tokens ({ token_method } ). "
979967 f"Request dumped to: { dump_path } . "
980968 f"Retrying in { EMPTY_STREAM_RETRY_DELAY } s "
@@ -983,7 +971,17 @@ async def stream(
983971 await asyncio .sleep (EMPTY_STREAM_RETRY_DELAY )
984972 continue
985973
986- # Success (or final attempt) — flush remaining events.
974+ # All retries exhausted — log and return the empty
975+ # result. EventLoopNode's empty response guard will
976+ # accept if all outputs are set, or handle the ghost
977+ # stream case if outputs are still missing.
978+ logger .error (
979+ f"[stream] { self .model } returned empty stream after "
980+ f"{ EMPTY_STREAM_MAX_RETRIES } retries "
981+ f"(last_role={ last_role } ). Returning empty result."
982+ )
983+
984+ # Success (or empty after exhausted retries) — flush events.
987985 for event in tail_events :
988986 yield event
989987 return
0 commit comments