From df882783ca7548e3e635d368f153847002899458 Mon Sep 17 00:00:00 2001 From: 0xhsn Date: Tue, 27 Jan 2026 16:52:24 +0100 Subject: [PATCH] Fix duplicate API calls causing hang with Anthropic streaming Remove duplicate litellm.acompletion() calls that were causing every streaming request to make two identical API calls, with the first result being discarded. This doubled latency and caused apparent hanging behavior, especially noticeable with Anthropic API. --- src/cai/sdk/agents/models/openai_chatcompletions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py index 8931edd63..b981d5da4 100644 --- a/src/cai/sdk/agents/models/openai_chatcompletions.py +++ b/src/cai/sdk/agents/models/openai_chatcompletions.py @@ -3305,7 +3305,6 @@ async def _fetch_response_litellm_openai( try: if stream: # Standard LiteLLM handling for streaming - ret = await litellm.acompletion(**kwargs) stream_obj = await litellm.acompletion(**kwargs) response = Response( @@ -3359,7 +3358,6 @@ async def _fetch_response_litellm_openai( kwargs["messages"] = messages # Retry once, silently if stream: - ret = await litellm.acompletion(**kwargs) stream_obj = await litellm.acompletion(**kwargs) response = Response( id=FAKE_RESPONSES_ID,