From df882783ca7548e3e635d368f153847002899458 Mon Sep 17 00:00:00 2001
From: 0xhsn <mail@7asan.io>
Date: Tue, 27 Jan 2026 16:52:24 +0100
Subject: [PATCH] Fix duplicate API calls causing hang with Anthropic streaming

Remove duplicate litellm.acompletion() calls that were causing every
streaming request to make two identical API calls, with the first
result being discarded. This doubled latency and caused apparent
hanging behavior, especially noticeable with Anthropic API.
---
 src/cai/sdk/agents/models/openai_chatcompletions.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py
index 8931edd63..b981d5da4 100644
--- a/src/cai/sdk/agents/models/openai_chatcompletions.py
+++ b/src/cai/sdk/agents/models/openai_chatcompletions.py
@@ -3305,7 +3305,6 @@ async def _fetch_response_litellm_openai(
         try:
             if stream:
                 # Standard LiteLLM handling for streaming
-                ret = await litellm.acompletion(**kwargs)
                 stream_obj = await litellm.acompletion(**kwargs)
 
                 response = Response(
@@ -3359,7 +3358,6 @@ async def _fetch_response_litellm_openai(
                 kwargs["messages"] = messages
                 # Retry once, silently
                 if stream:
-                    ret = await litellm.acompletion(**kwargs)
                     stream_obj = await litellm.acompletion(**kwargs)
                     response = Response(
                         id=FAKE_RESPONSES_ID,