Skip to content

Commit 551c956

Browse files
authored
feat: implement EmptyModelOutputError for handling empty responses across providers and enhance retry logic in ToolLoopAgentRunner (#7104)
closes: #7044
1 parent 1070804 commit 551c956

File tree

9 files changed

+320
-38
lines changed

9 files changed

+320
-38
lines changed

astrbot/core/agent/runners/tool_loop_agent_runner.py

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,18 @@
1616
TextContent,
1717
TextResourceContents,
1818
)
19+
from tenacity import (
20+
AsyncRetrying,
21+
retry_if_exception_type,
22+
stop_after_attempt,
23+
wait_exponential,
24+
)
1925

2026
from astrbot import logger
2127
from astrbot.core.agent.message import ImageURLPart, TextPart, ThinkPart
2228
from astrbot.core.agent.tool import ToolSet
2329
from astrbot.core.agent.tool_image_cache import tool_image_cache
30+
from astrbot.core.exceptions import EmptyModelOutputError
2431
from astrbot.core.message.components import Json
2532
from astrbot.core.message.message_event_result import (
2633
MessageChain,
@@ -95,6 +102,10 @@ class _ToolExecutionInterrupted(Exception):
95102

96103

97104
class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
105+
EMPTY_OUTPUT_RETRY_ATTEMPTS = 3
106+
EMPTY_OUTPUT_RETRY_WAIT_MIN_S = 1
107+
EMPTY_OUTPUT_RETRY_WAIT_MAX_S = 4
108+
98109
def _get_persona_custom_error_message(self) -> str | None:
99110
"""Read persona-level custom error message from event extras when available."""
100111
event = getattr(self.run_context.context, "event", None)
@@ -279,31 +290,61 @@ async def _iter_llm_responses_with_fallback(
279290
candidate_id,
280291
)
281292
self.provider = candidate
282-
has_stream_output = False
283293
try:
284-
async for resp in self._iter_llm_responses(include_model=idx == 0):
285-
if resp.is_chunk:
286-
has_stream_output = True
287-
yield resp
288-
continue
289-
290-
if (
291-
resp.role == "err"
292-
and not has_stream_output
293-
and (not is_last_candidate)
294-
):
295-
last_err_response = resp
296-
logger.warning(
297-
"Chat Model %s returns error response, trying fallback to next provider.",
298-
candidate_id,
299-
)
300-
break
301-
302-
yield resp
303-
return
294+
retrying = AsyncRetrying(
295+
retry=retry_if_exception_type(EmptyModelOutputError),
296+
stop=stop_after_attempt(self.EMPTY_OUTPUT_RETRY_ATTEMPTS),
297+
wait=wait_exponential(
298+
multiplier=1,
299+
min=self.EMPTY_OUTPUT_RETRY_WAIT_MIN_S,
300+
max=self.EMPTY_OUTPUT_RETRY_WAIT_MAX_S,
301+
),
302+
reraise=True,
303+
)
304304

305-
if has_stream_output:
306-
return
305+
async for attempt in retrying:
306+
has_stream_output = False
307+
with attempt:
308+
try:
309+
async for resp in self._iter_llm_responses(
310+
include_model=idx == 0
311+
):
312+
if resp.is_chunk:
313+
has_stream_output = True
314+
yield resp
315+
continue
316+
317+
if (
318+
resp.role == "err"
319+
and not has_stream_output
320+
and (not is_last_candidate)
321+
):
322+
last_err_response = resp
323+
logger.warning(
324+
"Chat Model %s returns error response, trying fallback to next provider.",
325+
candidate_id,
326+
)
327+
break
328+
329+
yield resp
330+
return
331+
332+
if has_stream_output:
333+
return
334+
except EmptyModelOutputError:
335+
if has_stream_output:
336+
logger.warning(
337+
"Chat Model %s returned empty output after streaming started; skipping empty-output retry.",
338+
candidate_id,
339+
)
340+
else:
341+
logger.warning(
342+
"Chat Model %s returned empty output on attempt %s/%s.",
343+
candidate_id,
344+
attempt.retry_state.attempt_number,
345+
self.EMPTY_OUTPUT_RETRY_ATTEMPTS,
346+
)
347+
raise
307348
except Exception as exc: # noqa: BLE001
308349
last_exception = exc
309350
logger.warning(

astrbot/core/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,7 @@ class AstrBotError(Exception):
77

88
class ProviderNotFoundError(AstrBotError):
99
"""Raised when a specified provider is not found."""
10+
11+
12+
class EmptyModelOutputError(AstrBotError):
13+
"""Raised when the model response contains no usable assistant output."""

astrbot/core/provider/sources/anthropic_source.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from astrbot import logger
1414
from astrbot.api.provider import Provider
1515
from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
16+
from astrbot.core.exceptions import EmptyModelOutputError
1617
from astrbot.core.provider.entities import LLMResponse, TokenUsage
1718
from astrbot.core.provider.func_tool_manager import ToolSet
1819
from astrbot.core.utils.io import download_image_by_url
@@ -29,6 +30,23 @@
2930
"Anthropic Claude API 提供商适配器",
3031
)
3132
class ProviderAnthropic(Provider):
33+
@staticmethod
34+
def _ensure_usable_response(
35+
llm_response: LLMResponse,
36+
*,
37+
completion_id: str | None = None,
38+
stop_reason: str | None = None,
39+
) -> None:
40+
has_text_output = bool((llm_response.completion_text or "").strip())
41+
has_reasoning_output = bool(llm_response.reasoning_content.strip())
42+
has_tool_output = bool(llm_response.tools_call_args)
43+
if has_text_output or has_reasoning_output or has_tool_output:
44+
return
45+
raise EmptyModelOutputError(
46+
"Anthropic completion has no usable output. "
47+
f"completion_id={completion_id}, stop_reason={stop_reason}"
48+
)
49+
3250
@staticmethod
3351
def _normalize_custom_headers(provider_config: dict) -> dict[str, str] | None:
3452
custom_headers = provider_config.get("custom_headers", {})
@@ -289,7 +307,9 @@ async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
289307
logger.debug(f"completion: {completion}")
290308

291309
if len(completion.content) == 0:
292-
raise Exception("API 返回的 completion 为空。")
310+
raise EmptyModelOutputError(
311+
f"Anthropic completion is empty. completion_id={completion.id}"
312+
)
293313

294314
llm_response = LLMResponse(role="assistant")
295315

@@ -317,10 +337,9 @@ async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
317337
if not llm_response.completion_text and not llm_response.tools_call_args:
318338
# Guard clause: raise early if no valid content at all
319339
if not llm_response.reasoning_content:
320-
raise ValueError(
321-
f"Anthropic API returned unparsable completion: "
322-
f"no text, tool_use, or thinking content found. "
323-
f"Completion: {completion}"
340+
raise EmptyModelOutputError(
341+
"Anthropic completion has no usable output. "
342+
f"completion_id={completion.id}, stop_reason={completion.stop_reason}"
324343
)
325344

326345
# We have reasoning content (ThinkingBlock) - this is valid
@@ -330,6 +349,11 @@ async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
330349
)
331350
llm_response.completion_text = "" # Ensure empty string, not None
332351

352+
self._ensure_usable_response(
353+
llm_response,
354+
completion_id=completion.id,
355+
stop_reason=completion.stop_reason,
356+
)
333357
return llm_response
334358

335359
async def _query_stream(
@@ -481,6 +505,11 @@ async def _query_stream(
481505
final_response.tools_call_name = [call["name"] for call in final_tool_calls]
482506
final_response.tools_call_ids = [call["id"] for call in final_tool_calls]
483507

508+
self._ensure_usable_response(
509+
final_response,
510+
completion_id=id,
511+
stop_reason=None,
512+
)
484513
yield final_response
485514

486515
async def text_chat(

astrbot/core/provider/sources/gemini_source.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from astrbot import logger
1515
from astrbot.api.provider import Provider
1616
from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
17+
from astrbot.core.exceptions import EmptyModelOutputError
1718
from astrbot.core.message.message_event_result import MessageChain
1819
from astrbot.core.provider.entities import LLMResponse, TokenUsage
1920
from astrbot.core.provider.func_tool_manager import ToolSet
@@ -444,6 +445,23 @@ def _extract_usage(
444445
output=usage_metadata.candidates_token_count or 0,
445446
)
446447

448+
@staticmethod
449+
def _ensure_usable_response(
450+
llm_response: LLMResponse,
451+
*,
452+
response_id: str | None = None,
453+
finish_reason: str | None = None,
454+
) -> None:
455+
has_text_output = bool((llm_response.completion_text or "").strip())
456+
has_reasoning_output = bool(llm_response.reasoning_content.strip())
457+
has_tool_output = bool(llm_response.tools_call_args)
458+
if has_text_output or has_reasoning_output or has_tool_output:
459+
return
460+
raise EmptyModelOutputError(
461+
"Gemini completion has no usable output. "
462+
f"response_id={response_id}, finish_reason={finish_reason}"
463+
)
464+
447465
def _process_content_parts(
448466
self,
449467
candidate: types.Candidate,
@@ -452,7 +470,10 @@ def _process_content_parts(
452470
"""处理内容部分并构建消息链"""
453471
if not candidate.content:
454472
logger.warning(f"收到的 candidate.content 为空: {candidate}")
455-
raise Exception("API 返回的 candidate.content 为空。")
473+
raise EmptyModelOutputError(
474+
"Gemini candidate content is empty. "
475+
f"finish_reason={candidate.finish_reason}"
476+
)
456477

457478
finish_reason = candidate.finish_reason
458479
result_parts: list[types.Part] | None = candidate.content.parts
@@ -474,7 +495,10 @@ def _process_content_parts(
474495

475496
if not result_parts:
476497
logger.warning(f"收到的 candidate.content.parts 为空: {candidate}")
477-
raise Exception("API 返回的 candidate.content.parts 为空。")
498+
raise EmptyModelOutputError(
499+
"Gemini candidate content parts are empty. "
500+
f"finish_reason={candidate.finish_reason}"
501+
)
478502

479503
# 提取 reasoning content
480504
reasoning = self._extract_reasoning_content(candidate)
@@ -525,7 +549,14 @@ def _process_content_parts(
525549
if ts := part.thought_signature:
526550
# only keep the last thinking signature
527551
llm_response.reasoning_signature = base64.b64encode(ts).decode("utf-8")
528-
return MessageChain(chain=chain)
552+
chain_result = MessageChain(chain=chain)
553+
llm_response.result_chain = chain_result
554+
self._ensure_usable_response(
555+
llm_response,
556+
response_id=None,
557+
finish_reason=str(finish_reason) if finish_reason is not None else None,
558+
)
559+
return chain_result
529560

530561
async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
531562
"""非流式请求 Gemini API"""
@@ -727,9 +758,12 @@ async def _query_stream(
727758
final_response.result_chain = MessageChain(
728759
chain=[Comp.Plain(accumulated_text)],
729760
)
730-
elif not final_response.result_chain:
731-
# If no text was accumulated and no final response was set, provide empty space
732-
final_response.result_chain = MessageChain(chain=[Comp.Plain(" ")])
761+
762+
self._ensure_usable_response(
763+
final_response,
764+
response_id=getattr(final_response, "id", None),
765+
finish_reason=None,
766+
)
733767

734768
yield final_response
735769

astrbot/core/provider/sources/openai_source.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from astrbot.api.provider import Provider
2727
from astrbot.core.agent.message import ContentPart, ImageURLPart, Message, TextPart
2828
from astrbot.core.agent.tool import ToolSet
29+
from astrbot.core.exceptions import EmptyModelOutputError
2930
from astrbot.core.message.message_event_result import MessageChain
3031
from astrbot.core.provider.entities import LLMResponse, TokenUsage, ToolCallsResult
3132
from astrbot.core.utils.io import download_image_by_url
@@ -696,7 +697,9 @@ async def _parse_openai_completion(
696697
llm_response = LLMResponse("assistant")
697698

698699
if not completion.choices:
699-
raise Exception("API 返回的 completion 为空。")
700+
raise EmptyModelOutputError(
701+
f"OpenAI completion has no choices. response_id={completion.id}"
702+
)
700703
choice = completion.choices[0]
701704

702705
# parse the text completion
@@ -714,6 +717,10 @@ async def _parse_openai_completion(
714717
# Also clean up orphan </think> tags that may leak from some models
715718
completion_text = re.sub(r"</think>\s*$", "", completion_text).strip()
716719
llm_response.result_chain = MessageChain().message(completion_text)
720+
elif refusal := getattr(choice.message, "refusal", None):
721+
refusal_text = self._normalize_content(refusal)
722+
if refusal_text:
723+
llm_response.result_chain = MessageChain().message(refusal_text)
717724

718725
# parse the reasoning content if any
719726
# the priority is higher than the <think> tag extraction
@@ -761,9 +768,18 @@ async def _parse_openai_completion(
761768
raise Exception(
762769
"API 返回的 completion 由于内容安全过滤被拒绝(非 AstrBot)。",
763770
)
764-
if llm_response.completion_text is None and not llm_response.tools_call_args:
765-
logger.error(f"API 返回的 completion 无法解析:{completion}。")
766-
raise Exception(f"API 返回的 completion 无法解析:{completion}。")
771+
has_text_output = bool((llm_response.completion_text or "").strip())
772+
has_reasoning_output = bool(llm_response.reasoning_content.strip())
773+
if (
774+
not has_text_output
775+
and not has_reasoning_output
776+
and not llm_response.tools_call_args
777+
):
778+
logger.error(f"OpenAI completion has no usable output: {completion}.")
779+
raise EmptyModelOutputError(
780+
"OpenAI completion has no usable output. "
781+
f"response_id={completion.id}, finish_reason={choice.finish_reason}"
782+
)
767783

768784
llm_response.raw_completion = completion
769785
llm_response.id = completion.id

tests/test_anthropic_kimi_code_provider.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import httpx
2+
import pytest
23

34
import astrbot.core.provider.sources.anthropic_source as anthropic_source
45
import astrbot.core.provider.sources.kimi_code_source as kimi_code_source
6+
from astrbot.core.exceptions import EmptyModelOutputError
7+
from astrbot.core.provider.entities import LLMResponse
58

69

710
class _FakeAsyncAnthropic:
@@ -79,3 +82,14 @@ def test_kimi_code_provider_restores_required_user_agent_when_blank(monkeypatch)
7982
assert provider.custom_headers == {
8083
"User-Agent": kimi_code_source.KIMI_CODE_USER_AGENT,
8184
}
85+
86+
87+
def test_anthropic_empty_output_raises_empty_model_output_error():
88+
llm_response = LLMResponse(role="assistant")
89+
90+
with pytest.raises(EmptyModelOutputError):
91+
anthropic_source.ProviderAnthropic._ensure_usable_response(
92+
llm_response,
93+
completion_id="msg_empty",
94+
stop_reason="end_turn",
95+
)

tests/test_gemini_source.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import pytest
2+
3+
from astrbot.core.exceptions import EmptyModelOutputError
4+
from astrbot.core.provider.entities import LLMResponse
5+
from astrbot.core.provider.sources.gemini_source import ProviderGoogleGenAI
6+
7+
8+
def test_gemini_empty_output_raises_empty_model_output_error():
9+
llm_response = LLMResponse(role="assistant")
10+
11+
with pytest.raises(EmptyModelOutputError):
12+
ProviderGoogleGenAI._ensure_usable_response(
13+
llm_response,
14+
response_id="resp_empty",
15+
finish_reason="STOP",
16+
)
17+
18+
19+
def test_gemini_reasoning_only_output_is_allowed():
20+
llm_response = LLMResponse(
21+
role="assistant",
22+
reasoning_content="chain of thought placeholder",
23+
)
24+
25+
ProviderGoogleGenAI._ensure_usable_response(
26+
llm_response,
27+
response_id="resp_reasoning",
28+
finish_reason="STOP",
29+
)

0 commit comments

Comments
 (0)