Skip to content

Commit 109ec12

Browse files
committed
Recover the ReAct loop from MAX_TOKENS truncation mid-tool-call
When a turn stops on MAX_TOKENS while a tool call is pending, the tool_use block is truncated and never executed. The loop previously exited on any non-TOOL_USE stop reason, leaving a dangling tool_use in history; the next send() then replays it and the provider rejects the request with a 400 (tool_use without a following tool_result). The loop now continues while a tool call is pending and, on a truncated turn, answers each pending tool_use with a synthetic failure result instead of executing it (the call's input is incomplete and unsafe to run). This repairs the conversation and prompts the model to retry with a smaller change. A guard aborts after repeated consecutive truncations to avoid an unrecoverable loop. Also nudges toward smaller edits in the edit_file tool description.
1 parent 8f47a83 commit 109ec12

3 files changed

Lines changed: 150 additions & 15 deletions

File tree

ddev/src/ddev/ai/react/process.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,23 @@
1212
from ddev.ai.react.types import ReActResult
1313
from ddev.ai.tools.core.types import ToolResult
1414

15+
# A turn that stops on MAX_TOKENS while a tool call is pending was truncated mid-call: the
16+
# tool_use block is incomplete and was never executed. We must still answer every tool_use
17+
# with a tool_result, otherwise the next send() replays a dangling tool_use and the provider
18+
# rejects the request. This synthetic result repairs the conversation and nudges the model
19+
# toward a smaller follow-up.
20+
TRUNCATED_TOOL_CALL_ERROR = (
21+
"This tool call was NOT executed: your previous response was truncated after reaching the "
22+
"maximum output token limit, so the tool call is incomplete. Retry with a smaller, more "
23+
"targeted change — edit a single small unique region instead of rewriting a whole file, or "
24+
"split the work across several sequential tool calls. For a full-file rewrite prefer "
25+
"create_file over one huge edit_file."
26+
)
27+
28+
# Upper bound on back-to-back truncated turns before we give up, to avoid an unrecoverable loop
29+
# where the model keeps emitting an oversized tool call that never fits in the output budget.
30+
MAX_CONSECUTIVE_TRUNCATIONS = 2
31+
1532

1633
class ReActProcess:
1734
"""
@@ -77,6 +94,22 @@ def _is_compact_needed(self, response: AgentResponse) -> bool:
7794
return False
7895
return True
7996

97+
async def _execute_tool_calls(self, tool_calls: list) -> list[ToolResult]:
98+
"""Run all tool calls in parallel, converting any raised exception into a failure result."""
99+
raw_results = await asyncio.gather(
100+
*[self._tool_registry.run(tc.name, tc.input) for tc in tool_calls],
101+
return_exceptions=True,
102+
)
103+
return [
104+
r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
105+
for r in raw_results
106+
]
107+
108+
@staticmethod
109+
def _truncated_tool_results(tool_calls: list) -> list[ToolResult]:
110+
"""Synthetic failure results for a turn truncated by the output token limit."""
111+
return [ToolResult(success=False, error=TRUNCATED_TOOL_CALL_ERROR) for _ in tool_calls]
112+
80113
async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> ReActResult:
81114
"""
82115
Run the ReAct loop for a single task.
@@ -104,18 +137,24 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
104137
await self._callbacks.fire_agent_response(self._scope, response, iterations)
105138

106139
# No iteration cap — this is an interactive CLI tool; the user can Ctrl+C to stop.
107-
while response.stop_reason == StopReason.TOOL_USE:
108-
if not response.tool_calls:
109-
raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")
110-
111-
raw_results = await asyncio.gather(
112-
*[self._tool_registry.run(tc.name, tc.input) for tc in response.tool_calls],
113-
return_exceptions=True,
114-
)
115-
tool_results: list[ToolResult] = [
116-
r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
117-
for r in raw_results
118-
]
140+
# Loop while a tool call is pending. A MAX_TOKENS turn can also carry a (truncated)
141+
# tool_use that must be answered, so we key off tool_calls rather than the stop reason.
142+
consecutive_truncations = 0
143+
while response.tool_calls:
144+
truncated = response.stop_reason == StopReason.MAX_TOKENS
145+
if truncated:
146+
consecutive_truncations += 1
147+
if consecutive_truncations > MAX_CONSECUTIVE_TRUNCATIONS:
148+
raise AgentError(
149+
"Agent response was truncated by the output token limit on "
150+
f"{consecutive_truncations} consecutive turns while a tool call was "
151+
"pending; aborting to avoid an unrecoverable loop. Reduce the amount "
152+
"of work attempted in a single tool call."
153+
)
154+
tool_results: list[ToolResult] = self._truncated_tool_results(response.tool_calls)
155+
else:
156+
consecutive_truncations = 0
157+
tool_results = await self._execute_tool_calls(response.tool_calls)
119158
total_input += sum(result.total_input_tokens for result in tool_results)
120159
total_output += sum(result.total_output_tokens for result in tool_results)
121160

@@ -140,6 +179,9 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
140179
total_input += compact_in
141180
total_output += compact_out
142181

182+
if response.stop_reason == StopReason.TOOL_USE:
183+
raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")
184+
143185
react_result = ReActResult(
144186
final_response=response,
145187
iterations=iterations,

ddev/src/ddev/ai/tools/fs/edit_file.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ class EditFileInput(BaseToolInput):
1919
Field(
2020
description=(
2121
"Exact non-empty text to replace. Must appear exactly once in the file "
22-
"(hint: include surrounding context if needed)."
22+
"(hint: include surrounding context if needed). Keep this to the smallest "
23+
"unique region that needs changing — do not paste the whole file. To rewrite "
24+
"a file extensively, apply several small edits rather than one huge one."
2325
),
2426
min_length=1,
2527
),
@@ -30,7 +32,9 @@ class EditFileInput(BaseToolInput):
3032
class EditFileTool(FileRegistryTool[EditFileInput]):
3133
"""Edits a file by replacing an exact string with a new one.
3234
Fails if the file was modified since the last read.
33-
old_string must appear exactly once in the file — if it appears multiple times, the call fails."""
35+
old_string must appear exactly once in the file — if it appears multiple times, the call fails.
36+
Prefer small, targeted edits over large ones: a single edit that spans most of the file can
37+
exceed the response token limit and be truncated. Break big rewrites into several edits."""
3438

3539
@property
3640
def name(self) -> str:

ddev/tests/ai/react/test_process.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from ddev.ai.agent.base import BaseAgent
1111
from ddev.ai.agent.build import AgentRuntime
12-
from ddev.ai.agent.exceptions import AgentConnectionError
12+
from ddev.ai.agent.exceptions import AgentConnectionError, AgentError
1313
from ddev.ai.agent.scope import AgentRole, AgentScope
1414
from ddev.ai.agent.types import AgentResponse, ContextUsage, StopReason, TokenUsage, ToolCall, ToolResultMessage
1515
from ddev.ai.callbacks.callbacks import Callbacks, CallbackSet
@@ -368,6 +368,95 @@ async def test_partial_batch_failure_only_affects_raising_tool() -> None:
368368
assert "RuntimeError" in (results["tc_02"].error or "")
369369

370370

371+
# ---------------------------------------------------------------------------
372+
# MAX_TOKENS truncation while a tool call is pending
373+
# ---------------------------------------------------------------------------
374+
375+
376+
async def test_truncated_tool_call_is_not_executed_but_answered() -> None:
377+
"""A MAX_TOKENS turn with a pending tool call must NOT run the tool, but must still send a
378+
failure tool_result back so the conversation stays valid."""
379+
tc = make_tool_call("tc_01", "edit_file")
380+
responses = [
381+
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
382+
make_response(StopReason.END_TURN),
383+
]
384+
registry = MockToolRegistry()
385+
agent = MockAgent(responses)
386+
387+
result = await make_process(agent, registry=registry).start("Rewrite the file")
388+
389+
assert result.final_response.stop_reason == StopReason.END_TURN
390+
assert result.iterations == 2
391+
# Tool was never executed because the call was truncated.
392+
assert registry.run_calls == []
393+
# A failure tool_result was still sent back for the dangling tool_use.
394+
sent_back = agent.send_calls[1]
395+
assert isinstance(sent_back, list)
396+
assert len(sent_back) == 1
397+
assert sent_back[0].tool_call_id == "tc_01"
398+
assert sent_back[0].result.success is False
399+
400+
401+
async def test_truncated_tool_call_fires_tool_call_callback() -> None:
402+
tc = make_tool_call("tc_01", "edit_file")
403+
agent = MockAgent(
404+
[
405+
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
406+
make_response(StopReason.END_TURN),
407+
]
408+
)
409+
recorder = CallbackRecorder()
410+
411+
await make_process(agent, callbacks=Callbacks([recorder.callback_set])).start("x")
412+
413+
assert len(recorder.tool_calls_seen) == 1
414+
_, seen_call, seen_result, _ = recorder.tool_calls_seen[0]
415+
assert seen_call is tc
416+
assert seen_result.success is False
417+
418+
419+
async def test_truncation_then_recovery_continues_loop() -> None:
420+
"""After a truncated turn the model can retry; once it stops requesting tools, the loop ends."""
421+
tc = make_tool_call("tc_01", "edit_file")
422+
responses = [
423+
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
424+
make_response(StopReason.TOOL_USE, tool_calls=[make_tool_call("tc_02", "edit_file")]),
425+
make_response(StopReason.END_TURN),
426+
]
427+
registry = MockToolRegistry()
428+
agent = MockAgent(responses)
429+
430+
result = await make_process(agent, registry=registry).start("Rewrite the file")
431+
432+
assert result.final_response.stop_reason == StopReason.END_TURN
433+
assert result.iterations == 3
434+
# Only the second, non-truncated tool call was executed.
435+
assert len(registry.run_calls) == 1
436+
437+
438+
async def test_repeated_truncation_aborts_with_agent_error() -> None:
439+
"""If the model keeps truncating on a pending tool call, the loop aborts instead of looping forever."""
440+
truncated = [
441+
make_response(StopReason.MAX_TOKENS, tool_calls=[make_tool_call(f"tc_{i:02d}", "edit_file")]) for i in range(10)
442+
]
443+
agent = MockAgent(truncated)
444+
445+
with pytest.raises(AgentError, match="truncated by the output token limit"):
446+
await make_process(agent).start("Rewrite the file")
447+
448+
449+
async def test_max_tokens_without_tool_calls_returns_immediately() -> None:
450+
"""A truncated text-only turn is valid on its own and must not trigger the repair path."""
451+
agent = MockAgent([make_response(StopReason.MAX_TOKENS)])
452+
453+
result = await make_process(agent).start("Write a long essay")
454+
455+
assert result.final_response.stop_reason == StopReason.MAX_TOKENS
456+
assert result.iterations == 1
457+
assert len(agent.send_calls) == 1
458+
459+
371460
# ---------------------------------------------------------------------------
372461
# Callbacks fired correctly
373462
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)