Recover the ReAct loop from MAX_TOKENS truncation mid-tool-call

AAraKKe · AAraKKe · commit 109ec129a70f · 2026-06-30T15:47:49.000+02:00
When a turn stops on MAX_TOKENS while a tool call is pending, the tool_use
block is truncated and never executed. The loop previously exited on any
non-TOOL_USE stop reason, leaving a dangling tool_use in history; the next
send() then replays it and the provider rejects the request with a 400
(tool_use without a following tool_result).

The loop now continues while a tool call is pending and, on a truncated turn,
answers each pending tool_use with a synthetic failure result instead of
executing it (the call's input is incomplete and unsafe to run). This repairs
the conversation and prompts the model to retry with a smaller change. A guard
aborts after repeated consecutive truncations to avoid an unrecoverable loop.

Also nudges toward smaller edits in the edit_file tool description.
diff --git a/ddev/src/ddev/ai/react/process.py b/ddev/src/ddev/ai/react/process.py
@@ -12,6 +12,23 @@
 from ddev.ai.react.types import ReActResult
 from ddev.ai.tools.core.types import ToolResult
 
+# A turn that stops on MAX_TOKENS while a tool call is pending was truncated mid-call: the
+# tool_use block is incomplete and was never executed. We must still answer every tool_use
+# with a tool_result, otherwise the next send() replays a dangling tool_use and the provider
+# rejects the request. This synthetic result repairs the conversation and nudges the model
+# toward a smaller follow-up.
+TRUNCATED_TOOL_CALL_ERROR = (
+    "This tool call was NOT executed: your previous response was truncated after reaching the "
+    "maximum output token limit, so the tool call is incomplete. Retry with a smaller, more "
+    "targeted change — edit a single small unique region instead of rewriting a whole file, or "
+    "split the work across several sequential tool calls. For a full-file rewrite prefer "
+    "create_file over one huge edit_file."
+)
+
+# Upper bound on back-to-back truncated turns before we give up, to avoid an unrecoverable loop
+# where the model keeps emitting an oversized tool call that never fits in the output budget.
+MAX_CONSECUTIVE_TRUNCATIONS = 2
+
 
 class ReActProcess:
     """
@@ -77,6 +94,22 @@ def _is_compact_needed(self, response: AgentResponse) -> bool:
             return False
         return True
 
+    async def _execute_tool_calls(self, tool_calls: list) -> list[ToolResult]:
+        """Run all tool calls in parallel, converting any raised exception into a failure result."""
+        raw_results = await asyncio.gather(
+            *[self._tool_registry.run(tc.name, tc.input) for tc in tool_calls],
+            return_exceptions=True,
+        )
+        return [
+            r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
+            for r in raw_results
+        ]
+
+    @staticmethod
+    def _truncated_tool_results(tool_calls: list) -> list[ToolResult]:
+        """Synthetic failure results for a turn truncated by the output token limit."""
+        return [ToolResult(success=False, error=TRUNCATED_TOOL_CALL_ERROR) for _ in tool_calls]
+
     async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> ReActResult:
         """
         Run the ReAct loop for a single task.
@@ -104,18 +137,24 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
             await self._callbacks.fire_agent_response(self._scope, response, iterations)
 
             # No iteration cap — this is an interactive CLI tool; the user can Ctrl+C to stop.
-            while response.stop_reason == StopReason.TOOL_USE:
-                if not response.tool_calls:
-                    raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")
-
-                raw_results = await asyncio.gather(
-                    *[self._tool_registry.run(tc.name, tc.input) for tc in response.tool_calls],
-                    return_exceptions=True,
-                )
-                tool_results: list[ToolResult] = [
-                    r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
-                    for r in raw_results
-                ]
+            # Loop while a tool call is pending. A MAX_TOKENS turn can also carry a (truncated)
+            # tool_use that must be answered, so we key off tool_calls rather than the stop reason.
+            consecutive_truncations = 0
+            while response.tool_calls:
+                truncated = response.stop_reason == StopReason.MAX_TOKENS
+                if truncated:
+                    consecutive_truncations += 1
+                    if consecutive_truncations > MAX_CONSECUTIVE_TRUNCATIONS:
+                        raise AgentError(
+                            "Agent response was truncated by the output token limit on "
+                            f"{consecutive_truncations} consecutive turns while a tool call was "
+                            "pending; aborting to avoid an unrecoverable loop. Reduce the amount "
+                            "of work attempted in a single tool call."
+                        )
+                    tool_results: list[ToolResult] = self._truncated_tool_results(response.tool_calls)
+                else:
+                    consecutive_truncations = 0
+                    tool_results = await self._execute_tool_calls(response.tool_calls)
                 total_input += sum(result.total_input_tokens for result in tool_results)
                 total_output += sum(result.total_output_tokens for result in tool_results)
 
@@ -140,6 +179,9 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
                     total_input += compact_in
                     total_output += compact_out
 
+            if response.stop_reason == StopReason.TOOL_USE:
+                raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")
+
             react_result = ReActResult(
                 final_response=response,
                 iterations=iterations,
diff --git a/ddev/src/ddev/ai/tools/fs/edit_file.py b/ddev/src/ddev/ai/tools/fs/edit_file.py
@@ -19,7 +19,9 @@ class EditFileInput(BaseToolInput):
         Field(
             description=(
                 "Exact non-empty text to replace. Must appear exactly once in the file "
-                "(hint: include surrounding context if needed)."
+                "(hint: include surrounding context if needed). Keep this to the smallest "
+                "unique region that needs changing — do not paste the whole file. To rewrite "
+                "a file extensively, apply several small edits rather than one huge one."
             ),
             min_length=1,
         ),
@@ -30,7 +32,9 @@ class EditFileInput(BaseToolInput):
 class EditFileTool(FileRegistryTool[EditFileInput]):
     """Edits a file by replacing an exact string with a new one.
     Fails if the file was modified since the last read.
-    old_string must appear exactly once in the file — if it appears multiple times, the call fails."""
+    old_string must appear exactly once in the file — if it appears multiple times, the call fails.
+    Prefer small, targeted edits over large ones: a single edit that spans most of the file can
+    exceed the response token limit and be truncated. Break big rewrites into several edits."""
 
     @property
     def name(self) -> str:
diff --git a/ddev/tests/ai/react/test_process.py b/ddev/tests/ai/react/test_process.py
@@ -9,7 +9,7 @@
 
 from ddev.ai.agent.base import BaseAgent
 from ddev.ai.agent.build import AgentRuntime
-from ddev.ai.agent.exceptions import AgentConnectionError
+from ddev.ai.agent.exceptions import AgentConnectionError, AgentError
 from ddev.ai.agent.scope import AgentRole, AgentScope
 from ddev.ai.agent.types import AgentResponse, ContextUsage, StopReason, TokenUsage, ToolCall, ToolResultMessage
 from ddev.ai.callbacks.callbacks import Callbacks, CallbackSet
@@ -368,6 +368,95 @@ async def test_partial_batch_failure_only_affects_raising_tool() -> None:
     assert "RuntimeError" in (results["tc_02"].error or "")
 
 
+# ---------------------------------------------------------------------------
+# MAX_TOKENS truncation while a tool call is pending
+# ---------------------------------------------------------------------------
+
+
+async def test_truncated_tool_call_is_not_executed_but_answered() -> None:
+    """A MAX_TOKENS turn with a pending tool call must NOT run the tool, but must still send a
+    failure tool_result back so the conversation stays valid."""
+    tc = make_tool_call("tc_01", "edit_file")
+    responses = [
+        make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
+        make_response(StopReason.END_TURN),
+    ]
+    registry = MockToolRegistry()
+    agent = MockAgent(responses)
+
+    result = await make_process(agent, registry=registry).start("Rewrite the file")
+
+    assert result.final_response.stop_reason == StopReason.END_TURN
+    assert result.iterations == 2
+    # Tool was never executed because the call was truncated.
+    assert registry.run_calls == []
+    # A failure tool_result was still sent back for the dangling tool_use.
+    sent_back = agent.send_calls[1]
+    assert isinstance(sent_back, list)
+    assert len(sent_back) == 1
+    assert sent_back[0].tool_call_id == "tc_01"
+    assert sent_back[0].result.success is False
+
+
+async def test_truncated_tool_call_fires_tool_call_callback() -> None:
+    tc = make_tool_call("tc_01", "edit_file")
+    agent = MockAgent(
+        [
+            make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
+            make_response(StopReason.END_TURN),
+        ]
+    )
+    recorder = CallbackRecorder()
+
+    await make_process(agent, callbacks=Callbacks([recorder.callback_set])).start("x")
+
+    assert len(recorder.tool_calls_seen) == 1
+    _, seen_call, seen_result, _ = recorder.tool_calls_seen[0]
+    assert seen_call is tc
+    assert seen_result.success is False
+
+
+async def test_truncation_then_recovery_continues_loop() -> None:
+    """After a truncated turn the model can retry; once it stops requesting tools, the loop ends."""
+    tc = make_tool_call("tc_01", "edit_file")
+    responses = [
+        make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
+        make_response(StopReason.TOOL_USE, tool_calls=[make_tool_call("tc_02", "edit_file")]),
+        make_response(StopReason.END_TURN),
+    ]
+    registry = MockToolRegistry()
+    agent = MockAgent(responses)
+
+    result = await make_process(agent, registry=registry).start("Rewrite the file")
+
+    assert result.final_response.stop_reason == StopReason.END_TURN
+    assert result.iterations == 3
+    # Only the second, non-truncated tool call was executed.
+    assert len(registry.run_calls) == 1
+
+
+async def test_repeated_truncation_aborts_with_agent_error() -> None:
+    """If the model keeps truncating on a pending tool call, the loop aborts instead of looping forever."""
+    truncated = [
+        make_response(StopReason.MAX_TOKENS, tool_calls=[make_tool_call(f"tc_{i:02d}", "edit_file")]) for i in range(10)
+    ]
+    agent = MockAgent(truncated)
+
+    with pytest.raises(AgentError, match="truncated by the output token limit"):
+        await make_process(agent).start("Rewrite the file")
+
+
+async def test_max_tokens_without_tool_calls_returns_immediately() -> None:
+    """A truncated text-only turn is valid on its own and must not trigger the repair path."""
+    agent = MockAgent([make_response(StopReason.MAX_TOKENS)])
+
+    result = await make_process(agent).start("Write a long essay")
+
+    assert result.final_response.stop_reason == StopReason.MAX_TOKENS
+    assert result.iterations == 1
+    assert len(agent.send_calls) == 1
+
+
 # ---------------------------------------------------------------------------
 # Callbacks fired correctly
 # ---------------------------------------------------------------------------