Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 54 additions & 12 deletions ddev/src/ddev/ai/react/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,23 @@
from ddev.ai.react.types import ReActResult
from ddev.ai.tools.core.types import ToolResult

# A turn that stops on MAX_TOKENS while a tool call is pending was truncated mid-call: the
# tool_use block is incomplete and was never executed. We must still answer every tool_use
# with a tool_result, otherwise the next send() replays a dangling tool_use and the provider
# rejects the request. This synthetic result repairs the conversation and nudges the model
# toward a smaller follow-up.
TRUNCATED_TOOL_CALL_ERROR = (
"This tool call was NOT executed: your previous response was truncated after reaching the "
"maximum output token limit, so the tool call is incomplete. Retry with a smaller, more "
"targeted change — edit a single small unique region instead of rewriting a whole file, or "
"split the work across several sequential tool calls. For a full-file rewrite prefer "
"create_file over one huge edit_file."

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, we don't have any way of easily rewriting an entire file. The only way is using edit_file, since create_file only creates a file if it doesn't exist. Maybe, as a follow up, we could include a flag in create_file to allow overwriting if it already exists.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem still persist, it is that if it wants to edit (or create) a file that is too big it will run into max_tokens error. We could add an overwrite option to write a file but if we do not mention the token limit that might be an issue.

Maybe, since we know the max tokens set, we could include that into the prompt itself? Inejct it as a guideline for the agent not know not to do weird things?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, saw your last commit and looks good. Thank you!

)

# Upper bound on back-to-back truncated turns before we give up, to avoid an unrecoverable loop
# where the model keeps emitting an oversized tool call that never fits in the output budget.
MAX_CONSECUTIVE_TRUNCATIONS = 2


class ReActProcess:
"""
Expand Down Expand Up @@ -77,6 +94,22 @@ def _is_compact_needed(self, response: AgentResponse) -> bool:
return False
return True

async def _execute_tool_calls(self, tool_calls: list) -> list[ToolResult]:
"""Run all tool calls in parallel, converting any raised exception into a failure result."""
raw_results = await asyncio.gather(
*[self._tool_registry.run(tc.name, tc.input) for tc in tool_calls],
return_exceptions=True,
)
return [
r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
for r in raw_results
]

@staticmethod
def _truncated_tool_results(tool_calls: list) -> list[ToolResult]:
"""Synthetic failure results for a turn truncated by the output token limit."""
return [ToolResult(success=False, error=TRUNCATED_TOOL_CALL_ERROR) for _ in tool_calls]

async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> ReActResult:
"""
Run the ReAct loop for a single task.
Expand Down Expand Up @@ -104,18 +137,24 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
await self._callbacks.fire_agent_response(self._scope, response, iterations)

# No iteration cap — this is an interactive CLI tool; the user can Ctrl+C to stop.
while response.stop_reason == StopReason.TOOL_USE:
if not response.tool_calls:
raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")

raw_results = await asyncio.gather(
*[self._tool_registry.run(tc.name, tc.input) for tc in response.tool_calls],
return_exceptions=True,
)
tool_results: list[ToolResult] = [
r if isinstance(r, ToolResult) else ToolResult(success=False, error=f"{type(r).__name__}: {r}")
for r in raw_results
]
# Loop while a tool call is pending. A MAX_TOKENS turn can also carry a (truncated)
# tool_use that must be answered, so we key off tool_calls rather than the stop reason.
consecutive_truncations = 0
while response.tool_calls:
truncated = response.stop_reason == StopReason.MAX_TOKENS
Comment on lines +151 to +152

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve pending max-token tool calls before compacting

When a follow-up response contains tool_calls with StopReason.MAX_TOKENS, this new loop keeps it pending, but the auto-compaction block below can still run before the synthetic tool_results are sent. If that response is over the compaction threshold, compact(response) treats it as a normal non-TOOL_USE turn and sends a summary request after an unresolved tool_use, which recreates the provider 400 this change is trying to avoid; this occurs on truncated retries or any tool-result turn that truncates while requesting another tool in a high-context conversation.

Useful? React with 👍 / 👎.

if truncated:
consecutive_truncations += 1
if consecutive_truncations > MAX_CONSECUTIVE_TRUNCATIONS:
raise AgentError(
"Agent response was truncated by the output token limit on "
f"{consecutive_truncations} consecutive turns while a tool call was "
"pending; aborting to avoid an unrecoverable loop. Reduce the amount "
"of work attempted in a single tool call."
)
tool_results: list[ToolResult] = self._truncated_tool_results(response.tool_calls)
else:
consecutive_truncations = 0
tool_results = await self._execute_tool_calls(response.tool_calls)
total_input += sum(result.total_input_tokens for result in tool_results)
total_output += sum(result.total_output_tokens for result in tool_results)

Expand All @@ -140,6 +179,9 @@ async def start(self, prompt: str, allowed_tools: list[str] | None = None) -> Re
total_input += compact_in
total_output += compact_out

if response.stop_reason == StopReason.TOOL_USE:
raise AgentError("Agent returned stop_reason=TOOL_USE with no tool calls")

react_result = ReActResult(
final_response=response,
iterations=iterations,
Expand Down
8 changes: 6 additions & 2 deletions ddev/src/ddev/ai/tools/fs/edit_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ class EditFileInput(BaseToolInput):
Field(
description=(
"Exact non-empty text to replace. Must appear exactly once in the file "
"(hint: include surrounding context if needed)."
"(hint: include surrounding context if needed). Keep this to the smallest "
"unique region that needs changing — do not paste the whole file. To rewrite "
"a file extensively, apply several small edits rather than one huge one."
),
min_length=1,
),
Expand All @@ -30,7 +32,9 @@ class EditFileInput(BaseToolInput):
class EditFileTool(FileRegistryTool[EditFileInput]):
"""Edits a file by replacing an exact string with a new one.
Fails if the file was modified since the last read.
old_string must appear exactly once in the file — if it appears multiple times, the call fails."""
old_string must appear exactly once in the file — if it appears multiple times, the call fails.
Prefer small, targeted edits over large ones: a single edit that spans most of the file can
exceed the response token limit and be truncated. Break big rewrites into several edits."""

@property
def name(self) -> str:
Expand Down
91 changes: 90 additions & 1 deletion ddev/tests/ai/react/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from ddev.ai.agent.base import BaseAgent
from ddev.ai.agent.build import AgentRuntime
from ddev.ai.agent.exceptions import AgentConnectionError
from ddev.ai.agent.exceptions import AgentConnectionError, AgentError
from ddev.ai.agent.scope import AgentRole, AgentScope
from ddev.ai.agent.types import AgentResponse, ContextUsage, StopReason, TokenUsage, ToolCall, ToolResultMessage
from ddev.ai.callbacks.callbacks import Callbacks, CallbackSet
Expand Down Expand Up @@ -368,6 +368,95 @@ async def test_partial_batch_failure_only_affects_raising_tool() -> None:
assert "RuntimeError" in (results["tc_02"].error or "")


# ---------------------------------------------------------------------------
# MAX_TOKENS truncation while a tool call is pending
# ---------------------------------------------------------------------------


async def test_truncated_tool_call_is_not_executed_but_answered() -> None:
"""A MAX_TOKENS turn with a pending tool call must NOT run the tool, but must still send a
failure tool_result back so the conversation stays valid."""
tc = make_tool_call("tc_01", "edit_file")
responses = [
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
make_response(StopReason.END_TURN),
]
registry = MockToolRegistry()
agent = MockAgent(responses)

result = await make_process(agent, registry=registry).start("Rewrite the file")

assert result.final_response.stop_reason == StopReason.END_TURN
assert result.iterations == 2
# Tool was never executed because the call was truncated.
assert registry.run_calls == []
# A failure tool_result was still sent back for the dangling tool_use.
sent_back = agent.send_calls[1]
assert isinstance(sent_back, list)
assert len(sent_back) == 1
assert sent_back[0].tool_call_id == "tc_01"
assert sent_back[0].result.success is False


async def test_truncated_tool_call_fires_tool_call_callback() -> None:
tc = make_tool_call("tc_01", "edit_file")
agent = MockAgent(
[
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
make_response(StopReason.END_TURN),
]
)
recorder = CallbackRecorder()

await make_process(agent, callbacks=Callbacks([recorder.callback_set])).start("x")

assert len(recorder.tool_calls_seen) == 1
_, seen_call, seen_result, _ = recorder.tool_calls_seen[0]
assert seen_call is tc
assert seen_result.success is False


async def test_truncation_then_recovery_continues_loop() -> None:
"""After a truncated turn the model can retry; once it stops requesting tools, the loop ends."""
tc = make_tool_call("tc_01", "edit_file")
responses = [
make_response(StopReason.MAX_TOKENS, tool_calls=[tc]),
make_response(StopReason.TOOL_USE, tool_calls=[make_tool_call("tc_02", "edit_file")]),
make_response(StopReason.END_TURN),
]
registry = MockToolRegistry()
agent = MockAgent(responses)

result = await make_process(agent, registry=registry).start("Rewrite the file")

assert result.final_response.stop_reason == StopReason.END_TURN
assert result.iterations == 3
# Only the second, non-truncated tool call was executed.
assert len(registry.run_calls) == 1


async def test_repeated_truncation_aborts_with_agent_error() -> None:
"""If the model keeps truncating on a pending tool call, the loop aborts instead of looping forever."""
truncated = [
make_response(StopReason.MAX_TOKENS, tool_calls=[make_tool_call(f"tc_{i:02d}", "edit_file")]) for i in range(10)
]
agent = MockAgent(truncated)

with pytest.raises(AgentError, match="truncated by the output token limit"):
await make_process(agent).start("Rewrite the file")


async def test_max_tokens_without_tool_calls_returns_immediately() -> None:
"""A truncated text-only turn is valid on its own and must not trigger the repair path."""
agent = MockAgent([make_response(StopReason.MAX_TOKENS)])

result = await make_process(agent).start("Write a long essay")

assert result.final_response.stop_reason == StopReason.MAX_TOKENS
assert result.iterations == 1
assert len(agent.send_calls) == 1


# ---------------------------------------------------------------------------
# Callbacks fired correctly
# ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion ddev/tests/ai/tools/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def test_available_tool_names_includes_web_fetch():
def test_from_names_multiple_native(tmp_path):
registry = from_names(["web_search", "web_fetch"], tmp_path)
assert registry.definitions == []
assert registry.native_tool_names == ["web_search", "web_fetch"]
assert registry.native_tool_names == ("web_search", "web_fetch")


def test_from_names_native_only(tmp_path):
Expand Down
Loading