Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,20 +1303,25 @@ class RetryPromptPart:
part_kind: Literal['retry-prompt'] = 'retry-prompt'
"""Part type identifier, this is available on all parts as a discriminator."""

def model_response(self) -> str:
"""Return a string message describing why the retry is requested."""
def error_description(self) -> str:
"""Return the error description without the retry instruction suffix.

This is suitable for UI display where the LLM-facing "Fix the errors and try again."
instruction is not appropriate. For the full model-facing text, use [`model_response()`][pydantic_ai.messages.RetryPromptPart.model_response].
"""
if isinstance(self.content, str):
if self.tool_name is None:
description = f'Validation feedback:\n{self.content}'
return f'Validation feedback:\n{self.content}'
else:
description = self.content
return self.content
else:
json_errors = error_details_ta.dump_json(self.content, exclude={'__all__': {'ctx'}}, indent=2)
plural = isinstance(self.content, list) and len(self.content) != 1
description = (
f'{len(self.content)} validation error{"s" if plural else ""}:\n```json\n{json_errors.decode()}\n```'
)
return f'{description}\n\nFix the errors and try again.'
return f'{len(self.content)} validation error{"s" if plural else ""}:\n```json\n{json_errors.decode()}\n```'

def model_response(self) -> str:
"""Return a string message describing why the retry is requested."""
return f'{self.error_description()}\n\nFix the errors and try again.'

def otel_event(self, settings: InstrumentationSettings) -> LogRecord:
if self.tool_name is None:
Expand Down
15 changes: 12 additions & 3 deletions pydantic_ai_slim/pydantic_ai/ui/vercel_ai/_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,11 +391,14 @@ def load_messages(cls, messages: Sequence[UIMessage]) -> list[ModelMessage]: #
ToolReturnPart(tool_name=tool_name, tool_call_id=tool_call_id, content=part.output)
)
elif part.state == 'output-error':
# Prefer model_response from metadata (preserves LLM-facing
# retry suffix for cache fidelity), fall back to error_text.
error_content = provider_meta.get('model_response') or part.error_text
builder.add(
ToolReturnPart(
tool_name=tool_name,
tool_call_id=tool_call_id,
content=part.error_text,
content=error_content,
outcome='failed',
)
)
Expand Down Expand Up @@ -649,14 +652,20 @@ def _dump_tool_call_part(
# Check for Vercel AI chunks returned by tool calls via metadata.
ui_parts.extend(_extract_metadata_ui_parts(tool_result))
elif isinstance(tool_result, RetryPromptPart):
# error_description() returns the error text without the LLM-facing
# "Fix the errors and try again." suffix — suitable for UI display.
# The full model_response() is preserved in metadata so load_messages()
# can restore it for LLM cache fidelity.
error_meta = call_provider_metadata or {}
error_meta.setdefault('pydantic_ai', {})['model_response'] = tool_result.model_response()
ui_parts.append(
ToolOutputErrorPart(
type=tool_type,
tool_call_id=part.tool_call_id,
input=part.args_as_dict(),
error_text=tool_result.model_response(),
error_text=tool_result.error_description(),
provider_executed=False,
call_provider_metadata=call_provider_metadata,
call_provider_metadata=error_meta,
)
)
else:
Expand Down
2 changes: 1 addition & 1 deletion pydantic_ai_slim/pydantic_ai/ui/vercel_ai/_event_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ async def handle_function_tool_result(self, event: FunctionToolResultEvent) -> A
if self.sdk_version >= 6 and isinstance(part, ToolReturnPart) and part.outcome == 'denied':
yield ToolOutputDeniedChunk(tool_call_id=tool_call_id)
elif isinstance(part, RetryPromptPart):
yield ToolOutputErrorChunk(tool_call_id=tool_call_id, error_text=part.model_response())
yield ToolOutputErrorChunk(tool_call_id=tool_call_id, error_text=part.error_description())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Streaming path silently drops retry suffix, breaking cache fidelity for the primary Vercel AI use case

The PR's stated goal is to preserve LLM cache fidelity by storing the full model_response() (with "Fix the errors and try again." suffix) in metadata. This works for the dump_messages path (_adapter.py:660), but the streaming path at _event_stream.py:281 sends error_description() (no suffix) via ToolOutputErrorChunk, which has no provider_metadata field to carry the full text. When a Vercel AI client reconstructs its message history from stream events and sends them back on the next turn, load_messages at _adapter.py:396 does provider_meta.get('model_response') or part.error_text — since no model_response key exists in the stream-derived metadata, it falls back to part.error_text (the suffix-free version). The resulting ToolReturnPart.content now differs from what the model originally saw during the agent run, invalidating prompt caches (e.g. Anthropic) and changing the model's view of its own history.

Before this PR, _event_stream.py:281 used part.model_response() (with suffix), so the stream→load round-trip was consistent. This PR introduces the regression specifically for the streaming path, which is the primary path for Vercel AI SDK (useChat) users.

Prompt for agents
In pydantic_ai_slim/pydantic_ai/ui/vercel_ai/_event_stream.py at line 281, the ToolOutputErrorChunk is created with error_description() (no retry suffix), but there is no way to attach the full model_response() as metadata since ToolOutputErrorChunk lacks a provider_metadata field.

Two possible fixes:

1. (Minimal) Revert line 281 to use part.model_response() instead of part.error_description(). This keeps the streaming path consistent with the pre-PR behavior and ensures cache fidelity, at the cost of showing the LLM-facing suffix in the UI during streaming.

2. (Complete) Add a provider_metadata field to ToolOutputErrorChunk (in response_types.py), populate it with the model_response in handle_function_tool_result, and update load_messages to extract it. This would achieve the PR's goal of clean UI text + cache fidelity for both streaming and dump_messages paths.

The test at tests/test_vercel_ai.py line 2248 (test_run_stream_response_error) would need to be updated accordingly.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

elif isinstance(part, ToolReturnPart) and part.outcome == 'failed':
yield ToolOutputErrorChunk(tool_call_id=tool_call_id, error_text=part.model_response_str())
else:
Expand Down
30 changes: 11 additions & 19 deletions tests/test_vercel_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2245,11 +2245,7 @@ async def stream_function(
{
'type': 'tool-output-error',
'toolCallId': IsStr(),
'errorText': """\
Unknown tool name: 'unknown_tool'. No tools available.

Fix the errors and try again.\
""",
'errorText': "Unknown tool name: 'unknown_tool'. No tools available.",
},
{'type': 'finish-step'},
{'type': 'start-step'},
Expand Down Expand Up @@ -4136,21 +4132,20 @@ async def test_adapter_dump_messages_with_retry():
'raw_input': None,
'input': {'arg': 'value'},
'provider_executed': False,
'error_text': """\
Tool failed with error

Fix the errors and try again.\
""",
'call_provider_metadata': None,
'error_text': 'Tool failed with error',
'call_provider_metadata': {
'pydantic_ai': {
'model_response': 'Tool failed with error\n\nFix the errors and try again.',
}
},
'approval': None,
}
],
},
]
)

# Verify roundtrip — load_messages now produces ToolReturnPart(outcome='failed')
# instead of RetryPromptPart for tool errors from the Vercel AI format
# Verify roundtrip — load_messages uses model_response from metadata for cache fidelity
reloaded_messages = VercelAIAdapter.load_messages(ui_messages)
tool_error_part = reloaded_messages[2].parts[0]
assert isinstance(tool_error_part, ToolReturnPart)
Expand Down Expand Up @@ -5691,16 +5686,13 @@ async def test_adapter_dump_messages_tool_error_with_provider_metadata():
'raw_input': None,
'input': {'x': 1},
'provider_executed': False,
'error_text': """\
Tool execution failed

Fix the errors and try again.\
""",
'error_text': 'Tool execution failed',
'call_provider_metadata': {
'pydantic_ai': {
'id': 'call_fail_id',
'provider_name': 'google',
'provider_details': {'attempt': 1},
'model_response': 'Tool execution failed\n\nFix the errors and try again.',
}
},
'approval': None,
Expand All @@ -5710,7 +5702,7 @@ async def test_adapter_dump_messages_tool_error_with_provider_metadata():
]
)

# Verify roundtrip — load_messages now produces ToolReturnPart(outcome='failed')
# Verify roundtrip — uses model_response from metadata for cache fidelity
reloaded_messages = VercelAIAdapter.load_messages(ui_messages)
tool_error_part = reloaded_messages[2].parts[0]
assert isinstance(tool_error_part, ToolReturnPart)
Expand Down
Loading