Skip to content

Commit abace67

Browse files
committed
feat: separate cache breakpoints for static vs dynamic instructions (Anthropic)
1 parent 0b1e3f8 commit abace67

File tree

8 files changed

+618
-32
lines changed

8 files changed

+618
-32
lines changed

docs/models/anthropic.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,77 @@ result = agent.run_sync([
233233
print(result.output)
234234
```
235235

236+
### Example 4: Separate Cache Breakpoints for Static vs Dynamic Instructions
237+
238+
When using `anthropic_cache_instructions`, static instructions (from `system_prompt` / `instructions`) and
239+
dynamic instructions (from `@agent.instructions`) are joined into a single cached block. This means any change
240+
in the dynamic part invalidates the entire cache — even if the static part hasn't changed.
241+
242+
To solve this, you can cache them as separate blocks:
243+
244+
- **`anthropic_static_cache_instructions`**: Caches only the static system prompt independently.
245+
- **`@agent.instructions(add_cache_breakpoint=True)`**: Adds a cache breakpoint after a specific dynamic instruction.
246+
247+
This way, the expensive static instructions stay cached even when dynamic context changes between requests.
248+
249+
```python {test="skip"}
250+
from dataclasses import dataclass
251+
252+
from pydantic_ai import Agent, RunContext
253+
from pydantic_ai.models.anthropic import AnthropicModelSettings
254+
255+
256+
@dataclass
257+
class Deps:
258+
user_name: str
259+
current_date: str
260+
261+
262+
agent = Agent(
263+
'anthropic:claude-sonnet-4-6',
264+
system_prompt='You are a helpful assistant. Here are the detailed guidelines...', # (1)!
265+
deps_type=Deps,
266+
model_settings=AnthropicModelSettings(
267+
anthropic_static_cache_instructions='1h', # (2)!
268+
),
269+
)
270+
271+
272+
@agent.instructions(add_cache_breakpoint=True) # (3)!
273+
def dynamic_context(ctx: RunContext[Deps]) -> str:
274+
return f'Today is {ctx.deps.current_date}. The user is {ctx.deps.user_name}.'
275+
276+
277+
result = agent.run_sync(
278+
'Hello!',
279+
deps=Deps(user_name='Alice', current_date='2025-03-25'),
280+
)
281+
print(result.output)
282+
```
283+
284+
1. The static system prompt — this stays the same across all requests.
285+
2. Cache the static system prompt with a 1-hour TTL. This uses one cache point.
286+
3. `add_cache_breakpoint=True` adds a separate cache point after this instruction block (default TTL `'5m'`). You can also pass `'1h'` directly: `add_cache_breakpoint='1h'`.
287+
288+
This sends two separate system prompt blocks to the Anthropic API:
289+
290+
| Block | Content | Cached | Cache Points Used |
291+
|-------|---------|--------|-------------------|
292+
| 1 | Static system prompt | ✅ TTL 1h | 1 |
293+
| 2 | Dynamic instruction | ✅ TTL 5m | 1 |
294+
| | | **Total** | **2 of 4** |
295+
296+
Without this feature, using `anthropic_cache_instructions=True` would join both into a single block —
297+
so changing the date or user name would invalidate the cache for the entire system prompt.
298+
299+
!!! tip "When to use which setting"
300+
- **`anthropic_cache_instructions`**: Use when your instructions don't change between requests, or when you want the simplest setup.
301+
- **`anthropic_static_cache_instructions` + `add_cache_breakpoint`**: Use when you have a large, stable system prompt combined with small, frequently changing dynamic context (e.g. user name, current date, session state).
302+
303+
!!! note
304+
Both `anthropic_static_cache_instructions` and `add_cache_breakpoint` each use one of Anthropic's 4 available cache points per request.
305+
Keep this in mind when combining with `anthropic_cache_tool_definitions`, `anthropic_cache_messages`, or `CachePoint` markers.
306+
236307
### Accessing Cache Usage Statistics
237308

238309
Access cache usage statistics via `result.usage()`:

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from . import _output, _system_prompt, exceptions, messages as _messages, models, result, usage as _usage
3131
from ._run_context import set_current_run_context
3232
from .exceptions import ToolRetryError
33+
from .messages import InstructionPart
3334
from .output import OutputDataT, OutputSpec
3435
from .settings import ModelSettings
3536
from .tools import (
@@ -126,7 +127,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
126127
usage_limits: _usage.UsageLimits
127128
max_result_retries: int
128129
end_strategy: EndStrategy
129-
get_instructions: Callable[[RunContext[DepsT]], Awaitable[str | None]]
130+
get_instructions: Callable[[RunContext[DepsT]], Awaitable[tuple[str | None, list[InstructionPart] | None]]]
130131

131132
output_schema: _output.OutputSchema[OutputDataT]
132133
output_validators: list[_output.OutputValidator[DepsT, OutputDataT]]
@@ -215,6 +216,7 @@ async def run( # noqa: C901
215216

216217
run_context: RunContext[DepsT] | None = None
217218
instructions: str | None = None
219+
instruction_parts: list[InstructionPart] | None = None
218220

219221
if messages and (last_message := messages[-1]):
220222
if isinstance(last_message, _messages.ModelRequest) and self.user_prompt is None:
@@ -243,7 +245,7 @@ async def run( # noqa: C901
243245
elif isinstance(last_message, _messages.ModelResponse):
244246
if self.user_prompt is None:
245247
run_context = build_run_context(ctx)
246-
instructions = await ctx.deps.get_instructions(run_context)
248+
instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
247249
if not instructions:
248250
# If there's no new prompt or instructions, skip ModelRequestNode and go directly to CallToolsNode
249251
return CallToolsNode[DepsT, NodeRunEndT](last_message)
@@ -254,7 +256,7 @@ async def run( # noqa: C901
254256

255257
if not run_context:
256258
run_context = build_run_context(ctx)
257-
instructions = await ctx.deps.get_instructions(run_context)
259+
instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
258260

259261
if messages:
260262
await self._reevaluate_dynamic_prompts(messages, run_context)
@@ -269,7 +271,9 @@ async def run( # noqa: C901
269271
if self.user_prompt is not None:
270272
parts.append(_messages.UserPromptPart(self.user_prompt))
271273

272-
next_message = _messages.ModelRequest(parts=parts)
274+
next_message = _messages.ModelRequest(
275+
parts=parts, instructions=instructions, instruction_parts=instruction_parts
276+
)
273277

274278
next_message.instructions = instructions
275279

@@ -876,9 +880,11 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
876880
# in the hope the model will return a non-empty response this time.
877881
ctx.state.increment_retries(ctx.deps.max_result_retries)
878882
run_context = build_run_context(ctx)
879-
instructions = await ctx.deps.get_instructions(run_context)
883+
instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
880884
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
881-
_messages.ModelRequest(parts=[], instructions=instructions)
885+
_messages.ModelRequest(
886+
parts=[], instructions=instructions, instruction_parts=instruction_parts
887+
)
882888
)
883889
return
884890

@@ -948,9 +954,11 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
948954
except ToolRetryError as e:
949955
ctx.state.increment_retries(ctx.deps.max_result_retries, error=e)
950956
run_context = build_run_context(ctx)
951-
instructions = await ctx.deps.get_instructions(run_context)
957+
instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
952958
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
953-
_messages.ModelRequest(parts=[e.tool_retry], instructions=instructions)
959+
_messages.ModelRequest(
960+
parts=[e.tool_retry], instructions=instructions, instruction_parts=instruction_parts
961+
)
954962
)
955963

956964
self._events_iterator = _run_stream()
@@ -991,9 +999,11 @@ async def _handle_tool_calls(
991999
if self.user_prompt is not None:
9921000
output_parts.append(_messages.UserPromptPart(self.user_prompt))
9931001

994-
instructions = await ctx.deps.get_instructions(run_context)
1002+
instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
9951003
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
996-
_messages.ModelRequest(parts=output_parts, instructions=instructions)
1004+
_messages.ModelRequest(
1005+
parts=output_parts, instructions=instructions, instruction_parts=instruction_parts
1006+
)
9971007
)
9981008

9991009
@staticmethod

pydantic_ai_slim/pydantic_ai/_system_prompt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import inspect
44
from collections.abc import Awaitable, Callable
55
from dataclasses import dataclass, field
6-
from typing import Any, Generic, cast
6+
from typing import Any, Generic, Literal, cast
77

88
from . import _utils
99
from ._run_context import AgentDepsT, RunContext
@@ -14,6 +14,7 @@
1414
class SystemPromptRunner(Generic[AgentDepsT]):
1515
function: SystemPromptFunc[AgentDepsT]
1616
dynamic: bool = False
17+
add_cache_breakpoint: bool | Literal['5m', '1h'] = False
1718
_takes_ctx: bool = field(init=False)
1819
_is_async: bool = field(init=False)
1920

pydantic_ai_slim/pydantic_ai/agent/__init__.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from ..capabilities import AbstractCapability, CombinedCapability
5050
from ..capabilities.builtin_tool import BuiltinTool as BuiltinToolCap
5151
from ..capabilities.history_processor import HistoryProcessor as HistoryProcessorCap
52+
from ..messages import InstructionPart
5253
from ..models.instrumented import InstrumentationSettings, InstrumentedModel, instrument_model
5354
from ..output import OutputDataT, OutputSpec, StructuredDict
5455
from ..run import AgentRun, AgentRunResult
@@ -62,6 +63,7 @@
6263
DocstringFormat,
6364
GenerateToolJsonSchema,
6465
RunContext,
66+
SystemPromptFunc,
6567
Tool,
6668
ToolFuncContext,
6769
ToolFuncEither,
@@ -1220,16 +1222,30 @@ def get_model_settings(run_context: RunContext[AgentDepsT]) -> ModelSettings | N
12201222
cap_instructions=cap_instructions,
12211223
)
12221224

1223-
async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
1224-
parts = [
1225-
instructions_literal,
1226-
*[await func.run(run_context) for func in instructions_functions],
1227-
]
1225+
async def get_instructions(
1226+
run_context: RunContext[AgentDepsT],
1227+
) -> tuple[str | None, list[InstructionPart] | None]:
1228+
instruction_parts: list[InstructionPart] = []
1229+
1230+
if instructions_literal:
1231+
instruction_parts.append(InstructionPart(content=instructions_literal))
1232+
1233+
for func in instructions_functions:
1234+
text = await func.run(run_context)
1235+
if text:
1236+
instruction_parts.append(
1237+
InstructionPart(
1238+
content=text,
1239+
add_cache_breakpoint=func.add_cache_breakpoint,
1240+
)
1241+
)
12281242

1229-
parts = [p for p in parts if p]
1230-
if not parts:
1231-
return None
1232-
return '\n\n'.join(parts).strip()
1243+
if not instruction_parts:
1244+
return None, None
1245+
1246+
joined = '\n\n'.join(p.content for p in instruction_parts).strip()
1247+
has_breakpoints = any(p.add_cache_breakpoint for p in instruction_parts)
1248+
return joined, instruction_parts if has_breakpoints else None
12331249

12341250
graph_deps = _agent_graph.GraphAgentDeps[AgentDepsT, OutputDataT](
12351251
user_deps=deps,
@@ -1749,13 +1765,17 @@ def instructions(self, func: Callable[[], Awaitable[str | None]], /) -> Callable
17491765

17501766
@overload
17511767
def instructions(
1752-
self, /
1753-
) -> Callable[[_system_prompt.SystemPromptFunc[AgentDepsT]], _system_prompt.SystemPromptFunc[AgentDepsT]]: ...
1768+
self,
1769+
/,
1770+
*,
1771+
add_cache_breakpoint: bool | Literal['5m', '1h'] = False,
1772+
) -> Callable[[SystemPromptFunc[AgentDepsT]], SystemPromptFunc[AgentDepsT]]: ...
17541773

17551774
def instructions(
17561775
self,
17571776
func: _system_prompt.SystemPromptFunc[AgentDepsT] | None = None,
17581777
/,
1778+
add_cache_breakpoint: bool | Literal['5m', '1h'] = False,
17591779
) -> (
17601780
Callable[[_system_prompt.SystemPromptFunc[AgentDepsT]], _system_prompt.SystemPromptFunc[AgentDepsT]]
17611781
| _system_prompt.SystemPromptFunc[AgentDepsT]
@@ -1790,12 +1810,16 @@ async def async_instructions(ctx: RunContext[str]) -> str:
17901810
def decorator(
17911811
func_: _system_prompt.SystemPromptFunc[AgentDepsT],
17921812
) -> _system_prompt.SystemPromptFunc[AgentDepsT]:
1793-
self._instructions.append(func_)
1813+
instruction_runner = _system_prompt.SystemPromptRunner[AgentDepsT](
1814+
func_, add_cache_breakpoint=add_cache_breakpoint
1815+
)
1816+
self._instructions.append(instruction_runner) # pyright: ignore[reportArgumentType]
17941817
return func_
17951818

17961819
return decorator
17971820
else:
1798-
self._instructions.append(func)
1821+
runner = _system_prompt.SystemPromptRunner[AgentDepsT](func, add_cache_breakpoint=add_cache_breakpoint)
1822+
self._instructions.append(runner) # pyright: ignore[reportArgumentType]
17991823
return func
18001824

18011825
@overload
@@ -2298,6 +2322,8 @@ def _get_instructions(
22982322
for instruction in instructions:
22992323
if isinstance(instruction, str):
23002324
literal_parts.append(instruction)
2325+
elif isinstance(instruction, _system_prompt.SystemPromptRunner):
2326+
functions.append(instruction) # pyright: ignore[reportUnknownArgumentType]
23012327
else:
23022328
# TemplateStr instances land here too: they are callable with a
23032329
# RunContext parameter, so SystemPromptRunner handles them like

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,6 +1359,14 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
13591359
"""A message part sent by Pydantic AI to a model."""
13601360

13611361

1362+
@dataclass(repr=False)
1363+
class InstructionPart:
1364+
"""A single instruction block with optional cache control metadata."""
1365+
1366+
content: str
1367+
add_cache_breakpoint: bool | Literal['5m', '1h'] = False
1368+
1369+
13621370
@dataclass(repr=False)
13631371
class ModelRequest:
13641372
"""A request generated by Pydantic AI and sent to a model, e.g. a message from the Pydantic AI app to the model."""
@@ -1376,6 +1384,9 @@ class ModelRequest:
13761384
instructions: str | None = None
13771385
"""The instructions for the model."""
13781386

1387+
instruction_parts: list[InstructionPart] | None = None
1388+
"""Structured instruction parts for models that support per-part cache control (e.g. Anthropic)."""
1389+
13791390
kind: Literal['request'] = 'request'
13801391
"""Message type identifier, this is available on all parts as a discriminator."""
13811392

0 commit comments

Comments
 (0)