pydantic
diff --git a/‎docs/models/anthropic.md‎
Lines changed: 71 additions & 0 deletions b/‎docs/models/anthropic.md‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 20 additions & 10 deletions b/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/_system_prompt.py‎
Lines changed: 2 additions & 1 deletion b/‎pydantic_ai_slim/pydantic_ai/_system_prompt.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pydantic_ai_slim/pydantic_ai/agent/__init__.py‎
Lines changed: 39 additions & 13 deletions b/‎pydantic_ai_slim/pydantic_ai/agent/__init__.py‎
Lines changed: 39 additions & 13 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 11 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 11 additions & 0 deletions
@@ -233,6 +233,77 @@ result = agent.run_sync([
 print(result.output)
 ```
 
+### Example 4: Separate Cache Breakpoints for Static vs Dynamic Instructions
+
+When using `anthropic_cache_instructions`, static instructions (from `system_prompt` / `instructions`) and
+dynamic instructions (from `@agent.instructions`) are joined into a single cached block. This means any change
+in the dynamic part invalidates the entire cache — even if the static part hasn't changed.
+
+To solve this, you can cache them as separate blocks:
+
+- **`anthropic_static_cache_instructions`**: Caches only the static system prompt independently.
+- **`@agent.instructions(add_cache_breakpoint=True)`**: Adds a cache breakpoint after a specific dynamic instruction.
+
+This way, the expensive static instructions stay cached even when dynamic context changes between requests.
+
+```python {test="skip"}
+from dataclasses import dataclass
+
+from pydantic_ai import Agent, RunContext
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+
+@dataclass
+class Deps:
+    user_name: str
+    current_date: str
+
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-6',
+    system_prompt='You are a helpful assistant. Here are the detailed guidelines...',  # (1)!
+    deps_type=Deps,
+    model_settings=AnthropicModelSettings(
+        anthropic_static_cache_instructions='1h',  # (2)!
+    ),
+)
+
+
+@agent.instructions(add_cache_breakpoint=True)  # (3)!
+def dynamic_context(ctx: RunContext[Deps]) -> str:
+    return f'Today is {ctx.deps.current_date}. The user is {ctx.deps.user_name}.'
+
+
+result = agent.run_sync(
+    'Hello!',
+    deps=Deps(user_name='Alice', current_date='2025-03-25'),
+)
+print(result.output)
+```
+
+1. The static system prompt — this stays the same across all requests.
+2. Cache the static system prompt with a 1-hour TTL. This uses one cache point.
+3. `add_cache_breakpoint=True` adds a separate cache point after this instruction block (default TTL `'5m'`). You can also pass `'1h'` directly: `add_cache_breakpoint='1h'`.
+
+This sends two separate system prompt blocks to the Anthropic API:
+
+| Block | Content | Cached | Cache Points Used |
+|-------|---------|--------|-------------------|
+| 1 | Static system prompt | ✅ TTL 1h | 1 |
+| 2 | Dynamic instruction | ✅ TTL 5m | 1 |
+| | | **Total** | **2 of 4** |
+
+Without this feature, using `anthropic_cache_instructions=True` would join both into a single block —
+so changing the date or user name would invalidate the cache for the entire system prompt.
+
+!!! tip "When to use which setting"
+    - **`anthropic_cache_instructions`**: Use when your instructions don't change between requests, or when you want the simplest setup.
+    - **`anthropic_static_cache_instructions` + `add_cache_breakpoint`**: Use when you have a large, stable system prompt combined with small, frequently changing dynamic context (e.g. user name, current date, session state).
+
+!!! note
+    Both `anthropic_static_cache_instructions` and `add_cache_breakpoint` each use one of Anthropic's 4 available cache points per request.
+    Keep this in mind when combining with `anthropic_cache_tool_definitions`, `anthropic_cache_messages`, or `CachePoint` markers.
+
 ### Accessing Cache Usage Statistics
 
 Access cache usage statistics via `result.usage()`:
 
@@ -30,6 +30,7 @@
 from . import _output, _system_prompt, exceptions, messages as _messages, models, result, usage as _usage
 from ._run_context import set_current_run_context
 from .exceptions import ToolRetryError
+from .messages import InstructionPart
 from .output import OutputDataT, OutputSpec
 from .settings import ModelSettings
 from .tools import (
@@ -126,7 +127,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
     usage_limits: _usage.UsageLimits
     max_result_retries: int
     end_strategy: EndStrategy
-    get_instructions: Callable[[RunContext[DepsT]], Awaitable[str | None]]
+    get_instructions: Callable[[RunContext[DepsT]], Awaitable[tuple[str | None, list[InstructionPart] | None]]]
 
     output_schema: _output.OutputSchema[OutputDataT]
     output_validators: list[_output.OutputValidator[DepsT, OutputDataT]]
@@ -215,6 +216,7 @@ async def run(  # noqa: C901
 
         run_context: RunContext[DepsT] | None = None
         instructions: str | None = None
+        instruction_parts: list[InstructionPart] | None = None
 
         if messages and (last_message := messages[-1]):
             if isinstance(last_message, _messages.ModelRequest) and self.user_prompt is None:
@@ -243,7 +245,7 @@ async def run(  # noqa: C901
             elif isinstance(last_message, _messages.ModelResponse):
                 if self.user_prompt is None:
                     run_context = build_run_context(ctx)
-                    instructions = await ctx.deps.get_instructions(run_context)
+                    instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
                     if not instructions:
                         # If there's no new prompt or instructions, skip ModelRequestNode and go directly to CallToolsNode
                         return CallToolsNode[DepsT, NodeRunEndT](last_message)
@@ -254,7 +256,7 @@ async def run(  # noqa: C901
 
         if not run_context:
             run_context = build_run_context(ctx)
-            instructions = await ctx.deps.get_instructions(run_context)
+            instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
 
         if messages:
             await self._reevaluate_dynamic_prompts(messages, run_context)
@@ -269,7 +271,9 @@ async def run(  # noqa: C901
             if self.user_prompt is not None:
                 parts.append(_messages.UserPromptPart(self.user_prompt))
 
-            next_message = _messages.ModelRequest(parts=parts)
+            next_message = _messages.ModelRequest(
+                parts=parts, instructions=instructions, instruction_parts=instruction_parts
+            )
 
         next_message.instructions = instructions
 
@@ -876,9 +880,11 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa
                         # in the hope the model will return a non-empty response this time.
                         ctx.state.increment_retries(ctx.deps.max_result_retries)
                         run_context = build_run_context(ctx)
-                        instructions = await ctx.deps.get_instructions(run_context)
+                        instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
                         self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
-                            _messages.ModelRequest(parts=[], instructions=instructions)
+                            _messages.ModelRequest(
+                                parts=[], instructions=instructions, instruction_parts=instruction_parts
+                            )
                         )
                         return
 
@@ -948,9 +954,11 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa
                 except ToolRetryError as e:
                     ctx.state.increment_retries(ctx.deps.max_result_retries, error=e)
                     run_context = build_run_context(ctx)
-                    instructions = await ctx.deps.get_instructions(run_context)
+                    instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
                     self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
-                        _messages.ModelRequest(parts=[e.tool_retry], instructions=instructions)
+                        _messages.ModelRequest(
+                            parts=[e.tool_retry], instructions=instructions, instruction_parts=instruction_parts
+                        )
                     )
 
             self._events_iterator = _run_stream()
@@ -991,9 +999,11 @@ async def _handle_tool_calls(
             if self.user_prompt is not None:
                 output_parts.append(_messages.UserPromptPart(self.user_prompt))
 
-            instructions = await ctx.deps.get_instructions(run_context)
+            instructions, instruction_parts = await ctx.deps.get_instructions(run_context)
             self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
-                _messages.ModelRequest(parts=output_parts, instructions=instructions)
+                _messages.ModelRequest(
+                    parts=output_parts, instructions=instructions, instruction_parts=instruction_parts
+                )
             )
 
     @staticmethod
 
@@ -3,7 +3,7 @@
 import inspect
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass, field
-from typing import Any, Generic, cast
+from typing import Any, Generic, Literal, cast
 
 from . import _utils
 from ._run_context import AgentDepsT, RunContext
@@ -14,6 +14,7 @@
 class SystemPromptRunner(Generic[AgentDepsT]):
     function: SystemPromptFunc[AgentDepsT]
     dynamic: bool = False
+    add_cache_breakpoint: bool | Literal['5m', '1h'] = False
     _takes_ctx: bool = field(init=False)
     _is_async: bool = field(init=False)
 
 
@@ -49,6 +49,7 @@
 from ..capabilities import AbstractCapability, CombinedCapability
 from ..capabilities.builtin_tool import BuiltinTool as BuiltinToolCap
 from ..capabilities.history_processor import HistoryProcessor as HistoryProcessorCap
+from ..messages import InstructionPart
 from ..models.instrumented import InstrumentationSettings, InstrumentedModel, instrument_model
 from ..output import OutputDataT, OutputSpec, StructuredDict
 from ..run import AgentRun, AgentRunResult
@@ -62,6 +63,7 @@
     DocstringFormat,
     GenerateToolJsonSchema,
     RunContext,
+    SystemPromptFunc,
     Tool,
     ToolFuncContext,
     ToolFuncEither,
@@ -1220,16 +1222,30 @@ def get_model_settings(run_context: RunContext[AgentDepsT]) -> ModelSettings | N
             cap_instructions=cap_instructions,
         )
 
-        async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
-            parts = [
-                instructions_literal,
-                *[await func.run(run_context) for func in instructions_functions],
-            ]
+        async def get_instructions(
+            run_context: RunContext[AgentDepsT],
+        ) -> tuple[str | None, list[InstructionPart] | None]:
+            instruction_parts: list[InstructionPart] = []
+
+            if instructions_literal:
+                instruction_parts.append(InstructionPart(content=instructions_literal))
+
+            for func in instructions_functions:
+                text = await func.run(run_context)
+                if text:
+                    instruction_parts.append(
+                        InstructionPart(
+                            content=text,
+                            add_cache_breakpoint=func.add_cache_breakpoint,
+                        )
+                    )
 
-            parts = [p for p in parts if p]
-            if not parts:
-                return None
-            return '\n\n'.join(parts).strip()
+            if not instruction_parts:
+                return None, None
+
+            joined = '\n\n'.join(p.content for p in instruction_parts).strip()
+            has_breakpoints = any(p.add_cache_breakpoint for p in instruction_parts)
+            return joined, instruction_parts if has_breakpoints else None
 
         graph_deps = _agent_graph.GraphAgentDeps[AgentDepsT, OutputDataT](
             user_deps=deps,
@@ -1749,13 +1765,17 @@ def instructions(self, func: Callable[[], Awaitable[str | None]], /) -> Callable
 
     @overload
     def instructions(
-        self, /
-    ) -> Callable[[_system_prompt.SystemPromptFunc[AgentDepsT]], _system_prompt.SystemPromptFunc[AgentDepsT]]: ...
+        self,
+        /,
+        *,
+        add_cache_breakpoint: bool | Literal['5m', '1h'] = False,
+    ) -> Callable[[SystemPromptFunc[AgentDepsT]], SystemPromptFunc[AgentDepsT]]: ...
 
     def instructions(
         self,
         func: _system_prompt.SystemPromptFunc[AgentDepsT] | None = None,
         /,
+        add_cache_breakpoint: bool | Literal['5m', '1h'] = False,
     ) -> (
         Callable[[_system_prompt.SystemPromptFunc[AgentDepsT]], _system_prompt.SystemPromptFunc[AgentDepsT]]
         | _system_prompt.SystemPromptFunc[AgentDepsT]
@@ -1790,12 +1810,16 @@ async def async_instructions(ctx: RunContext[str]) -> str:
             def decorator(
                 func_: _system_prompt.SystemPromptFunc[AgentDepsT],
             ) -> _system_prompt.SystemPromptFunc[AgentDepsT]:
-                self._instructions.append(func_)
+                instruction_runner = _system_prompt.SystemPromptRunner[AgentDepsT](
+                    func_, add_cache_breakpoint=add_cache_breakpoint
+                )
+                self._instructions.append(instruction_runner)  # pyright: ignore[reportArgumentType]
                 return func_
 
             return decorator
         else:
-            self._instructions.append(func)
+            runner = _system_prompt.SystemPromptRunner[AgentDepsT](func, add_cache_breakpoint=add_cache_breakpoint)
+            self._instructions.append(runner)  # pyright: ignore[reportArgumentType]
             return func
 
     @overload
@@ -2298,6 +2322,8 @@ def _get_instructions(
         for instruction in instructions:
             if isinstance(instruction, str):
                 literal_parts.append(instruction)
+            elif isinstance(instruction, _system_prompt.SystemPromptRunner):
+                functions.append(instruction)  # pyright: ignore[reportUnknownArgumentType]
             else:
                 # TemplateStr instances land here too: they are callable with a
                 # RunContext parameter, so SystemPromptRunner handles them like
 
@@ -1359,6 +1359,14 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
 """A message part sent by Pydantic AI to a model."""
 
 
+@dataclass(repr=False)
+class InstructionPart:
+    """A single instruction block with optional cache control metadata."""
+
+    content: str
+    add_cache_breakpoint: bool | Literal['5m', '1h'] = False
+
+
 @dataclass(repr=False)
 class ModelRequest:
     """A request generated by Pydantic AI and sent to a model, e.g. a message from the Pydantic AI app to the model."""
@@ -1376,6 +1384,9 @@ class ModelRequest:
     instructions: str | None = None
     """The instructions for the model."""
 
+    instruction_parts: list[InstructionPart] | None = None
+    """Structured instruction parts for models that support per-part cache control (e.g. Anthropic)."""
+
     kind: Literal['request'] = 'request'
     """Message type identifier, this is available on all parts as a discriminator."""