Skip to content

Commit 12ec83f

Browse files
Enable Gemini prompt cache markers (#3090)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
1 parent c9a4fd5 commit 12ec83f

4 files changed

Lines changed: 42 additions & 3 deletions

File tree

openhands-sdk/openhands/sdk/llm/llm.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,8 +1356,8 @@ def is_caching_prompt_active(self) -> bool:
13561356
"""
13571357
if not self.caching_prompt:
13581358
return False
1359-
# We don't need to look-up model_info, because
1360-
# only Anthropic models need explicit caching breakpoints
1359+
# We don't need to look up model_info because explicit caching
1360+
# breakpoint support is tracked in the local feature table.
13611361
return (
13621362
self.caching_prompt
13631363
and get_features(self._model_name_for_capabilities()).supports_prompt_cache
@@ -1397,7 +1397,8 @@ def _apply_prompt_caching(self, messages: list[Message]) -> None:
13971397
# Single block: mark it for caching
13981398
sys_content[0].cache_prompt = True
13991399

1400-
# NOTE: this is only needed for anthropic
1400+
# Anthropic and Gemini both use these cache_control markers. LiteLLM
1401+
# performs the provider-specific cache setup for Gemini downstream.
14011402
for message in reversed(messages):
14021403
if message.role in ("user", "tool"):
14031404
message.content[

openhands-sdk/openhands/sdk/llm/utils/model_features.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ def _supports_reasoning_effort(model: str | None) -> bool:
112112
"claude-opus-4-6",
113113
"claude-opus-4-7",
114114
"claude-sonnet-4-6",
115+
# Gemini uses the same cache_control marker format. LiteLLM handles
116+
# Vertex/Gemini context-cache creation when these markers are present.
117+
"gemini-2.5",
118+
"gemini-3",
115119
]
116120

117121
# Models that support a top-level prompt_cache_retention parameter

tests/sdk/llm/test_model_features.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ def test_extended_thinking_support(model, expected_extended_thinking):
123123
("anthropic.claude-3-5-sonnet-20241022", True),
124124
("anthropic.claude-3-haiku-20240307", True),
125125
("anthropic.claude-3-opus-20240229", True),
126+
# Gemini explicit context caching through LiteLLM.
127+
("gemini-2.5-pro", True),
128+
("gemini-3.1-pro-preview", True),
129+
("litellm_proxy/gemini-3.1-pro-preview", True),
126130
("gpt-4o", False), # OpenAI doesn't support explicit prompt caching
127131
("gemini-1.5-pro", False),
128132
("unknown-model", False),

tests/sdk/llm/test_prompt_caching_cross_conversation.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,36 @@ def on_event(event):
144144
assert messages[1].content[-1].cache_prompt is True
145145

146146

147+
def test_gemini_prompt_caching_marks_formatted_messages():
148+
"""Gemini models should emit cache_control markers when caching is enabled."""
149+
llm = LLM(
150+
model="litellm_proxy/gemini-3.1-pro-preview",
151+
usage_id="test",
152+
caching_prompt=True,
153+
)
154+
messages = [
155+
Message(
156+
role="system",
157+
content=[
158+
TextContent(text="Static system prompt"),
159+
TextContent(text="Dynamic context"),
160+
],
161+
),
162+
Message(
163+
role="user",
164+
content=[TextContent(text="Hello")],
165+
),
166+
]
167+
168+
formatted_messages = llm.format_messages_for_llm(messages)
169+
170+
system_content = formatted_messages[0]["content"]
171+
user_content = formatted_messages[1]["content"]
172+
assert system_content[0]["cache_control"] == {"type": "ephemeral"}
173+
assert "cache_control" not in system_content[1]
174+
assert user_content[-1]["cache_control"] == {"type": "ephemeral"}
175+
176+
147177
@pytest.mark.parametrize(
148178
("first_suffix", "second_suffix"),
149179
[

0 commit comments

Comments
 (0)