implementing prompt caching in openrouter

AbedrahmanYassen · AbedrahmanYassen · commit 4c101c55a870 · 2026-02-11T12:36:27.000+02:00
diff --git a/camel/configs/openrouter_config.py b/camel/configs/openrouter_config.py
@@ -13,7 +13,7 @@
 # ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
 from __future__ import annotations
 
-from typing import Dict, Optional, Sequence, Union
+from typing import Any, Dict, Optional, Sequence, Union
 
 from camel.configs.base_config import BaseConfig
 from camel.types import NotGiven
@@ -86,6 +86,13 @@ class OpenRouterConfig(BaseConfig):
             forces the model to call that tool. :obj:`"none"` is the default
             when no tools are present. :obj:`"auto"` is the default if tools
             are present. (default: :obj:`None`)
+        extra_body (dict, optional): Used to pass provider-specific parameters 
+            to OpenRouter. This is where you can specify provider-specific 
+            caching options, such as "anthropic": {"cache_control": {"type": "ephemeral"}}
+            or other beta features. (default: :obj:`None`)
+        include_usage (bool, optional): Whether to include token usage in the
+            response, which is essential for tracking cache hits/misses.
+            (default: :obj:`None`)
     """
 
     temperature: Optional[float] = None
@@ -101,8 +108,8 @@ class OpenRouterConfig(BaseConfig):
     tool_choice: Optional[
         Union[Dict[str, Union[str, Dict[str, str]]], str]
     ] = None
-
-
+    enable_prompt_caching: bool = False
+    ttl: str = "5m"
 OPENROUTER_API_PARAMS = {
     param for param in OpenRouterConfig.model_fields.keys()
 }
diff --git a/camel/models/openrouter_model.py b/camel/models/openrouter_model.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 # ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
 import os
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 from camel.configs import OpenRouterConfig
 from camel.models.openai_compatible_model import OpenAICompatibleModel
@@ -81,3 +81,47 @@ def __init__(
             max_retries=max_retries,
             **kwargs,
         )
+    def _prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        if not self.model_config_dict.get("enable_prompt_caching"):
+            return messages
+
+        implicit_caching_prefixes = (
+            "openai/",
+            "deepseek/",
+            "google/gemini-2.5-pro",
+            "google/gemini-2.5-flash",
+            "x-ai/",
+            "moonshotai/"
+
+        )
+
+        model_name = str(self.model_type).lower()
+
+        needs_explicit_caching = not any(
+            model_name.startswith(p) for p in implicit_caching_prefixes
+        )
+
+        # 4. Apply transformation only if needed
+        if needs_explicit_caching and messages:
+            if messages[0].get("role") == "system":
+                sys_msg = messages[0]
+                content = sys_msg.get("content")
+
+                ttl = self.model_config_dict.get("cache_ttl", "5m")
+                cache_obj = {"type": "ephemeral"}
+
+                if model_name.startswith("anthropic/") and ttl == "1h":
+                    cache_obj["ttl"] = "1h"
+
+                if isinstance(content, str):
+                    sys_msg["content"] = [
+                        {
+                            "type": "text",
+                            "text": content,
+                            "cache_control": cache_obj,
+                        }
+                    ]
+                elif isinstance(content, list) and content:
+                    content[-1]["cache_control"] = cache_obj
+
+        return messages
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
     "pillow>=10.0.0",
     "google-search-results>=2.4.2",
     "pyyaml>=6.0.3",
+    "pytest>=7.4.4",
 ]
 
 [tool.uv]
diff --git a/test/models/test_openrouter_model.py b/test/models/test_openrouter_model.py
@@ -18,8 +18,7 @@
 from camel.models import OpenRouterModel
 from camel.types import ModelType
 from camel.utils import OpenAITokenCounter
-
-
+import os 
 @pytest.mark.model_backend
 @pytest.mark.parametrize(
     "model_type",
@@ -38,8 +37,23 @@ def test_openrouter_model(model_type: ModelType):
     assert isinstance(model.model_type.value_for_tiktoken, str)
     assert isinstance(model.model_type.token_limit, int)
 
+def test_openrouter_gemini_caching(model_type="google/gemma-3-27b-it:free"):
+    config = {"enable_prompt_caching": True}
+    model = OpenRouterModel(model_type, model_config_dict=config, api_key=os.environ.get("OPENROUTER_API_KEY"))
+    
+    messages = [{"role": "system", "content": "Large context..."}]
+    prepared = model._prepare_messages(messages)
+    
+    assert isinstance(prepared[0]["content"], list)
+    assert "cache_control" in prepared[0]["content"][0]
+    assert prepared[0]["content"][0]["cache_control"]["type"] == "ephemeral"
 
-@pytest.mark.model_backend
-def test_openrouter_model_stream_property():
-    model = OpenRouterModel(ModelType.OPENROUTER_LLAMA_3_1_70B)
-    assert model.stream is False
+def test_openrouter_openai(model_type="openai/gpt-4o-mini"):
+    config = {"enable_prompt_caching": True}
+    model = OpenRouterModel(model_type, model_config_dict=config, api_key=os.environ.get("OPENROUTER_AP I_KEY"))
+    
+    messages = [{"role": "system", "content": "Large context..."}]
+    prepared = model._prepare_messages(messages)
+    
+    assert isinstance(prepared[0]["content"], str)
+    assert "cache_control" not in prepared[0]["content"][0]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ dependencies = [`
`35`	`35`	`"pillow>=10.0.0",`
`36`	`36`	`"google-search-results>=2.4.2",`
`37`	`37`	`"pyyaml>=6.0.3",`
	`38`	`+ "pytest>=7.4.4",`
`38`	`39`	`]`
`39`	`40`
`40`	`41`	`[tool.uv]`