Add SiliconFlow as an LLM provider and test it out with various test cases

ron-42 · ron-42 · commit af8fe89436f6 · 2025-10-22T22:45:40.000+05:30
diff --git a/mem0/llms/configs.py b/mem0/llms/configs.py
@@ -15,6 +15,7 @@ def validate_config(cls, v, values):
             "ollama",
             "anthropic",
             "groq",
+            "siliconflow",
             "together",
             "aws_bedrock",
             "litellm",
diff --git a/mem0/llms/siliconflow.py b/mem0/llms/siliconflow.py
@@ -0,0 +1,117 @@
+import json
+import os
+import requests
+from typing import Dict, List, Optional, Any
+
+from mem0.configs.llms.base import BaseLlmConfig
+from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
+
+
+class SiliconFlowLLM(LLMBase):
+    """
+    SiliconFlow chat completion provider.
+    Docs:
+      https://docs.siliconflow.com/en/api-reference/chat-completions/chat-completions
+    """
+
+    def __init__(self, config: Optional[BaseLlmConfig] = None):
+        super().__init__(config)
+
+        if not self.config.model:
+            self.config.model = "Qwen/Qwen2.5-7B-Instruct"
+
+        self.api_key = self.config.api_key or os.getenv("SILICONFLOW_API_KEY")
+        if not self.api_key:
+            raise ValueError("SiliconFlow API key not found. Set SILICONFLOW_API_KEY or pass via config.api_key.")
+
+        # Allow override of base URL via config or environment (docs show .com domain)
+        self.base_url = (
+            getattr(self.config, "base_url", None)
+            or os.getenv("SILICONFLOW_BASE_URL")
+            or "https://api.siliconflow.com/v1"
+        )
+
+        # Pre-build headers
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+    def _endpoint(self) -> str:
+        return f"{self.base_url}/chat/completions"
+
+    def _parse_response(self, data: Dict[str, Any], tools: Optional[List[Dict]]) -> Any:
+        """
+        Matches structure similar to OpenAI-like responses.
+        """
+        try:
+            choice = data["choices"][0]
+            message = choice.get("message", {})
+        except (KeyError, IndexError):
+            raise ValueError(f"Unexpected SiliconFlow response format: {data}")
+
+        if tools:
+            processed = {"content": message.get("content"), "tool_calls": []}
+            # If SiliconFlow returns tool_calls similar to OpenAI:
+            for tc in message.get("tool_calls", []) or []:
+                try:
+                    name = tc["function"]["name"]
+                    raw_args = tc["function"].get("arguments", "{}")
+                    # Ensure JSON object parsing
+                    args = json.loads(extract_json(raw_args))
+                    processed["tool_calls"].append({"name": name, "arguments": args})
+                except Exception:
+                    # Fallback raw
+                    processed["tool_calls"].append(
+                        {
+                            "name": tc.get("function", {}).get("name"),
+                            "arguments": tc.get("function", {}).get("arguments"),
+                        }
+                    )
+            return processed
+        else:
+            return message.get("content")
+
+    def generate_response(
+        self,
+        messages: List[Dict[str, str]],
+        response_format=None,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: str = "auto",
+    ):
+        """
+        Create chat completion via SiliconFlow.
+        Adjust request body keys if docs differ.
+        """
+        payload: Dict[str, Any] = {
+            "model": self.config.model,
+            "messages": messages,
+            "temperature": self.config.temperature,
+            "top_p": self.config.top_p,
+            "max_tokens": self.config.max_tokens,
+        }
+
+        # Response format (if SiliconFlow supports 'response_format': {"type": "json_object"})
+        if response_format:
+            payload["response_format"] = response_format
+
+        # Tool / function calling (verify exact schema in docs; may differ)
+        if tools:
+            payload["tools"] = tools
+            # Some APIs expect {"type":"function","function":{...}} structures
+            # tool_choice might be "auto" / {"type":"function","function":{"name":"..."}}
+            payload["tool_choice"] = tool_choice
+
+        resp = requests.post(self._endpoint(), headers=self.headers, json=payload, timeout=60)
+        if resp.status_code >= 400:
+            extra_hint = ""
+            if resp.status_code == 401:
+                extra_hint = (
+                    " (401 Unauthorized: Verify SILICONFLOW_API_KEY is correct and matches the domain "
+                    f"{self.base_url.split('/v1')[0]}; you can also set SILICONFLOW_BASE_URL if needed)"
+                )
+            raise RuntimeError(f"SiliconFlow error {resp.status_code}: {resp.text}{extra_hint}")
+
+        data = resp.json()
+        return self._parse_response(data, tools)
diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py
@@ -36,6 +36,7 @@ class LlmFactory:
         "ollama": ("mem0.llms.ollama.OllamaLLM", OllamaConfig),
         "openai": ("mem0.llms.openai.OpenAILLM", OpenAIConfig),
         "groq": ("mem0.llms.groq.GroqLLM", BaseLlmConfig),
+        "siliconflow": ("mem0.llms.siliconflow.SiliconFlowLLM", BaseLlmConfig),
         "together": ("mem0.llms.together.TogetherLLM", BaseLlmConfig),
         "aws_bedrock": ("mem0.llms.aws_bedrock.AWSBedrockLLM", BaseLlmConfig),
         "litellm": ("mem0.llms.litellm.LiteLLM", BaseLlmConfig),
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "pytz>=2024.1",
     "sqlalchemy>=2.0.31",
     "protobuf>=5.29.0,<6.0.0",
+    "requests>=2.32.0",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/llms/test_siliconflow.py b/tests/llms/test_siliconflow.py
@@ -0,0 +1,113 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from mem0.configs.llms.base import BaseLlmConfig
+from mem0.llms.siliconflow import SiliconFlowLLM
+
+
+@patch("mem0.llms.siliconflow.requests.post")
+def test_generate_response_without_tools(mock_post, monkeypatch):
+    monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
+    config = BaseLlmConfig(model="Qwen/Qwen2.5-7B-Instruct", temperature=0.3, max_tokens=64, top_p=1.0)
+    llm = SiliconFlowLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello SiliconFlow"},
+    ]
+
+    mock_post.return_value = Mock(
+        status_code=200,
+        json=lambda: {"choices": [{"message": {"content": "Hello back!"}}]},
+    )
+
+    response = llm.generate_response(messages)
+
+    mock_post.assert_called_once()
+    called_payload = mock_post.call_args.kwargs["json"]
+    assert called_payload["model"] == config.model
+    assert called_payload["messages"][1]["content"] == "Hello SiliconFlow"
+    assert response == "Hello back!"
+
+
+@patch("mem0.llms.siliconflow.requests.post")
+def test_generate_response_with_tools(mock_post, monkeypatch):
+    monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
+    config = BaseLlmConfig(model="Qwen/Qwen2.5-7B-Instruct", temperature=0.3, max_tokens=64, top_p=1.0)
+    llm = SiliconFlowLLM(config)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Call a tool"},
+    ]
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "echo",
+                "description": "Echo input",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"text": {"type": "string"}},
+                    "required": ["text"],
+                },
+            },
+        }
+    ]
+
+    mock_post.return_value = Mock(
+        status_code=200,
+        json=lambda: {
+            "choices": [
+                {
+                    "message": {
+                        "content": "Tool called.",
+                        "tool_calls": [{"function": {"name": "echo", "arguments": '{"text":"hi"}'}}],
+                    }
+                }
+            ]
+        },
+    )
+
+    response = llm.generate_response(messages, tools=tools)
+
+    mock_post.assert_called_once()
+    called_payload = mock_post.call_args.kwargs["json"]
+    assert called_payload["tools"] == tools
+    assert response["content"] == "Tool called."
+    assert len(response["tool_calls"]) == 1
+    assert response["tool_calls"][0]["name"] == "echo"
+    assert response["tool_calls"][0]["arguments"]["text"] == "hi"
+
+
+@patch("mem0.llms.siliconflow.requests.post")
+def test_generate_response_error(mock_post, monkeypatch):
+    monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
+    config = BaseLlmConfig(model="Qwen/Qwen2.5-7B-Instruct", temperature=0.3, max_tokens=64, top_p=1.0)
+    llm = SiliconFlowLLM(config)
+
+    mock_post.return_value = Mock(status_code=500, text="Internal Error")
+
+    import pytest
+
+    with pytest.raises(RuntimeError):
+        llm.generate_response([{"role": "user", "content": "Hi"}])
+
+
+# ------------------------- LIVE INTEGRATION (optional) ------------------------- #
+@pytest.mark.skipif(not os.getenv("SILICONFLOW_API_KEY"), reason="No SiliconFlow API key set")
+def test_siliconflow_live_basic():
+    """Live call to SiliconFlow API (non-mocked). Skipped if no key.
+    Keeps tokens low to control cost.
+    Set SILICONFLOW_MODEL to override model name.
+    """
+    model = os.getenv("SILICONFLOW_MODEL", "Qwen/QwQ-32B")
+    cfg = BaseLlmConfig(model=model, temperature=0.2, max_tokens=64, top_p=0.9)
+    llm = SiliconFlowLLM(cfg)
+
+    prompt = "In one concise sentence, say hello from SiliconFlow integration test."
+    resp = llm.generate_response([{"role": "user", "content": prompt}])
+
+    assert isinstance(resp, str)
+    assert resp.strip() and resp.strip() != prompt

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ dependencies = [`
`21`	`21`	`"pytz>=2024.1",`
`22`	`22`	`"sqlalchemy>=2.0.31",`
`23`	`23`	`"protobuf>=5.29.0,<6.0.0",`
	`24`	`+ "requests>=2.32.0",`
`24`	`25`	`]`
`25`	`26`
`26`	`27`	`[project.optional-dependencies]`