feat: Tool calling

WieslerAA · WieslerAA · commit 30cebb1b2c79 · 2025-08-20T14:35:10.000+02:00
diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## 11.4.0
+- Tool calling for chat tasks where the model supports it
+
 ## 11.3.0
 
 - Drop support for python3.9
diff --git a/aleph_alpha_client/chat.py b/aleph_alpha_client/chat.py
@@ -2,7 +2,7 @@
 from dataclasses import asdict, dataclass
 from enum import Enum
 from io import BytesIO
-from typing import Any, Dict, List, Mapping, Optional, Sequence, Union
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Union, Literal
 
 from pydantic import BaseModel
 from aleph_alpha_client.structured_output import ResponseFormat
@@ -41,6 +41,40 @@ def to_json(self) -> Mapping[str, Any]:
         return result
 
 
+@dataclass(frozen=True)
+class FunctionCall:
+    name: str
+    arguments: str
+
+
+@dataclass(frozen=True)
+class ToolCall:
+    id: str
+    type: str
+    function: FunctionCall
+
+    @staticmethod
+    def from_json(json: Dict[str, Any]) -> "ToolCall":
+        function = json["function"]
+        return ToolCall(
+            id=json["id"],
+            type=json["type"],
+            function=FunctionCall(
+                name=function["name"], arguments=function["arguments"]
+            ),
+        )
+
+    def to_json(self) -> Mapping[str, Any]:
+        return {
+            "id": self.id,
+            "type": self.type,
+            "function": {
+                "name": self.function.name,
+                "arguments": self.function.arguments,
+            },
+        }
+
+
 # We introduce a more specific message type because chat responses can only
 # contain text at the moment. This enables static type checking to proof that
 # `content` is always a string.
@@ -59,12 +93,17 @@ class TextMessage:
 
     role: Role
     content: str
+    tool_calls: Optional[List[ToolCall]] = None
 
     @staticmethod
     def from_json(json: Dict[str, Any]) -> "TextMessage":
+        tool_calls = json.get("tool_calls")
         return TextMessage(
             role=Role(json["role"]),
             content=json["content"],
+            tool_calls=None
+            if tool_calls is None
+            else [ToolCall.from_json(tool_call) for tool_call in tool_calls],
         )
 
     # In multi-turn conversations the returned TextMessage is part of the chat
@@ -76,6 +115,8 @@ def to_json(self) -> Mapping[str, Any]:
             "role": self.role.value,
             "content": _message_content_to_json(self.content),
         }
+        if self.tool_calls is not None:
+            result["tool_calls"] = [t.to_json() for t in self.tool_calls]
         return result
 
 
@@ -122,6 +163,12 @@ class StreamOptions:
     include_usage: bool
 
 
+@dataclass(frozen=True)
+class ToolFunction:
+    type: Literal["function"]
+    function: Any
+
+
 @dataclass(frozen=True)
 class ChatRequest:
     """
@@ -141,6 +188,12 @@ class ChatRequest:
     steering_concepts: Optional[List[str]] = None
     response_format: Optional[ResponseFormat] = None
 
+    tools: Optional[List[Any]] = None
+    tool_choice: Optional[Union[Literal["auto", "required", "none"], ToolFunction]] = (
+        None
+    )
+    parallel_tool_calls: Optional[bool] = None
+
     def to_json(self) -> Mapping[str, Any]:
         payload = {k: v for k, v in asdict(self).items() if v is not None}
         payload["messages"] = [message.to_json() for message in self.messages]
@@ -164,7 +217,7 @@ class FinishReason(str, Enum):
     """
     The reason the model stopped generating tokens.
 
-    This will be stop if the model hit a natural stop point or a provided stop
+    This will be `stop` if the model hit a natural stop point or a provided stop
     sequence or length if the maximum number of tokens specified in the request
     was reached. If the API is unable to understand the stop reason emitted by
     one of the workers, content_filter is returned.
@@ -173,6 +226,7 @@ class FinishReason(str, Enum):
     Stop = "stop"
     Length = "length"
     ContentFilter = "content_filter"
+    ToolCalls = "tool_calls"
 
 
 @dataclass(frozen=True)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "aleph-alpha-client"
-version = "11.3.0"
+version = "11.4.0"
 description = "python client to interact with Aleph Alpha api endpoints"
 authors = [{ name = "Aleph Alpha", email = "support@aleph-alpha.com" }]
 requires-python = ">=3.10,<3.14"
diff --git a/tests/cassettes/test_chat/test_can_chat_with_tools.yaml b/tests/cassettes/test_chat/test_can_chat_with_tools.yaml
@@ -0,0 +1,61 @@
+interactions:
+- request:
+    body:
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: What is the weather like in Paris today?
+        role: user
+      model: qwen3-32b-tool
+      tools:
+      - function:
+          description: Get current temperature for a given location.
+          name: get_weather
+          parameters:
+            additionalProperties: false
+            properties:
+              location:
+                description: "City and country e.g. Bogot\xE1, Colombia"
+                type: string
+            required:
+            - location
+            type: object
+          strict: true
+        type: function
+    headers: {}
+    method: POST
+    uri: https://inference-api.stage.product.pharia.com/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-11b3f640-841a-478a-93cb-0c7ac98fc3da","choices":[{"finish_reason":"tool_calls","index":0,"message":{"role":"assistant","content":"\n\n","reasoning_content":"\nOkay,
+        the user is asking about the weather in Paris today. I need to figure out
+        which function to use. The available tool is get_weather, which requires a
+        location parameter. Paris is the city mentioned, and the country is France.
+        So I should format the location as \"Paris, France\". Let me make sure there
+        are no other parameters needed. The function only needs the location, so I''ll
+        construct the tool call with that.\n","tool_calls":[{"id":"chatcmpl-tool-2370633f184e43d8a700b78806cb1083","type":"function","function":{"name":"get_weather","arguments":"{\"location\":
+        \"Paris, France\"}"}}]},"logprobs":null}],"created":1755691940,"model":"qwen3-32b-tool","system_fingerprint":null,"object":"chat.completion","usage":{"prompt_tokens":188,"completion_tokens":114,"total_tokens":302}}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - content-type
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 20 Aug 2025 12:12:23 GMT
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin, Access-Control-Request-Method, Access-Control-Request-Headers
+      - accept-encoding
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -116,6 +116,11 @@ def structured_output_model_name() -> str:
     return "qwen3-32b-tool"
 
 
+@pytest.fixture(scope="session")
+def tool_calling_model_name() -> str:
+    return "qwen3-32b-tool"
+
+
 @pytest.fixture(scope="session")
 def dummy_model_name() -> str:
     return "dummy-model"
diff --git a/tests/test_chat.py b/tests/test_chat.py
@@ -65,6 +65,53 @@ async def test_can_chat_with_async_client(
     assert response.message.content is not None
 
 
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get current temperature for a given location.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and country e.g. Bogotá, Colombia",
+                    }
+                },
+                "required": ["location"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        },
+    }
+]
+
+
+@pytest.mark.vcr
+async def test_can_chat_with_tools(
+    async_client: AsyncClient, tool_calling_model_name: str
+):
+    system_msg = Message(role=Role.System, content="You are a helpful assistant.")
+    user_msg = Message(
+        role=Role.User, content="What is the weather like in Paris today?"
+    )
+    request = ChatRequest(
+        messages=[system_msg, user_msg],
+        model=tool_calling_model_name,
+        tools=TOOLS,
+    )
+
+    response = await async_client.chat(request, model=tool_calling_model_name)
+    assert response.message.role == Role.Assistant
+    assert response.message.content is not None
+    assert response.message.tool_calls is not None
+    calls = response.message.tool_calls
+    assert len(calls) == 1
+    assert calls[0].type == "function"
+    assert calls[0].function.name == "get_weather"
+
+
 @pytest.mark.vcr
 async def test_can_chat_with_streaming_support(
     async_client: AsyncClient, chat_model_name: str
@@ -263,7 +310,6 @@ def test_response_format_json_schema(
         assert field in json_response.keys(), (
             f"Required field '{field}' is missing from response"
         )
-
     # Validate field types
     assert isinstance(json_response["nemo"], str), "Field 'nemo' should be a string"
     assert isinstance(json_response["species"], str), (
@@ -273,7 +319,6 @@ def test_response_format_json_schema(
     assert isinstance(json_response["size_cm"], (int, float)), (
         "Field 'size_cm' should be a number"
     )
-
     # Validate size constraints
     assert 0.1 <= json_response["size_cm"] <= 100.0, (
         "Field 'size_cm' should be between 0.1 and 100.0"