enhance: volcano reasoning handling (#3681)

Wendong-Fan · web-flow · commit d1d664e485e9 · 2026-01-11T10:22:19.000+08:00
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -26,7 +26,7 @@ body:
     attributes:
       label: What version of camel are you using?
       description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here.
-      placeholder: E.g., 0.2.83a5
+      placeholder: E.g., 0.2.83a6
     validations:
       required: true
 
diff --git a/camel/__init__.py b/camel/__init__.py
@@ -14,7 +14,7 @@
 
 from camel.logger import disable_logging, enable_logging, set_log_level
 
-__version__ = '0.2.83a5'
+__version__ = '0.2.83a6'
 
 __all__ = [
     '__version__',
diff --git a/camel/models/volcano_model.py b/camel/models/volcano_model.py
@@ -13,10 +13,14 @@
 # ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
 
 import os
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Type, Union
 
+from openai import AsyncStream, Stream
+from pydantic import BaseModel
+
+from camel.messages import OpenAIMessage
 from camel.models.openai_compatible_model import OpenAICompatibleModel
-from camel.types import ModelType
+from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
 from camel.utils import (
     BaseTokenCounter,
     api_keys_required,
@@ -85,3 +89,142 @@ def __init__(
             max_retries,
             **kwargs,
         )
+        # Store the last reasoning_content from model response
+        # This will be injected into the next request's assistant message
+        self._last_reasoning_content: Optional[str] = None
+
+    def _inject_reasoning_content(
+        self,
+        messages: List[OpenAIMessage],
+    ) -> List[OpenAIMessage]:
+        r"""Inject the last reasoning_content into assistant messages.
+
+        For Volcano Engine's doubao-seed models with deep thinking enabled,
+        the reasoning_content from the model response needs to be passed back
+        in subsequent requests for proper context management.
+
+        Args:
+            messages: The original messages list.
+
+        Returns:
+            Messages with reasoning_content added to the last assistant
+            message that has tool_calls.
+        """
+        if not self._last_reasoning_content:
+            return messages
+
+        # Find the last assistant message with tool_calls and inject
+        # reasoning_content
+        processed: List[OpenAIMessage] = []
+        reasoning_injected = False
+
+        for msg in reversed(messages):
+            if (
+                not reasoning_injected
+                and isinstance(msg, dict)
+                and msg.get("role") == "assistant"
+                and msg.get("tool_calls")
+                and "reasoning_content" not in msg
+            ):
+                # Inject reasoning_content into this message
+                new_msg = dict(msg)
+                new_msg["reasoning_content"] = self._last_reasoning_content
+                processed.append(new_msg)  # type: ignore[arg-type]
+                reasoning_injected = True
+            else:
+                processed.append(msg)
+
+        # Only clear after successful injection
+        if reasoning_injected:
+            self._last_reasoning_content = None
+
+        return list(reversed(processed))
+
+    def _extract_reasoning_content(
+        self, response: ChatCompletion
+    ) -> Optional[str]:
+        r"""Extract reasoning_content from the model response.
+
+        Args:
+            response: The model response.
+
+        Returns:
+            The reasoning_content if available, None otherwise.
+        """
+        if response.choices:
+            return getattr(
+                response.choices[0].message, "reasoning_content", None
+            )
+        return None
+
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+        response_format: Optional[Type[BaseModel]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
+        r"""Runs inference of Volcano Engine chat completion.
+
+        Overrides the base run method to inject reasoning_content from
+        previous responses into subsequent requests, as required by
+        Volcano Engine's doubao-seed models with deep thinking enabled.
+
+        Args:
+            messages: Message list with the chat history in OpenAI API format.
+            response_format: The format of the response.
+            tools: The schema of the tools to use for the request.
+
+        Returns:
+            ChatCompletion in the non-stream mode, or
+            Stream[ChatCompletionChunk] in the stream mode.
+        """
+        # Inject reasoning_content from previous response
+        processed_messages = self._inject_reasoning_content(messages)
+
+        # Call parent's run
+        response = super().run(processed_messages, response_format, tools)
+
+        # Extract and store reasoning_content for next request
+        if isinstance(response, ChatCompletion):
+            self._last_reasoning_content = self._extract_reasoning_content(
+                response
+            )
+
+        return response
+
+    async def arun(
+        self,
+        messages: List[OpenAIMessage],
+        response_format: Optional[Type[BaseModel]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
+        r"""Runs async inference of Volcano Engine chat completion.
+
+        Overrides the base arun method to inject reasoning_content from
+        previous responses into subsequent requests, as required by
+        Volcano Engine's doubao-seed models with deep thinking enabled.
+
+        Args:
+            messages: Message list with the chat history in OpenAI API format.
+            response_format: The format of the response.
+            tools: The schema of the tools to use for the request.
+
+        Returns:
+            ChatCompletion in the non-stream mode, or
+            AsyncStream[ChatCompletionChunk] in the stream mode.
+        """
+        # Inject reasoning_content from previous response
+        processed_messages = self._inject_reasoning_content(messages)
+
+        # Call parent's arun
+        response = await super().arun(
+            processed_messages, response_format, tools
+        )
+
+        # Extract and store reasoning_content for next request
+        if isinstance(response, ChatCompletion):
+            self._last_reasoning_content = self._extract_reasoning_content(
+                response
+            )
+
+        return response
diff --git a/docs/conf.py b/docs/conf.py
@@ -27,7 +27,7 @@
 project = 'CAMEL'
 copyright = '2024, CAMEL-AI.org'
 author = 'CAMEL-AI.org'
-release = '0.2.83a5'
+release = '0.2.83a6'
 
 html_favicon = (
     'https://raw.githubusercontent.com/camel-ai/camel/master/misc/favicon.png'
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "camel-ai"
-version = "0.2.83a5"
+version = "0.2.83a6"
 description = "Communicative Agents for AI Society Study"
 authors = [{ name = "CAMEL-AI.org" }]
 requires-python = ">=3.10,<3.15"
diff --git a/uv.lock b/uv.lock