From abb34249dc04e023542d51e13741a6f1279436d6 Mon Sep 17 00:00:00 2001
From: Xiangyu Shi <sxyu.shi@gmail.com>
Date: Tue, 10 Feb 2026 10:30:05 +0100
Subject: [PATCH 1/6] fix bugs

---
 camel/agents/chat_agent.py | 388 +++++++++++++++++++++++++++++++------
 1 file changed, 332 insertions(+), 56 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index c6337408d7..458443d3b0 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -2669,65 +2669,74 @@ def _is_called_from_registered_toolkit(self) -> bool:
 
         return False
 
-    def _apply_prompt_based_parsing(
+    def _apply_prompt_based_parsing_to_message(
         self,
-        response: ModelResponse,
+        message: BaseMessage,
         original_response_format: Type[BaseModel],
     ) -> None:
-        r"""Apply manual parsing when using prompt-based formatting.
+        r"""Apply prompt-based parsing to a single message.
 
         Args:
-            response: The model response to parse.
+            message: The message to parse.
             original_response_format: The original response format class.
         """
-        for message in response.output_messages:
-            if message.content:
-                try:
-                    # Try to extract JSON from the response content
-                    import json
+        if not message.content:
+            return
+        try:
+            import json
 
-                    from pydantic import ValidationError
+            from pydantic import ValidationError
 
-                    # Try to find JSON in the content
-                    content = message.content.strip()
+            content = message.content.strip()
 
-                    # Try direct parsing first
-                    try:
-                        parsed_json = json.loads(content)
-                        message.parsed = (
-                            original_response_format.model_validate(
-                                parsed_json
-                            )
-                        )
-                        continue
-                    except (json.JSONDecodeError, ValidationError):
-                        pass
+            # Try direct parsing first
+            try:
+                parsed_json = json.loads(content)
+                message.parsed = original_response_format.model_validate(
+                    parsed_json
+                )
+                return
+            except (json.JSONDecodeError, ValidationError):
+                pass
 
-                    # Try to extract JSON from text
-                    json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
-                    json_matches = re.findall(json_pattern, content, re.DOTALL)
+            # Try to extract JSON from text
+            json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
+            json_matches = re.findall(json_pattern, content, re.DOTALL)
 
-                    for json_str in json_matches:
-                        try:
-                            parsed_json = json.loads(json_str)
-                            message.parsed = (
-                                original_response_format.model_validate(
-                                    parsed_json
-                                )
-                            )
-                            # Update content to just the JSON for consistency
-                            message.content = json.dumps(parsed_json)
-                            break
-                        except (json.JSONDecodeError, ValidationError):
-                            continue
+            for json_str in json_matches:
+                try:
+                    parsed_json = json.loads(json_str)
+                    message.parsed = original_response_format.model_validate(
+                        parsed_json
+                    )
+                    message.content = json.dumps(parsed_json)
+                    break
+                except (json.JSONDecodeError, ValidationError):
+                    continue
 
-                    if not message.parsed:
-                        logger.warning(
-                            f"Failed to parse JSON from response: {content}"
-                        )
+            if not message.parsed:
+                logger.warning(
+                    f"Failed to parse JSON from response: {content}"
+                )
 
-                except Exception as e:
-                    logger.warning(f"Error during prompt-based parsing: {e}")
+        except Exception as e:
+            logger.warning(f"Error during prompt-based parsing: {e}")
+
+    def _apply_prompt_based_parsing(
+        self,
+        response: ModelResponse,
+        original_response_format: Type[BaseModel],
+    ) -> None:
+        r"""Apply manual parsing when using prompt-based formatting.
+
+        Args:
+            response: The model response to parse.
+            original_response_format: The original response format class.
+        """
+        for message in response.output_messages:
+            self._apply_prompt_based_parsing_to_message(
+                message, original_response_format
+            )
 
     def _format_response_if_needed(
         self,
@@ -4177,7 +4186,8 @@ def _stream(
                 available.
         """
         # Handle response format compatibility with non-strict tools
-        input_message, response_format, _ = (
+        original_response_format = response_format
+        input_message, response_format, used_prompt_formatting = (
             self._handle_response_format_with_non_strict_tools(
                 input_message, response_format
             )
@@ -4203,7 +4213,11 @@ def _stream(
 
         # Start streaming response
         yield from self._stream_response(
-            openai_messages, num_tokens, response_format
+            openai_messages,
+            num_tokens,
+            response_format,
+            original_response_format,
+            used_prompt_formatting,
         )
 
     def _get_token_count(self, content: str) -> int:
@@ -4245,6 +4259,8 @@ def _stream_response(
         openai_messages: List[OpenAIMessage],
         num_tokens: int,
         response_format: Optional[Type[BaseModel]] = None,
+        original_response_format: Optional[Type[BaseModel]] = None,
+        used_prompt_formatting: bool = False,
     ) -> Generator[ChatAgentResponse, None, None]:
         r"""Internal method to handle streaming responses with tool calls."""
 
@@ -4304,6 +4320,8 @@ def _stream_response(
                     tool_call_records,
                     step_token_usage,
                     response_format,
+                    original_response_format,
+                    used_prompt_formatting,
                 )
 
                 if tool_calls_complete:
@@ -4374,15 +4392,76 @@ def _stream_response(
                     # Get final completion and record final message
                     try:
                         final_completion = stream.get_final_completion()
-                        final_content = (
-                            final_completion.choices[0].message.content or ""
+
+                        # Check if the model wants to call tools
+                        final_choice = final_completion.choices[0]
+                        final_tool_calls = getattr(
+                            final_choice.message, 'tool_calls', None
                         )
+                        if final_tool_calls:
+                            # Convert tool calls to accumulated format
+                            # and execute them
+                            for tc in final_tool_calls:
+                                tc_data = {
+                                    'id': tc.id,
+                                    'function': {
+                                        'name': tc.function.name,
+                                        'arguments': tc.function.arguments,
+                                    },
+                                    'complete': True,
+                                }
+                                accumulated_tool_calls[tc.id] = tc_data
+
+                            # Record assistant message with tool calls
+                            # and execute tools
+                            for status_response in (
+                                self
+                            )._execute_tools_sync_with_status_accumulator(
+                                accumulated_tool_calls,
+                                tool_call_records,
+                            ):
+                                yield status_response
+
+                            if tool_call_records:
+                                logger.info("Sending back result to model")
+
+                            # Update usage
+                            if final_completion.usage:
+                                self._update_token_usage_tracker(
+                                    step_token_usage,
+                                    safe_model_dump(final_completion.usage),
+                                )
+
+                            # Continue the loop for next model call
+                            accumulated_tool_calls.clear()
+                            if tool_call_records and (
+                                self.max_iteration is None
+                                or iteration_count < self.max_iteration
+                            ):
+                                try:
+                                    openai_messages, num_tokens = (
+                                        self.memory.get_context()
+                                    )
+                                except RuntimeError as e:
+                                    yield self._step_terminate(
+                                        e.args[1],
+                                        tool_call_records,
+                                        "max_tokens_exceeded",
+                                    )
+                                    return
+                                content_accumulator.reset_streaming_content()
+                                continue
+                            else:
+                                break
+
+                        final_content = final_choice.message.content or ""
                         final_reasoning = (
                             content_accumulator.get_full_reasoning_content()
                             or None
                         )
 
-                        final_message = BaseMessage(
+                        # Record full content to memory
+                        record_message = BaseMessage(
                             role_name=self.role_name,
                             role_type=self.role_type,
                             meta_dict={},
@@ -4394,7 +4473,46 @@ def _stream_response(
                             reasoning_content=final_reasoning,
                         )
 
-                        self.record_message(final_message)
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not record_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                record_message, original_response_format
+                            )
+
+                        self.record_message(record_message)
+
+                        # In delta mode, final response content should
+                        # be empty since all content was already yielded
+                        # incrementally
+                        display_content = (
+                            final_content if self.stream_accumulate else ""
+                        )
+                        display_reasoning = (
+                            final_reasoning if self.stream_accumulate else None
+                        )
+                        final_message = BaseMessage(
+                            role_name=self.role_name,
+                            role_type=self.role_type,
+                            meta_dict={},
+                            content=display_content,
+                            parsed=cast(
+                                "BaseModel | dict[str, Any] | None",
+                                parsed_object,
+                            ),  # type: ignore[arg-type]
+                            reasoning_content=display_reasoning,
+                        )
+
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
 
                         # Create final response
                         final_response = ChatAgentResponse(
@@ -4454,6 +4572,8 @@ def _process_stream_chunks_with_accumulator(
         tool_call_records: List[ToolCallingRecord],
         step_token_usage: Dict[str, int],
         response_format: Optional[Type[BaseModel]] = None,
+        original_response_format: Optional[Type[BaseModel]] = None,
+        used_prompt_formatting: bool = False,
     ) -> Generator[ChatAgentResponse, None, Tuple[bool, bool]]:
         r"""Process streaming chunks with content accumulator."""
 
@@ -4552,6 +4672,15 @@ def _process_stream_chunks_with_accumulator(
                                 final_message, response_format
                             )
 
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
+
                         self.record_message(final_message)
             if chunk.usage:
                 # Handle final usage chunk, whether or not choices are present.
@@ -4591,6 +4720,15 @@ def _process_stream_chunks_with_accumulator(
                                 final_message, response_format
                             )
 
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
+
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(
                             msgs=[final_message],
@@ -5132,6 +5270,14 @@ async def _astream(
     ) -> AsyncGenerator[ChatAgentResponse, None]:
         r"""Asynchronous version of stream method."""
 
+        # Handle response format compatibility with non-strict tools
+        original_response_format = response_format
+        input_message, response_format, used_prompt_formatting = (
+            self._handle_response_format_with_non_strict_tools(
+                input_message, response_format
+            )
+        )
+
         # Convert input message to BaseMessage if necessary
         if isinstance(input_message, str):
             input_message = BaseMessage.make_user_message(
@@ -5154,7 +5300,11 @@ async def _astream(
         # Start async streaming response
         last_response = None
         async for response in self._astream_response(
-            openai_messages, num_tokens, response_format
+            openai_messages,
+            num_tokens,
+            response_format,
+            original_response_format,
+            used_prompt_formatting,
         ):
             last_response = response
             yield response
@@ -5171,6 +5321,8 @@ async def _astream_response(
         openai_messages: List[OpenAIMessage],
         num_tokens: int,
         response_format: Optional[Type[BaseModel]] = None,
+        original_response_format: Optional[Type[BaseModel]] = None,
+        used_prompt_formatting: bool = False,
     ) -> AsyncGenerator[ChatAgentResponse, None]:
         r"""Async method to handle streaming responses with tool calls."""
 
@@ -5234,6 +5386,8 @@ async def _astream_response(
                     tool_call_records,
                     step_token_usage,
                     response_format,
+                    original_response_format,
+                    used_prompt_formatting,
                 ):
                     if isinstance(item, tuple):
                         # This is the final return value (stream_completed,
@@ -5313,15 +5467,78 @@ async def _astream_response(
                     # Get final completion and record final message
                     try:
                         final_completion = await stream.get_final_completion()
-                        final_content = (
-                            final_completion.choices[0].message.content or ""
+
+                        # Check if the model wants to call tools
+                        final_choice = final_completion.choices[0]
+                        final_tool_calls = getattr(
+                            final_choice.message, 'tool_calls', None
                         )
+                        if final_tool_calls:
+                            # Convert tool calls to accumulated format
+                            # and execute them
+                            for tc in final_tool_calls:
+                                tc_data = {
+                                    'id': tc.id,
+                                    'function': {
+                                        'name': tc.function.name,
+                                        'arguments': tc.function.arguments,
+                                    },
+                                    'complete': True,
+                                }
+                                accumulated_tool_calls[tc.id] = tc_data
+
+                            # Record assistant message with tool calls
+                            # and execute tools
+                            async for status_response in (
+                                self
+                            )._execute_tools_async_with_status_accumulator(
+                                accumulated_tool_calls,
+                                content_accumulator,
+                                step_token_usage,
+                                tool_call_records,
+                            ):
+                                yield status_response
+
+                            if tool_call_records:
+                                logger.info("Sending back result to model")
+
+                            # Update usage
+                            if final_completion.usage:
+                                self._update_token_usage_tracker(
+                                    step_token_usage,
+                                    safe_model_dump(final_completion.usage),
+                                )
+
+                            # Continue the loop for next model call
+                            accumulated_tool_calls.clear()
+                            if tool_call_records and (
+                                self.max_iteration is None
+                                or iteration_count < self.max_iteration
+                            ):
+                                try:
+                                    openai_messages, num_tokens = (
+                                        self.memory.get_context()
+                                    )
+                                except RuntimeError as e:
+                                    yield self._step_terminate(
+                                        e.args[1],
+                                        tool_call_records,
+                                        "max_tokens_exceeded",
+                                    )
+                                    return
+                                content_accumulator.reset_streaming_content()
+                                continue
+                            else:
+                                break
+
+                        final_content = final_choice.message.content or ""
                         final_reasoning = (
                             content_accumulator.get_full_reasoning_content()
                             or None
                         )
 
-                        final_message = BaseMessage(
+                        # Record full content to memory
+                        record_message = BaseMessage(
                             role_name=self.role_name,
                             role_type=self.role_type,
                             meta_dict={},
@@ -5333,7 +5550,46 @@ async def _astream_response(
                             reasoning_content=final_reasoning,
                         )
 
-                        self.record_message(final_message)
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not record_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                record_message, original_response_format
+                            )
+
+                        self.record_message(record_message)
+
+                        # In delta mode, final response content should
+                        # be empty since all content was already yielded
+                        # incrementally
+                        display_content = (
+                            final_content if self.stream_accumulate else ""
+                        )
+                        display_reasoning = (
+                            final_reasoning if self.stream_accumulate else None
+                        )
+                        final_message = BaseMessage(
+                            role_name=self.role_name,
+                            role_type=self.role_type,
+                            meta_dict={},
+                            content=display_content,
+                            parsed=cast(
+                                "BaseModel | dict[str, Any] | None",
+                                parsed_object,
+                            ),  # type: ignore[arg-type]
+                            reasoning_content=display_reasoning,
+                        )
+
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
 
                         # Create final response
                         final_response = ChatAgentResponse(
@@ -5489,6 +5745,8 @@ async def _aprocess_stream_chunks_with_accumulator(
         tool_call_records: List[ToolCallingRecord],
         step_token_usage: Dict[str, int],
         response_format: Optional[Type[BaseModel]] = None,
+        original_response_format: Optional[Type[BaseModel]] = None,
+        used_prompt_formatting: bool = False,
     ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
         r"""Async version of process streaming chunks with
         content accumulator.
@@ -5586,6 +5844,15 @@ async def _aprocess_stream_chunks_with_accumulator(
                                 final_message, response_format
                             )
 
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
+
                         self.record_message(final_message)
             if chunk.usage:
                 # Handle final usage chunk, whether or not choices are present.
@@ -5625,6 +5892,15 @@ async def _aprocess_stream_chunks_with_accumulator(
                                 final_message, response_format
                             )
 
+                        if (
+                            used_prompt_formatting
+                            and original_response_format
+                            and not final_message.parsed
+                        ):
+                            self._apply_prompt_based_parsing_to_message(
+                                final_message, original_response_format
+                            )
+
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(
                             msgs=[final_message],

From ec431f21828f971d13474bc6e20f0ae3dd8cf918 Mon Sep 17 00:00:00 2001
From: Xiangyu Shi <sxyu.shi@gmail.com>
Date: Tue, 10 Feb 2026 10:42:52 +0100
Subject: [PATCH 2/6] add example

---
 .../chatagent_stream_structured_output.py     | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 examples/agents/chatagent_stream_structured_output.py

diff --git a/examples/agents/chatagent_stream_structured_output.py b/examples/agents/chatagent_stream_structured_output.py
new file mode 100644
index 0000000000..6318c2e137
--- /dev/null
+++ b/examples/agents/chatagent_stream_structured_output.py
@@ -0,0 +1,108 @@
+# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
+
+"""
+Example: Streaming + Tool Calls + Structured Output
+
+Demonstrates using ChatAgent in streaming mode with both tools and
+response_format (structured output) simultaneously, in both sync and async.
+"""
+
+import asyncio
+
+from pydantic import BaseModel, Field
+
+from camel.agents import ChatAgent
+from camel.models import ModelFactory
+from camel.toolkits import MathToolkit
+from camel.types import ModelPlatformType, ModelType
+
+
+class Result(BaseModel):
+    sum_result: str = Field(description="Result of the addition")
+    product_result: str = Field(description="Result of the multiplication")
+    division_result: str = Field(description="Result of the division")
+    capital_result: str = Field(description="Result of the capital search")
+
+
+USER_MESSAGE = (
+    "Calculate: 1) 123.45 + 678.90  2) 100 * 3.14159  3) 1000 / 7, "
+    "also search what is the capital of Germany"
+)
+
+
+def create_agent() -> ChatAgent:
+    streaming_model = ModelFactory.create(
+        model_platform=ModelPlatformType.DEFAULT,
+        model_type=ModelType.DEFAULT,
+        model_config_dict={
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        },
+    )
+    return ChatAgent(
+        system_message="You are a helpful assistant.",
+        model=streaming_model,
+        tools=MathToolkit().get_tools(),
+        stream_accumulate=False,  # Delta mode
+    )
+
+
+def sync_example():
+    """Sync streaming with tools + structured output."""
+    print("=== Sync Example ===")
+    agent = create_agent()
+
+    streaming_response = agent.step(USER_MESSAGE, response_format=Result)
+
+    content_parts = []
+    for chunk in streaming_response:
+        if chunk.msgs[0].content:
+            content_parts.append(chunk.msgs[0].content)
+            print(chunk.msgs[0].content, end="", flush=True)
+
+    print()
+
+    # Print tool call records
+    tool_calls = streaming_response.info.get("tool_calls", [])
+    if tool_calls:
+        print(f"\nTool calls made: {len(tool_calls)}")
+        for i, tc in enumerate(tool_calls, 1):
+            print(f"  {i}. {tc.tool_name}({tc.args}) = {tc.result}")
+
+    # Check parsed output
+    final_msg = streaming_response.msgs[0]
+    if final_msg.parsed:
+        print(f"\nParsed result: {final_msg.parsed}")
+
+
+async def async_example():
+    """Async streaming with tools + structured output."""
+    print("\n=== Async Example ===")
+    agent = create_agent()
+
+    content_parts = []
+    async for chunk in await agent.astep(USER_MESSAGE, response_format=Result):
+        if chunk.msgs[0].content:
+            content_parts.append(chunk.msgs[0].content)
+            print(chunk.msgs[0].content, end="", flush=True)
+
+    print()
+    full_content = "".join(content_parts)
+    print(f"\nFull content: {full_content}")
+
+
+if __name__ == "__main__":
+    sync_example()
+    asyncio.run(async_example())

From 8d62cc32ea3417caefda1f60117b74ad8572e701 Mon Sep 17 00:00:00 2001
From: Xiangyu Shi <sxyu.shi@gmail.com>
Date: Tue, 10 Feb 2026 10:53:36 +0100
Subject: [PATCH 3/6] improvement

---
 camel/agents/chat_agent.py | 358 ++++++++++++++++++-------------------
 1 file changed, 172 insertions(+), 186 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 458443d3b0..a2e93754e9 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -2525,6 +2525,128 @@ def _try_format_message(
         except ValidationError:
             return False
 
+    def _format_message_with_fallback(
+        self,
+        message: BaseMessage,
+        response_format: Optional[Type[BaseModel]],
+        original_response_format: Optional[Type[BaseModel]] = None,
+        used_prompt_formatting: bool = False,
+    ) -> None:
+        r"""Format a message with optional prompt-based parsing fallback.
+
+        First tries standard JSON validation via ``_try_format_message``.
+        If that fails and prompt-based formatting was used, falls back to
+        ``_apply_prompt_based_parsing_to_message`` which can extract JSON
+        from free-form text.
+
+        Args:
+            message: The message to format.
+            response_format: The (possibly modified) response format passed
+                to the model. May be ``None`` when prompt-based formatting
+                replaced it.
+            original_response_format: The original response format before
+                prompt-based conversion.
+            used_prompt_formatting: Whether prompt-based formatting was used.
+        """
+        if response_format:
+            self._try_format_message(message, response_format)
+        if (
+            used_prompt_formatting
+            and original_response_format
+            and not message.parsed
+        ):
+            self._apply_prompt_based_parsing_to_message(
+                message, original_response_format
+            )
+
+    @staticmethod
+    def _collect_tool_calls_from_completion(
+        tool_calls: List[Any],
+        accumulated_tool_calls: Dict[str, Any],
+    ) -> None:
+        r"""Convert tool calls from a ChatCompletion into the accumulated
+        tool-call dictionary format used by the streaming pipeline.
+
+        Args:
+            tool_calls: Tool call objects from
+                ``completion.choices[0].message.tool_calls``.
+            accumulated_tool_calls: Mutable dict that will be populated with
+                the converted entries.
+        """
+        for tc in tool_calls:
+            accumulated_tool_calls[tc.id] = {
+                'id': tc.id,
+                'function': {
+                    'name': tc.function.name,
+                    'arguments': tc.function.arguments,
+                },
+                'complete': True,
+            }
+
+    def _record_and_build_display_message(
+        self,
+        final_content: str,
+        parsed_object: Any,
+        final_reasoning: Optional[str],
+        response_format: Optional[Type[BaseModel]],
+        original_response_format: Optional[Type[BaseModel]],
+        used_prompt_formatting: bool,
+    ) -> BaseMessage:
+        r"""Record the full message to memory and build a display message.
+
+        In delta mode the display message has empty content because all
+        content was already yielded incrementally.  In accumulate mode the
+        display message carries the full content.
+
+        Args:
+            final_content: The full final content string.
+            parsed_object: The parsed object from structured output stream.
+            final_reasoning: The reasoning content, if any.
+            response_format: The (possibly modified) response format.
+            original_response_format: The original response format.
+            used_prompt_formatting: Whether prompt-based formatting was used.
+
+        Returns:
+            BaseMessage: The display message to yield to the caller.
+        """
+        parsed_cast = cast("BaseModel | dict[str, Any] | None", parsed_object)  # type: ignore[arg-type]
+
+        # Record full content to memory
+        record_msg = BaseMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict={},
+            content=final_content,
+            parsed=parsed_cast,
+            reasoning_content=final_reasoning,
+        )
+        self._format_message_with_fallback(
+            record_msg,
+            response_format,
+            original_response_format,
+            used_prompt_formatting,
+        )
+        self.record_message(record_msg)
+
+        # Build display message (empty content in delta mode)
+        display_content = final_content if self.stream_accumulate else ""
+        display_reasoning = final_reasoning if self.stream_accumulate else None
+        display_msg = BaseMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict={},
+            content=display_content,
+            parsed=parsed_cast,
+            reasoning_content=display_reasoning,
+        )
+        self._format_message_with_fallback(
+            display_msg,
+            response_format,
+            original_response_format,
+            used_prompt_formatting,
+        )
+        return display_msg
+
     def _check_tools_strict_compatibility(self) -> bool:
         r"""Check if all tools are compatible with OpenAI strict mode.
 
@@ -4399,21 +4521,12 @@ def _stream_response(
                             final_choice.message, 'tool_calls', None
                         )
                         if final_tool_calls:
-                            # Convert tool calls to accumulated format
-                            # and execute them
-                            for tc in final_tool_calls:
-                                tc_data = {
-                                    'id': tc.id,
-                                    'function': {
-                                        'name': tc.function.name,
-                                        'arguments': tc.function.arguments,
-                                    },
-                                    'complete': True,
-                                }
-                                accumulated_tool_calls[tc.id] = tc_data
-
-                            # Record assistant message with tool calls
-                            # and execute tools
+                            self._collect_tool_calls_from_completion(
+                                final_tool_calls,
+                                accumulated_tool_calls,
+                            )
+
+                            # Execute tools
                             for status_response in (
                                 self
                             )._execute_tools_sync_with_status_accumulator(
@@ -4460,60 +4573,15 @@ def _stream_response(
                             or None
                         )
 
-                        # Record full content to memory
-                        record_message = BaseMessage(
-                            role_name=self.role_name,
-                            role_type=self.role_type,
-                            meta_dict={},
-                            content=final_content,
-                            parsed=cast(
-                                "BaseModel | dict[str, Any] | None",
-                                parsed_object,
-                            ),  # type: ignore[arg-type]
-                            reasoning_content=final_reasoning,
+                        final_message = self._record_and_build_display_message(
+                            final_content,
+                            parsed_object,
+                            final_reasoning,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
                         )
 
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not record_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                record_message, original_response_format
-                            )
-
-                        self.record_message(record_message)
-
-                        # In delta mode, final response content should
-                        # be empty since all content was already yielded
-                        # incrementally
-                        display_content = (
-                            final_content if self.stream_accumulate else ""
-                        )
-                        display_reasoning = (
-                            final_reasoning if self.stream_accumulate else None
-                        )
-                        final_message = BaseMessage(
-                            role_name=self.role_name,
-                            role_type=self.role_type,
-                            meta_dict={},
-                            content=display_content,
-                            parsed=cast(
-                                "BaseModel | dict[str, Any] | None",
-                                parsed_object,
-                            ),  # type: ignore[arg-type]
-                            reasoning_content=display_reasoning,
-                        )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
-
                         # Create final response
                         final_response = ChatAgentResponse(
                             msgs=[final_message],
@@ -4667,19 +4735,12 @@ def _process_stream_chunks_with_accumulator(
                             reasoning_content=final_reasoning,
                         )
 
-                        if response_format:
-                            self._try_format_message(
-                                final_message, response_format
-                            )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
+                        self._format_message_with_fallback(
+                            final_message,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
+                        )
 
                         self.record_message(final_message)
             if chunk.usage:
@@ -4715,19 +4776,12 @@ def _process_stream_chunks_with_accumulator(
                             reasoning_content=display_reasoning,
                         )
 
-                        if response_format:
-                            self._try_format_message(
-                                final_message, response_format
-                            )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
+                        self._format_message_with_fallback(
+                            final_message,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
+                        )
 
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(
@@ -5474,21 +5528,12 @@ async def _astream_response(
                             final_choice.message, 'tool_calls', None
                         )
                         if final_tool_calls:
-                            # Convert tool calls to accumulated format
-                            # and execute them
-                            for tc in final_tool_calls:
-                                tc_data = {
-                                    'id': tc.id,
-                                    'function': {
-                                        'name': tc.function.name,
-                                        'arguments': tc.function.arguments,
-                                    },
-                                    'complete': True,
-                                }
-                                accumulated_tool_calls[tc.id] = tc_data
-
-                            # Record assistant message with tool calls
-                            # and execute tools
+                            self._collect_tool_calls_from_completion(
+                                final_tool_calls,
+                                accumulated_tool_calls,
+                            )
+
+                            # Execute tools
                             async for status_response in (
                                 self
                             )._execute_tools_async_with_status_accumulator(
@@ -5537,60 +5582,15 @@ async def _astream_response(
                             or None
                         )
 
-                        # Record full content to memory
-                        record_message = BaseMessage(
-                            role_name=self.role_name,
-                            role_type=self.role_type,
-                            meta_dict={},
-                            content=final_content,
-                            parsed=cast(
-                                "BaseModel | dict[str, Any] | None",
-                                parsed_object,
-                            ),  # type: ignore[arg-type]
-                            reasoning_content=final_reasoning,
+                        final_message = self._record_and_build_display_message(
+                            final_content,
+                            parsed_object,
+                            final_reasoning,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
                         )
 
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not record_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                record_message, original_response_format
-                            )
-
-                        self.record_message(record_message)
-
-                        # In delta mode, final response content should
-                        # be empty since all content was already yielded
-                        # incrementally
-                        display_content = (
-                            final_content if self.stream_accumulate else ""
-                        )
-                        display_reasoning = (
-                            final_reasoning if self.stream_accumulate else None
-                        )
-                        final_message = BaseMessage(
-                            role_name=self.role_name,
-                            role_type=self.role_type,
-                            meta_dict={},
-                            content=display_content,
-                            parsed=cast(
-                                "BaseModel | dict[str, Any] | None",
-                                parsed_object,
-                            ),  # type: ignore[arg-type]
-                            reasoning_content=display_reasoning,
-                        )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
-
                         # Create final response
                         final_response = ChatAgentResponse(
                             msgs=[final_message],
@@ -5839,19 +5839,12 @@ async def _aprocess_stream_chunks_with_accumulator(
                             content=final_content,
                         )
 
-                        if response_format:
-                            self._try_format_message(
-                                final_message, response_format
-                            )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
+                        self._format_message_with_fallback(
+                            final_message,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
+                        )
 
                         self.record_message(final_message)
             if chunk.usage:
@@ -5887,19 +5880,12 @@ async def _aprocess_stream_chunks_with_accumulator(
                             reasoning_content=display_reasoning,
                         )
 
-                        if response_format:
-                            self._try_format_message(
-                                final_message, response_format
-                            )
-
-                        if (
-                            used_prompt_formatting
-                            and original_response_format
-                            and not final_message.parsed
-                        ):
-                            self._apply_prompt_based_parsing_to_message(
-                                final_message, original_response_format
-                            )
+                        self._format_message_with_fallback(
+                            final_message,
+                            response_format,
+                            original_response_format,
+                            used_prompt_formatting,
+                        )
 
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(

From 7970d8975d9eca9af3cf8fa63c4107700fae1195 Mon Sep 17 00:00:00 2001
From: Tao Sun <168447269+fengju0213@users.noreply.github.com>
Date: Tue, 10 Feb 2026 19:30:55 +0800
Subject: [PATCH 4/6] Remove _format_message_with_fallback method

Removed the _format_message_with_fallback method and its usages throughout the chat_agent.py file.
---
 camel/agents/chat_agent.py | 344 ++-----------------------------------
 1 file changed, 19 insertions(+), 325 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index a2e93754e9..40f70cbee8 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -2525,40 +2525,6 @@ def _try_format_message(
         except ValidationError:
             return False
 
-    def _format_message_with_fallback(
-        self,
-        message: BaseMessage,
-        response_format: Optional[Type[BaseModel]],
-        original_response_format: Optional[Type[BaseModel]] = None,
-        used_prompt_formatting: bool = False,
-    ) -> None:
-        r"""Format a message with optional prompt-based parsing fallback.
-
-        First tries standard JSON validation via ``_try_format_message``.
-        If that fails and prompt-based formatting was used, falls back to
-        ``_apply_prompt_based_parsing_to_message`` which can extract JSON
-        from free-form text.
-
-        Args:
-            message: The message to format.
-            response_format: The (possibly modified) response format passed
-                to the model. May be ``None`` when prompt-based formatting
-                replaced it.
-            original_response_format: The original response format before
-                prompt-based conversion.
-            used_prompt_formatting: Whether prompt-based formatting was used.
-        """
-        if response_format:
-            self._try_format_message(message, response_format)
-        if (
-            used_prompt_formatting
-            and original_response_format
-            and not message.parsed
-        ):
-            self._apply_prompt_based_parsing_to_message(
-                message, original_response_format
-            )
-
     @staticmethod
     def _collect_tool_calls_from_completion(
         tool_calls: List[Any],
@@ -2589,8 +2555,6 @@ def _record_and_build_display_message(
         parsed_object: Any,
         final_reasoning: Optional[str],
         response_format: Optional[Type[BaseModel]],
-        original_response_format: Optional[Type[BaseModel]],
-        used_prompt_formatting: bool,
     ) -> BaseMessage:
         r"""Record the full message to memory and build a display message.
 
@@ -2603,8 +2567,6 @@ def _record_and_build_display_message(
             parsed_object: The parsed object from structured output stream.
             final_reasoning: The reasoning content, if any.
             response_format: The (possibly modified) response format.
-            original_response_format: The original response format.
-            used_prompt_formatting: Whether prompt-based formatting was used.
 
         Returns:
             BaseMessage: The display message to yield to the caller.
@@ -2620,12 +2582,8 @@ def _record_and_build_display_message(
             parsed=parsed_cast,
             reasoning_content=final_reasoning,
         )
-        self._format_message_with_fallback(
-            record_msg,
-            response_format,
-            original_response_format,
-            used_prompt_formatting,
-        )
+        if response_format:
+            self._try_format_message(record_msg, response_format)
         self.record_message(record_msg)
 
         # Build display message (empty content in delta mode)
@@ -2636,134 +2594,11 @@ def _record_and_build_display_message(
             role_type=self.role_type,
             meta_dict={},
             content=display_content,
-            parsed=parsed_cast,
+            parsed=record_msg.parsed,
             reasoning_content=display_reasoning,
         )
-        self._format_message_with_fallback(
-            display_msg,
-            response_format,
-            original_response_format,
-            used_prompt_formatting,
-        )
         return display_msg
 
-    def _check_tools_strict_compatibility(self) -> bool:
-        r"""Check if all tools are compatible with OpenAI strict mode.
-
-        Returns:
-            bool: True if all tools are strict mode compatible,
-                False otherwise.
-        """
-        tool_schemas = self._get_full_tool_schemas()
-        for schema in tool_schemas:
-            if not schema.get("function", {}).get("strict", True):
-                return False
-        return True
-
-    def _convert_response_format_to_prompt(
-        self, response_format: Type[BaseModel]
-    ) -> str:
-        r"""Convert a Pydantic response format to a prompt instruction.
-
-        Args:
-            response_format (Type[BaseModel]): The Pydantic model class.
-
-        Returns:
-            str: A prompt instruction requesting the specific format.
-        """
-        try:
-            # Get the JSON schema from the Pydantic model
-            schema = response_format.model_json_schema()
-
-            # Create a prompt based on the schema
-            format_instruction = (
-                "\n\nPlease respond in the following JSON format:\n{\n"
-            )
-
-            properties = schema.get("properties", {})
-            for field_name, field_info in properties.items():
-                field_type = field_info.get("type", "string")
-                description = field_info.get("description", "")
-
-                if field_type == "array":
-                    format_instruction += (
-                        f'    "{field_name}": ["array of values"]'
-                    )
-                elif field_type == "object":
-                    format_instruction += f'    "{field_name}": {{"object"}}'
-                elif field_type == "boolean":
-                    format_instruction += f'    "{field_name}": true'
-                elif field_type == "number":
-                    format_instruction += f'    "{field_name}": 0'
-                else:
-                    format_instruction += f'    "{field_name}": "string value"'
-
-                if description:
-                    format_instruction += f'  // {description}'
-
-                # Add comma if not the last item
-                if field_name != list(properties.keys())[-1]:
-                    format_instruction += ","
-                format_instruction += "\n"
-
-            format_instruction += "}"
-            return format_instruction
-
-        except Exception as e:
-            logger.warning(
-                f"Failed to convert response_format to prompt: {e}. "
-                f"Using generic format instruction."
-            )
-            return (
-                "\n\nPlease respond in a structured JSON format "
-                "that matches the requested schema."
-            )
-
-    def _handle_response_format_with_non_strict_tools(
-        self,
-        input_message: Union[BaseMessage, str],
-        response_format: Optional[Type[BaseModel]] = None,
-    ) -> Tuple[Union[BaseMessage, str], Optional[Type[BaseModel]], bool]:
-        r"""Handle response format when tools are not strict mode compatible.
-
-        Args:
-            input_message: The original input message.
-            response_format: The requested response format.
-
-        Returns:
-            Tuple: (modified_message, modified_response_format,
-                   used_prompt_formatting)
-        """
-        if response_format is None:
-            return input_message, response_format, False
-
-        # Check if tools are strict mode compatible
-        if self._check_tools_strict_compatibility():
-            return input_message, response_format, False
-
-        # Tools are not strict compatible, convert to prompt
-        logger.info(
-            "Non-strict tools detected. Converting response_format to "
-            "prompt-based formatting."
-        )
-
-        format_prompt = self._convert_response_format_to_prompt(
-            response_format
-        )
-
-        # Modify the message to include format instruction
-        modified_message: Union[BaseMessage, str]
-        if isinstance(input_message, str):
-            modified_message = input_message + format_prompt
-        else:
-            modified_message = input_message.create_new_instance(
-                input_message.content + format_prompt
-            )
-
-        # Return None for response_format to avoid strict mode conflicts
-        # and True to indicate we used prompt formatting
-        return modified_message, None, True
-
     def _is_called_from_registered_toolkit(self) -> bool:
         r"""Check if current step/astep call originates from a
         RegisteredAgentToolkit.
@@ -2791,75 +2626,6 @@ def _is_called_from_registered_toolkit(self) -> bool:
 
         return False
 
-    def _apply_prompt_based_parsing_to_message(
-        self,
-        message: BaseMessage,
-        original_response_format: Type[BaseModel],
-    ) -> None:
-        r"""Apply prompt-based parsing to a single message.
-
-        Args:
-            message: The message to parse.
-            original_response_format: The original response format class.
-        """
-        if not message.content:
-            return
-        try:
-            import json
-
-            from pydantic import ValidationError
-
-            content = message.content.strip()
-
-            # Try direct parsing first
-            try:
-                parsed_json = json.loads(content)
-                message.parsed = original_response_format.model_validate(
-                    parsed_json
-                )
-                return
-            except (json.JSONDecodeError, ValidationError):
-                pass
-
-            # Try to extract JSON from text
-            json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
-            json_matches = re.findall(json_pattern, content, re.DOTALL)
-
-            for json_str in json_matches:
-                try:
-                    parsed_json = json.loads(json_str)
-                    message.parsed = original_response_format.model_validate(
-                        parsed_json
-                    )
-                    message.content = json.dumps(parsed_json)
-                    break
-                except (json.JSONDecodeError, ValidationError):
-                    continue
-
-            if not message.parsed:
-                logger.warning(
-                    f"Failed to parse JSON from response: {content}"
-                )
-
-        except Exception as e:
-            logger.warning(f"Error during prompt-based parsing: {e}")
-
-    def _apply_prompt_based_parsing(
-        self,
-        response: ModelResponse,
-        original_response_format: Type[BaseModel],
-    ) -> None:
-        r"""Apply manual parsing when using prompt-based formatting.
-
-        Args:
-            response: The model response to parse.
-            original_response_format: The original response format class.
-        """
-        for message in response.output_messages:
-            self._apply_prompt_based_parsing_to_message(
-                message, original_response_format
-            )
-
     def _format_response_if_needed(
         self,
         response: ModelResponse,
@@ -2998,14 +2764,6 @@ def _step_impl(
         # use
         disable_tools = self._is_called_from_registered_toolkit()
 
-        # Handle response format compatibility with non-strict tools
-        original_response_format = response_format
-        input_message, response_format, used_prompt_formatting = (
-            self._handle_response_format_with_non_strict_tools(
-                input_message, response_format
-            )
-        )
-
         # Convert input message to BaseMessage if necessary
         if isinstance(input_message, str):
             input_message = BaseMessage.make_user_message(
@@ -3182,12 +2940,6 @@ def _step_impl(
 
         self._format_response_if_needed(response, response_format)
 
-        # Apply manual parsing if we used prompt-based formatting
-        if used_prompt_formatting and original_response_format:
-            self._apply_prompt_based_parsing(
-                response, original_response_format
-            )
-
         # Only record final output if we haven't already recorded tool calls
         # for this response (to avoid duplicate assistant messages)
         if not recorded_tool_calls:
@@ -3301,14 +3053,6 @@ async def _astep_non_streaming_task(
         # use
         disable_tools = self._is_called_from_registered_toolkit()
 
-        # Handle response format compatibility with non-strict tools
-        original_response_format = response_format
-        input_message, response_format, used_prompt_formatting = (
-            self._handle_response_format_with_non_strict_tools(
-                input_message, response_format
-            )
-        )
-
         if isinstance(input_message, str):
             input_message = BaseMessage.make_user_message(
                 role_name="User", content=input_message
@@ -3484,12 +3228,6 @@ async def _astep_non_streaming_task(
 
         await self._aformat_response_if_needed(response, response_format)
 
-        # Apply manual parsing if we used prompt-based formatting
-        if used_prompt_formatting and original_response_format:
-            self._apply_prompt_based_parsing(
-                response, original_response_format
-            )
-
         # Only record final output if we haven't already recorded tool calls
         # for this response (to avoid duplicate assistant messages)
         if not recorded_tool_calls:
@@ -4307,14 +4045,6 @@ def _stream(
                 content, tool calls, and other information as they become
                 available.
         """
-        # Handle response format compatibility with non-strict tools
-        original_response_format = response_format
-        input_message, response_format, used_prompt_formatting = (
-            self._handle_response_format_with_non_strict_tools(
-                input_message, response_format
-            )
-        )
-
         # Convert input message to BaseMessage if necessary
         if isinstance(input_message, str):
             input_message = BaseMessage.make_user_message(
@@ -4338,8 +4068,6 @@ def _stream(
             openai_messages,
             num_tokens,
             response_format,
-            original_response_format,
-            used_prompt_formatting,
         )
 
     def _get_token_count(self, content: str) -> int:
@@ -4381,8 +4109,6 @@ def _stream_response(
         openai_messages: List[OpenAIMessage],
         num_tokens: int,
         response_format: Optional[Type[BaseModel]] = None,
-        original_response_format: Optional[Type[BaseModel]] = None,
-        used_prompt_formatting: bool = False,
     ) -> Generator[ChatAgentResponse, None, None]:
         r"""Internal method to handle streaming responses with tool calls."""
 
@@ -4442,8 +4168,6 @@ def _stream_response(
                     tool_call_records,
                     step_token_usage,
                     response_format,
-                    original_response_format,
-                    used_prompt_formatting,
                 )
 
                 if tool_calls_complete:
@@ -4578,8 +4302,6 @@ def _stream_response(
                             parsed_object,
                             final_reasoning,
                             response_format,
-                            original_response_format,
-                            used_prompt_formatting,
                         )
 
                         # Create final response
@@ -4640,8 +4362,6 @@ def _process_stream_chunks_with_accumulator(
         tool_call_records: List[ToolCallingRecord],
         step_token_usage: Dict[str, int],
         response_format: Optional[Type[BaseModel]] = None,
-        original_response_format: Optional[Type[BaseModel]] = None,
-        used_prompt_formatting: bool = False,
     ) -> Generator[ChatAgentResponse, None, Tuple[bool, bool]]:
         r"""Process streaming chunks with content accumulator."""
 
@@ -4735,12 +4455,10 @@ def _process_stream_chunks_with_accumulator(
                             reasoning_content=final_reasoning,
                         )
 
-                        self._format_message_with_fallback(
-                            final_message,
-                            response_format,
-                            original_response_format,
-                            used_prompt_formatting,
-                        )
+                        if response_format:
+                            self._try_format_message(
+                                final_message, response_format
+                            )
 
                         self.record_message(final_message)
             if chunk.usage:
@@ -4776,12 +4494,10 @@ def _process_stream_chunks_with_accumulator(
                             reasoning_content=display_reasoning,
                         )
 
-                        self._format_message_with_fallback(
-                            final_message,
-                            response_format,
-                            original_response_format,
-                            used_prompt_formatting,
-                        )
+                        if response_format:
+                            self._try_format_message(
+                                final_message, response_format
+                            )
 
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(
@@ -5324,14 +5040,6 @@ async def _astream(
     ) -> AsyncGenerator[ChatAgentResponse, None]:
         r"""Asynchronous version of stream method."""
 
-        # Handle response format compatibility with non-strict tools
-        original_response_format = response_format
-        input_message, response_format, used_prompt_formatting = (
-            self._handle_response_format_with_non_strict_tools(
-                input_message, response_format
-            )
-        )
-
         # Convert input message to BaseMessage if necessary
         if isinstance(input_message, str):
             input_message = BaseMessage.make_user_message(
@@ -5357,8 +5065,6 @@ async def _astream(
             openai_messages,
             num_tokens,
             response_format,
-            original_response_format,
-            used_prompt_formatting,
         ):
             last_response = response
             yield response
@@ -5375,8 +5081,6 @@ async def _astream_response(
         openai_messages: List[OpenAIMessage],
         num_tokens: int,
         response_format: Optional[Type[BaseModel]] = None,
-        original_response_format: Optional[Type[BaseModel]] = None,
-        used_prompt_formatting: bool = False,
     ) -> AsyncGenerator[ChatAgentResponse, None]:
         r"""Async method to handle streaming responses with tool calls."""
 
@@ -5440,8 +5144,6 @@ async def _astream_response(
                     tool_call_records,
                     step_token_usage,
                     response_format,
-                    original_response_format,
-                    used_prompt_formatting,
                 ):
                     if isinstance(item, tuple):
                         # This is the final return value (stream_completed,
@@ -5587,8 +5289,6 @@ async def _astream_response(
                             parsed_object,
                             final_reasoning,
                             response_format,
-                            original_response_format,
-                            used_prompt_formatting,
                         )
 
                         # Create final response
@@ -5745,8 +5445,6 @@ async def _aprocess_stream_chunks_with_accumulator(
         tool_call_records: List[ToolCallingRecord],
         step_token_usage: Dict[str, int],
         response_format: Optional[Type[BaseModel]] = None,
-        original_response_format: Optional[Type[BaseModel]] = None,
-        used_prompt_formatting: bool = False,
     ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
         r"""Async version of process streaming chunks with
         content accumulator.
@@ -5839,12 +5537,10 @@ async def _aprocess_stream_chunks_with_accumulator(
                             content=final_content,
                         )
 
-                        self._format_message_with_fallback(
-                            final_message,
-                            response_format,
-                            original_response_format,
-                            used_prompt_formatting,
-                        )
+                        if response_format:
+                            self._try_format_message(
+                                final_message, response_format
+                            )
 
                         self.record_message(final_message)
             if chunk.usage:
@@ -5880,12 +5576,10 @@ async def _aprocess_stream_chunks_with_accumulator(
                             reasoning_content=display_reasoning,
                         )
 
-                        self._format_message_with_fallback(
-                            final_message,
-                            response_format,
-                            original_response_format,
-                            used_prompt_formatting,
-                        )
+                        if response_format:
+                            self._try_format_message(
+                                final_message, response_format
+                            )
 
                         # Create final response with final usage (not partial)
                         final_response = ChatAgentResponse(

From 72d73767a6333b540ceca0ca99053e97cc0f7a31 Mon Sep 17 00:00:00 2001
From: Xiangyu Shi <sxyu.shi@gmail.com>
Date: Wed, 11 Feb 2026 09:49:04 +0100
Subject: [PATCH 5/6] support gemini

---
 camel/models/gemini_model.py | 233 +++++++++++++++++++----------------
 1 file changed, 130 insertions(+), 103 deletions(-)

diff --git a/camel/models/gemini_model.py b/camel/models/gemini_model.py
index e4a2d60e13..f4b5491f1a 100644
--- a/camel/models/gemini_model.py
+++ b/camel/models/gemini_model.py
@@ -24,6 +24,10 @@
 )
 
 from openai import AsyncStream, Stream
+from openai.lib.streaming.chat import (
+    AsyncChatCompletionStreamManager,
+    ChatCompletionStreamManager,
+)
 from pydantic import BaseModel
 
 from camel.configs import GeminiConfig
@@ -451,6 +455,47 @@ async def async_thought_preserving_generator():
 
         return async_thought_preserving_generator()
 
+    @staticmethod
+    def _clean_gemini_tools(
+        tools: Optional[List[Dict[str, Any]]],
+    ) -> Optional[List[Dict[str, Any]]]:
+        r"""Clean tools for Gemini API compatibility.
+
+        Removes unsupported fields like strict, anyOf, and restricts
+        enum/format to allowed types.
+        """
+        if not tools:
+            return tools
+        import copy
+
+        tools = copy.deepcopy(tools)
+        for tool in tools:
+            function_dict = tool.get('function', {})
+            function_dict.pop("strict", None)
+
+            if 'parameters' in function_dict:
+                params = function_dict['parameters']
+                if 'properties' in params:
+                    for prop_name, prop_value in params['properties'].items():
+                        if 'anyOf' in prop_value:
+                            first_type = prop_value['anyOf'][0]
+                            params['properties'][prop_name] = first_type
+                            if 'description' in prop_value:
+                                params['properties'][prop_name][
+                                    'description'
+                                ] = prop_value['description']
+
+                        if prop_value.get('type') != 'string':
+                            prop_value.pop('enum', None)
+
+                        if prop_value.get('type') not in [
+                            'string',
+                            'integer',
+                            'number',
+                        ]:
+                            prop_value.pop('format', None)
+        return tools
+
     @observe()
     def _run(
         self,
@@ -479,19 +524,18 @@ def _run(
             "response_format", None
         )
         messages = self._process_messages(messages)
+        is_streaming = self.model_config_dict.get("stream", False)
+
         if response_format:
-            if tools:
-                raise ValueError(
-                    "Gemini does not support function calling with "
-                    "response format."
+            tools = self._clean_gemini_tools(tools)
+            if is_streaming:
+                return self._request_stream_parse(  # type: ignore[return-value]
+                    messages, response_format, tools
                 )
-            result: Union[ChatCompletion, Stream[ChatCompletionChunk]] = (
-                self._request_parse(messages, response_format)
-            )
+            else:
+                return self._request_parse(messages, response_format, tools)
         else:
-            result = self._request_chat_completion(messages, tools)
-
-        return result
+            return self._request_chat_completion(messages, tools)
 
     @observe()
     async def _arun(
@@ -521,67 +565,90 @@ async def _arun(
             "response_format", None
         )
         messages = self._process_messages(messages)
+        is_streaming = self.model_config_dict.get("stream", False)
+
         if response_format:
-            if tools:
-                raise ValueError(
-                    "Gemini does not support function calling with "
-                    "response format."
+            tools = self._clean_gemini_tools(tools)
+            if is_streaming:
+                return await self._arequest_stream_parse(  # type: ignore[return-value]
+                    messages, response_format, tools
+                )
+            else:
+                return await self._arequest_parse(
+                    messages, response_format, tools
                 )
-            result: Union[
-                ChatCompletion, AsyncStream[ChatCompletionChunk]
-            ] = await self._arequest_parse(messages, response_format)
         else:
-            result = await self._arequest_chat_completion(messages, tools)
+            return await self._arequest_chat_completion(messages, tools)
+
+    @staticmethod
+    def _build_gemini_response_format(
+        response_format: Type[BaseModel],
+    ) -> Dict[str, Any]:
+        r"""Convert a Pydantic model to Gemini-compatible response_format."""
+        schema = response_format.model_json_schema()
+        # Remove $defs and other unsupported fields for Gemini
+        schema.pop("$defs", None)
+        schema.pop("definitions", None)
+        return {
+            "type": "json_schema",
+            "json_schema": {
+                "name": response_format.__name__,
+                "schema": schema,
+            },
+        }
+
+    def _request_stream_parse(
+        self,
+        messages: List[OpenAIMessage],
+        response_format: Type[BaseModel],
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> ChatCompletionStreamManager[BaseModel]:
+        r"""Gemini-specific streaming structured output.
+
+        Uses regular streaming with response_format as JSON schema
+        instead of OpenAI's beta streaming API which is incompatible
+        with Gemini's tool call delta format.
+        """
+        request_config = self._prepare_request_config(tools)
+        request_config["stream"] = True
+        request_config["response_format"] = self._build_gemini_response_format(
+            response_format
+        )
+
+        response = self._client.chat.completions.create(
+            messages=messages,
+            model=self.model_type,
+            **request_config,
+        )
+        return self._preserve_thought_signatures(response)  # type: ignore[return-value]
 
-        return result
+    async def _arequest_stream_parse(
+        self,
+        messages: List[OpenAIMessage],
+        response_format: Type[BaseModel],
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> AsyncChatCompletionStreamManager[BaseModel]:
+        r"""Gemini-specific async streaming structured output."""
+        request_config = self._prepare_request_config(tools)
+        request_config["stream"] = True
+        request_config["response_format"] = self._build_gemini_response_format(
+            response_format
+        )
+
+        response = await self._async_client.chat.completions.create(
+            messages=messages,
+            model=self.model_type,
+            **request_config,
+        )
+        return self._preserve_thought_signatures(response)  # type: ignore[return-value]
 
     def _request_chat_completion(
         self,
         messages: List[OpenAIMessage],
         tools: Optional[List[Dict[str, Any]]] = None,
     ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
-        import copy
-
-        request_config = copy.deepcopy(self.model_config_dict)
-        # Remove strict and anyOf from each tool's function parameters since
-        # Gemini does not support them
-        if tools:
-            for tool in tools:
-                function_dict = tool.get('function', {})
-                function_dict.pop("strict", None)
-
-                # Process parameters to remove anyOf and handle enum/format
-                if 'parameters' in function_dict:
-                    params = function_dict['parameters']
-                    if 'properties' in params:
-                        for prop_name, prop_value in params[
-                            'properties'
-                        ].items():
-                            if 'anyOf' in prop_value:
-                                # Replace anyOf with the first type in the list
-                                first_type = prop_value['anyOf'][0]
-                                params['properties'][prop_name] = first_type
-                                # Preserve description if it exists
-                                if 'description' in prop_value:
-                                    params['properties'][prop_name][
-                                        'description'
-                                    ] = prop_value['description']
-
-                            # Handle enum and format restrictions for Gemini
-                            # API enum: only allowed for string type
-                            if prop_value.get('type') != 'string':
-                                prop_value.pop('enum', None)
-
-                            # format: only allowed for string, integer, and
-                            # number types
-                            if prop_value.get('type') not in [
-                                'string',
-                                'integer',
-                                'number',
-                            ]:
-                                prop_value.pop('format', None)
-
-            request_config["tools"] = tools
+        tools = self._clean_gemini_tools(tools)
+        request_config = self._prepare_request_config(tools)
 
         response = self._client.chat.completions.create(
             messages=messages,
@@ -597,48 +664,8 @@ async def _arequest_chat_completion(
         messages: List[OpenAIMessage],
         tools: Optional[List[Dict[str, Any]]] = None,
     ) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
-        import copy
-
-        request_config = copy.deepcopy(self.model_config_dict)
-        # Remove strict and anyOf from each tool's function parameters since
-        # Gemini does not support them
-        if tools:
-            for tool in tools:
-                function_dict = tool.get('function', {})
-                function_dict.pop("strict", None)
-
-                # Process parameters to remove anyOf and handle enum/format
-                if 'parameters' in function_dict:
-                    params = function_dict['parameters']
-                    if 'properties' in params:
-                        for prop_name, prop_value in params[
-                            'properties'
-                        ].items():
-                            if 'anyOf' in prop_value:
-                                # Replace anyOf with the first type in the list
-                                first_type = prop_value['anyOf'][0]
-                                params['properties'][prop_name] = first_type
-                                # Preserve description if it exists
-                                if 'description' in prop_value:
-                                    params['properties'][prop_name][
-                                        'description'
-                                    ] = prop_value['description']
-
-                            # Handle enum and format restrictions for Gemini
-                            # API enum: only allowed for string type
-                            if prop_value.get('type') != 'string':
-                                prop_value.pop('enum', None)
-
-                            # format: only allowed for string, integer, and
-                            # number types
-                            if prop_value.get('type') not in [
-                                'string',
-                                'integer',
-                                'number',
-                            ]:
-                                prop_value.pop('format', None)
-
-            request_config["tools"] = tools
+        tools = self._clean_gemini_tools(tools)
+        request_config = self._prepare_request_config(tools)
 
         response = await self._async_client.chat.completions.create(
             messages=messages,

From c9d588a4ea28c645cd7be03fd585dbc5b6355601 Mon Sep 17 00:00:00 2001
From: Xiangyu Shi <sxyu.shi@gmail.com>
Date: Wed, 11 Feb 2026 10:06:06 +0100
Subject: [PATCH 6/6] support anthropic

---
 camel/models/anthropic_model.py               | 66 +++++++++++++------
 .../chatagent_stream_structured_output.py     | 12 ++--
 2 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/camel/models/anthropic_model.py b/camel/models/anthropic_model.py
index a362a99df5..cd868392bd 100644
--- a/camel/models/anthropic_model.py
+++ b/camel/models/anthropic_model.py
@@ -14,7 +14,6 @@
 import json
 import os
 import time
-import warnings
 from typing import Any, Dict, List, Optional, Type, Union, cast
 
 from openai import AsyncStream, Stream
@@ -636,6 +635,37 @@ def _convert_anthropic_stream_to_openai_chunk(
             usage=usage,
         )
 
+    @staticmethod
+    def _add_additional_properties_false(schema: Dict[str, Any]) -> None:
+        r"""Recursively add additionalProperties: false to all object types."""
+        if schema.get("type") == "object":
+            schema["additionalProperties"] = False
+        for value in schema.values():
+            if isinstance(value, dict):
+                AnthropicModel._add_additional_properties_false(value)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        AnthropicModel._add_additional_properties_false(item)
+
+    @staticmethod
+    def _build_output_config(
+        response_format: Type[BaseModel],
+    ) -> Dict[str, Any]:
+        r"""Convert a Pydantic model to Anthropic's output_config format."""
+        schema = response_format.model_json_schema()
+        # Remove unsupported fields
+        schema.pop("$defs", None)
+        schema.pop("definitions", None)
+        # Anthropic requires additionalProperties: false on all object types
+        AnthropicModel._add_additional_properties_false(schema)
+        return {
+            "format": {
+                "type": "json_schema",
+                "schema": schema,
+            }
+        }
+
     def _convert_openai_tools_to_anthropic(
         self, tools: Optional[List[Dict[str, Any]]]
     ) -> Optional[List[Dict[str, Any]]]:
@@ -678,7 +708,7 @@ def _run(
             messages (List[OpenAIMessage]): Message list with the chat history
                 in OpenAI API format.
             response_format (Optional[Type[BaseModel]]): The format of the
-                response. (Not supported by Anthropic API directly)
+                response.
             tools (Optional[List[Dict[str, Any]]]): The schema of the tools to
                 use for the request.
 
@@ -687,15 +717,6 @@ def _run(
                 `ChatCompletion` in the non-stream mode, or
                 `Stream[ChatCompletionChunk]` in the stream mode.
         """
-        if response_format is not None:
-            warnings.warn(
-                "The 'response_format' parameter is not supported by the "
-                "Anthropic API and will be ignored. Consider using tools "
-                "for structured output instead.",
-                UserWarning,
-                stacklevel=2,
-            )
-
         # Update Langfuse trace with current agent session and metadata
         agent_session_id = get_current_agent_session_id()
         if agent_session_id:
@@ -764,6 +785,12 @@ def _run(
             if key in self.model_config_dict:
                 request_params[key] = self.model_config_dict[key]
 
+        # Add structured output via output_config
+        if response_format is not None:
+            request_params["output_config"] = self._build_output_config(
+                response_format
+            )
+
         # Convert tools
         anthropic_tools = self._convert_openai_tools_to_anthropic(tools)
         if anthropic_tools:
@@ -803,7 +830,7 @@ async def _arun(
             messages (List[OpenAIMessage]): Message list with the chat history
                 in OpenAI API format.
             response_format (Optional[Type[BaseModel]]): The format of the
-                response. (Not supported by Anthropic API directly)
+                response.
             tools (Optional[List[Dict[str, Any]]]): The schema of the tools to
                 use for the request.
 
@@ -812,15 +839,6 @@ async def _arun(
                 `ChatCompletion` in the non-stream mode, or
                 `AsyncStream[ChatCompletionChunk]` in the stream mode.
         """
-        if response_format is not None:
-            warnings.warn(
-                "The 'response_format' parameter is not supported by the "
-                "Anthropic API and will be ignored. Consider using tools "
-                "for structured output instead.",
-                UserWarning,
-                stacklevel=2,
-            )
-
         # Update Langfuse trace with current agent session and metadata
         agent_session_id = get_current_agent_session_id()
         if agent_session_id:
@@ -889,6 +907,12 @@ async def _arun(
             if key in self.model_config_dict:
                 request_params[key] = self.model_config_dict[key]
 
+        # Add structured output via output_config
+        if response_format is not None:
+            request_params["output_config"] = self._build_output_config(
+                response_format
+            )
+
         # Convert tools
         anthropic_tools = self._convert_openai_tools_to_anthropic(tools)
         if anthropic_tools:
diff --git a/examples/agents/chatagent_stream_structured_output.py b/examples/agents/chatagent_stream_structured_output.py
index 6318c2e137..5f332d308e 100644
--- a/examples/agents/chatagent_stream_structured_output.py
+++ b/examples/agents/chatagent_stream_structured_output.py
@@ -30,10 +30,14 @@
 
 
 class Result(BaseModel):
-    sum_result: str = Field(description="Result of the addition")
-    product_result: str = Field(description="Result of the multiplication")
-    division_result: str = Field(description="Result of the division")
-    capital_result: str = Field(description="Result of the capital search")
+    sum_result: str = Field(description="Only the result of the addition")
+    product_result: str = Field(
+        description="Only the result of the multiplication"
+    )
+    division_result: str = Field(description="Only the result of the division")
+    capital_result: str = Field(
+        description="Only the result of the capital search"
+    )
 
 
 USER_MESSAGE = (