From abb34249dc04e023542d51e13741a6f1279436d6 Mon Sep 17 00:00:00 2001 From: Xiangyu Shi Date: Tue, 10 Feb 2026 10:30:05 +0100 Subject: [PATCH 1/6] fix bugs --- camel/agents/chat_agent.py | 388 +++++++++++++++++++++++++++++++------ 1 file changed, 332 insertions(+), 56 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index c6337408d7..458443d3b0 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -2669,65 +2669,74 @@ def _is_called_from_registered_toolkit(self) -> bool: return False - def _apply_prompt_based_parsing( + def _apply_prompt_based_parsing_to_message( self, - response: ModelResponse, + message: BaseMessage, original_response_format: Type[BaseModel], ) -> None: - r"""Apply manual parsing when using prompt-based formatting. + r"""Apply prompt-based parsing to a single message. Args: - response: The model response to parse. + message: The message to parse. original_response_format: The original response format class. """ - for message in response.output_messages: - if message.content: - try: - # Try to extract JSON from the response content - import json + if not message.content: + return + try: + import json - from pydantic import ValidationError + from pydantic import ValidationError - # Try to find JSON in the content - content = message.content.strip() + content = message.content.strip() - # Try direct parsing first - try: - parsed_json = json.loads(content) - message.parsed = ( - original_response_format.model_validate( - parsed_json - ) - ) - continue - except (json.JSONDecodeError, ValidationError): - pass + # Try direct parsing first + try: + parsed_json = json.loads(content) + message.parsed = original_response_format.model_validate( + parsed_json + ) + return + except (json.JSONDecodeError, ValidationError): + pass - # Try to extract JSON from text - json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' - json_matches = re.findall(json_pattern, content, re.DOTALL) + # Try to extract JSON from text + json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' + json_matches = re.findall(json_pattern, content, re.DOTALL) - for json_str in json_matches: - try: - parsed_json = json.loads(json_str) - message.parsed = ( - original_response_format.model_validate( - parsed_json - ) - ) - # Update content to just the JSON for consistency - message.content = json.dumps(parsed_json) - break - except (json.JSONDecodeError, ValidationError): - continue + for json_str in json_matches: + try: + parsed_json = json.loads(json_str) + message.parsed = original_response_format.model_validate( + parsed_json + ) + message.content = json.dumps(parsed_json) + break + except (json.JSONDecodeError, ValidationError): + continue - if not message.parsed: - logger.warning( - f"Failed to parse JSON from response: {content}" - ) + if not message.parsed: + logger.warning( + f"Failed to parse JSON from response: {content}" + ) - except Exception as e: - logger.warning(f"Error during prompt-based parsing: {e}") + except Exception as e: + logger.warning(f"Error during prompt-based parsing: {e}") + + def _apply_prompt_based_parsing( + self, + response: ModelResponse, + original_response_format: Type[BaseModel], + ) -> None: + r"""Apply manual parsing when using prompt-based formatting. + + Args: + response: The model response to parse. + original_response_format: The original response format class. + """ + for message in response.output_messages: + self._apply_prompt_based_parsing_to_message( + message, original_response_format + ) def _format_response_if_needed( self, @@ -4177,7 +4186,8 @@ def _stream( available. """ # Handle response format compatibility with non-strict tools - input_message, response_format, _ = ( + original_response_format = response_format + input_message, response_format, used_prompt_formatting = ( self._handle_response_format_with_non_strict_tools( input_message, response_format ) @@ -4203,7 +4213,11 @@ def _stream( # Start streaming response yield from self._stream_response( - openai_messages, num_tokens, response_format + openai_messages, + num_tokens, + response_format, + original_response_format, + used_prompt_formatting, ) def _get_token_count(self, content: str) -> int: @@ -4245,6 +4259,8 @@ def _stream_response( openai_messages: List[OpenAIMessage], num_tokens: int, response_format: Optional[Type[BaseModel]] = None, + original_response_format: Optional[Type[BaseModel]] = None, + used_prompt_formatting: bool = False, ) -> Generator[ChatAgentResponse, None, None]: r"""Internal method to handle streaming responses with tool calls.""" @@ -4304,6 +4320,8 @@ def _stream_response( tool_call_records, step_token_usage, response_format, + original_response_format, + used_prompt_formatting, ) if tool_calls_complete: @@ -4374,15 +4392,76 @@ def _stream_response( # Get final completion and record final message try: final_completion = stream.get_final_completion() - final_content = ( - final_completion.choices[0].message.content or "" + + # Check if the model wants to call tools + final_choice = final_completion.choices[0] + final_tool_calls = getattr( + final_choice.message, 'tool_calls', None ) + if final_tool_calls: + # Convert tool calls to accumulated format + # and execute them + for tc in final_tool_calls: + tc_data = { + 'id': tc.id, + 'function': { + 'name': tc.function.name, + 'arguments': tc.function.arguments, + }, + 'complete': True, + } + accumulated_tool_calls[tc.id] = tc_data + + # Record assistant message with tool calls + # and execute tools + for status_response in ( + self + )._execute_tools_sync_with_status_accumulator( + accumulated_tool_calls, + tool_call_records, + ): + yield status_response + + if tool_call_records: + logger.info("Sending back result to model") + + # Update usage + if final_completion.usage: + self._update_token_usage_tracker( + step_token_usage, + safe_model_dump(final_completion.usage), + ) + + # Continue the loop for next model call + accumulated_tool_calls.clear() + if tool_call_records and ( + self.max_iteration is None + or iteration_count < self.max_iteration + ): + try: + openai_messages, num_tokens = ( + self.memory.get_context() + ) + except RuntimeError as e: + yield self._step_terminate( + e.args[1], + tool_call_records, + "max_tokens_exceeded", + ) + return + content_accumulator.reset_streaming_content() + continue + else: + break + + final_content = final_choice.message.content or "" final_reasoning = ( content_accumulator.get_full_reasoning_content() or None ) - final_message = BaseMessage( + # Record full content to memory + record_message = BaseMessage( role_name=self.role_name, role_type=self.role_type, meta_dict={}, @@ -4394,7 +4473,46 @@ def _stream_response( reasoning_content=final_reasoning, ) - self.record_message(final_message) + if ( + used_prompt_formatting + and original_response_format + and not record_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + record_message, original_response_format + ) + + self.record_message(record_message) + + # In delta mode, final response content should + # be empty since all content was already yielded + # incrementally + display_content = ( + final_content if self.stream_accumulate else "" + ) + display_reasoning = ( + final_reasoning if self.stream_accumulate else None + ) + final_message = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=display_content, + parsed=cast( + "BaseModel | dict[str, Any] | None", + parsed_object, + ), # type: ignore[arg-type] + reasoning_content=display_reasoning, + ) + + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) # Create final response final_response = ChatAgentResponse( @@ -4454,6 +4572,8 @@ def _process_stream_chunks_with_accumulator( tool_call_records: List[ToolCallingRecord], step_token_usage: Dict[str, int], response_format: Optional[Type[BaseModel]] = None, + original_response_format: Optional[Type[BaseModel]] = None, + used_prompt_formatting: bool = False, ) -> Generator[ChatAgentResponse, None, Tuple[bool, bool]]: r"""Process streaming chunks with content accumulator.""" @@ -4552,6 +4672,15 @@ def _process_stream_chunks_with_accumulator( final_message, response_format ) + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) + self.record_message(final_message) if chunk.usage: # Handle final usage chunk, whether or not choices are present. @@ -4591,6 +4720,15 @@ def _process_stream_chunks_with_accumulator( final_message, response_format ) + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) + # Create final response with final usage (not partial) final_response = ChatAgentResponse( msgs=[final_message], @@ -5132,6 +5270,14 @@ async def _astream( ) -> AsyncGenerator[ChatAgentResponse, None]: r"""Asynchronous version of stream method.""" + # Handle response format compatibility with non-strict tools + original_response_format = response_format + input_message, response_format, used_prompt_formatting = ( + self._handle_response_format_with_non_strict_tools( + input_message, response_format + ) + ) + # Convert input message to BaseMessage if necessary if isinstance(input_message, str): input_message = BaseMessage.make_user_message( @@ -5154,7 +5300,11 @@ async def _astream( # Start async streaming response last_response = None async for response in self._astream_response( - openai_messages, num_tokens, response_format + openai_messages, + num_tokens, + response_format, + original_response_format, + used_prompt_formatting, ): last_response = response yield response @@ -5171,6 +5321,8 @@ async def _astream_response( openai_messages: List[OpenAIMessage], num_tokens: int, response_format: Optional[Type[BaseModel]] = None, + original_response_format: Optional[Type[BaseModel]] = None, + used_prompt_formatting: bool = False, ) -> AsyncGenerator[ChatAgentResponse, None]: r"""Async method to handle streaming responses with tool calls.""" @@ -5234,6 +5386,8 @@ async def _astream_response( tool_call_records, step_token_usage, response_format, + original_response_format, + used_prompt_formatting, ): if isinstance(item, tuple): # This is the final return value (stream_completed, @@ -5313,15 +5467,78 @@ async def _astream_response( # Get final completion and record final message try: final_completion = await stream.get_final_completion() - final_content = ( - final_completion.choices[0].message.content or "" + + # Check if the model wants to call tools + final_choice = final_completion.choices[0] + final_tool_calls = getattr( + final_choice.message, 'tool_calls', None ) + if final_tool_calls: + # Convert tool calls to accumulated format + # and execute them + for tc in final_tool_calls: + tc_data = { + 'id': tc.id, + 'function': { + 'name': tc.function.name, + 'arguments': tc.function.arguments, + }, + 'complete': True, + } + accumulated_tool_calls[tc.id] = tc_data + + # Record assistant message with tool calls + # and execute tools + async for status_response in ( + self + )._execute_tools_async_with_status_accumulator( + accumulated_tool_calls, + content_accumulator, + step_token_usage, + tool_call_records, + ): + yield status_response + + if tool_call_records: + logger.info("Sending back result to model") + + # Update usage + if final_completion.usage: + self._update_token_usage_tracker( + step_token_usage, + safe_model_dump(final_completion.usage), + ) + + # Continue the loop for next model call + accumulated_tool_calls.clear() + if tool_call_records and ( + self.max_iteration is None + or iteration_count < self.max_iteration + ): + try: + openai_messages, num_tokens = ( + self.memory.get_context() + ) + except RuntimeError as e: + yield self._step_terminate( + e.args[1], + tool_call_records, + "max_tokens_exceeded", + ) + return + content_accumulator.reset_streaming_content() + continue + else: + break + + final_content = final_choice.message.content or "" final_reasoning = ( content_accumulator.get_full_reasoning_content() or None ) - final_message = BaseMessage( + # Record full content to memory + record_message = BaseMessage( role_name=self.role_name, role_type=self.role_type, meta_dict={}, @@ -5333,7 +5550,46 @@ async def _astream_response( reasoning_content=final_reasoning, ) - self.record_message(final_message) + if ( + used_prompt_formatting + and original_response_format + and not record_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + record_message, original_response_format + ) + + self.record_message(record_message) + + # In delta mode, final response content should + # be empty since all content was already yielded + # incrementally + display_content = ( + final_content if self.stream_accumulate else "" + ) + display_reasoning = ( + final_reasoning if self.stream_accumulate else None + ) + final_message = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=display_content, + parsed=cast( + "BaseModel | dict[str, Any] | None", + parsed_object, + ), # type: ignore[arg-type] + reasoning_content=display_reasoning, + ) + + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) # Create final response final_response = ChatAgentResponse( @@ -5489,6 +5745,8 @@ async def _aprocess_stream_chunks_with_accumulator( tool_call_records: List[ToolCallingRecord], step_token_usage: Dict[str, int], response_format: Optional[Type[BaseModel]] = None, + original_response_format: Optional[Type[BaseModel]] = None, + used_prompt_formatting: bool = False, ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]: r"""Async version of process streaming chunks with content accumulator. @@ -5586,6 +5844,15 @@ async def _aprocess_stream_chunks_with_accumulator( final_message, response_format ) + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) + self.record_message(final_message) if chunk.usage: # Handle final usage chunk, whether or not choices are present. @@ -5625,6 +5892,15 @@ async def _aprocess_stream_chunks_with_accumulator( final_message, response_format ) + if ( + used_prompt_formatting + and original_response_format + and not final_message.parsed + ): + self._apply_prompt_based_parsing_to_message( + final_message, original_response_format + ) + # Create final response with final usage (not partial) final_response = ChatAgentResponse( msgs=[final_message], From ec431f21828f971d13474bc6e20f0ae3dd8cf918 Mon Sep 17 00:00:00 2001 From: Xiangyu Shi Date: Tue, 10 Feb 2026 10:42:52 +0100 Subject: [PATCH 2/6] add example --- .../chatagent_stream_structured_output.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 examples/agents/chatagent_stream_structured_output.py diff --git a/examples/agents/chatagent_stream_structured_output.py b/examples/agents/chatagent_stream_structured_output.py new file mode 100644 index 0000000000..6318c2e137 --- /dev/null +++ b/examples/agents/chatagent_stream_structured_output.py @@ -0,0 +1,108 @@ +# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. ========= + +""" +Example: Streaming + Tool Calls + Structured Output + +Demonstrates using ChatAgent in streaming mode with both tools and +response_format (structured output) simultaneously, in both sync and async. +""" + +import asyncio + +from pydantic import BaseModel, Field + +from camel.agents import ChatAgent +from camel.models import ModelFactory +from camel.toolkits import MathToolkit +from camel.types import ModelPlatformType, ModelType + + +class Result(BaseModel): + sum_result: str = Field(description="Result of the addition") + product_result: str = Field(description="Result of the multiplication") + division_result: str = Field(description="Result of the division") + capital_result: str = Field(description="Result of the capital search") + + +USER_MESSAGE = ( + "Calculate: 1) 123.45 + 678.90 2) 100 * 3.14159 3) 1000 / 7, " + "also search what is the capital of Germany" +) + + +def create_agent() -> ChatAgent: + streaming_model = ModelFactory.create( + model_platform=ModelPlatformType.DEFAULT, + model_type=ModelType.DEFAULT, + model_config_dict={ + "stream": True, + "stream_options": {"include_usage": True}, + }, + ) + return ChatAgent( + system_message="You are a helpful assistant.", + model=streaming_model, + tools=MathToolkit().get_tools(), + stream_accumulate=False, # Delta mode + ) + + +def sync_example(): + """Sync streaming with tools + structured output.""" + print("=== Sync Example ===") + agent = create_agent() + + streaming_response = agent.step(USER_MESSAGE, response_format=Result) + + content_parts = [] + for chunk in streaming_response: + if chunk.msgs[0].content: + content_parts.append(chunk.msgs[0].content) + print(chunk.msgs[0].content, end="", flush=True) + + print() + + # Print tool call records + tool_calls = streaming_response.info.get("tool_calls", []) + if tool_calls: + print(f"\nTool calls made: {len(tool_calls)}") + for i, tc in enumerate(tool_calls, 1): + print(f" {i}. {tc.tool_name}({tc.args}) = {tc.result}") + + # Check parsed output + final_msg = streaming_response.msgs[0] + if final_msg.parsed: + print(f"\nParsed result: {final_msg.parsed}") + + +async def async_example(): + """Async streaming with tools + structured output.""" + print("\n=== Async Example ===") + agent = create_agent() + + content_parts = [] + async for chunk in await agent.astep(USER_MESSAGE, response_format=Result): + if chunk.msgs[0].content: + content_parts.append(chunk.msgs[0].content) + print(chunk.msgs[0].content, end="", flush=True) + + print() + full_content = "".join(content_parts) + print(f"\nFull content: {full_content}") + + +if __name__ == "__main__": + sync_example() + asyncio.run(async_example()) From 8d62cc32ea3417caefda1f60117b74ad8572e701 Mon Sep 17 00:00:00 2001 From: Xiangyu Shi Date: Tue, 10 Feb 2026 10:53:36 +0100 Subject: [PATCH 3/6] improvement --- camel/agents/chat_agent.py | 358 ++++++++++++++++++------------------- 1 file changed, 172 insertions(+), 186 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 458443d3b0..a2e93754e9 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -2525,6 +2525,128 @@ def _try_format_message( except ValidationError: return False + def _format_message_with_fallback( + self, + message: BaseMessage, + response_format: Optional[Type[BaseModel]], + original_response_format: Optional[Type[BaseModel]] = None, + used_prompt_formatting: bool = False, + ) -> None: + r"""Format a message with optional prompt-based parsing fallback. + + First tries standard JSON validation via ``_try_format_message``. + If that fails and prompt-based formatting was used, falls back to + ``_apply_prompt_based_parsing_to_message`` which can extract JSON + from free-form text. + + Args: + message: The message to format. + response_format: The (possibly modified) response format passed + to the model. May be ``None`` when prompt-based formatting + replaced it. + original_response_format: The original response format before + prompt-based conversion. + used_prompt_formatting: Whether prompt-based formatting was used. + """ + if response_format: + self._try_format_message(message, response_format) + if ( + used_prompt_formatting + and original_response_format + and not message.parsed + ): + self._apply_prompt_based_parsing_to_message( + message, original_response_format + ) + + @staticmethod + def _collect_tool_calls_from_completion( + tool_calls: List[Any], + accumulated_tool_calls: Dict[str, Any], + ) -> None: + r"""Convert tool calls from a ChatCompletion into the accumulated + tool-call dictionary format used by the streaming pipeline. + + Args: + tool_calls: Tool call objects from + ``completion.choices[0].message.tool_calls``. + accumulated_tool_calls: Mutable dict that will be populated with + the converted entries. + """ + for tc in tool_calls: + accumulated_tool_calls[tc.id] = { + 'id': tc.id, + 'function': { + 'name': tc.function.name, + 'arguments': tc.function.arguments, + }, + 'complete': True, + } + + def _record_and_build_display_message( + self, + final_content: str, + parsed_object: Any, + final_reasoning: Optional[str], + response_format: Optional[Type[BaseModel]], + original_response_format: Optional[Type[BaseModel]], + used_prompt_formatting: bool, + ) -> BaseMessage: + r"""Record the full message to memory and build a display message. + + In delta mode the display message has empty content because all + content was already yielded incrementally. In accumulate mode the + display message carries the full content. + + Args: + final_content: The full final content string. + parsed_object: The parsed object from structured output stream. + final_reasoning: The reasoning content, if any. + response_format: The (possibly modified) response format. + original_response_format: The original response format. + used_prompt_formatting: Whether prompt-based formatting was used. + + Returns: + BaseMessage: The display message to yield to the caller. + """ + parsed_cast = cast("BaseModel | dict[str, Any] | None", parsed_object) # type: ignore[arg-type] + + # Record full content to memory + record_msg = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=final_content, + parsed=parsed_cast, + reasoning_content=final_reasoning, + ) + self._format_message_with_fallback( + record_msg, + response_format, + original_response_format, + used_prompt_formatting, + ) + self.record_message(record_msg) + + # Build display message (empty content in delta mode) + display_content = final_content if self.stream_accumulate else "" + display_reasoning = final_reasoning if self.stream_accumulate else None + display_msg = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=display_content, + parsed=parsed_cast, + reasoning_content=display_reasoning, + ) + self._format_message_with_fallback( + display_msg, + response_format, + original_response_format, + used_prompt_formatting, + ) + return display_msg + def _check_tools_strict_compatibility(self) -> bool: r"""Check if all tools are compatible with OpenAI strict mode. @@ -4399,21 +4521,12 @@ def _stream_response( final_choice.message, 'tool_calls', None ) if final_tool_calls: - # Convert tool calls to accumulated format - # and execute them - for tc in final_tool_calls: - tc_data = { - 'id': tc.id, - 'function': { - 'name': tc.function.name, - 'arguments': tc.function.arguments, - }, - 'complete': True, - } - accumulated_tool_calls[tc.id] = tc_data - - # Record assistant message with tool calls - # and execute tools + self._collect_tool_calls_from_completion( + final_tool_calls, + accumulated_tool_calls, + ) + + # Execute tools for status_response in ( self )._execute_tools_sync_with_status_accumulator( @@ -4460,60 +4573,15 @@ def _stream_response( or None ) - # Record full content to memory - record_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=final_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - reasoning_content=final_reasoning, + final_message = self._record_and_build_display_message( + final_content, + parsed_object, + final_reasoning, + response_format, + original_response_format, + used_prompt_formatting, ) - if ( - used_prompt_formatting - and original_response_format - and not record_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - record_message, original_response_format - ) - - self.record_message(record_message) - - # In delta mode, final response content should - # be empty since all content was already yielded - # incrementally - display_content = ( - final_content if self.stream_accumulate else "" - ) - display_reasoning = ( - final_reasoning if self.stream_accumulate else None - ) - final_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=display_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - reasoning_content=display_reasoning, - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) - # Create final response final_response = ChatAgentResponse( msgs=[final_message], @@ -4667,19 +4735,12 @@ def _process_stream_chunks_with_accumulator( reasoning_content=final_reasoning, ) - if response_format: - self._try_format_message( - final_message, response_format - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) + self._format_message_with_fallback( + final_message, + response_format, + original_response_format, + used_prompt_formatting, + ) self.record_message(final_message) if chunk.usage: @@ -4715,19 +4776,12 @@ def _process_stream_chunks_with_accumulator( reasoning_content=display_reasoning, ) - if response_format: - self._try_format_message( - final_message, response_format - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) + self._format_message_with_fallback( + final_message, + response_format, + original_response_format, + used_prompt_formatting, + ) # Create final response with final usage (not partial) final_response = ChatAgentResponse( @@ -5474,21 +5528,12 @@ async def _astream_response( final_choice.message, 'tool_calls', None ) if final_tool_calls: - # Convert tool calls to accumulated format - # and execute them - for tc in final_tool_calls: - tc_data = { - 'id': tc.id, - 'function': { - 'name': tc.function.name, - 'arguments': tc.function.arguments, - }, - 'complete': True, - } - accumulated_tool_calls[tc.id] = tc_data - - # Record assistant message with tool calls - # and execute tools + self._collect_tool_calls_from_completion( + final_tool_calls, + accumulated_tool_calls, + ) + + # Execute tools async for status_response in ( self )._execute_tools_async_with_status_accumulator( @@ -5537,60 +5582,15 @@ async def _astream_response( or None ) - # Record full content to memory - record_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=final_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - reasoning_content=final_reasoning, + final_message = self._record_and_build_display_message( + final_content, + parsed_object, + final_reasoning, + response_format, + original_response_format, + used_prompt_formatting, ) - if ( - used_prompt_formatting - and original_response_format - and not record_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - record_message, original_response_format - ) - - self.record_message(record_message) - - # In delta mode, final response content should - # be empty since all content was already yielded - # incrementally - display_content = ( - final_content if self.stream_accumulate else "" - ) - display_reasoning = ( - final_reasoning if self.stream_accumulate else None - ) - final_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=display_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - reasoning_content=display_reasoning, - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) - # Create final response final_response = ChatAgentResponse( msgs=[final_message], @@ -5839,19 +5839,12 @@ async def _aprocess_stream_chunks_with_accumulator( content=final_content, ) - if response_format: - self._try_format_message( - final_message, response_format - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) + self._format_message_with_fallback( + final_message, + response_format, + original_response_format, + used_prompt_formatting, + ) self.record_message(final_message) if chunk.usage: @@ -5887,19 +5880,12 @@ async def _aprocess_stream_chunks_with_accumulator( reasoning_content=display_reasoning, ) - if response_format: - self._try_format_message( - final_message, response_format - ) - - if ( - used_prompt_formatting - and original_response_format - and not final_message.parsed - ): - self._apply_prompt_based_parsing_to_message( - final_message, original_response_format - ) + self._format_message_with_fallback( + final_message, + response_format, + original_response_format, + used_prompt_formatting, + ) # Create final response with final usage (not partial) final_response = ChatAgentResponse( From 7970d8975d9eca9af3cf8fa63c4107700fae1195 Mon Sep 17 00:00:00 2001 From: Tao Sun <168447269+fengju0213@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:30:55 +0800 Subject: [PATCH 4/6] Remove _format_message_with_fallback method Removed the _format_message_with_fallback method and its usages throughout the chat_agent.py file. --- camel/agents/chat_agent.py | 344 ++----------------------------------- 1 file changed, 19 insertions(+), 325 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index a2e93754e9..40f70cbee8 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -2525,40 +2525,6 @@ def _try_format_message( except ValidationError: return False - def _format_message_with_fallback( - self, - message: BaseMessage, - response_format: Optional[Type[BaseModel]], - original_response_format: Optional[Type[BaseModel]] = None, - used_prompt_formatting: bool = False, - ) -> None: - r"""Format a message with optional prompt-based parsing fallback. - - First tries standard JSON validation via ``_try_format_message``. - If that fails and prompt-based formatting was used, falls back to - ``_apply_prompt_based_parsing_to_message`` which can extract JSON - from free-form text. - - Args: - message: The message to format. - response_format: The (possibly modified) response format passed - to the model. May be ``None`` when prompt-based formatting - replaced it. - original_response_format: The original response format before - prompt-based conversion. - used_prompt_formatting: Whether prompt-based formatting was used. - """ - if response_format: - self._try_format_message(message, response_format) - if ( - used_prompt_formatting - and original_response_format - and not message.parsed - ): - self._apply_prompt_based_parsing_to_message( - message, original_response_format - ) - @staticmethod def _collect_tool_calls_from_completion( tool_calls: List[Any], @@ -2589,8 +2555,6 @@ def _record_and_build_display_message( parsed_object: Any, final_reasoning: Optional[str], response_format: Optional[Type[BaseModel]], - original_response_format: Optional[Type[BaseModel]], - used_prompt_formatting: bool, ) -> BaseMessage: r"""Record the full message to memory and build a display message. @@ -2603,8 +2567,6 @@ def _record_and_build_display_message( parsed_object: The parsed object from structured output stream. final_reasoning: The reasoning content, if any. response_format: The (possibly modified) response format. - original_response_format: The original response format. - used_prompt_formatting: Whether prompt-based formatting was used. Returns: BaseMessage: The display message to yield to the caller. @@ -2620,12 +2582,8 @@ def _record_and_build_display_message( parsed=parsed_cast, reasoning_content=final_reasoning, ) - self._format_message_with_fallback( - record_msg, - response_format, - original_response_format, - used_prompt_formatting, - ) + if response_format: + self._try_format_message(record_msg, response_format) self.record_message(record_msg) # Build display message (empty content in delta mode) @@ -2636,134 +2594,11 @@ def _record_and_build_display_message( role_type=self.role_type, meta_dict={}, content=display_content, - parsed=parsed_cast, + parsed=record_msg.parsed, reasoning_content=display_reasoning, ) - self._format_message_with_fallback( - display_msg, - response_format, - original_response_format, - used_prompt_formatting, - ) return display_msg - def _check_tools_strict_compatibility(self) -> bool: - r"""Check if all tools are compatible with OpenAI strict mode. - - Returns: - bool: True if all tools are strict mode compatible, - False otherwise. - """ - tool_schemas = self._get_full_tool_schemas() - for schema in tool_schemas: - if not schema.get("function", {}).get("strict", True): - return False - return True - - def _convert_response_format_to_prompt( - self, response_format: Type[BaseModel] - ) -> str: - r"""Convert a Pydantic response format to a prompt instruction. - - Args: - response_format (Type[BaseModel]): The Pydantic model class. - - Returns: - str: A prompt instruction requesting the specific format. - """ - try: - # Get the JSON schema from the Pydantic model - schema = response_format.model_json_schema() - - # Create a prompt based on the schema - format_instruction = ( - "\n\nPlease respond in the following JSON format:\n{\n" - ) - - properties = schema.get("properties", {}) - for field_name, field_info in properties.items(): - field_type = field_info.get("type", "string") - description = field_info.get("description", "") - - if field_type == "array": - format_instruction += ( - f' "{field_name}": ["array of values"]' - ) - elif field_type == "object": - format_instruction += f' "{field_name}": {{"object"}}' - elif field_type == "boolean": - format_instruction += f' "{field_name}": true' - elif field_type == "number": - format_instruction += f' "{field_name}": 0' - else: - format_instruction += f' "{field_name}": "string value"' - - if description: - format_instruction += f' // {description}' - - # Add comma if not the last item - if field_name != list(properties.keys())[-1]: - format_instruction += "," - format_instruction += "\n" - - format_instruction += "}" - return format_instruction - - except Exception as e: - logger.warning( - f"Failed to convert response_format to prompt: {e}. " - f"Using generic format instruction." - ) - return ( - "\n\nPlease respond in a structured JSON format " - "that matches the requested schema." - ) - - def _handle_response_format_with_non_strict_tools( - self, - input_message: Union[BaseMessage, str], - response_format: Optional[Type[BaseModel]] = None, - ) -> Tuple[Union[BaseMessage, str], Optional[Type[BaseModel]], bool]: - r"""Handle response format when tools are not strict mode compatible. - - Args: - input_message: The original input message. - response_format: The requested response format. - - Returns: - Tuple: (modified_message, modified_response_format, - used_prompt_formatting) - """ - if response_format is None: - return input_message, response_format, False - - # Check if tools are strict mode compatible - if self._check_tools_strict_compatibility(): - return input_message, response_format, False - - # Tools are not strict compatible, convert to prompt - logger.info( - "Non-strict tools detected. Converting response_format to " - "prompt-based formatting." - ) - - format_prompt = self._convert_response_format_to_prompt( - response_format - ) - - # Modify the message to include format instruction - modified_message: Union[BaseMessage, str] - if isinstance(input_message, str): - modified_message = input_message + format_prompt - else: - modified_message = input_message.create_new_instance( - input_message.content + format_prompt - ) - - # Return None for response_format to avoid strict mode conflicts - # and True to indicate we used prompt formatting - return modified_message, None, True - def _is_called_from_registered_toolkit(self) -> bool: r"""Check if current step/astep call originates from a RegisteredAgentToolkit. @@ -2791,75 +2626,6 @@ def _is_called_from_registered_toolkit(self) -> bool: return False - def _apply_prompt_based_parsing_to_message( - self, - message: BaseMessage, - original_response_format: Type[BaseModel], - ) -> None: - r"""Apply prompt-based parsing to a single message. - - Args: - message: The message to parse. - original_response_format: The original response format class. - """ - if not message.content: - return - try: - import json - - from pydantic import ValidationError - - content = message.content.strip() - - # Try direct parsing first - try: - parsed_json = json.loads(content) - message.parsed = original_response_format.model_validate( - parsed_json - ) - return - except (json.JSONDecodeError, ValidationError): - pass - - # Try to extract JSON from text - json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' - json_matches = re.findall(json_pattern, content, re.DOTALL) - - for json_str in json_matches: - try: - parsed_json = json.loads(json_str) - message.parsed = original_response_format.model_validate( - parsed_json - ) - message.content = json.dumps(parsed_json) - break - except (json.JSONDecodeError, ValidationError): - continue - - if not message.parsed: - logger.warning( - f"Failed to parse JSON from response: {content}" - ) - - except Exception as e: - logger.warning(f"Error during prompt-based parsing: {e}") - - def _apply_prompt_based_parsing( - self, - response: ModelResponse, - original_response_format: Type[BaseModel], - ) -> None: - r"""Apply manual parsing when using prompt-based formatting. - - Args: - response: The model response to parse. - original_response_format: The original response format class. - """ - for message in response.output_messages: - self._apply_prompt_based_parsing_to_message( - message, original_response_format - ) - def _format_response_if_needed( self, response: ModelResponse, @@ -2998,14 +2764,6 @@ def _step_impl( # use disable_tools = self._is_called_from_registered_toolkit() - # Handle response format compatibility with non-strict tools - original_response_format = response_format - input_message, response_format, used_prompt_formatting = ( - self._handle_response_format_with_non_strict_tools( - input_message, response_format - ) - ) - # Convert input message to BaseMessage if necessary if isinstance(input_message, str): input_message = BaseMessage.make_user_message( @@ -3182,12 +2940,6 @@ def _step_impl( self._format_response_if_needed(response, response_format) - # Apply manual parsing if we used prompt-based formatting - if used_prompt_formatting and original_response_format: - self._apply_prompt_based_parsing( - response, original_response_format - ) - # Only record final output if we haven't already recorded tool calls # for this response (to avoid duplicate assistant messages) if not recorded_tool_calls: @@ -3301,14 +3053,6 @@ async def _astep_non_streaming_task( # use disable_tools = self._is_called_from_registered_toolkit() - # Handle response format compatibility with non-strict tools - original_response_format = response_format - input_message, response_format, used_prompt_formatting = ( - self._handle_response_format_with_non_strict_tools( - input_message, response_format - ) - ) - if isinstance(input_message, str): input_message = BaseMessage.make_user_message( role_name="User", content=input_message @@ -3484,12 +3228,6 @@ async def _astep_non_streaming_task( await self._aformat_response_if_needed(response, response_format) - # Apply manual parsing if we used prompt-based formatting - if used_prompt_formatting and original_response_format: - self._apply_prompt_based_parsing( - response, original_response_format - ) - # Only record final output if we haven't already recorded tool calls # for this response (to avoid duplicate assistant messages) if not recorded_tool_calls: @@ -4307,14 +4045,6 @@ def _stream( content, tool calls, and other information as they become available. """ - # Handle response format compatibility with non-strict tools - original_response_format = response_format - input_message, response_format, used_prompt_formatting = ( - self._handle_response_format_with_non_strict_tools( - input_message, response_format - ) - ) - # Convert input message to BaseMessage if necessary if isinstance(input_message, str): input_message = BaseMessage.make_user_message( @@ -4338,8 +4068,6 @@ def _stream( openai_messages, num_tokens, response_format, - original_response_format, - used_prompt_formatting, ) def _get_token_count(self, content: str) -> int: @@ -4381,8 +4109,6 @@ def _stream_response( openai_messages: List[OpenAIMessage], num_tokens: int, response_format: Optional[Type[BaseModel]] = None, - original_response_format: Optional[Type[BaseModel]] = None, - used_prompt_formatting: bool = False, ) -> Generator[ChatAgentResponse, None, None]: r"""Internal method to handle streaming responses with tool calls.""" @@ -4442,8 +4168,6 @@ def _stream_response( tool_call_records, step_token_usage, response_format, - original_response_format, - used_prompt_formatting, ) if tool_calls_complete: @@ -4578,8 +4302,6 @@ def _stream_response( parsed_object, final_reasoning, response_format, - original_response_format, - used_prompt_formatting, ) # Create final response @@ -4640,8 +4362,6 @@ def _process_stream_chunks_with_accumulator( tool_call_records: List[ToolCallingRecord], step_token_usage: Dict[str, int], response_format: Optional[Type[BaseModel]] = None, - original_response_format: Optional[Type[BaseModel]] = None, - used_prompt_formatting: bool = False, ) -> Generator[ChatAgentResponse, None, Tuple[bool, bool]]: r"""Process streaming chunks with content accumulator.""" @@ -4735,12 +4455,10 @@ def _process_stream_chunks_with_accumulator( reasoning_content=final_reasoning, ) - self._format_message_with_fallback( - final_message, - response_format, - original_response_format, - used_prompt_formatting, - ) + if response_format: + self._try_format_message( + final_message, response_format + ) self.record_message(final_message) if chunk.usage: @@ -4776,12 +4494,10 @@ def _process_stream_chunks_with_accumulator( reasoning_content=display_reasoning, ) - self._format_message_with_fallback( - final_message, - response_format, - original_response_format, - used_prompt_formatting, - ) + if response_format: + self._try_format_message( + final_message, response_format + ) # Create final response with final usage (not partial) final_response = ChatAgentResponse( @@ -5324,14 +5040,6 @@ async def _astream( ) -> AsyncGenerator[ChatAgentResponse, None]: r"""Asynchronous version of stream method.""" - # Handle response format compatibility with non-strict tools - original_response_format = response_format - input_message, response_format, used_prompt_formatting = ( - self._handle_response_format_with_non_strict_tools( - input_message, response_format - ) - ) - # Convert input message to BaseMessage if necessary if isinstance(input_message, str): input_message = BaseMessage.make_user_message( @@ -5357,8 +5065,6 @@ async def _astream( openai_messages, num_tokens, response_format, - original_response_format, - used_prompt_formatting, ): last_response = response yield response @@ -5375,8 +5081,6 @@ async def _astream_response( openai_messages: List[OpenAIMessage], num_tokens: int, response_format: Optional[Type[BaseModel]] = None, - original_response_format: Optional[Type[BaseModel]] = None, - used_prompt_formatting: bool = False, ) -> AsyncGenerator[ChatAgentResponse, None]: r"""Async method to handle streaming responses with tool calls.""" @@ -5440,8 +5144,6 @@ async def _astream_response( tool_call_records, step_token_usage, response_format, - original_response_format, - used_prompt_formatting, ): if isinstance(item, tuple): # This is the final return value (stream_completed, @@ -5587,8 +5289,6 @@ async def _astream_response( parsed_object, final_reasoning, response_format, - original_response_format, - used_prompt_formatting, ) # Create final response @@ -5745,8 +5445,6 @@ async def _aprocess_stream_chunks_with_accumulator( tool_call_records: List[ToolCallingRecord], step_token_usage: Dict[str, int], response_format: Optional[Type[BaseModel]] = None, - original_response_format: Optional[Type[BaseModel]] = None, - used_prompt_formatting: bool = False, ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]: r"""Async version of process streaming chunks with content accumulator. @@ -5839,12 +5537,10 @@ async def _aprocess_stream_chunks_with_accumulator( content=final_content, ) - self._format_message_with_fallback( - final_message, - response_format, - original_response_format, - used_prompt_formatting, - ) + if response_format: + self._try_format_message( + final_message, response_format + ) self.record_message(final_message) if chunk.usage: @@ -5880,12 +5576,10 @@ async def _aprocess_stream_chunks_with_accumulator( reasoning_content=display_reasoning, ) - self._format_message_with_fallback( - final_message, - response_format, - original_response_format, - used_prompt_formatting, - ) + if response_format: + self._try_format_message( + final_message, response_format + ) # Create final response with final usage (not partial) final_response = ChatAgentResponse( From 72d73767a6333b540ceca0ca99053e97cc0f7a31 Mon Sep 17 00:00:00 2001 From: Xiangyu Shi Date: Wed, 11 Feb 2026 09:49:04 +0100 Subject: [PATCH 5/6] support gemini --- camel/models/gemini_model.py | 233 +++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 103 deletions(-) diff --git a/camel/models/gemini_model.py b/camel/models/gemini_model.py index e4a2d60e13..f4b5491f1a 100644 --- a/camel/models/gemini_model.py +++ b/camel/models/gemini_model.py @@ -24,6 +24,10 @@ ) from openai import AsyncStream, Stream +from openai.lib.streaming.chat import ( + AsyncChatCompletionStreamManager, + ChatCompletionStreamManager, +) from pydantic import BaseModel from camel.configs import GeminiConfig @@ -451,6 +455,47 @@ async def async_thought_preserving_generator(): return async_thought_preserving_generator() + @staticmethod + def _clean_gemini_tools( + tools: Optional[List[Dict[str, Any]]], + ) -> Optional[List[Dict[str, Any]]]: + r"""Clean tools for Gemini API compatibility. + + Removes unsupported fields like strict, anyOf, and restricts + enum/format to allowed types. + """ + if not tools: + return tools + import copy + + tools = copy.deepcopy(tools) + for tool in tools: + function_dict = tool.get('function', {}) + function_dict.pop("strict", None) + + if 'parameters' in function_dict: + params = function_dict['parameters'] + if 'properties' in params: + for prop_name, prop_value in params['properties'].items(): + if 'anyOf' in prop_value: + first_type = prop_value['anyOf'][0] + params['properties'][prop_name] = first_type + if 'description' in prop_value: + params['properties'][prop_name][ + 'description' + ] = prop_value['description'] + + if prop_value.get('type') != 'string': + prop_value.pop('enum', None) + + if prop_value.get('type') not in [ + 'string', + 'integer', + 'number', + ]: + prop_value.pop('format', None) + return tools + @observe() def _run( self, @@ -479,19 +524,18 @@ def _run( "response_format", None ) messages = self._process_messages(messages) + is_streaming = self.model_config_dict.get("stream", False) + if response_format: - if tools: - raise ValueError( - "Gemini does not support function calling with " - "response format." + tools = self._clean_gemini_tools(tools) + if is_streaming: + return self._request_stream_parse( # type: ignore[return-value] + messages, response_format, tools ) - result: Union[ChatCompletion, Stream[ChatCompletionChunk]] = ( - self._request_parse(messages, response_format) - ) + else: + return self._request_parse(messages, response_format, tools) else: - result = self._request_chat_completion(messages, tools) - - return result + return self._request_chat_completion(messages, tools) @observe() async def _arun( @@ -521,67 +565,90 @@ async def _arun( "response_format", None ) messages = self._process_messages(messages) + is_streaming = self.model_config_dict.get("stream", False) + if response_format: - if tools: - raise ValueError( - "Gemini does not support function calling with " - "response format." + tools = self._clean_gemini_tools(tools) + if is_streaming: + return await self._arequest_stream_parse( # type: ignore[return-value] + messages, response_format, tools + ) + else: + return await self._arequest_parse( + messages, response_format, tools ) - result: Union[ - ChatCompletion, AsyncStream[ChatCompletionChunk] - ] = await self._arequest_parse(messages, response_format) else: - result = await self._arequest_chat_completion(messages, tools) + return await self._arequest_chat_completion(messages, tools) + + @staticmethod + def _build_gemini_response_format( + response_format: Type[BaseModel], + ) -> Dict[str, Any]: + r"""Convert a Pydantic model to Gemini-compatible response_format.""" + schema = response_format.model_json_schema() + # Remove $defs and other unsupported fields for Gemini + schema.pop("$defs", None) + schema.pop("definitions", None) + return { + "type": "json_schema", + "json_schema": { + "name": response_format.__name__, + "schema": schema, + }, + } + + def _request_stream_parse( + self, + messages: List[OpenAIMessage], + response_format: Type[BaseModel], + tools: Optional[List[Dict[str, Any]]] = None, + ) -> ChatCompletionStreamManager[BaseModel]: + r"""Gemini-specific streaming structured output. + + Uses regular streaming with response_format as JSON schema + instead of OpenAI's beta streaming API which is incompatible + with Gemini's tool call delta format. + """ + request_config = self._prepare_request_config(tools) + request_config["stream"] = True + request_config["response_format"] = self._build_gemini_response_format( + response_format + ) + + response = self._client.chat.completions.create( + messages=messages, + model=self.model_type, + **request_config, + ) + return self._preserve_thought_signatures(response) # type: ignore[return-value] - return result + async def _arequest_stream_parse( + self, + messages: List[OpenAIMessage], + response_format: Type[BaseModel], + tools: Optional[List[Dict[str, Any]]] = None, + ) -> AsyncChatCompletionStreamManager[BaseModel]: + r"""Gemini-specific async streaming structured output.""" + request_config = self._prepare_request_config(tools) + request_config["stream"] = True + request_config["response_format"] = self._build_gemini_response_format( + response_format + ) + + response = await self._async_client.chat.completions.create( + messages=messages, + model=self.model_type, + **request_config, + ) + return self._preserve_thought_signatures(response) # type: ignore[return-value] def _request_chat_completion( self, messages: List[OpenAIMessage], tools: Optional[List[Dict[str, Any]]] = None, ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]: - import copy - - request_config = copy.deepcopy(self.model_config_dict) - # Remove strict and anyOf from each tool's function parameters since - # Gemini does not support them - if tools: - for tool in tools: - function_dict = tool.get('function', {}) - function_dict.pop("strict", None) - - # Process parameters to remove anyOf and handle enum/format - if 'parameters' in function_dict: - params = function_dict['parameters'] - if 'properties' in params: - for prop_name, prop_value in params[ - 'properties' - ].items(): - if 'anyOf' in prop_value: - # Replace anyOf with the first type in the list - first_type = prop_value['anyOf'][0] - params['properties'][prop_name] = first_type - # Preserve description if it exists - if 'description' in prop_value: - params['properties'][prop_name][ - 'description' - ] = prop_value['description'] - - # Handle enum and format restrictions for Gemini - # API enum: only allowed for string type - if prop_value.get('type') != 'string': - prop_value.pop('enum', None) - - # format: only allowed for string, integer, and - # number types - if prop_value.get('type') not in [ - 'string', - 'integer', - 'number', - ]: - prop_value.pop('format', None) - - request_config["tools"] = tools + tools = self._clean_gemini_tools(tools) + request_config = self._prepare_request_config(tools) response = self._client.chat.completions.create( messages=messages, @@ -597,48 +664,8 @@ async def _arequest_chat_completion( messages: List[OpenAIMessage], tools: Optional[List[Dict[str, Any]]] = None, ) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]: - import copy - - request_config = copy.deepcopy(self.model_config_dict) - # Remove strict and anyOf from each tool's function parameters since - # Gemini does not support them - if tools: - for tool in tools: - function_dict = tool.get('function', {}) - function_dict.pop("strict", None) - - # Process parameters to remove anyOf and handle enum/format - if 'parameters' in function_dict: - params = function_dict['parameters'] - if 'properties' in params: - for prop_name, prop_value in params[ - 'properties' - ].items(): - if 'anyOf' in prop_value: - # Replace anyOf with the first type in the list - first_type = prop_value['anyOf'][0] - params['properties'][prop_name] = first_type - # Preserve description if it exists - if 'description' in prop_value: - params['properties'][prop_name][ - 'description' - ] = prop_value['description'] - - # Handle enum and format restrictions for Gemini - # API enum: only allowed for string type - if prop_value.get('type') != 'string': - prop_value.pop('enum', None) - - # format: only allowed for string, integer, and - # number types - if prop_value.get('type') not in [ - 'string', - 'integer', - 'number', - ]: - prop_value.pop('format', None) - - request_config["tools"] = tools + tools = self._clean_gemini_tools(tools) + request_config = self._prepare_request_config(tools) response = await self._async_client.chat.completions.create( messages=messages, From c9d588a4ea28c645cd7be03fd585dbc5b6355601 Mon Sep 17 00:00:00 2001 From: Xiangyu Shi Date: Wed, 11 Feb 2026 10:06:06 +0100 Subject: [PATCH 6/6] support anthropic --- camel/models/anthropic_model.py | 66 +++++++++++++------ .../chatagent_stream_structured_output.py | 12 ++-- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/camel/models/anthropic_model.py b/camel/models/anthropic_model.py index a362a99df5..cd868392bd 100644 --- a/camel/models/anthropic_model.py +++ b/camel/models/anthropic_model.py @@ -14,7 +14,6 @@ import json import os import time -import warnings from typing import Any, Dict, List, Optional, Type, Union, cast from openai import AsyncStream, Stream @@ -636,6 +635,37 @@ def _convert_anthropic_stream_to_openai_chunk( usage=usage, ) + @staticmethod + def _add_additional_properties_false(schema: Dict[str, Any]) -> None: + r"""Recursively add additionalProperties: false to all object types.""" + if schema.get("type") == "object": + schema["additionalProperties"] = False + for value in schema.values(): + if isinstance(value, dict): + AnthropicModel._add_additional_properties_false(value) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + AnthropicModel._add_additional_properties_false(item) + + @staticmethod + def _build_output_config( + response_format: Type[BaseModel], + ) -> Dict[str, Any]: + r"""Convert a Pydantic model to Anthropic's output_config format.""" + schema = response_format.model_json_schema() + # Remove unsupported fields + schema.pop("$defs", None) + schema.pop("definitions", None) + # Anthropic requires additionalProperties: false on all object types + AnthropicModel._add_additional_properties_false(schema) + return { + "format": { + "type": "json_schema", + "schema": schema, + } + } + def _convert_openai_tools_to_anthropic( self, tools: Optional[List[Dict[str, Any]]] ) -> Optional[List[Dict[str, Any]]]: @@ -678,7 +708,7 @@ def _run( messages (List[OpenAIMessage]): Message list with the chat history in OpenAI API format. response_format (Optional[Type[BaseModel]]): The format of the - response. (Not supported by Anthropic API directly) + response. tools (Optional[List[Dict[str, Any]]]): The schema of the tools to use for the request. @@ -687,15 +717,6 @@ def _run( `ChatCompletion` in the non-stream mode, or `Stream[ChatCompletionChunk]` in the stream mode. """ - if response_format is not None: - warnings.warn( - "The 'response_format' parameter is not supported by the " - "Anthropic API and will be ignored. Consider using tools " - "for structured output instead.", - UserWarning, - stacklevel=2, - ) - # Update Langfuse trace with current agent session and metadata agent_session_id = get_current_agent_session_id() if agent_session_id: @@ -764,6 +785,12 @@ def _run( if key in self.model_config_dict: request_params[key] = self.model_config_dict[key] + # Add structured output via output_config + if response_format is not None: + request_params["output_config"] = self._build_output_config( + response_format + ) + # Convert tools anthropic_tools = self._convert_openai_tools_to_anthropic(tools) if anthropic_tools: @@ -803,7 +830,7 @@ async def _arun( messages (List[OpenAIMessage]): Message list with the chat history in OpenAI API format. response_format (Optional[Type[BaseModel]]): The format of the - response. (Not supported by Anthropic API directly) + response. tools (Optional[List[Dict[str, Any]]]): The schema of the tools to use for the request. @@ -812,15 +839,6 @@ async def _arun( `ChatCompletion` in the non-stream mode, or `AsyncStream[ChatCompletionChunk]` in the stream mode. """ - if response_format is not None: - warnings.warn( - "The 'response_format' parameter is not supported by the " - "Anthropic API and will be ignored. Consider using tools " - "for structured output instead.", - UserWarning, - stacklevel=2, - ) - # Update Langfuse trace with current agent session and metadata agent_session_id = get_current_agent_session_id() if agent_session_id: @@ -889,6 +907,12 @@ async def _arun( if key in self.model_config_dict: request_params[key] = self.model_config_dict[key] + # Add structured output via output_config + if response_format is not None: + request_params["output_config"] = self._build_output_config( + response_format + ) + # Convert tools anthropic_tools = self._convert_openai_tools_to_anthropic(tools) if anthropic_tools: diff --git a/examples/agents/chatagent_stream_structured_output.py b/examples/agents/chatagent_stream_structured_output.py index 6318c2e137..5f332d308e 100644 --- a/examples/agents/chatagent_stream_structured_output.py +++ b/examples/agents/chatagent_stream_structured_output.py @@ -30,10 +30,14 @@ class Result(BaseModel): - sum_result: str = Field(description="Result of the addition") - product_result: str = Field(description="Result of the multiplication") - division_result: str = Field(description="Result of the division") - capital_result: str = Field(description="Result of the capital search") + sum_result: str = Field(description="Only the result of the addition") + product_result: str = Field( + description="Only the result of the multiplication" + ) + division_result: str = Field(description="Only the result of the division") + capital_result: str = Field( + description="Only the result of the capital search" + ) USER_MESSAGE = (