From 1b6ee867c748a5b1c594e4ad6ba64018fcede49f Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 24 Oct 2025 13:34:07 +0300 Subject: [PATCH 01/17] add abstraction of camel layer --- camel/agents/_types.py | 15 +- camel/core/__init__.py | 20 +++ camel/core/messages.py | 163 +++++++++++++++++++ camel/responses/__init__.py | 6 + camel/responses/adapters/__init__.py | 16 ++ camel/responses/adapters/chat_completions.py | 119 ++++++++++++++ camel/responses/model_response.py | 63 +++++++ test/responses/test_chat_adapter.py | 98 +++++++++++ 8 files changed, 491 insertions(+), 9 deletions(-) create mode 100644 camel/core/__init__.py create mode 100644 camel/core/messages.py create mode 100644 camel/responses/adapters/__init__.py create mode 100644 camel/responses/adapters/chat_completions.py create mode 100644 camel/responses/model_response.py create mode 100644 test/responses/test_chat_adapter.py diff --git a/camel/agents/_types.py b/camel/agents/_types.py index 07a86f9cd8..8e9a57e991 100644 --- a/camel/agents/_types.py +++ b/camel/agents/_types.py @@ -11,14 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional -from openai import AsyncStream, Stream -from openai.types.chat import ChatCompletionChunk from pydantic import BaseModel, ConfigDict from camel.messages import BaseMessage -from camel.types import ChatCompletion class ToolCallRequest(BaseModel): @@ -33,11 +30,11 @@ class ModelResponse(BaseModel): r"""The response from the model.""" model_config = ConfigDict(arbitrary_types_allowed=True) - response: Union[ - ChatCompletion, - Stream[ChatCompletionChunk], - AsyncStream[ChatCompletionChunk], - ] + # Phase 1: relax the annotation to decouple from provider schemas. + # Existing call sites do not rely on static typing here and tests + # often pass MagicMock; this change avoids tight coupling to + # ChatCompletion when adapters introduce unified responses. + response: Any tool_call_requests: Optional[List[ToolCallRequest]] output_messages: List[BaseMessage] finish_reasons: List[str] diff --git a/camel/core/__init__.py b/camel/core/__init__.py new file mode 100644 index 0000000000..0060770374 --- /dev/null +++ b/camel/core/__init__.py @@ -0,0 +1,20 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Core abstractions for CAMEL runtime. + +This package hosts model-agnostic message types that can be adapted to +either legacy Chat Completions or the newer OpenAI Responses API. +""" + +__all__: list[str] = [] diff --git a/camel/core/messages.py b/camel/core/messages.py new file mode 100644 index 0000000000..42c7bd07b3 --- /dev/null +++ b/camel/core/messages.py @@ -0,0 +1,163 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Model-agnostic message abstractions and converters. + +Phase 1 introduces `CamelMessage` to decouple CAMEL from the legacy +OpenAI Chat Completions message schema while keeping behaviour identical +via adapter conversion. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Literal, Optional, cast + +from pydantic import BaseModel, Field + +from camel.messages import OpenAIMessage + + +class CamelContentPart(BaseModel): + """A single content fragment. + + This is intentionally minimal for Phase 1. It currently models the + fragments we already consume through Chat Completions: text and image. + Tool-related parts are represented at the response layer for now. + """ + + type: Literal["text", "image_url"] + payload: Dict[str, Any] = Field(default_factory=dict) + + +class CamelMessage(BaseModel): + """A model-agnostic chat message used by CAMEL runtime. + + The schema is compatible with both legacy Chat Completions and the + newer Responses API after conversion. + """ + + role: Literal["system", "user", "assistant", "tool", "developer"] + content: List[CamelContentPart] = Field(default_factory=list) + name: Optional[str] = None + tool_call_id: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None + + +def openai_messages_to_camel( + messages: List[OpenAIMessage], +) -> List[CamelMessage]: + """Convert OpenAI ChatCompletion-style messages to `CamelMessage`. + + Notes: + - Only text and image_url items are converted in Phase 1. + - Other fields are carried over when present (name, tool_call_id). + """ + result: List[CamelMessage] = [] + for msg in messages: + role = msg.get("role", "user") # type: ignore[assignment] + parts: List[CamelContentPart] = [] + + content = msg.get("content") + if isinstance(content, str): + if content.strip(): + parts.append( + CamelContentPart(type="text", payload={"text": content}) + ) + elif isinstance(content, list): + for item in content: + item_t = item.get("type") if isinstance(item, dict) else None + if item_t == "text": + text = item.get("text", "") + parts.append( + CamelContentPart(type="text", payload={"text": text}) + ) + elif item_t == "image_url": + image_url = item.get("image_url", {}) + payload = { + "url": image_url.get("url"), + "detail": image_url.get("detail"), + } + parts.append( + CamelContentPart(type="image_url", payload=payload) + ) + + name_val = cast(Optional[str], msg.get("name", None)) + tool_call_id = ( + cast(Optional[str], msg.get("tool_call_id", None)) + if role == "tool" + else None + ) + + result.append( + CamelMessage( + role=cast(Any, role), # mypy: role literal narrowing from dict + content=parts, + name=name_val, + tool_call_id=tool_call_id, + ) + ) + + return result + + +def camel_messages_to_openai( + messages: List[CamelMessage], +) -> List[OpenAIMessage]: + """Convert `CamelMessage` back to OpenAI ChatCompletion-style messages. + + This is lossless for the text/image_url subset used in Phase 1. + """ + result: List[OpenAIMessage] = [] + for cmsg in messages: + if cmsg.role == "tool": + # Tool message expects string content + tool_call_id + text_parts = [ + p.payload.get("text", "") + for p in cmsg.content + if p.type == "text" + ] + content_str = "\n".join([t for t in text_parts if t]) + d: Dict[str, Any] = {"role": "tool", "content": content_str} + if cmsg.tool_call_id: + d["tool_call_id"] = cmsg.tool_call_id + result.append(cast(OpenAIMessage, d)) + continue + + # Non-tool roles: use hybrid content list + hybrid: List[Dict[str, Any]] = [] + for part in cmsg.content: + if part.type == "text": + hybrid.append( + {"type": "text", "text": part.payload.get("text", "")} + ) + elif part.type == "image_url": + url = part.payload.get("url") + detail = part.payload.get("detail") or "auto" + hybrid.append( + { + "type": "image_url", + "image_url": {"url": url, "detail": detail}, + } + ) + + d = {"role": cmsg.role, "content": hybrid or ""} + if cmsg.name and cmsg.role in { + "system", + "user", + "assistant", + "developer", + }: + d["name"] = cmsg.name + result.append(cast(OpenAIMessage, d)) + + return result diff --git a/camel/responses/__init__.py b/camel/responses/__init__.py index 527a586dea..6ed90e7e03 100644 --- a/camel/responses/__init__.py +++ b/camel/responses/__init__.py @@ -11,6 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Unified response abstractions and adapters. + +This package contains agent-visible response containers and provider-agnostic +response models/adapters used across CAMEL. +""" + from .agent_responses import ChatAgentResponse __all__ = [ diff --git a/camel/responses/adapters/__init__.py b/camel/responses/adapters/__init__.py new file mode 100644 index 0000000000..6bcb9e11ec --- /dev/null +++ b/camel/responses/adapters/__init__.py @@ -0,0 +1,16 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Response adapters from provider-specific schemas to CAMEL types.""" + +__all__: list[str] = [] diff --git a/camel/responses/adapters/chat_completions.py b/camel/responses/adapters/chat_completions.py new file mode 100644 index 0000000000..cfa5f26ac4 --- /dev/null +++ b/camel/responses/adapters/chat_completions.py @@ -0,0 +1,119 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Adapters for mapping OpenAI Chat Completions to CAMEL abstractions.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from camel.messages.base import BaseMessage +from camel.responses.model_response import ( + CamelModelResponse, + CamelToolCall, + CamelUsage, +) +from camel.types import ChatCompletion, RoleType + + +def _choice_tool_calls_to_camel( + choice_msg: Any, +) -> Optional[List[CamelToolCall]]: + tool_calls = getattr(choice_msg, "tool_calls", None) + if not tool_calls: + return None + result: List[CamelToolCall] = [] + for tc in tool_calls: + func = getattr(tc, "function", None) + name = getattr(func, "name", None) if func else None + args_str = getattr(func, "arguments", "{}") if func else "{}" + try: + import json + + args = json.loads(args_str) if isinstance(args_str, str) else {} + except Exception: + args = {} + result.append( + CamelToolCall(id=getattr(tc, "id", ""), name=name or "", args=args) + ) + return result + + +def adapt_chat_to_camel_response( + response: ChatCompletion, +) -> CamelModelResponse: + """Convert an OpenAI ChatCompletion into a CamelModelResponse. + + This performs the minimal mapping needed in Phase 1 and keeps the + original response accessible via the `raw` field. + """ + output_messages: List[BaseMessage] = [] + finish_reasons: List[str] = [] + tool_call_requests: Optional[List[CamelToolCall]] = None + + for _, choice in enumerate(response.choices): + finish_reasons.append(str(choice.finish_reason)) + + msg = choice.message + # Skip empty (no content and no tool calls) + if ( + getattr(msg, "content", None) is None + or str(getattr(msg, "content", "")).strip() == "" + ) and not getattr(msg, "tool_calls", None): + continue + + bm = BaseMessage( + role_name="assistant", + role_type=RoleType.ASSISTANT, + meta_dict={}, + content=getattr(msg, "content", "") or "", + parsed=getattr(msg, "parsed", None), + ) + output_messages.append(bm) + + # Collect tool calls from the first non-empty choice only + # (align with existing usage) + if tool_call_requests is None: + tool_call_requests = _choice_tool_calls_to_camel(msg) + + usage_raw: Dict[str, Any] = {} + usage_obj: Optional[Any] = getattr(response, "usage", None) + if usage_obj is not None: + try: + # Pydantic model -> dict + usage_raw = usage_obj.model_dump() # type: ignore[no-any-return] + except Exception: + try: + import dataclasses + + usage_raw = dataclasses.asdict(usage_obj) # type: ignore[arg-type] + except Exception: + usage_raw = {} + + usage = CamelUsage( + input_tokens=(usage_raw or {}).get("prompt_tokens"), + output_tokens=(usage_raw or {}).get("completion_tokens"), + total_tokens=(usage_raw or {}).get("total_tokens"), + raw=usage_raw or None, + ) + + return CamelModelResponse( + id=getattr(response, "id", ""), + model=getattr(response, "model", None), + created=getattr(response, "created", None), + output_messages=output_messages, + tool_call_requests=tool_call_requests, + finish_reasons=finish_reasons, + usage=usage, + raw=response, + ) diff --git a/camel/responses/model_response.py b/camel/responses/model_response.py new file mode 100644 index 0000000000..f1d5df221c --- /dev/null +++ b/camel/responses/model_response.py @@ -0,0 +1,63 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Unified response models used by CAMEL runtime. + +These types are model-agnostic and can be populated from both legacy +Chat Completions and the newer OpenAI Responses API. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + +from camel.messages.base import BaseMessage + + +class CamelToolCall(BaseModel): + """Represents a single tool call request emitted by the model.""" + + id: str + name: str + args: Dict[str, Any] = Field(default_factory=dict) + + +class CamelUsage(BaseModel): + """Normalized usage counters with raw response attached for reference.""" + + input_tokens: Optional[int] = None + output_tokens: Optional[int] = None + total_tokens: Optional[int] = None + raw: Optional[Dict[str, Any]] = None + + +class CamelModelResponse(BaseModel): + """Unified model response returned by adapters/backends. + + Fields mirror the needs of ChatAgent and friends without exposing + provider-specific schemas. + """ + + id: str + model: Optional[str] = None + created: Optional[int] = None + + output_messages: List[BaseMessage] = Field(default_factory=list) + tool_call_requests: Optional[List[CamelToolCall]] = None + finish_reasons: List[str] = Field(default_factory=list) + usage: CamelUsage = Field(default_factory=CamelUsage) + + # Keep a handle to the original provider response for debugging/tests + raw: Any = None diff --git a/test/responses/test_chat_adapter.py b/test/responses/test_chat_adapter.py new file mode 100644 index 0000000000..ec2039b1d3 --- /dev/null +++ b/test/responses/test_chat_adapter.py @@ -0,0 +1,98 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import json + +from camel.responses.adapters.chat_completions import ( + adapt_chat_to_camel_response, +) +from camel.types import ChatCompletion, ChatCompletionMessage +from camel.types.enums import RoleType + + +def _make_chat_completion( + *, content: str, finish_reason: str = "stop" +) -> ChatCompletion: + """Construct a minimal ChatCompletion using pydantic.construct. + + This avoids importing heavy client dependencies and mirrors how the + codebase itself synthesizes ChatCompletion in adapters elsewhere. + """ + choice = dict( + index=0, + message=ChatCompletionMessage.construct( + role="assistant", content=content, tool_calls=None + ), + finish_reason=finish_reason, + ) + return ChatCompletion.construct( + id="chatcmpl-test-001", + choices=[choice], + created=1730000000, + model="gpt-4o-mini", + object="chat.completion", + usage=None, + ) + + +def test_adapt_chat_to_camel_response_basic(): + cc = _make_chat_completion(content="Hello, world!", finish_reason="stop") + cmr = adapt_chat_to_camel_response(cc) + + assert cmr.id == "chatcmpl-test-001" + assert cmr.model == "gpt-4o-mini" + assert cmr.finish_reasons == ["stop"] + assert ( + cmr.output_messages + and cmr.output_messages[0].content == "Hello, world!" + ) + assert cmr.output_messages[0].role_type == RoleType.ASSISTANT + # usage is optional in this minimal object + assert cmr.usage is not None + # raw holds the original object for debugging + assert cmr.raw is cc + + +def test_adapt_tool_calls_if_present(): + # Build a ChatCompletion-like object with a function tool call + tool_call = { + "id": "call_1", + "type": "function", + "function": {"name": "search", "arguments": json.dumps({"q": "x"})}, + } + + choice = dict( + index=0, + message=ChatCompletionMessage.construct( + role="assistant", content="", tool_calls=[tool_call] + ), + finish_reason="tool_calls", + ) + + cc = ChatCompletion.construct( + id="chatcmpl-test-002", + choices=[choice], + created=1730000001, + model="gpt-4o-mini", + object="chat.completion", + usage=None, + ) + + cmr = adapt_chat_to_camel_response(cc) + assert cmr.finish_reasons == ["tool_calls"] + assert ( + cmr.tool_call_requests is not None and len(cmr.tool_call_requests) == 1 + ) + tc = cmr.tool_call_requests[0] + assert tc.id == "call_1" and tc.name == "search" and tc.args == {"q": "x"} From ce19defa409fae348d940c67eb353ce5c8370b51 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 24 Oct 2025 14:20:35 +0300 Subject: [PATCH 02/17] enrich Camel message design --- camel/core/messages.py | 103 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/camel/core/messages.py b/camel/core/messages.py index 42c7bd07b3..14245e1c9b 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -30,12 +30,20 @@ class CamelContentPart(BaseModel): """A single content fragment. - This is intentionally minimal for Phase 1. It currently models the - fragments we already consume through Chat Completions: text and image. - Tool-related parts are represented at the response layer for now. + Phase 1.5: Extend to cover Responses API inputs while staying compatible + with Chat Completions. Supported types: + - text, image_url (Chat-compatible) + - input_text, input_image, input_file, input_audio (Responses-compatible) """ - type: Literal["text", "image_url"] + type: Literal[ + "text", + "image_url", + "input_text", + "input_image", + "input_file", + "input_audio", + ] payload: Dict[str, Any] = Field(default_factory=dict) @@ -161,3 +169,90 @@ def camel_messages_to_openai( result.append(cast(OpenAIMessage, d)) return result + + +def _part_to_responses_fragment(part: CamelContentPart) -> Dict[str, Any]: + """Convert a single CamelContentPart into a Responses API content item. + + Mapping rules: + - text -> {type: input_text, text} + - image_url -> {type: input_image, image_url} + - input_text -> passthrough as {type: input_text, text} + - input_image -> passthrough as {type: input_image, image_url} + - input_file -> {type: input_file, file_id|file_url} + - input_audio -> {type: input_audio, input_audio: {data, format}} + """ + t = part.type + p = part.payload or {} + + if t == "text" or t == "input_text": + text_val = p.get("text", "") + return {"type": "input_text", "text": text_val} + + if t == "image_url" or t == "input_image": + url = p.get("image_url") or p.get("url") + return {"type": "input_image", "image_url": url} + + if t == "input_file": + if "file_id" in p: + return {"type": "input_file", "file_id": p.get("file_id")} + if "file_url" in p: + return {"type": "input_file", "file_url": p.get("file_url")} + # Fallback: allow data URL + if "file_data" in p: + return {"type": "input_file", "file_data": p.get("file_data")} + return {"type": "input_file"} + + if t == "input_audio": + audio = { + "data": p.get("data"), + "format": p.get("format", "wav"), + } + return {"type": "input_audio", "input_audio": audio} + + # Default safe fallback: treat as text + return {"type": "input_text", "text": str(p.get("text", ""))} + + +def camel_messages_to_responses_request( + messages: List[CamelMessage], +) -> Dict[str, Any]: + """Build a minimal Responses API request body shape. + + Returns a dict with optional `instructions` and an `input` list of + message objects, where each message has a `role` and typed `content`. + + Notes: + - System messages are concatenated into `instructions` (in order). + - Non-system messages are emitted as `role: user` items for maximal + compatibility (Responses tolerates user/assistant; we default to user). + - Chat-only parts (text/image_url) converted to input_text/input_image. + """ + instructions_parts: List[str] = [] + input_messages: List[Dict[str, Any]] = [] + + for msg in messages: + if msg.role == "system" or msg.role == "developer": + # Fold into instructions + for part in msg.content: + frag = _part_to_responses_fragment(part) + if frag.get("type") == "input_text": + txt = frag.get("text") or "" + if txt: + instructions_parts.append(str(txt)) + continue + + # Map other roles to a user message for broad compat in Responses + role = "user" + content_frags = [_part_to_responses_fragment(p) for p in msg.content] + input_messages.append( + { + "role": role, + "content": content_frags, + } + ) + + body: Dict[str, Any] = {"input": input_messages} + if instructions_parts: + body["instructions"] = "\n\n".join(instructions_parts) + return body From f8b8d8e214541a9fc677a071e8e4bf818d942830 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 24 Oct 2025 14:29:23 +0300 Subject: [PATCH 03/17] fix _choice_tool_calls_to_camel --- camel/responses/adapters/chat_completions.py | 49 +++++++++++++++----- camel/responses/model_response.py | 3 +- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/camel/responses/adapters/chat_completions.py b/camel/responses/adapters/chat_completions.py index cfa5f26ac4..409eb5a624 100644 --- a/camel/responses/adapters/chat_completions.py +++ b/camel/responses/adapters/chat_completions.py @@ -26,25 +26,52 @@ from camel.types import ChatCompletion, RoleType +def _get(obj: Any, key: str, default: Any = None) -> Any: + """Get attribute or dict item uniformly.""" + if isinstance(obj, dict): + return obj.get(key, default) + return getattr(obj, key, default) + + +def _json_loads_safe(val: Any) -> Dict[str, Any]: + if isinstance(val, dict): + return val + if isinstance(val, str): + try: + import json + + return json.loads(val) + except Exception: + return {} + return {} + + def _choice_tool_calls_to_camel( choice_msg: Any, ) -> Optional[List[CamelToolCall]]: - tool_calls = getattr(choice_msg, "tool_calls", None) + tool_calls = _get(choice_msg, "tool_calls", None) if not tool_calls: return None result: List[CamelToolCall] = [] for tc in tool_calls: - func = getattr(tc, "function", None) - name = getattr(func, "name", None) if func else None - args_str = getattr(func, "arguments", "{}") if func else "{}" - try: - import json - - args = json.loads(args_str) if isinstance(args_str, str) else {} - except Exception: - args = {} + func = _get(tc, "function", None) + # Prefer nested function fields; fall back to flat keys if present + name = ( + _get(func, "name", None) + if func is not None + else _get(tc, "name", None) + ) + args_src = ( + _get(func, "arguments", None) + if func is not None + else _get(tc, "arguments", None) + ) + args = _json_loads_safe(args_src) + call_id = _get(tc, "id", "") result.append( - CamelToolCall(id=getattr(tc, "id", ""), name=name or "", args=args) + CamelToolCall( + id=str(call_id or ""), name=str(name or ""), args=args + ) ) return result diff --git a/camel/responses/model_response.py b/camel/responses/model_response.py index f1d5df221c..bc3ecc5758 100644 --- a/camel/responses/model_response.py +++ b/camel/responses/model_response.py @@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from camel.messages.base import BaseMessage @@ -50,6 +50,7 @@ class CamelModelResponse(BaseModel): provider-specific schemas. """ + model_config = ConfigDict(arbitrary_types_allowed=True) id: str model: Optional[str] = None created: Optional[int] = None From 3196cfabd74b39f5406e5ef1c58e00d929d1d27b Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Thu, 30 Oct 2025 16:28:56 +0300 Subject: [PATCH 04/17] add response api model --- camel/agents/chat_agent.py | 107 +++++- camel/models/model_factory.py | 2 + camel/models/openai_model.py | 57 +++- camel/models/openai_responses_model.py | 320 ++++++++++++++++++ camel/types/enums.py | 1 + .../openai_responses_quickstart_example.py | 69 ++++ test/models/test_openai_model_camel_flag.py | 91 +++++ 7 files changed, 628 insertions(+), 19 deletions(-) create mode 100644 camel/models/openai_responses_model.py create mode 100644 examples/models/openai_responses_quickstart_example.py create mode 100644 test/models/test_openai_model_camel_flag.py diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 037d19aa45..55d8502a51 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -84,6 +84,10 @@ ) from camel.prompts import TextPrompt from camel.responses import ChatAgentResponse +from camel.responses.adapters.chat_completions import ( + adapt_chat_to_camel_response, +) +from camel.responses.model_response import CamelModelResponse from camel.storages import JsonStorage from camel.toolkits import FunctionTool, RegisteredAgentToolkit from camel.types import ( @@ -2566,12 +2570,8 @@ def _get_model_response( f"[{current_iteration}]: {sanitized}" ) - if not isinstance(response, ChatCompletion): - raise TypeError( - f"Expected ChatCompletion, got {type(response).__name__}" - ) - - return self._handle_batch_response(response) + camel_resp = self._normalize_to_camel_response(response) + return self._handle_camel_response(camel_resp) @observe() async def _aget_model_response( @@ -2631,12 +2631,8 @@ async def _aget_model_response( f"[{current_iteration}]: {sanitized}" ) - if not isinstance(response, ChatCompletion): - raise TypeError( - f"Expected ChatCompletion, got {type(response).__name__}" - ) - - return self._handle_batch_response(response) + camel_resp = self._normalize_to_camel_response(response) + return self._handle_camel_response(camel_resp) def _sanitize_messages_for_logging( self, messages, prev_num_openai_messages: int @@ -2907,6 +2903,93 @@ def _handle_batch_response( response_id=response.id or "", ) + def _normalize_to_camel_response(self, resp: Any) -> CamelModelResponse: + """Normalize backend response into CamelModelResponse. + + Accepts ChatCompletion (legacy) or already-normalized + CamelModelResponse. + """ + if isinstance(resp, CamelModelResponse): + return resp + # Best-effort detect ChatCompletion without tight import coupling + try: + from camel.types import ( + ChatCompletion as _CC, # local import to avoid cycles + ) + + if isinstance(resp, _CC): + return adapt_chat_to_camel_response(resp) + except Exception: + pass + raise TypeError( + f"Unsupported response type for normalization: {type(resp).__name__}" # noqa:E501 + ) + + def _handle_camel_response( + self, response: CamelModelResponse + ) -> ModelResponse: + """Process a CamelModelResponse and build the legacy ModelResponse. + + Mirrors _handle_batch_response semantics to keep behavior identical. + """ + output_messages: List[BaseMessage] = [] + for msg in response.output_messages: + # Re-wrap to preserve agent role naming convention + chat_message = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict=msg.meta_dict, + content=msg.content, + parsed=msg.parsed, + ) + output_messages.append(chat_message) + + finish_reasons = response.finish_reasons or [] + + usage: Dict[str, Any] = {} + if response.usage and response.usage.raw: + usage = dict(response.usage.raw) + else: + # Synthesize from normalized fields if raw missing + usage = { + "prompt_tokens": response.usage.input_tokens + if response.usage + else 0, + "completion_tokens": response.usage.output_tokens + if response.usage + else 0, + "total_tokens": response.usage.total_tokens + if response.usage + else 0, + } + + tool_call_requests: Optional[List[ToolCallRequest]] = None + if response.tool_call_requests: + tool_call_requests = [] + for tc in response.tool_call_requests: + tool_call_requests.append( + ToolCallRequest( + tool_name=tc.name, + args=tc.args, + tool_call_id=tc.id, + ) + ) + + # For compatibility, return original provider payload when available + provider_payload = getattr(response, "raw", None) + response_id = response.id or "" + + return ModelResponse( + response=provider_payload + if provider_payload is not None + else response, + tool_call_requests=tool_call_requests, + output_messages=output_messages, + finish_reasons=finish_reasons, + usage_dict=usage, + response_id=response_id, + ) + def _step_terminate( self, num_tokens: int, diff --git a/camel/models/model_factory.py b/camel/models/model_factory.py index 1669fce901..4efcdbd0bf 100644 --- a/camel/models/model_factory.py +++ b/camel/models/model_factory.py @@ -40,6 +40,7 @@ from camel.models.ollama_model import OllamaModel from camel.models.openai_compatible_model import OpenAICompatibleModel from camel.models.openai_model import OpenAIModel +from camel.models.openai_responses_model import OpenAIResponsesModel from camel.models.openrouter_model import OpenRouterModel from camel.models.ppio_model import PPIOModel from camel.models.qianfan_model import QianfanModel @@ -84,6 +85,7 @@ class ModelFactory: ModelPlatformType.VOLCANO: VolcanoModel, ModelPlatformType.NETMIND: NetmindModel, ModelPlatformType.OPENAI: OpenAIModel, + ModelPlatformType.OPENAI_RESPONSES: OpenAIResponsesModel, ModelPlatformType.AZURE: AzureOpenAIModel, ModelPlatformType.ANTHROPIC: AnthropicModel, ModelPlatformType.GROQ: GroqModel, diff --git a/camel/models/openai_model.py b/camel/models/openai_model.py index 4c8751787c..9fe109df21 100644 --- a/camel/models/openai_model.py +++ b/camel/models/openai_model.py @@ -26,6 +26,9 @@ from camel.logger import get_logger from camel.messages import OpenAIMessage from camel.models import BaseModelBackend +from camel.responses.adapters.chat_completions import ( + adapt_chat_to_camel_response, +) from camel.types import ( ChatCompletion, ChatCompletionChunk, @@ -336,11 +339,30 @@ def _run( ) else: # Use non-streaming parse for structured output - return self._request_parse(messages, response_format, tools) + result = self._request_parse(messages, response_format, tools) + if ( + os.environ.get("CAMEL_USE_CAMEL_RESPONSE", "false").lower() + == "true" + ): + try: + return adapt_chat_to_camel_response(result) # type: ignore[return-value] + except Exception: + pass + return result else: - result = self._request_chat_completion(messages, tools) + resp_or_stream = self._request_chat_completion(messages, tools) + if ( + os.environ.get("CAMEL_USE_CAMEL_RESPONSE", "false").lower() + == "true" + ): + try: + from camel.types import ChatCompletion as _CC - return result + if isinstance(resp_or_stream, _CC): + return adapt_chat_to_camel_response(resp_or_stream) # type: ignore[return-value] + except Exception: + pass + return resp_or_stream @observe() async def _arun( @@ -407,13 +429,34 @@ async def _arun( ) else: # Use non-streaming parse for structured output - return await self._arequest_parse( + result = await self._arequest_parse( messages, response_format, tools ) + if ( + os.environ.get("CAMEL_USE_CAMEL_RESPONSE", "false").lower() + == "true" + ): + try: + return adapt_chat_to_camel_response(result) # type: ignore[return-value] + except Exception: + pass + return result else: - result = await self._arequest_chat_completion(messages, tools) - - return result + resp_or_stream = await self._arequest_chat_completion( + messages, tools + ) + if ( + os.environ.get("CAMEL_USE_CAMEL_RESPONSE", "false").lower() + == "true" + ): + try: + from camel.types import ChatCompletion as _CC + + if isinstance(resp_or_stream, _CC): + return adapt_chat_to_camel_response(resp_or_stream) # type: ignore[return-value] + except Exception: + pass + return resp_or_stream def _request_chat_completion( self, diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py new file mode 100644 index 0000000000..38100a1c3d --- /dev/null +++ b/camel/models/openai_responses_model.py @@ -0,0 +1,320 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""OpenAI Responses API backend in a unified BaseModelBackend interface.""" + +from __future__ import annotations + +import os +from typing import Any, Dict, List, Optional, Type, Union + +from openai import AsyncOpenAI, OpenAI +from pydantic import BaseModel + +from camel.core.messages import ( + camel_messages_to_responses_request, + openai_messages_to_camel, +) +from camel.messages import OpenAIMessage +from camel.models.base_model import BaseModelBackend +from camel.responses.model_response import CamelModelResponse +from camel.types import ChatCompletion, ModelType +from camel.utils import ( + BaseTokenCounter, + OpenAITokenCounter, + get_current_agent_session_id, + is_langfuse_available, + update_langfuse_trace, +) + + +class OpenAIResponsesModel(BaseModelBackend): + r"""OpenAI Responses API backend returning CamelModelResponse. + + This backend is additive and does not alter existing OpenAIModel logic. + It accepts OpenAI-style messages for compatibility, converts them to the + Responses input shape, calls `responses.create`, then maps the provider + result to `CamelModelResponse`. + """ + + def __init__( + self, + model_type: Union[ModelType, str], + model_config_dict: Optional[Dict[str, Any]] = None, + api_key: Optional[str] = None, + url: Optional[str] = None, + token_counter: Optional[BaseTokenCounter] = None, + timeout: Optional[float] = None, + max_retries: int = 3, + **kwargs: Any, + ) -> None: + if model_config_dict is None: + model_config_dict = {} + api_key = api_key or os.environ.get("OPENAI_API_KEY") + url = url or os.environ.get("OPENAI_BASE_URL") + timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180)) + + super().__init__( + model_type, model_config_dict, api_key, url, token_counter, timeout + ) + + # Create clients (Langfuse-aware when available) + if is_langfuse_available(): + from langfuse.openai import AsyncOpenAI as LangfuseAsyncOpenAI + from langfuse.openai import OpenAI as LangfuseOpenAI + + self._client = LangfuseOpenAI( + timeout=self._timeout, + max_retries=max_retries, + base_url=self._url, + api_key=self._api_key, + **kwargs, + ) + self._async_client = LangfuseAsyncOpenAI( + timeout=self._timeout, + max_retries=max_retries, + base_url=self._url, + api_key=self._api_key, + **kwargs, + ) + else: + self._client = OpenAI( + timeout=self._timeout, + max_retries=max_retries, + base_url=self._url, + api_key=self._api_key, + **kwargs, + ) + self._async_client = AsyncOpenAI( + timeout=self._timeout, + max_retries=max_retries, + base_url=self._url, + api_key=self._api_key, + **kwargs, + ) + + @property + def token_counter(self) -> BaseTokenCounter: + if not self._token_counter: + self._token_counter = OpenAITokenCounter(self.model_type) + return self._token_counter + + # ----------------------- helpers ----------------------- + def _to_camel_response_from_responses( + self, resp: Any, expected_parsed_type: Optional[Type[BaseModel]] = None + ) -> CamelModelResponse: + """Map a minimal Responses object to CamelModelResponse. + + This uses duck typing to avoid hard dependencies on a specific + provider SDK version. It handles the common `output_text` and + aggregates text from `output[].content[]` as a fallback. + """ + text = getattr(resp, "output_text", None) + if not text: + # Fallback: concatenate all text parts from output[].content[] + parts: List[str] = [] + output = getattr(resp, "output", None) + if isinstance(output, list): + for item in output: + content = getattr(item, "content", None) or ( + item.get("content") if isinstance(item, dict) else None + ) + if isinstance(content, list): + for c in content: + if isinstance(c, dict) and c.get("type") in ( + "output_text", + "text", + "input_text", + ): + val = ( + c.get("text") or c.get("output_text") or "" + ) + if val: + parts.append(str(val)) + text = "\n".join(parts) if parts else "" + + from camel.messages.base import BaseMessage + from camel.types import RoleType + + parsed_obj = None + if expected_parsed_type is not None: + # Prefer SDK's top-level parsed field + parsed_obj = getattr(resp, "output_parsed", None) + if parsed_obj is None: + parsed_obj = getattr(resp, "parsed", None) + if parsed_obj is None: + output = getattr(resp, "output", None) + if isinstance(output, list) and output: + first = output[0] + # Nested parsed on item or first content element + parsed_obj = getattr(first, "parsed", None) + if parsed_obj is None and isinstance(first, dict): + parsed_obj = first.get("parsed") + if parsed_obj is None: + content = getattr(first, "content", None) or ( + first.get("content") + if isinstance(first, dict) + else None + ) + if isinstance(content, list) and content: + c0 = content[0] + if isinstance(c0, dict): + parsed_obj = c0.get("parsed") + + msg = BaseMessage( + role_name="assistant", + role_type=RoleType.ASSISTANT, + meta_dict={}, + content=text or "", + parsed=parsed_obj if isinstance(parsed_obj, BaseModel) else None, + ) + + # usage is provider-specific; attach raw when present + usage_raw: Optional[Dict[str, Any]] = None + usage_obj = getattr(resp, "usage", None) + try: + if usage_obj is not None: + usage_raw = ( + usage_obj.model_dump() # type: ignore[attr-defined] + if hasattr(usage_obj, "model_dump") + else dict(usage_obj) + if isinstance(usage_obj, dict) + else None + ) + except Exception: + usage_raw = None + + return CamelModelResponse( + id=getattr(resp, "id", ""), + model=getattr(resp, "model", None), + created=getattr(resp, "created", None), + output_messages=[msg], + finish_reasons=["stop"], + usage={ + "raw": usage_raw, + }, # type: ignore[arg-type] + raw=resp, + ) + + # ----------------------- BaseModelBackend API ----------------------- + def _run( + self, + messages: List[OpenAIMessage], + response_format: Optional[Type[BaseModel]] = None, + tools: Optional[List[Dict[str, Any]]] = None, + ) -> Union[ + ChatCompletion, ChatCompletion + ]: # unused legacy types in signature + # Update trace + agent_session_id = get_current_agent_session_id() + if agent_session_id: + update_langfuse_trace( + session_id=agent_session_id, + metadata={ + "agent_id": str(agent_session_id), + "model_type": str(self.model_type), + }, + tags=["CAMEL-AI", str(self.model_type)], + ) + + # Convert OpenAI chat to Camel messages, then to Responses body + camel_msgs = openai_messages_to_camel(messages) + body = camel_messages_to_responses_request(camel_msgs) + + # Merge extra args from model_config_dict + request_dict = dict(self.model_config_dict) + request_dict.update(body) + + # Tools: Responses also accepts `tools`; pass through when provided + if tools: + request_dict["tools"] = tools + + if response_format is not None: + # Structured outputs require Responses.parse with text_format + parse_fn = getattr(self._client.responses, "parse", None) + if not callable(parse_fn): + raise RuntimeError( + "responses.parse is not available. " + "Upgrade the openai package to support Responses.parse, " + "or call without response_format." + ) + try: + resp = parse_fn( + model=self.model_type, + text_format=response_format, + **request_dict, + ) + except Exception as e: + raise RuntimeError( + "Failed to perform structured parse via Responses API. " + "Check that your model supports structured outputs." + ) from e + return self._to_camel_response_from_responses( + resp, expected_parsed_type=response_format + ) # type: ignore[return-value] + else: + resp = self._client.responses.create( + model=self.model_type, **request_dict + ) + return self._to_camel_response_from_responses(resp) # type: ignore[return-value] + + async def _arun( + self, + messages: List[OpenAIMessage], + response_format: Optional[Type[BaseModel]] = None, + tools: Optional[List[Dict[str, Any]]] = None, + ) -> Union[ChatCompletion, ChatCompletion]: + agent_session_id = get_current_agent_session_id() + if agent_session_id: + update_langfuse_trace( + session_id=agent_session_id, + metadata={ + "agent_id": str(agent_session_id), + "model_type": str(self.model_type), + }, + tags=["CAMEL-AI", str(self.model_type)], + ) + + camel_msgs = openai_messages_to_camel(messages) + body = camel_messages_to_responses_request(camel_msgs) + request_dict = dict(self.model_config_dict) + request_dict.update(body) + if tools: + request_dict["tools"] = tools + + if response_format is not None: + parse_fn = getattr(self._async_client.responses, "parse", None) + if not callable(parse_fn): + raise RuntimeError( + "responses.parse is not available. " + "Please upgrade the openai package." + ) + try: + resp = await parse_fn( + model=self.model_type, + text_format=response_format, + **request_dict, + ) + except Exception as e: + raise RuntimeError( + "Failed to call structured parse via Responses API. " + "Check model support and SDK version." + ) from e + return self._to_camel_response_from_responses( + resp, expected_parsed_type=response_format + ) # type: ignore[return-value] + else: + resp = await self._async_client.responses.create( + model=self.model_type, **request_dict + ) + return self._to_camel_response_from_responses(resp) # type: ignore[return-value] diff --git a/camel/types/enums.py b/camel/types/enums.py index cec536c547..1a6ebbfd79 100644 --- a/camel/types/enums.py +++ b/camel/types/enums.py @@ -1680,6 +1680,7 @@ class ModelPlatformType(Enum): DEFAULT = os.getenv("DEFAULT_MODEL_PLATFORM_TYPE", "openai") OPENAI = "openai" + OPENAI_RESPONSES = "openai-responses" AWS_BEDROCK = "aws-bedrock" AZURE = "azure" ANTHROPIC = "anthropic" diff --git a/examples/models/openai_responses_quickstart_example.py b/examples/models/openai_responses_quickstart_example.py new file mode 100644 index 0000000000..d792e08846 --- /dev/null +++ b/examples/models/openai_responses_quickstart_example.py @@ -0,0 +1,69 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""OpenAI Responses API — quickstart with ModelFactory. + +This example shows how to: + 1) Create the Responses backend via ModelFactory + 2) Make a basic non-streaming request + 3) Do structured output parsing with a Pydantic schema + +Requirements: + export OPENAI_API_KEY=sk-... +""" + +from __future__ import annotations + +from pydantic import BaseModel + +from camel.models import ModelFactory +from camel.types import ModelPlatformType, ModelType + + +def basic_request() -> None: + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + messages = [ + {"role": "system", "content": "You are a concise assistant."}, + {"role": "user", "content": "Give me one sentence about the ocean."}, + ] + resp = model.run(messages) + # Responses backend returns CamelModelResponse + print("Response ID:", resp.id) + print("Text:\n", resp.output_messages[0].content) + + +class Country(BaseModel): + name: str + capital: str + + +def structured_output() -> None: + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + messages = [ + { + "role": "user", + "content": "Extract country and capital from: 'Paris is the capital of France.'", # noqa:E501 + } + ] + resp = model.run(messages, response_format=Country) + parsed = resp.output_messages[0].parsed + print("Parsed:", parsed) + + +if __name__ == "__main__": + basic_request() + structured_output() diff --git a/test/models/test_openai_model_camel_flag.py b/test/models/test_openai_model_camel_flag.py new file mode 100644 index 0000000000..4c14221e99 --- /dev/null +++ b/test/models/test_openai_model_camel_flag.py @@ -0,0 +1,91 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import os +from contextlib import contextmanager + +from camel.models.openai_model import OpenAIModel +from camel.responses.model_response import CamelModelResponse +from camel.types import ChatCompletion, ChatCompletionMessage, ModelType + + +@contextmanager +def env(var: str, value: str): + old = os.environ.get(var) + os.environ[var] = value + try: + yield + finally: + if old is None: + del os.environ[var] + else: + os.environ[var] = old + + +def _fake_chat_completion(text: str = "ok") -> ChatCompletion: + choice = dict( + index=0, + message=ChatCompletionMessage.construct( + role="assistant", content=text + ), + finish_reason="stop", + ) + return ChatCompletion.construct( + id="chatcmpl-flag-001", + choices=[choice], + created=1730000100, + model="gpt-4o-mini", + object="chat.completion", + usage=None, + ) + + +def test_openai_model_returns_camel_when_flag_on(monkeypatch): + # Satisfy constructor API key check + with ( + env("OPENAI_API_KEY", "test"), + env("CAMEL_USE_CAMEL_RESPONSE", "true"), + ): + model = OpenAIModel(ModelType.GPT_4O_MINI) + + # Avoid network: stub internal request method + monkeypatch.setattr( + model, + "_request_chat_completion", + lambda messages, tools=None: _fake_chat_completion("hello"), + ) + + resp = model.run([{"role": "user", "content": "hi"}]) + assert isinstance(resp, CamelModelResponse) + assert resp.id == "chatcmpl-flag-001" + assert ( + resp.output_messages and resp.output_messages[0].content == "hello" + ) + + +def test_openai_model_returns_chat_when_flag_off(monkeypatch): + with ( + env("OPENAI_API_KEY", "test"), + env("CAMEL_USE_CAMEL_RESPONSE", "false"), + ): + model = OpenAIModel(ModelType.GPT_4O_MINI) + monkeypatch.setattr( + model, + "_request_chat_completion", + lambda messages, tools=None: _fake_chat_completion("hello"), + ) + + resp = model.run([{"role": "user", "content": "hi"}]) + assert isinstance(resp, ChatCompletion) + assert resp.choices[0].message.content == "hello" From 00dc3ffe7445489367cfe7e4a4593814506a2627 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 31 Oct 2025 09:52:17 +0300 Subject: [PATCH 05/17] Update camel/models/openai_responses_model.py Co-authored-by: Tao Sun <168447269+fengju0213@users.noreply.github.com> --- camel/models/openai_responses_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py index 38100a1c3d..3f94f68598 100644 --- a/camel/models/openai_responses_model.py +++ b/camel/models/openai_responses_model.py @@ -61,7 +61,7 @@ def __init__( if model_config_dict is None: model_config_dict = {} api_key = api_key or os.environ.get("OPENAI_API_KEY") - url = url or os.environ.get("OPENAI_BASE_URL") + url = url or os.environ.get("OPENAI_API_BASE_URL") timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180)) super().__init__( From bd013336a6744dddb6235089295e1134e72a6462 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 14 Nov 2025 09:28:46 +0800 Subject: [PATCH 06/17] improve exception handlings --- camel/agents/chat_agent.py | 85 +++---------------- camel/models/openai_model.py | 14 ++- camel/responses/adapters/chat_completions.py | 48 ++++++++--- .../reference/camel.agents.chat_agent.mdx | 19 ----- docs/reference/camel.agents.chat_agent.md | 19 ----- 5 files changed, 57 insertions(+), 128 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 1a00aab744..9fa489c868 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -61,7 +61,6 @@ convert_to_function_tool, convert_to_schema, get_info_dict, - handle_logprobs, safe_model_dump, ) from camel.agents.base import BaseAgent @@ -93,7 +92,6 @@ from camel.storages import JsonStorage from camel.toolkits import FunctionTool, RegisteredAgentToolkit from camel.types import ( - ChatCompletion, ChatCompletionChunk, ModelPlatformType, ModelType, @@ -3615,70 +3613,6 @@ def _step_get_info( external_tool_call_requests, ) - def _handle_batch_response( - self, response: ChatCompletion - ) -> ModelResponse: - r"""Process a batch response from the model and extract the necessary - information. - - Args: - response (ChatCompletion): Model response. - - Returns: - _ModelResponse: parsed model response. - """ - output_messages: List[BaseMessage] = [] - for choice in response.choices: - # Skip messages with no meaningful content - if ( - choice.message.content is None - or choice.message.content.strip() == "" - ) and not choice.message.tool_calls: - continue - - meta_dict = {} - if logprobs_info := handle_logprobs(choice): - meta_dict["logprobs_info"] = logprobs_info - - chat_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict=meta_dict, - content=choice.message.content or "", - parsed=getattr(choice.message, "parsed", None), - ) - - output_messages.append(chat_message) - - finish_reasons = [ - str(choice.finish_reason) for choice in response.choices - ] - - usage = {} - if response.usage is not None: - usage = safe_model_dump(response.usage) - - tool_call_requests: Optional[List[ToolCallRequest]] = None - if tool_calls := response.choices[0].message.tool_calls: - tool_call_requests = [] - for tool_call in tool_calls: - tool_name = tool_call.function.name # type: ignore[union-attr] - tool_call_id = tool_call.id - args = json.loads(tool_call.function.arguments) # type: ignore[union-attr] - tool_call_request = ToolCallRequest( - tool_name=tool_name, args=args, tool_call_id=tool_call_id - ) - tool_call_requests.append(tool_call_request) - - return ModelResponse( - response=response, - tool_call_requests=tool_call_requests, - output_messages=output_messages, - finish_reasons=finish_reasons, - usage_dict=usage, - response_id=response.id or "", - ) - def _normalize_to_camel_response(self, resp: Any) -> CamelModelResponse: """Normalize backend response into CamelModelResponse. @@ -3695,8 +3629,12 @@ def _normalize_to_camel_response(self, resp: Any) -> CamelModelResponse: if isinstance(resp, _CC): return adapt_chat_to_camel_response(resp) - except Exception: - pass + except Exception as exc: + logger.warning( + "Failed to normalize response of type %s via ChatCompletion adapter: %s", # noqa:E501 + type(resp).__name__, + exc, + ) raise TypeError( f"Unsupported response type for normalization: {type(resp).__name__}" # noqa:E501 ) @@ -3704,10 +3642,7 @@ def _normalize_to_camel_response(self, resp: Any) -> CamelModelResponse: def _handle_camel_response( self, response: CamelModelResponse ) -> ModelResponse: - """Process a CamelModelResponse and build the legacy ModelResponse. - - Mirrors _handle_batch_response semantics to keep behavior identical. - """ + """Process a CamelModelResponse and build the legacy ModelResponse.""" output_messages: List[BaseMessage] = [] for msg in response.output_messages: # Re-wrap to preserve agent role naming convention @@ -4198,7 +4133,8 @@ def _stream_response( return else: # Handle non-streaming response (fallback) - model_response = self._handle_batch_response(response) + camel_response = self._normalize_to_camel_response(response) + model_response = self._handle_camel_response(camel_response) yield self._convert_to_chatagent_response( model_response, tool_call_records, @@ -4957,7 +4893,8 @@ async def _astream_response( return else: # Handle non-streaming response (fallback) - model_response = self._handle_batch_response(response) + camel_response = self._normalize_to_camel_response(response) + model_response = self._handle_camel_response(camel_response) yield self._convert_to_chatagent_response( model_response, tool_call_records, diff --git a/camel/models/openai_model.py b/camel/models/openai_model.py index 9fe109df21..657ab8c211 100644 --- a/camel/models/openai_model.py +++ b/camel/models/openai_model.py @@ -346,8 +346,11 @@ def _run( ): try: return adapt_chat_to_camel_response(result) # type: ignore[return-value] - except Exception: - pass + except Exception as exc: + logger.warning( + "Failed to convert ChatCompletion to CamelModelResponse: %s", # noqa:E501 + exc, + ) return result else: resp_or_stream = self._request_chat_completion(messages, tools) @@ -360,8 +363,11 @@ def _run( if isinstance(resp_or_stream, _CC): return adapt_chat_to_camel_response(resp_or_stream) # type: ignore[return-value] - except Exception: - pass + except Exception as exc: + logger.warning( + "Failed to convert streamed ChatCompletion to CamelModelResponse: %s", # noqa:E501 + exc, + ) return resp_or_stream @observe() diff --git a/camel/responses/adapters/chat_completions.py b/camel/responses/adapters/chat_completions.py index 409eb5a624..2880774baa 100644 --- a/camel/responses/adapters/chat_completions.py +++ b/camel/responses/adapters/chat_completions.py @@ -17,6 +17,7 @@ from typing import Any, Dict, List, Optional +from camel.logger import get_logger from camel.messages.base import BaseMessage from camel.responses.model_response import ( CamelModelResponse, @@ -25,6 +26,8 @@ ) from camel.types import ChatCompletion, RoleType +logger = get_logger(__name__) + def _get(obj: Any, key: str, default: Any = None) -> Any: """Get attribute or dict item uniformly.""" @@ -41,8 +44,18 @@ def _json_loads_safe(val: Any) -> Dict[str, Any]: import json return json.loads(val) - except Exception: + except Exception as exc: + snippet = val[:200] + "..." if len(val) > 200 else val + logger.warning( + "Failed to parse tool call arguments as JSON. Snippet=%s, error=%s", # noqa:E501 + snippet, + exc, + ) return {} + if val is not None: + logger.debug( + "Unexpected type for tool call arguments: %s", type(val).__name__ + ) return {} @@ -116,21 +129,32 @@ def adapt_chat_to_camel_response( usage_raw: Dict[str, Any] = {} usage_obj: Optional[Any] = getattr(response, "usage", None) if usage_obj is not None: - try: - # Pydantic model -> dict - usage_raw = usage_obj.model_dump() # type: ignore[no-any-return] - except Exception: - try: - import dataclasses + import dataclasses + if hasattr(usage_obj, "model_dump"): + try: + usage_raw = usage_obj.model_dump() # type: ignore[no-any-return] + except Exception as exc: + logger.warning("Failed to dump usage via model_dump: %s", exc) + elif dataclasses.is_dataclass(usage_obj): + try: usage_raw = dataclasses.asdict(usage_obj) # type: ignore[arg-type] - except Exception: - usage_raw = {} + except Exception as exc: + logger.warning( + "Failed to dump usage via dataclasses.asdict: %s", exc + ) + elif isinstance(usage_obj, dict): + usage_raw = dict(usage_obj) + else: + logger.debug( + "Unsupported usage type: %s", type(usage_obj).__name__ + ) + usage_dict = usage_raw or {} usage = CamelUsage( - input_tokens=(usage_raw or {}).get("prompt_tokens"), - output_tokens=(usage_raw or {}).get("completion_tokens"), - total_tokens=(usage_raw or {}).get("total_tokens"), + input_tokens=usage_dict.get("prompt_tokens"), + output_tokens=usage_dict.get("completion_tokens"), + total_tokens=usage_dict.get("total_tokens"), raw=usage_raw or None, ) diff --git a/docs/mintlify/reference/camel.agents.chat_agent.mdx b/docs/mintlify/reference/camel.agents.chat_agent.mdx index 495dbb4208..7a74aa2e2f 100644 --- a/docs/mintlify/reference/camel.agents.chat_agent.mdx +++ b/docs/mintlify/reference/camel.agents.chat_agent.mdx @@ -1286,25 +1286,6 @@ any of them signal termination. If a terminator signals termination, the agent's state is updated accordingly, and the termination reason is recorded. - - -### _handle_batch_response - -```python -def _handle_batch_response(self, response: ChatCompletion): -``` - -Process a batch response from the model and extract the necessary -information. - -**Parameters:** - -- **response** (ChatCompletion): Model response. - -**Returns:** - - _ModelResponse: parsed model response. - ### _step_terminate diff --git a/docs/reference/camel.agents.chat_agent.md b/docs/reference/camel.agents.chat_agent.md index 9d087d29a4..c4935e37ca 100644 --- a/docs/reference/camel.agents.chat_agent.md +++ b/docs/reference/camel.agents.chat_agent.md @@ -910,25 +910,6 @@ any of them signal termination. If a terminator signals termination, the agent's state is updated accordingly, and the termination reason is recorded. - - -### _handle_batch_response - -```python -def _handle_batch_response(self, response: ChatCompletion): -``` - -Process a batch response from the model and extract the necessary -information. - -**Parameters:** - -- **response** (ChatCompletion): Model response. - -**Returns:** - - _ModelResponse: parsed model response. - ### _step_terminate From 5b8acca0d54724fa11762732d2f8d7630d8c6cc4 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 14 Nov 2025 10:56:34 +0800 Subject: [PATCH 07/17] add stream support --- camel/agents/chat_agent.py | 459 ++++++++++++------ camel/models/openai_responses_model.py | 129 ++--- .../openai_responses_quickstart_example.py | 25 + 3 files changed, 363 insertions(+), 250 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 9fa489c868..ebc86b399d 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -88,6 +88,9 @@ from camel.responses.adapters.chat_completions import ( adapt_chat_to_camel_response, ) +from camel.responses.adapters.responses_adapter import ( + responses_to_camel_response, +) from camel.responses.model_response import CamelModelResponse from camel.storages import JsonStorage from camel.toolkits import FunctionTool, RegisteredAgentToolkit @@ -4050,87 +4053,99 @@ def _stream_response( ): # Handle structured output stream (ChatCompletionStreamManager) with response as stream: - parsed_object = None - - for event in stream: - if event.type == "content.delta": - if getattr(event, "delta", None): - # Use accumulator for proper content management - partial_response = self._create_streaming_response_with_accumulator( # noqa: E501 - content_accumulator, - getattr(event, "delta", ""), - step_token_usage, - tool_call_records=tool_call_records.copy(), + if hasattr(stream, "get_final_response"): + for item in self._process_responses_stream( + stream, + content_accumulator, + tool_call_records, + step_token_usage, + num_tokens, + response_format, + ): + yield item + break + else: + parsed_object = None + + for event in stream: + if event.type == "content.delta": + if getattr(event, "delta", None): + partial_response = self._create_streaming_response_with_accumulator( # noqa: E501 + content_accumulator, + getattr(event, "delta", ""), + step_token_usage, + tool_call_records=tool_call_records.copy(), + ) + yield partial_response + + elif event.type == "content.done": + parsed_object = getattr(event, "parsed", None) + break + elif event.type == "error": + logger.error( + f"Error in structured stream: " + f"{getattr(event, 'error', '')}" + ) + yield self._create_error_response( + str(getattr(event, 'error', '')), + tool_call_records, ) - yield partial_response + return - elif event.type == "content.done": - parsed_object = getattr(event, "parsed", None) + try: + final_completion = stream.get_final_completion() + final_content = ( + final_completion.choices[0].message.content + or "" + ) + + final_message = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=final_content, + parsed=cast( + "BaseModel | dict[str, Any] | None", + parsed_object, + ), # type: ignore[arg-type] + ) + + self.record_message(final_message) + + final_response = ChatAgentResponse( + msgs=[final_message], + terminated=False, + info={ + "id": final_completion.id or "", + "usage": safe_model_dump( + final_completion.usage + ) + if final_completion.usage + else {}, + "finish_reasons": [ + choice.finish_reason or "stop" + for choice in final_completion.choices + ], + "num_tokens": self._get_token_count( + final_content + ), + "tool_calls": tool_call_records, + "external_tool_requests": None, + "streaming": False, + "partial": False, + }, + ) + yield final_response break - elif event.type == "error": + + except Exception as e: logger.error( - f"Error in structured stream: " - f"{getattr(event, 'error', '')}" + f"Error getting final completion: {e}" ) yield self._create_error_response( - str(getattr(event, 'error', '')), - tool_call_records, + str(e), tool_call_records ) return - - # Get final completion and record final message - try: - final_completion = stream.get_final_completion() - final_content = ( - final_completion.choices[0].message.content or "" - ) - - final_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=final_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - ) - - self.record_message(final_message) - - # Create final response - final_response = ChatAgentResponse( - msgs=[final_message], - terminated=False, - info={ - "id": final_completion.id or "", - "usage": safe_model_dump( - final_completion.usage - ) - if final_completion.usage - else {}, - "finish_reasons": [ - choice.finish_reason or "stop" - for choice in final_completion.choices - ], - "num_tokens": self._get_token_count( - final_content - ), - "tool_calls": tool_call_records, - "external_tool_requests": None, - "streaming": False, - "partial": False, - }, - ) - yield final_response - break - - except Exception as e: - logger.error(f"Error getting final completion: {e}") - yield self._create_error_response( - str(e), tool_call_records - ) - return else: # Handle non-streaming response (fallback) camel_response = self._normalize_to_camel_response(response) @@ -4808,89 +4823,101 @@ async def _astream_response( # Handle structured output stream # (AsyncChatCompletionStreamManager) async with response as stream: - parsed_object = None - - async for event in stream: - if event.type == "content.delta": - if getattr(event, "delta", None): - # Use accumulator for proper content management - partial_response = self._create_streaming_response_with_accumulator( # noqa: E501 - content_accumulator, - getattr(event, "delta", ""), - step_token_usage, - tool_call_records=tool_call_records.copy(), + if hasattr(stream, "get_final_response"): + async for item in self._aprocess_responses_stream( + stream, + content_accumulator, + tool_call_records, + step_token_usage, + num_tokens, + response_format, + ): + yield item + break + else: + parsed_object = None + + async for event in stream: + if event.type == "content.delta": + if getattr(event, "delta", None): + partial_response = self._create_streaming_response_with_accumulator( # noqa: E501 + content_accumulator, + getattr(event, "delta", ""), + step_token_usage, + tool_call_records=tool_call_records.copy(), + ) + yield partial_response + + elif event.type == "content.done": + parsed_object = getattr(event, "parsed", None) + break + elif event.type == "error": + logger.error( + f"Error in async structured stream: " + f"{getattr(event, 'error', '')}" ) - yield partial_response + yield self._create_error_response( + str(getattr(event, 'error', '')), + tool_call_records, + ) + return - elif event.type == "content.done": - parsed_object = getattr(event, "parsed", None) - break - elif event.type == "error": - logger.error( - f"Error in async structured stream: " - f"{getattr(event, 'error', '')}" + try: + final_completion = ( + await stream.get_final_completion() ) - yield self._create_error_response( - str(getattr(event, 'error', '')), - tool_call_records, + final_content = ( + final_completion.choices[0].message.content + or "" ) - return - # Get final completion and record final message - try: - final_completion = await stream.get_final_completion() - final_content = ( - final_completion.choices[0].message.content or "" - ) - - final_message = BaseMessage( - role_name=self.role_name, - role_type=self.role_type, - meta_dict={}, - content=final_content, - parsed=cast( - "BaseModel | dict[str, Any] | None", - parsed_object, - ), # type: ignore[arg-type] - ) + final_message = BaseMessage( + role_name=self.role_name, + role_type=self.role_type, + meta_dict={}, + content=final_content, + parsed=cast( + "BaseModel | dict[str, Any] | None", + parsed_object, + ), # type: ignore[arg-type] + ) - self.record_message(final_message) + self.record_message(final_message) - # Create final response - final_response = ChatAgentResponse( - msgs=[final_message], - terminated=False, - info={ - "id": final_completion.id or "", - "usage": safe_model_dump( - final_completion.usage - ) - if final_completion.usage - else {}, - "finish_reasons": [ - choice.finish_reason or "stop" - for choice in final_completion.choices - ], - "num_tokens": self._get_token_count( - final_content - ), - "tool_calls": tool_call_records, - "external_tool_requests": None, - "streaming": False, - "partial": False, - }, - ) - yield final_response - break + final_response = ChatAgentResponse( + msgs=[final_message], + terminated=False, + info={ + "id": final_completion.id or "", + "usage": safe_model_dump( + final_completion.usage + ) + if final_completion.usage + else {}, + "finish_reasons": [ + choice.finish_reason or "stop" + for choice in final_completion.choices + ], + "num_tokens": self._get_token_count( + final_content + ), + "tool_calls": tool_call_records, + "external_tool_requests": None, + "streaming": False, + "partial": False, + }, + ) + yield final_response + break - except Exception as e: - logger.error( - f"Error getting async final completion: {e}" - ) - yield self._create_error_response( - str(e), tool_call_records - ) - return + except Exception as e: + logger.error( + f"Error getting async final completion: {e}" + ) + yield self._create_error_response( + str(e), tool_call_records + ) + return else: # Handle non-streaming response (fallback) camel_response = self._normalize_to_camel_response(response) @@ -5188,6 +5215,142 @@ def _create_streaming_response_with_accumulator( }, ) + def _process_responses_stream( + self, + stream: Any, + content_accumulator: StreamContentAccumulator, + tool_call_records: List[ToolCallingRecord], + step_token_usage: Dict[str, int], + num_tokens: int, + response_format: Optional[Type[BaseModel]] = None, + ) -> Generator[ChatAgentResponse, None, None]: + """Handle streaming events from the Responses API.""" + + try: + for event in stream: + event_type = getattr(event, "type", "") + if event_type == "response.output_text.delta": + delta = getattr(event, "delta", "") + if delta: + yield self._create_streaming_response_with_accumulator( + content_accumulator, + delta, + step_token_usage, + getattr(event, "item_id", ""), + tool_call_records=tool_call_records.copy(), + ) + elif "error" in event_type or event_type.endswith(".failed"): + error_payload = getattr(event, "error", None) + message = getattr(error_payload, "message", None) + logger.error( + "Responses stream reported error (%s): %s", + event_type, + message or error_payload or "unknown", + ) + yield self._create_error_response( + str( + message + or error_payload + or "responses stream error" + ), + tool_call_records, + ) + return + except Exception as exc: + logger.error("Error while iterating Responses stream: %s", exc) + yield self._create_error_response(str(exc), tool_call_records) + return + + try: + final_response = stream.get_final_response() + except Exception as exc: + logger.error("Failed to finalize Responses stream: %s", exc) + yield self._create_error_response(str(exc), tool_call_records) + return + + camel_response = responses_to_camel_response( + final_response, response_format + ) + model_response = self._handle_camel_response(camel_response) + yield self._convert_to_chatagent_response( + model_response, + tool_call_records, + num_tokens, + None, + model_response.usage_dict.get("prompt_tokens", 0), + model_response.usage_dict.get("completion_tokens", 0), + model_response.usage_dict.get("total_tokens", 0), + ) + + async def _aprocess_responses_stream( + self, + stream: Any, + content_accumulator: StreamContentAccumulator, + tool_call_records: List[ToolCallingRecord], + step_token_usage: Dict[str, int], + num_tokens: int, + response_format: Optional[Type[BaseModel]] = None, + ) -> AsyncGenerator[ChatAgentResponse, None]: + """Async variant for Responses streaming events.""" + + try: + async for event in stream: + event_type = getattr(event, "type", "") + if event_type == "response.output_text.delta": + delta = getattr(event, "delta", "") + if delta: + yield self._create_streaming_response_with_accumulator( + content_accumulator, + delta, + step_token_usage, + getattr(event, "item_id", ""), + tool_call_records=tool_call_records.copy(), + ) + elif "error" in event_type or event_type.endswith(".failed"): + error_payload = getattr(event, "error", None) + message = getattr(error_payload, "message", None) + logger.error( + "Responses stream reported async error (%s): %s", + event_type, + message or error_payload or "unknown", + ) + yield self._create_error_response( + str( + message + or error_payload + or "responses stream error" + ), + tool_call_records, + ) + return + except Exception as exc: + logger.error( + "Error while iterating async Responses stream: %s", exc + ) + yield self._create_error_response(str(exc), tool_call_records) + return + + try: + final_response = await stream.get_final_response() + except Exception as exc: + logger.error("Failed to finalize async Responses stream: %s", exc) + yield self._create_error_response(str(exc), tool_call_records) + return + + camel_response = responses_to_camel_response( + final_response, response_format + ) + model_response = self._handle_camel_response(camel_response) + yield self._convert_to_chatagent_response( + model_response, + tool_call_records, + num_tokens, + None, + model_response.usage_dict.get("prompt_tokens", 0), + model_response.usage_dict.get("completion_tokens", 0), + model_response.usage_dict.get("total_tokens", 0), + ) + def get_usage_dict( self, output_messages: List[BaseMessage], prompt_tokens: int ) -> Dict[str, int]: diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py index 3f94f68598..da326e6611 100644 --- a/camel/models/openai_responses_model.py +++ b/camel/models/openai_responses_model.py @@ -27,7 +27,9 @@ ) from camel.messages import OpenAIMessage from camel.models.base_model import BaseModelBackend -from camel.responses.model_response import CamelModelResponse +from camel.responses.adapters.responses_adapter import ( + responses_to_camel_response, +) from camel.types import ChatCompletion, ModelType from camel.utils import ( BaseTokenCounter, @@ -109,103 +111,6 @@ def token_counter(self) -> BaseTokenCounter: self._token_counter = OpenAITokenCounter(self.model_type) return self._token_counter - # ----------------------- helpers ----------------------- - def _to_camel_response_from_responses( - self, resp: Any, expected_parsed_type: Optional[Type[BaseModel]] = None - ) -> CamelModelResponse: - """Map a minimal Responses object to CamelModelResponse. - - This uses duck typing to avoid hard dependencies on a specific - provider SDK version. It handles the common `output_text` and - aggregates text from `output[].content[]` as a fallback. - """ - text = getattr(resp, "output_text", None) - if not text: - # Fallback: concatenate all text parts from output[].content[] - parts: List[str] = [] - output = getattr(resp, "output", None) - if isinstance(output, list): - for item in output: - content = getattr(item, "content", None) or ( - item.get("content") if isinstance(item, dict) else None - ) - if isinstance(content, list): - for c in content: - if isinstance(c, dict) and c.get("type") in ( - "output_text", - "text", - "input_text", - ): - val = ( - c.get("text") or c.get("output_text") or "" - ) - if val: - parts.append(str(val)) - text = "\n".join(parts) if parts else "" - - from camel.messages.base import BaseMessage - from camel.types import RoleType - - parsed_obj = None - if expected_parsed_type is not None: - # Prefer SDK's top-level parsed field - parsed_obj = getattr(resp, "output_parsed", None) - if parsed_obj is None: - parsed_obj = getattr(resp, "parsed", None) - if parsed_obj is None: - output = getattr(resp, "output", None) - if isinstance(output, list) and output: - first = output[0] - # Nested parsed on item or first content element - parsed_obj = getattr(first, "parsed", None) - if parsed_obj is None and isinstance(first, dict): - parsed_obj = first.get("parsed") - if parsed_obj is None: - content = getattr(first, "content", None) or ( - first.get("content") - if isinstance(first, dict) - else None - ) - if isinstance(content, list) and content: - c0 = content[0] - if isinstance(c0, dict): - parsed_obj = c0.get("parsed") - - msg = BaseMessage( - role_name="assistant", - role_type=RoleType.ASSISTANT, - meta_dict={}, - content=text or "", - parsed=parsed_obj if isinstance(parsed_obj, BaseModel) else None, - ) - - # usage is provider-specific; attach raw when present - usage_raw: Optional[Dict[str, Any]] = None - usage_obj = getattr(resp, "usage", None) - try: - if usage_obj is not None: - usage_raw = ( - usage_obj.model_dump() # type: ignore[attr-defined] - if hasattr(usage_obj, "model_dump") - else dict(usage_obj) - if isinstance(usage_obj, dict) - else None - ) - except Exception: - usage_raw = None - - return CamelModelResponse( - id=getattr(resp, "id", ""), - model=getattr(resp, "model", None), - created=getattr(resp, "created", None), - output_messages=[msg], - finish_reasons=["stop"], - usage={ - "raw": usage_raw, - }, # type: ignore[arg-type] - raw=resp, - ) - # ----------------------- BaseModelBackend API ----------------------- def _run( self, @@ -233,12 +138,22 @@ def _run( # Merge extra args from model_config_dict request_dict = dict(self.model_config_dict) + is_streaming = bool(request_dict.pop("stream", False)) request_dict.update(body) # Tools: Responses also accepts `tools`; pass through when provided if tools: request_dict["tools"] = tools + if is_streaming: + if response_format is not None: + raise NotImplementedError( + "Responses streaming with response_format is not supported yet." # noqa:E501 + ) + return self._client.responses.stream( + model=self.model_type, **request_dict + ) + if response_format is not None: # Structured outputs require Responses.parse with text_format parse_fn = getattr(self._client.responses, "parse", None) @@ -259,14 +174,14 @@ def _run( "Failed to perform structured parse via Responses API. " "Check that your model supports structured outputs." ) from e - return self._to_camel_response_from_responses( + return responses_to_camel_response( resp, expected_parsed_type=response_format ) # type: ignore[return-value] else: resp = self._client.responses.create( model=self.model_type, **request_dict ) - return self._to_camel_response_from_responses(resp) # type: ignore[return-value] + return responses_to_camel_response(resp) # type: ignore[return-value] async def _arun( self, @@ -288,10 +203,20 @@ async def _arun( camel_msgs = openai_messages_to_camel(messages) body = camel_messages_to_responses_request(camel_msgs) request_dict = dict(self.model_config_dict) + is_streaming = bool(request_dict.pop("stream", False)) request_dict.update(body) if tools: request_dict["tools"] = tools + if is_streaming: + if response_format is not None: + raise NotImplementedError( + "Responses streaming with response_format is not supported yet." # noqa:E501 + ) + return self._async_client.responses.stream( + model=self.model_type, **request_dict + ) + if response_format is not None: parse_fn = getattr(self._async_client.responses, "parse", None) if not callable(parse_fn): @@ -310,11 +235,11 @@ async def _arun( "Failed to call structured parse via Responses API. " "Check model support and SDK version." ) from e - return self._to_camel_response_from_responses( + return responses_to_camel_response( resp, expected_parsed_type=response_format ) # type: ignore[return-value] else: resp = await self._async_client.responses.create( model=self.model_type, **request_dict ) - return self._to_camel_response_from_responses(resp) # type: ignore[return-value] + return responses_to_camel_response(resp) # type: ignore[return-value] diff --git a/examples/models/openai_responses_quickstart_example.py b/examples/models/openai_responses_quickstart_example.py index d792e08846..be9897db25 100644 --- a/examples/models/openai_responses_quickstart_example.py +++ b/examples/models/openai_responses_quickstart_example.py @@ -64,6 +64,31 @@ def structured_output() -> None: print("Parsed:", parsed) +def streaming_request() -> None: + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, + ModelType.GPT_4_1_MINI, + model_config_dict={"stream": True}, + ) + messages = [ + {"role": "system", "content": "Stream your reply word by word."}, + { + "role": "user", + "content": "Describe a sunrise over mountains in one short paragraph.", # noqa:E501 + }, + ] + + stream = model.run(messages) + print("Streaming response:") + with stream as events: # type: ignore[assignment] + for event in events: + if getattr(event, "type", "") == "response.output_text.delta": + print(getattr(event, "delta", ""), end="", flush=True) + final = events.get_final_response() + print("\n\nFinal text:\n", getattr(final, "output_text", "")) + + if __name__ == "__main__": basic_request() structured_output() + streaming_request() From 858efd11e8c54ed3f459f332f40f62cad71b28fb Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Thu, 20 Nov 2025 10:01:42 +0300 Subject: [PATCH 08/17] implement tool calling of response api --- camel/agents/chat_agent.py | 6 +- camel/core/messages.py | 62 +++ camel/memories/records.py | 3 + camel/messages/base.py | 158 +++++--- camel/models/openai_responses_model.py | 18 +- camel/responses/adapters/responses_adapter.py | 356 ++++++++++++++++++ .../openai_responses_chat_agent_example.py | 208 ++++++++++ .../openai_responses_quickstart_example.py | 85 ++++- test/models/test_openai_responses_model.py | 257 +++++++++++++ 9 files changed, 1086 insertions(+), 67 deletions(-) create mode 100644 camel/responses/adapters/responses_adapter.py create mode 100644 examples/agents/openai_responses_chat_agent_example.py create mode 100644 test/models/test_openai_responses_model.py diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index ebc86b399d..85e5db92ad 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -36,9 +36,11 @@ TYPE_CHECKING, Any, AsyncGenerator, + AsyncIterator, Callable, Dict, Generator, + Iterator, List, Optional, Set, @@ -4004,7 +4006,7 @@ def _stream_response( return # Handle streaming response - if isinstance(response, Stream): + if isinstance(response, (Stream, Iterator)): ( stream_completed, tool_calls_complete, @@ -4762,7 +4764,7 @@ async def _astream_response( return # Handle streaming response - if isinstance(response, AsyncStream): + if isinstance(response, (AsyncStream, AsyncIterator)): stream_completed = False tool_calls_complete = False diff --git a/camel/core/messages.py b/camel/core/messages.py index 14245e1c9b..3d45cf97a8 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -89,6 +89,13 @@ def openai_messages_to_camel( parts.append( CamelContentPart(type="text", payload={"text": text}) ) + elif item_t == "input_text": + text = item.get("text", "") + parts.append( + CamelContentPart( + type="input_text", payload={"text": text} + ) + ) elif item_t == "image_url": image_url = item.get("image_url", {}) payload = { @@ -98,6 +105,40 @@ def openai_messages_to_camel( parts.append( CamelContentPart(type="image_url", payload=payload) ) + elif item_t == "input_image": + image_url = item.get("image_url", "") + # input_image in Responses API uses "image_url" field + # which is a string URL + # But wait, the user snippet shows: "image_url": "..." + # Chat API "image_url" is a dict {"url": "...", ...} + # Responses API "input_image" has "image_url" string. + + payload = {"url": image_url} + parts.append( + CamelContentPart(type="input_image", payload=payload) + ) + elif item_t == "input_audio": + input_audio = item.get("input_audio", {}) + payload = { + "data": input_audio.get("data"), + "format": input_audio.get("format"), + } + parts.append( + CamelContentPart(type="input_audio", payload=payload) + ) + elif item_t == "input_file": + # input_file can have file_id, file_url, or file_data + payload = {} + if "file_id" in item: + payload["file_id"] = item["file_id"] + if "file_url" in item: + payload["file_url"] = item["file_url"] + if "file_data" in item: + payload["file_data"] = item["file_data"] + + parts.append( + CamelContentPart(type="input_file", payload=payload) + ) name_val = cast(Optional[str], msg.get("name", None)) tool_call_id = ( @@ -157,6 +198,27 @@ def camel_messages_to_openai( "image_url": {"url": url, "detail": detail}, } ) + elif part.type == "input_audio": + payload = part.payload + hybrid.append( + { + "type": "input_audio", + "input_audio": { + "data": payload.get("data"), + "format": payload.get("format"), + }, + } + ) + elif part.type == "input_file": + payload = part.payload + item = {"type": "input_file"} + if "file_id" in payload: + item["file_id"] = payload["file_id"] + if "file_url" in payload: + item["file_url"] = payload["file_url"] + if "file_data" in payload: + item["file_data"] = payload["file_data"] + hybrid.append(item) d = {"role": cmsg.role, "content": hybrid or ""} if cmsg.name and cmsg.role in { diff --git a/camel/memories/records.py b/camel/memories/records.py index c47fe6d03c..88195873d4 100644 --- a/camel/memories/records.py +++ b/camel/memories/records.py @@ -109,6 +109,9 @@ def from_dict(cls, record_dict: Dict[str, Any]) -> "MemoryRecord": if img_item["type"] == "url": # URL string, keep as-is image_objects.append(img_item["data"]) + elif img_item["type"] == "dict": + # Dict spec (e.g., {"url": "...", "detail": "..."}) + image_objects.append(img_item["data"]) else: # type == "base64" # Base64 encoded image, convert to PIL Image img_bytes = base64.b64decode(img_item["data"]) diff --git a/camel/messages/base.py b/camel/messages/base.py index 337c5e3f4c..d28c0cdc52 100644 --- a/camel/messages/base.py +++ b/camel/messages/base.py @@ -16,7 +16,6 @@ from __future__ import annotations import base64 -import io import re from dataclasses import dataclass from typing import ( @@ -71,6 +70,10 @@ class BaseMessage: images associated with the message. (default: :obj:`auto`) video_detail (Literal["auto", "low", "high"]): Detail level of the videos associated with the message. (default: :obj:`auto`) + audio_bytes (Optional[bytes]): Optional bytes of audio associated + with the message. (default: :obj:`None`) + audio_transcript (Optional[str]): Optional transcript of the audio. + (default: :obj:`None`) parsed: Optional[Union[Type[BaseModel], dict]]: Optional object which is parsed from the content. (default: :obj:`None`) """ @@ -84,6 +87,9 @@ class BaseMessage: image_list: Optional[List[Union[Image.Image, str]]] = None image_detail: Literal["auto", "low", "high"] = "auto" video_detail: Literal["auto", "low", "high"] = "auto" + audio_bytes: Optional[bytes] = None + audio_transcript: Optional[str] = None + file_list: Optional[List[Dict[str, Any]]] = None parsed: Optional[Union[BaseModel, dict]] = None @classmethod @@ -464,65 +470,67 @@ def to_openai_user_message(self) -> OpenAIUserMessage: } ) if self.image_list and len(self.image_list) > 0: - for image in self.image_list: - # Check if image is a URL string or PIL Image - if isinstance(image, str): - # Image is a URL string + import io + + for img in self.image_list: + # Support str URLs and dict image specs + if isinstance(img, str): hybrid_content.append( { "type": "image_url", "image_url": { - "url": image, + "url": img, "detail": self.image_detail, }, } ) - else: - # Image is a PIL Image object - if image.format is None: - # Set default format to PNG as fallback - image.format = 'PNG' - - image_type: str = image.format.lower() - if image_type not in OpenAIImageType: - raise ValueError( - f"Image type {image.format} " - f"is not supported by OpenAI vision model" - ) - - # Convert RGBA to RGB for formats that don't support - # transparency or when the image has transparency channel - img_to_save = image - if image.mode in ('RGBA', 'LA', 'P') and image_type in ( - 'jpeg', - 'jpg', - ): - # JPEG doesn't support transparency, convert to RGB - img_to_save = image.convert('RGB') - elif ( - image.mode in ('RGBA', 'LA', 'P') - and image_type == 'png' - ): - # For PNG with transparency, convert to RGBA if needed - if image.mode in ('LA', 'P'): - img_to_save = image.convert('RGBA') - # else: RGBA mode, keep as-is - - with io.BytesIO() as buffer: - img_to_save.save(fp=buffer, format=image.format) - encoded_image = base64.b64encode( - buffer.getvalue() - ).decode("utf-8") - image_prefix = f"data:image/{image_type};base64," + continue + elif isinstance(img, dict): hybrid_content.append( { "type": "image_url", - "image_url": { - "url": f"{image_prefix}{encoded_image}", - "detail": self.image_detail, - }, + "image_url": img, } ) + continue + + # PIL Image - process and encode + if img.format is None: + img.format = 'PNG' + + image_type: str = img.format.lower() + if image_type not in OpenAIImageType: + raise ValueError( + f"Image type {img.format} " + f"is not supported by OpenAI vision model" + ) + + # Convert RGBA to RGB for formats that don't support transparency + img_to_save = img + if img.mode in ('RGBA', 'LA', 'P') and image_type in ( + 'jpeg', + 'jpg', + ): + img_to_save = img.convert('RGB') + elif img.mode in ('RGBA', 'LA', 'P') and image_type == 'png': + if img.mode in ('LA', 'P'): + img_to_save = img.convert('RGBA') + + with io.BytesIO() as buffer: + img_to_save.save(fp=buffer, format=img.format) + encoded_image = base64.b64encode(buffer.getvalue()).decode( + "utf-8" + ) + image_prefix = f"data:image/{image_type};base64," + hybrid_content.append( + { + "type": "image_url", + "image_url": { + "url": f"{image_prefix}{encoded_image}", + "detail": self.image_detail, + }, + } + ) if self.video_bytes: import imageio.v3 as iio @@ -530,7 +538,6 @@ def to_openai_user_message(self) -> OpenAIUserMessage: base64Frames: List[str] = [] frame_count = 0 - # read video bytes video = iio.imiter( self.video_bytes, plugin=Constants.VIDEO_DEFAULT_PLUG_PYAV ) @@ -541,20 +548,14 @@ def to_openai_user_message(self) -> OpenAIUserMessage: frame_count % Constants.VIDEO_IMAGE_EXTRACTION_INTERVAL == 0 ): - # convert frame to numpy array frame_array = np.asarray(frame) frame_image = Image.fromarray(frame_array) - - # Get the dimensions of the frame width, height = frame_image.size - - # resize the frame to the default image size new_width = Constants.VIDEO_DEFAULT_IMAGE_SIZE aspect_ratio = width / height new_height = int(new_width / aspect_ratio) resized_img = frame_image.resize((new_width, new_height)) - # encode the image to base64 with io.BytesIO() as buffer: image_format = OpenAIImageType.JPEG.value image_format = image_format.upper() @@ -573,7 +574,24 @@ def to_openai_user_message(self) -> OpenAIUserMessage: "detail": self.video_detail, }, } + hybrid_content.append(item) + + if self.audio_bytes: + encoded_audio = base64.b64encode(self.audio_bytes).decode("utf-8") + hybrid_content.append( + { + "type": "input_audio", + "input_audio": { + "data": encoded_audio, + "format": "wav", + }, + } + ) + if self.file_list: + for file_item in self.file_list: + item = {"type": "input_file"} + item.update(file_item) hybrid_content.append(item) if len(hybrid_content) > 1: @@ -595,9 +613,25 @@ def to_openai_assistant_message(self) -> OpenAIAssistantMessage: OpenAIAssistantMessage: The converted :obj:`OpenAIAssistantMessage` object. """ + content: Union[str, List[Dict[str, Any]]] = self.content + if self.audio_bytes: + import base64 + + encoded_audio = base64.b64encode(self.audio_bytes).decode("utf-8") + content = [ + {"type": "text", "text": self.content}, + { + "type": "input_audio", + "input_audio": { + "data": encoded_audio, + "format": "wav", + }, + }, + ] + message_dict: Dict[str, Any] = { "role": "assistant", - "content": self.content, + "content": content, } # Check if meta_dict contains tool_calls @@ -632,6 +666,9 @@ def to_dict(self) -> Dict: if isinstance(img, str): # Image is a URL string, store as-is image_data_list.append({"type": "url", "data": img}) + elif isinstance(img, dict): + # Image spec dict (e.g., {"url": "...", "detail": "..."}) + image_data_list.append({"type": "dict", "data": img}) else: # Image is a PIL Image, convert to base64 # Preserve format, default to PNG if not set @@ -676,4 +713,15 @@ def to_dict(self) -> Dict: if self.video_detail is not None: result["video_detail"] = self.video_detail + if self.audio_bytes is not None: + import base64 + + result["audio_bytes"] = base64.b64encode(self.audio_bytes).decode() + + if self.audio_transcript is not None: + result["audio_transcript"] = self.audio_transcript + + if self.file_list is not None: + result["file_list"] = self.file_list + return result diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py index da326e6611..1199ee48f1 100644 --- a/camel/models/openai_responses_model.py +++ b/camel/models/openai_responses_model.py @@ -26,8 +26,10 @@ openai_messages_to_camel, ) from camel.messages import OpenAIMessage +from camel.models._tool_utils import convert_openai_tools_to_responses_format from camel.models.base_model import BaseModelBackend from camel.responses.adapters.responses_adapter import ( + responses_stream_to_chunks, responses_to_camel_response, ) from camel.types import ChatCompletion, ModelType @@ -143,16 +145,20 @@ def _run( # Tools: Responses also accepts `tools`; pass through when provided if tools: - request_dict["tools"] = tools + request_dict["tools"] = convert_openai_tools_to_responses_format( + tools + ) if is_streaming: if response_format is not None: raise NotImplementedError( "Responses streaming with response_format is not supported yet." # noqa:E501 ) - return self._client.responses.stream( + request_dict["stream"] = True + stream = self._client.responses.create( model=self.model_type, **request_dict ) + return responses_stream_to_chunks(stream) # type: ignore[return-value] if response_format is not None: # Structured outputs require Responses.parse with text_format @@ -206,16 +212,20 @@ async def _arun( is_streaming = bool(request_dict.pop("stream", False)) request_dict.update(body) if tools: - request_dict["tools"] = tools + request_dict["tools"] = convert_openai_tools_to_responses_format( + tools + ) if is_streaming: if response_format is not None: raise NotImplementedError( "Responses streaming with response_format is not supported yet." # noqa:E501 ) - return self._async_client.responses.stream( + request_dict["stream"] = True + stream = await self._async_client.responses.create( model=self.model_type, **request_dict ) + return responses_stream_to_chunks(stream) # type: ignore[return-value] if response_format is not None: parse_fn = getattr(self._async_client.responses, "parse", None) diff --git a/camel/responses/adapters/responses_adapter.py b/camel/responses/adapters/responses_adapter.py new file mode 100644 index 0000000000..8c742bdc8a --- /dev/null +++ b/camel/responses/adapters/responses_adapter.py @@ -0,0 +1,356 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Helpers for converting OpenAI Responses API payloads into CAMEL types.""" + +from __future__ import annotations + +import base64 +import json +import time +from typing import Any, Dict, Iterator, List, Optional, Type + +from openai.types.chat import ChatCompletionChunk +from openai.types.chat.chat_completion_chunk import ( + Choice, + ChoiceDelta, + ChoiceDeltaToolCall, + ChoiceDeltaToolCallFunction, +) +from pydantic import BaseModel + +from camel.messages.base import BaseMessage +from camel.responses.model_response import ( + CamelModelResponse, + CamelToolCall, + CamelUsage, +) +from camel.types import RoleType + + +def responses_to_camel_response( + resp: Any, expected_parsed_type: Optional[Type[BaseModel]] = None +) -> CamelModelResponse: + r"""Map a Responses API object to :class:`CamelModelResponse`.""" + + audio_bytes: Optional[bytes] = None + audio_transcript: Optional[str] = None + + text = getattr(resp, "output_text", None) + # If text is present directly, we might still have audio in output? + # But usually output_text is a convenience field. + # Let's check output list anyway for audio if we haven't found it? + # Or just follow the existing pattern: if not text, look in output. + # But audio might be there even if text is there (e.g. multimodal). + + # We'll iterate output to find text (if not present) and audio. + parts: List[str] = [] + output = getattr(resp, "output", None) + if isinstance(output, list): + for item in output: + content = getattr(item, "content", None) or ( + item.get("content") if isinstance(item, dict) else None + ) + if isinstance(content, list): + for chunk in content: + chunk_type = None + if isinstance(chunk, dict): + chunk_type = chunk.get("type") + + if chunk_type in ("output_text", "text", "input_text"): + val = chunk.get("text") or chunk.get("output_text") + if val: + parts.append(str(val)) + elif chunk_type == "output_audio": + audio = chunk.get("audio") + if isinstance(audio, dict): + b64_data = audio.get("data") + if b64_data: + try: + audio_bytes = base64.b64decode(b64_data) + except Exception: + pass + transcript = audio.get("transcript") + if transcript: + audio_transcript = transcript + elif chunk_type == "function_call": + # Handle tool calls (Responses API uses 'function_call' type for tools currently) + # It seems they are top-level items in output list, not chunks in content list? + # Wait, the debug output showed output list containing ResponseFunctionToolCall. + # So it's an item in output, NOT in content list of an item. + pass + + # Check for tool calls in the output list directly + tool_call_requests: List[CamelToolCall] = [] + if isinstance(output, list): + for item in output: + # Check if item is a tool call + # The item might be an object or dict + item_type = getattr(item, "type", None) or ( + item.get("type") if isinstance(item, dict) else None + ) + + if item_type == "function_call": + call_id = getattr(item, "call_id", None) or ( + item.get("call_id") if isinstance(item, dict) else None + ) + name = getattr(item, "name", None) or ( + item.get("name") if isinstance(item, dict) else None + ) + arguments = getattr(item, "arguments", None) or ( + item.get("arguments") if isinstance(item, dict) else None + ) + + if call_id and name: + args_dict = {} + if arguments: + try: + args_dict = json.loads(arguments) + except Exception: + pass + + tool_call_requests.append( + CamelToolCall(id=call_id, name=name, args=args_dict) + ) + + if not text: + text = "\n".join(parts) if parts else "" + + parsed_obj = None + if expected_parsed_type is not None: + parsed_obj = getattr(resp, "output_parsed", None) + if parsed_obj is None: + parsed_obj = getattr(resp, "parsed", None) + if parsed_obj is None: + output = getattr(resp, "output", None) + if isinstance(output, list) and output: + first = output[0] + parsed_obj = getattr(first, "parsed", None) + if parsed_obj is None and isinstance(first, dict): + parsed_obj = first.get("parsed") + if parsed_obj is None: + content = getattr(first, "content", None) or ( + first.get("content") + if isinstance(first, dict) + else None + ) + if isinstance(content, list) and content: + first_content = content[0] + if isinstance(first_content, dict): + parsed_obj = first_content.get("parsed") + + message = BaseMessage( + role_name="assistant", + role_type=RoleType.ASSISTANT, + meta_dict={}, + content=text or "", + audio_bytes=audio_bytes, + audio_transcript=audio_transcript, + parsed=parsed_obj if isinstance(parsed_obj, BaseModel) else None, + ) + + # If we have tool calls, we should also attach them to the message meta_dict + # so that message.to_openai_assistant_message() works correctly + if tool_call_requests: + # OpenAI message format for tool_calls + openai_tool_calls = [] + for tc in tool_call_requests: + openai_tool_calls.append( + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": json.dumps(tc.args), + }, + } + ) + if message.meta_dict is None: + message.meta_dict = {} + message.meta_dict["tool_calls"] = openai_tool_calls + + usage_raw: Optional[Dict[str, Any]] = None + usage_obj = getattr(resp, "usage", None) + try: + if usage_obj is not None: + if hasattr(usage_obj, "model_dump"): + usage_raw = usage_obj.model_dump() # type: ignore[no-any-return] + elif isinstance(usage_obj, dict): + usage_raw = dict(usage_obj) + except Exception: + usage_raw = None + + usage_dict = usage_raw or {} + usage = CamelUsage( + input_tokens=usage_dict.get("prompt_tokens"), + output_tokens=usage_dict.get("completion_tokens"), + total_tokens=usage_dict.get("total_tokens"), + raw=usage_raw or None, + ) + + return CamelModelResponse( + id=getattr(resp, "id", ""), + model=getattr(resp, "model", None), + created=getattr(resp, "created", None), + output_messages=[message], + tool_call_requests=tool_call_requests if tool_call_requests else None, + finish_reasons=["stop"], + usage=usage, + raw=resp, + ) + + +def responses_stream_to_chunks( + stream: Iterator[Any], +) -> Iterator[ChatCompletionChunk]: + """Convert a Responses API stream into ChatCompletionChunk iterator. + + This allows existing streaming consumers (like ChatAgent) to consume + Responses API streams without modification. + """ + # We need to track state because Responses events are granular + # and we need to emit ChatCompletionChunk which expects certain structure. + + # We'll use a dummy ID and model if not available immediately, + # but usually the first event might not have them. + # We can update them as we go. + + response_id = "" + model = "" + created = int(time.time()) + + # Track tool calls by index + # Responses API output_index seems to correspond to the item index in output list. + # If we have multiple outputs, we need to map them to choices? + # Usually ChatCompletion has one choice for stream unless n>1. + # Responses API 'output' list can have multiple items (e.g. text then tool call). + # We will map all output items to choice index 0 for now, as they are part of the same generation sequence? + # Or should we map output_index to choice_index? + # In ChatCompletion, multiple choices usually mean n>1 (alternative generations). + # In Responses API, output list is the sequence of content (multimodal). + # So they should all belong to choice 0, but with different content parts? + # ChatCompletionChunk delta has 'content' (string) and 'tool_calls' (list). + # It doesn't support multiple content parts in delta easily unless we concatenate text. + + # For tool calls, they are separate fields in delta. + + for chunk in stream: + chunk_type = getattr(chunk, "type", None) + + # Update common fields if available + if hasattr(chunk, "response"): + resp = chunk.response + if hasattr(resp, "id"): + response_id = resp.id + if hasattr(resp, "model"): + model = resp.model + if hasattr(resp, "created_at"): + created = int(resp.created_at) + + if chunk_type == "response.output_item.added": + item = getattr(chunk, "item", None) + item_type = getattr(item, "type", None) + output_index = getattr(chunk, "output_index", 0) + + if item_type == "function_call": + # Start of a tool call + call_id = getattr(item, "call_id", None) + name = getattr(item, "name", None) + + tool_call = ChoiceDeltaToolCall( + index=output_index, # Use output_index as tool_call index? + id=call_id, + type="function", + function=ChoiceDeltaToolCallFunction( + name=name, arguments="" + ), + ) + + yield ChatCompletionChunk( + id=response_id, + choices=[ + Choice( + delta=ChoiceDelta(tool_calls=[tool_call]), + finish_reason=None, + index=0, + logprobs=None, + ) + ], + created=created, + model=model, + object="chat.completion.chunk", + ) + + elif chunk_type == "response.function_call_arguments.delta": + delta_arg = getattr(chunk, "delta", "") + output_index = getattr(chunk, "output_index", 0) + + tool_call = ChoiceDeltaToolCall( + index=output_index, + function=ChoiceDeltaToolCallFunction(arguments=delta_arg), + ) + + yield ChatCompletionChunk( + id=response_id, + choices=[ + Choice( + delta=ChoiceDelta(tool_calls=[tool_call]), + finish_reason=None, + index=0, + logprobs=None, + ) + ], + created=created, + model=model, + object="chat.completion.chunk", + ) + + elif chunk_type == "response.output_text.delta": + delta_text = getattr(chunk, "delta", "") + + yield ChatCompletionChunk( + id=response_id, + choices=[ + Choice( + delta=ChoiceDelta(content=delta_text), + finish_reason=None, + index=0, + logprobs=None, + ) + ], + created=created, + model=model, + object="chat.completion.chunk", + ) + + elif chunk_type == "response.output_item.done": + # Item finished. We might want to signal finish_reason if it's the last one? + # But we don't know if it's the last one until response.completed. + pass + + elif chunk_type == "response.completed": + # Final chunk with finish reason + yield ChatCompletionChunk( + id=response_id, + choices=[ + Choice( + delta=ChoiceDelta(), + finish_reason="stop", + index=0, + logprobs=None, + ) + ], + created=created, + model=model, + object="chat.completion.chunk", + ) diff --git a/examples/agents/openai_responses_chat_agent_example.py b/examples/agents/openai_responses_chat_agent_example.py new file mode 100644 index 0000000000..558b0cfc53 --- /dev/null +++ b/examples/agents/openai_responses_chat_agent_example.py @@ -0,0 +1,208 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +""" +This example demonstrates how to use the CAMEL ChatAgent with the OpenAI +Responses API model. It covers: + + 1) Basic chat + 2) Structured output + 3) Streaming chat + 4) Tool calling + 5) Image analysis + 6) File analysis + +Requirements: + export OPENAI_API_KEY=sk-... +""" + +from pydantic import BaseModel + +from camel.agents import ChatAgent +from camel.messages import BaseMessage +from camel.models import ModelFactory +from camel.toolkits import FunctionTool +from camel.types import ModelPlatformType, ModelType, RoleType + + +def basic_chat(): + print("\n=== Basic Chat ===") + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You are a helpful assistant." + ), + model=model, + ) + + response = agent.step("Tell me a joke about camels.") + print(response.msgs[0].content) + + +def structured_output_chat(): + print("\n=== Structured Output Chat ===") + + class CountryInfo(BaseModel): + name: str + capital: str + population: int + + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + + # Note: ChatAgent currently doesn't expose response_format in step() directly + # but we can pass it via output_schema if supported, or configure the model? + # Actually ChatAgent.step has response_format argument? No. + # But we can use the model's run method directly? No, user wants ChatAgent. + # ChatAgent supports structured output via `output_schema` in `step`? No. + # ChatAgent supports `response_format` in `step`? Let's check. + # ChatAgent.step(..., response_format=...) + + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You provide country info." + ), + model=model, + ) + + response = agent.step("France", response_format=CountryInfo) + print(f"Structured Output: {response.msgs[0].parsed}") + + +def streaming_chat(): + print("\n=== Streaming Chat ===") + # To enable streaming, we configure the model with stream=True + # ChatAgent will consume the stream and return the final response. + # To see the stream, we would need to use agent.step_stream (if it existed) + # or rely on the fact that ChatAgent accumulates it. + + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, + ModelType.GPT_4_1_MINI, + model_config_dict={"stream": True}, + ) + + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You are a poet." + ), + model=model, + ) + + # This will block until completion, but internally it uses streaming + response = agent.step("Write a short poem about the sunrise.") + print(f"Streaming Result: {response.msgs[0].content}") + + +def tool_call_chat(): + print("\n=== Tool Call Chat ===") + + def add(a: int, b: int) -> int: + """Adds two numbers.""" + return a + b + + tools = [FunctionTool(add)] + + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You are a math helper." + ), + model=model, + tools=tools, + ) + + response = agent.step("What is 5 + 7?") + # ChatAgent automatically executes the tool and gets the result + # But usually it requires a loop or auto_tool_call=True? + # ChatAgent defaults to external_tool_call=False, so it executes tools? + # Let's check ChatAgent defaults. + # It should execute tools if tools are provided. + + print(f"Tool Call Result: {response.msgs[0].content}") + print(f"Tool Calls: {response.info['tool_calls']}") + + +def image_analysis_chat(): + print("\n=== Image Analysis Chat ===") + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You are an image analyst." + ), + model=model, + ) + + # Construct a message with image + msg = BaseMessage( + role_name="User", + role_type=RoleType.USER, + meta_dict={}, + content="What is in this image?", + image_list=[ + { + "url": "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png", + "detail": "auto", + } + ], + ) + + response = agent.step(msg) + print(f"Image Analysis: {response.msgs[0].content}") + + +def file_analysis_chat(): + print("\n=== File Analysis Chat ===") + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Assistant", content="You are a file analyst." + ), + model=model, + ) + + # Construct a message with file + # We use the newly added file_list field in BaseMessage + msg = BaseMessage( + role_name="User", + role_type=RoleType.USER, + meta_dict={}, + content="What is in this file?", + file_list=[ + { + "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf" + } + ], + ) + + response = agent.step(msg) + print(f"File Analysis: {response.msgs[0].content}") + + +if __name__ == "__main__": + basic_chat() + structured_output_chat() + streaming_chat() + tool_call_chat() + image_analysis_chat() + file_analysis_chat() diff --git a/examples/models/openai_responses_quickstart_example.py b/examples/models/openai_responses_quickstart_example.py index be9897db25..e2ce2d187a 100644 --- a/examples/models/openai_responses_quickstart_example.py +++ b/examples/models/openai_responses_quickstart_example.py @@ -17,6 +17,10 @@ 1) Create the Responses backend via ModelFactory 2) Make a basic non-streaming request 3) Do structured output parsing with a Pydantic schema + 4) Make a tool call request + 5) Stream responses (text and tool calls) + 6) Analyze an image (multimodal input) + 7) Analyze a file (PDF input) Requirements: export OPENAI_API_KEY=sk-... @@ -80,15 +84,84 @@ def streaming_request() -> None: stream = model.run(messages) print("Streaming response:") - with stream as events: # type: ignore[assignment] - for event in events: - if getattr(event, "type", "") == "response.output_text.delta": - print(getattr(event, "delta", ""), end="", flush=True) - final = events.get_final_response() - print("\n\nFinal text:\n", getattr(final, "output_text", "")) + for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) + print("\n") + + +def tool_call_request() -> None: + from camel.toolkits import FunctionTool + + def add(a: int, b: int) -> int: + """Adds two numbers.""" + return a + b + + add_tool = FunctionTool(add) + + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + + tools = [add_tool.get_openai_tool_schema()] + messages = [{"role": "user", "content": "What is 5 + 7?"}] + + resp = model.run(messages, tools=tools) + print("Tool Calls:", resp.tool_call_requests) + if resp.tool_call_requests: + for tool_call in resp.tool_call_requests: + print(f"Function: {tool_call.name}, Args: {tool_call.args}") + + +def image_analysis_request() -> None: + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + + # Using Responses API style input types (input_text, input_image) + messages = [ + { + "role": "user", + "content": [ + {"type": "input_text", "text": "what is in this image?"}, + { + "type": "input_image", + "image_url": "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png", + }, + ], + } + ] + + resp = model.run(messages) + print("Image Analysis Result:\n", resp.output_messages[0].content) + + +def file_analysis_request() -> None: + model = ModelFactory.create( + ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI + ) + + messages = [ + { + "role": "user", + "content": [ + {"type": "input_text", "text": "what is in this file?"}, + { + "type": "input_file", + "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf", + }, + ], + } + ] + + resp = model.run(messages) + print("File Analysis Result:\n", resp.output_messages[0].content) if __name__ == "__main__": basic_request() structured_output() streaming_request() + tool_call_request() + image_analysis_request() + file_analysis_request() diff --git a/test/models/test_openai_responses_model.py b/test/models/test_openai_responses_model.py new file mode 100644 index 0000000000..765a5a3ec2 --- /dev/null +++ b/test/models/test_openai_responses_model.py @@ -0,0 +1,257 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import os +import types +from contextlib import contextmanager +from unittest.mock import MagicMock + +from pydantic import BaseModel + +from camel.models.openai_responses_model import OpenAIResponsesModel +from camel.responses.adapters.responses_adapter import ( + responses_to_camel_response, +) +from camel.responses.model_response import CamelModelResponse +from camel.types import ModelType + + +@contextmanager +def env(var: str, value: str): + old = os.environ.get(var) + os.environ[var] = value + try: + yield + finally: + if old is None: + del os.environ[var] + else: + os.environ[var] = old + + +class _StubResp: + def __init__(self, text: str): + self.id = "resp_001" + self.model = "gpt-4.1-mini" + self.created = 1730000200 + self.output_text = text + self.usage = {"input_tokens": 1, "output_tokens": 2} + + +def test_openai_responses_model_non_streaming(monkeypatch): + with env("OPENAI_API_KEY", "test"): + model = OpenAIResponsesModel(ModelType.GPT_4_1_MINI) + + class _StubClient: + def __init__(self): + self.responses = types.SimpleNamespace( + create=lambda **kwargs: _StubResp("ok") + ) + + # Replace clients to avoid network + model._client = _StubClient() + + result = model.run([{"role": "user", "content": "hello"}]) + assert isinstance(result, CamelModelResponse) + assert result.id == "resp_001" + assert ( + result.output_messages + and result.output_messages[0].content == "ok" + ) + + +class _Person(BaseModel): + name: str + + +def test_openai_responses_model_parse(monkeypatch): + with env("OPENAI_API_KEY", "test"): + model = OpenAIResponsesModel(ModelType.GPT_4_1_MINI) + + class _StubParsedResp: + def __init__(self): + self.id = "resp_002" + self.model = "gpt-4.1-mini" + self.created = 1730000300 + self.output_text = "John" + self.parsed = _Person(name="John") + + class _StubClient: + def __init__(self): + self.responses = types.SimpleNamespace( + parse=lambda **kwargs: _StubParsedResp() + ) + + model._client = _StubClient() + result = model.run( + [{"role": "user", "content": "who?"}], response_format=_Person + ) + assert isinstance(result, CamelModelResponse) + assert result.id == "resp_002" + msg = result.output_messages[0] + assert msg.content == "John" + assert isinstance(msg.parsed, _Person) and msg.parsed.name == "John" + + +def test_responses_to_camel_response_with_audio(): + # Mock a response with audio output + mock_response = MagicMock() + mock_response.id = "resp_123" + mock_response.model = "gpt-4o-audio-preview" + mock_response.created = 1234567890 + + mock_audio_chunk = { + "type": "output_audio", + "audio": { + "data": ( + "UklGRgAAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAA" + "AAA=" + ), + "format": "wav", + "transcript": "Hello world", + }, + } + + mock_text_chunk = {"type": "output_text", "text": "Hello text"} + + mock_item = MagicMock() + mock_item.content = [mock_text_chunk, mock_audio_chunk] + + mock_response.output = [mock_item] + mock_response.output_text = None + mock_response.usage = None # Fix validation error + + camel_response = responses_to_camel_response(mock_response) + + assert "Hello text" in camel_response.output_messages[0].content + assert camel_response.output_messages[0].audio_bytes is not None + assert camel_response.output_messages[0].audio_transcript == "Hello world" + + +def test_openai_messages_to_camel_with_audio(): + from camel.core.messages import openai_messages_to_camel + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Listen to this"}, + { + "type": "input_audio", + "input_audio": {"data": "base64data", "format": "wav"}, + }, + ], + } + ] + + # Currently this will ignore the audio part because it's not implemented + camel_msgs = openai_messages_to_camel(messages) + assert len(camel_msgs) == 1 + assert len(camel_msgs[0].content) == 2 + assert camel_msgs[0].content[0].type == "text" + assert camel_msgs[0].content[1].type == "input_audio" + assert camel_msgs[0].content[1].payload["data"] == "base64data" + assert camel_msgs[0].content[1].payload["format"] == "wav" + + +def test_openai_messages_to_camel_with_file(): + from camel.core.messages import openai_messages_to_camel + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Analyze this file"}, + {"type": "input_file", "file_id": "file-123"}, + ], + } + ] + + camel_msgs = openai_messages_to_camel(messages) + assert len(camel_msgs) == 1 + assert len(camel_msgs[0].content) == 2 + assert camel_msgs[0].content[1].type == "input_file" + assert camel_msgs[0].content[1].payload["file_id"] == "file-123" + + +def test_responses_stream_tools(): + from openai.types.chat import ChatCompletionChunk + + from camel.models.openai_responses_model import OpenAIResponsesModel + from camel.types import ModelType + + # Mock chunks + class MockChunk: + def __init__(self, type, **kwargs): + self.type = type + for k, v in kwargs.items(): + setattr(self, k, v) + + mock_chunks = [ + MockChunk( + "response.output_item.added", + item=types.SimpleNamespace( + type="function_call", call_id="call_123", name="test_tool" + ), + output_index=0, + ), + MockChunk( + "response.function_call_arguments.delta", + delta='{"arg":', + output_index=0, + ), + MockChunk( + "response.function_call_arguments.delta", + delta='"val"}', + output_index=0, + ), + MockChunk("response.output_item.done"), + MockChunk("response.completed"), + ] + + with env("OPENAI_API_KEY", "test"): + model = OpenAIResponsesModel( + ModelType.GPT_4_1_MINI, model_config_dict={"stream": True} + ) + + class _StubClient: + def __init__(self): + self.responses = types.SimpleNamespace( + create=lambda **kwargs: iter(mock_chunks) + ) + + model._client = _StubClient() + + stream = model.run([{"role": "user", "content": "call tool"}]) + + chunks = list(stream) + assert len(chunks) > 0 + assert isinstance(chunks[0], ChatCompletionChunk) + + # Verify tool call start + assert chunks[0].choices[0].delta.tool_calls[0].id == "call_123" + assert ( + chunks[0].choices[0].delta.tool_calls[0].function.name + == "test_tool" + ) + + # Verify arguments delta + assert ( + chunks[1].choices[0].delta.tool_calls[0].function.arguments + == '{"arg":' + ) + assert ( + chunks[2].choices[0].delta.tool_calls[0].function.arguments + == '"val"}' + ) From cbb386cef03837cc167aad9cd51c1b990cb99ea2 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Thu, 20 Nov 2025 10:04:00 +0300 Subject: [PATCH 09/17] implement tool calling of response api --- .../openai_responses_chat_agent_example.py | 82 ++++++++++++++++--- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/examples/agents/openai_responses_chat_agent_example.py b/examples/agents/openai_responses_chat_agent_example.py index 558b0cfc53..67f2304cc6 100644 --- a/examples/agents/openai_responses_chat_agent_example.py +++ b/examples/agents/openai_responses_chat_agent_example.py @@ -63,14 +63,6 @@ class CountryInfo(BaseModel): ModelPlatformType.OPENAI_RESPONSES, ModelType.GPT_4_1_MINI ) - # Note: ChatAgent currently doesn't expose response_format in step() directly - # but we can pass it via output_schema if supported, or configure the model? - # Actually ChatAgent.step has response_format argument? No. - # But we can use the model's run method directly? No, user wants ChatAgent. - # ChatAgent supports structured output via `output_schema` in `step`? No. - # ChatAgent supports `response_format` in `step`? Let's check. - # ChatAgent.step(..., response_format=...) - agent = ChatAgent( system_message=BaseMessage.make_assistant_message( role_name="Assistant", content="You provide country info." @@ -129,11 +121,6 @@ def add(a: int, b: int) -> int: ) response = agent.step("What is 5 + 7?") - # ChatAgent automatically executes the tool and gets the result - # But usually it requires a loop or auto_tool_call=True? - # ChatAgent defaults to external_tool_call=False, so it executes tools? - # Let's check ChatAgent defaults. - # It should execute tools if tools are provided. print(f"Tool Call Result: {response.msgs[0].content}") print(f"Tool Calls: {response.info['tool_calls']}") @@ -206,3 +193,72 @@ def file_analysis_chat(): tool_call_chat() image_analysis_chat() file_analysis_chat() + + +''' +=== Basic Chat === +Sure! Here's a camel joke for you: + +Why do camels make terrible secret agents? +Because they always let the hump out of the bag! + +=== Structured Output Chat === +Structured Output: name='France' capital='Paris' population=67081000 + +=== Streaming Chat === +Streaming Result: Golden hues ignite the sky, +Whispers of dawn begin to fly. +Night retreats with gentle grace, +Sunrise paints a warm embrace. + +=== Tool Call Chat === +Tool Call Result: Yes, 5 + 7 equals 12. If you have any more math questions, feel free to ask! +Tool Calls: [ToolCallingRecord(tool_name='add', args={'a': 5, 'b': 7}, result=12, tool_call_id='call_z3DX7GyVj6sSYk7e6iEd3nVV', images=None)] + +=== Image Analysis Chat === +Image Analysis: This image shows the Google logo. The logo features the word "Google" written in a stylized font with the letters in different colors: blue for "G," red for the first "o," yellow for the second "o," blue for the "g," green for the "l," and red for the "e." + +=== File Analysis Chat === +File Analysis: This file is a detailed excerpt from the 2024 Berkshire Hathaway Inc. annual shareholder letter written by Warren E. Buffett, the company's Chairman of the Board. The letter covers a wide range of topics including: + +1. **Introduction and Philosophy**: + - Buffett explains the purpose of the annual letter, emphasizing transparency and honest communication with shareholders. + - He acknowledges that Berkshire makes mistakes in business acquisitions and personnel decisions but also shares stories of success. + +2. **Notable Tribute**: + - A specific tribute to Pete Liegl, the founder of Forest River (an RV manufacturer acquired by Berkshire), highlighting his integrity, compensation agreement, and outstanding business performance. + +3. **Company Performance**: + - Summary of Berkshire’s financial performance in 2024, including operating earnings ($47.4 billion) and breakdowns by business segments (insurance underwriting, investment income, railroad, utilities, and other businesses). + - Discussion about the growth and improvements in GEICO and the overall property-casualty insurance sector. + - Commentary on challenges posed by climate change and natural disasters. + +4. **Historical Context**: + - Reflection on Berkshire’s 60-year transformation since Buffett took control, including record corporate income tax payments made to the U.S. Treasury ($26.8 billion in 2024). + - Details on Berkshire’s investment strategy that emphasizes reinvestment over dividends for long-term growth. + +5. **Investment Philosophy and Holdings**: + - Berkshire’s dual strategy of owning controlling interest in many companies (generally 100%) and significant minority stakes in major public companies like Apple, American Express, and Coca-Cola. + - The company’s preference for equities over cash or bonds, focusing on long-term value. + +6. **Property-Casualty Insurance Business**: + - Explanation of the unique challenges of the P/C insurance business model, where premiums are collected upfront but costs may be settled many years later. + - The importance of prudent underwriting and risk management to avoid losses. + - Description of Berkshire's capability to handle large insurance losses without dependence on reinsurers. + +7. **International Investments**: + - Information on Berkshire’s growing investments in five major Japanese trading companies, their shareholder-friendly practices, and plans for long-term involvement. + - Commentary on currency risk management related to yen-denominated debt. + +8. **Annual Meeting in Omaha**: + - Invitation to shareholders for the annual meeting on May 3rd, 2025. + - Details on event activities, merchandise, and featured book selling. + - Personal notes including mention of Buffett’s sister and light-hearted commentary. + +9. **Historical Performance Comparison**: + - Comparative data showing Berkshire Hathaway’s annual percentage change in per-share market value against the S&P 500, spanning from 1965 to 2024. + - Highlights Buffett's surpassed compound annual gain of 19.9% compared to the S&P 500’s 10.4% and extraordinary long-term gains of 5,502,284% vs. 39,054%. + +**Summary:** +This document is a comprehensive, candid, and reflective communication from Warren Buffett about Berkshire Hathaway’s business performance, management philosophy, investment strategies, historical context, and future outlook, intended for the company’s shareholders. It combines financial data, storytelling, and personal insights characteristic of Berkshire Hathaway's annual shareholder letters. +''' # noqa: E501, RUF001 From b6b1f008f4b7c4787704f117bd97ffa8f23f674d Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Thu, 20 Nov 2025 10:11:21 +0300 Subject: [PATCH 10/17] fix logging error --- camel/models/base_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/camel/models/base_model.py b/camel/models/base_model.py index 13cd51b8c5..d3c9d1a96d 100644 --- a/camel/models/base_model.py +++ b/camel/models/base_model.py @@ -286,7 +286,7 @@ def _log_request(self, messages: List[OpenAIMessage]) -> Optional[str]: } with open(log_file_path, "w") as f: - json.dump(log_entry, f, indent=4) + json.dump(log_entry, f, indent=4, default=str) return log_file_path @@ -317,7 +317,7 @@ def _log_response(self, log_path: str, response: Any) -> None: log_data["response"] = str(response) f.seek(0) - json.dump(log_data, f, indent=4) + json.dump(log_data, f, indent=4, default=str) f.truncate() @abstractmethod From 3dd201457d759ab87c9ac142af25c223b0e67dbe Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Tue, 25 Nov 2025 16:06:22 +0300 Subject: [PATCH 11/17] remove extra comments and add util function --- camel/models/_utils.py | 48 ++++++++++++++++++- camel/models/openai_responses_model.py | 2 +- camel/responses/adapters/responses_adapter.py | 38 --------------- 3 files changed, 48 insertions(+), 40 deletions(-) diff --git a/camel/models/_utils.py b/camel/models/_utils.py index 462606efb5..252531d850 100644 --- a/camel/models/_utils.py +++ b/camel/models/_utils.py @@ -12,7 +12,7 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import textwrap -from typing import Optional, Type +from typing import Any, Dict, Iterable, List, Optional, Type from pydantic import BaseModel @@ -55,3 +55,49 @@ def try_modify_message_with_format( """ # noqa: E501 ) message["content"] = updated_prompt + + +def convert_openai_tools_to_responses_format( + tools: Iterable[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Convert Chat Completions style tool schemas to Responses format. + + Chat Completions expects each tool entry to look like + ``{"type": "function", "function": {...}}``. The Responses API instead + flattens the schema + (``{"type": "function", "name": ..., "parameters": ...}``). + This helper performs the conversion while keeping unrelated tool types + untouched. + """ + + converted: List[Dict[str, Any]] = [] + for tool in tools: + if not isinstance(tool, dict): + continue + + tool_type = tool.get("type") + func_schema = tool.get("function") + + if tool_type == "function" and isinstance(func_schema, dict): + converted_tool: Dict[str, Any] = {"type": "function"} + converted_tool["name"] = func_schema.get("name") + converted_tool["description"] = func_schema.get("description", "") + converted_tool["parameters"] = func_schema.get("parameters", {}) + # Strict mode defaults to False if unspecified. + if "strict" in tool: + converted_tool["strict"] = tool["strict"] + elif "strict" in func_schema: + converted_tool["strict"] = func_schema["strict"] + # Some schemas store metadata on the outer object + # (e.g. cache control). + # Preserve remaining top-level keys that are not part of + # Chat schema. + for key, value in tool.items(): + if key in {"type", "function"}: + continue + converted_tool[key] = value + converted.append(converted_tool) + else: + converted.append(tool) + + return converted diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py index 1199ee48f1..9b130bf2b4 100644 --- a/camel/models/openai_responses_model.py +++ b/camel/models/openai_responses_model.py @@ -26,7 +26,7 @@ openai_messages_to_camel, ) from camel.messages import OpenAIMessage -from camel.models._tool_utils import convert_openai_tools_to_responses_format +from camel.models._utils import convert_openai_tools_to_responses_format from camel.models.base_model import BaseModelBackend from camel.responses.adapters.responses_adapter import ( responses_stream_to_chunks, diff --git a/camel/responses/adapters/responses_adapter.py b/camel/responses/adapters/responses_adapter.py index 8c742bdc8a..e7101c1ea3 100644 --- a/camel/responses/adapters/responses_adapter.py +++ b/camel/responses/adapters/responses_adapter.py @@ -47,13 +47,6 @@ def responses_to_camel_response( audio_transcript: Optional[str] = None text = getattr(resp, "output_text", None) - # If text is present directly, we might still have audio in output? - # But usually output_text is a convenience field. - # Let's check output list anyway for audio if we haven't found it? - # Or just follow the existing pattern: if not text, look in output. - # But audio might be there even if text is there (e.g. multimodal). - - # We'll iterate output to find text (if not present) and audio. parts: List[str] = [] output = getattr(resp, "output", None) if isinstance(output, list): @@ -84,10 +77,6 @@ def responses_to_camel_response( if transcript: audio_transcript = transcript elif chunk_type == "function_call": - # Handle tool calls (Responses API uses 'function_call' type for tools currently) - # It seems they are top-level items in output list, not chunks in content list? - # Wait, the debug output showed output list containing ResponseFunctionToolCall. - # So it's an item in output, NOT in content list of an item. pass # Check for tool calls in the output list directly @@ -159,8 +148,6 @@ def responses_to_camel_response( parsed=parsed_obj if isinstance(parsed_obj, BaseModel) else None, ) - # If we have tool calls, we should also attach them to the message meta_dict - # so that message.to_openai_assistant_message() works correctly if tool_call_requests: # OpenAI message format for tool_calls openai_tool_calls = [] @@ -218,32 +205,10 @@ def responses_stream_to_chunks( This allows existing streaming consumers (like ChatAgent) to consume Responses API streams without modification. """ - # We need to track state because Responses events are granular - # and we need to emit ChatCompletionChunk which expects certain structure. - - # We'll use a dummy ID and model if not available immediately, - # but usually the first event might not have them. - # We can update them as we go. - response_id = "" model = "" created = int(time.time()) - # Track tool calls by index - # Responses API output_index seems to correspond to the item index in output list. - # If we have multiple outputs, we need to map them to choices? - # Usually ChatCompletion has one choice for stream unless n>1. - # Responses API 'output' list can have multiple items (e.g. text then tool call). - # We will map all output items to choice index 0 for now, as they are part of the same generation sequence? - # Or should we map output_index to choice_index? - # In ChatCompletion, multiple choices usually mean n>1 (alternative generations). - # In Responses API, output list is the sequence of content (multimodal). - # So they should all belong to choice 0, but with different content parts? - # ChatCompletionChunk delta has 'content' (string) and 'tool_calls' (list). - # It doesn't support multiple content parts in delta easily unless we concatenate text. - - # For tool calls, they are separate fields in delta. - for chunk in stream: chunk_type = getattr(chunk, "type", None) @@ -334,12 +299,9 @@ def responses_stream_to_chunks( ) elif chunk_type == "response.output_item.done": - # Item finished. We might want to signal finish_reason if it's the last one? - # But we don't know if it's the last one until response.completed. pass elif chunk_type == "response.completed": - # Final chunk with finish reason yield ChatCompletionChunk( id=response_id, choices=[ From 0473402a161e24b8d8e226995ed720d42b10a554 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 28 Nov 2025 16:56:21 +0300 Subject: [PATCH 12/17] minor fixs --- camel/agents/chat_agent.py | 8 +++----- camel/core/messages.py | 16 ++++------------ camel/models/openai_model.py | 6 ++++++ 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index df3e8d25a4..9a653b832b 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -29,7 +29,7 @@ import time import uuid import warnings -from dataclasses import dataclass +from dataclasses import dataclass, replace from datetime import datetime from pathlib import Path from typing import ( @@ -3654,12 +3654,10 @@ def _handle_camel_response( output_messages: List[BaseMessage] = [] for msg in response.output_messages: # Re-wrap to preserve agent role naming convention - chat_message = BaseMessage( + chat_message = replace( + msg, role_name=self.role_name, role_type=self.role_type, - meta_dict=msg.meta_dict, - content=msg.content, - parsed=msg.parsed, ) output_messages.append(chat_message) diff --git a/camel/core/messages.py b/camel/core/messages.py index 3d45cf97a8..67bc768c6a 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -13,7 +13,7 @@ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= """Model-agnostic message abstractions and converters. -Phase 1 introduces `CamelMessage` to decouple CAMEL from the legacy +Introduces `CamelMessage` to decouple CAMEL from the legacy OpenAI Chat Completions message schema while keeping behaviour identical via adapter conversion. """ @@ -30,7 +30,7 @@ class CamelContentPart(BaseModel): """A single content fragment. - Phase 1.5: Extend to cover Responses API inputs while staying compatible + Extend to cover Responses API inputs while staying compatible with Chat Completions. Supported types: - text, image_url (Chat-compatible) - input_text, input_image, input_file, input_audio (Responses-compatible) @@ -64,12 +64,7 @@ class CamelMessage(BaseModel): def openai_messages_to_camel( messages: List[OpenAIMessage], ) -> List[CamelMessage]: - """Convert OpenAI ChatCompletion-style messages to `CamelMessage`. - - Notes: - - Only text and image_url items are converted in Phase 1. - - Other fields are carried over when present (name, tool_call_id). - """ + """Convert OpenAI ChatCompletion-style messages to `CamelMessage`.""" result: List[CamelMessage] = [] for msg in messages: role = msg.get("role", "user") # type: ignore[assignment] @@ -162,10 +157,7 @@ def openai_messages_to_camel( def camel_messages_to_openai( messages: List[CamelMessage], ) -> List[OpenAIMessage]: - """Convert `CamelMessage` back to OpenAI ChatCompletion-style messages. - - This is lossless for the text/image_url subset used in Phase 1. - """ + """Convert `CamelMessage` back to OpenAI ChatCompletion-style messages.""" result: List[OpenAIMessage] = [] for cmsg in messages: if cmsg.role == "tool": diff --git a/camel/models/openai_model.py b/camel/models/openai_model.py index 657ab8c211..b73a2f47a8 100644 --- a/camel/models/openai_model.py +++ b/camel/models/openai_model.py @@ -108,6 +108,12 @@ class OpenAIModel(BaseModelBackend): OpenAI client initialization. These can include parameters like 'organization', 'default_headers', 'http_client', etc. Ignored if custom clients are provided. + + Environment Variables: + CAMEL_USE_CAMEL_RESPONSE (str): If set to "true", the model's response + will be automatically converted to :obj:`CamelModelResponse`. This + is useful for standardizing responses across different models. + (default: "false") """ @api_keys_required( From abdb22a545f9e0a6a90bef94a7a82f6889e30821 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Fri, 5 Dec 2025 15:54:22 +0300 Subject: [PATCH 13/17] add log probs --- camel/models/openai_responses_model.py | 6 ++-- camel/responses/adapters/chat_completions.py | 8 +++++ camel/responses/adapters/responses_adapter.py | 10 ++++++ camel/responses/model_response.py | 2 ++ test/models/test_openai_responses_model.py | 31 ++++++++++++++++ test/responses/test_chat_adapter.py | 36 +++++++++++++++++++ 6 files changed, 89 insertions(+), 4 deletions(-) diff --git a/camel/models/openai_responses_model.py b/camel/models/openai_responses_model.py index 9b130bf2b4..54294c4677 100644 --- a/camel/models/openai_responses_model.py +++ b/camel/models/openai_responses_model.py @@ -119,9 +119,7 @@ def _run( messages: List[OpenAIMessage], response_format: Optional[Type[BaseModel]] = None, tools: Optional[List[Dict[str, Any]]] = None, - ) -> Union[ - ChatCompletion, ChatCompletion - ]: # unused legacy types in signature + ) -> ChatCompletion: # unused legacy types in signature # Update trace agent_session_id = get_current_agent_session_id() if agent_session_id: @@ -194,7 +192,7 @@ async def _arun( messages: List[OpenAIMessage], response_format: Optional[Type[BaseModel]] = None, tools: Optional[List[Dict[str, Any]]] = None, - ) -> Union[ChatCompletion, ChatCompletion]: + ) -> ChatCompletion: agent_session_id = get_current_agent_session_id() if agent_session_id: update_langfuse_trace( diff --git a/camel/responses/adapters/chat_completions.py b/camel/responses/adapters/chat_completions.py index 2880774baa..bb49cb3314 100644 --- a/camel/responses/adapters/chat_completions.py +++ b/camel/responses/adapters/chat_completions.py @@ -100,9 +100,11 @@ def adapt_chat_to_camel_response( output_messages: List[BaseMessage] = [] finish_reasons: List[str] = [] tool_call_requests: Optional[List[CamelToolCall]] = None + logprobs_list: List[Any] = [] for _, choice in enumerate(response.choices): finish_reasons.append(str(choice.finish_reason)) + logprobs_list.append(getattr(choice, "logprobs", None)) msg = choice.message # Skip empty (no content and no tool calls) @@ -126,6 +128,11 @@ def adapt_chat_to_camel_response( if tool_call_requests is None: tool_call_requests = _choice_tool_calls_to_camel(msg) + # Preserve logprobs if caller requested them + logprobs: Optional[List[Any]] = ( + logprobs_list if any(lp is not None for lp in logprobs_list) else None + ) + usage_raw: Dict[str, Any] = {} usage_obj: Optional[Any] = getattr(response, "usage", None) if usage_obj is not None: @@ -166,5 +173,6 @@ def adapt_chat_to_camel_response( tool_call_requests=tool_call_requests, finish_reasons=finish_reasons, usage=usage, + logprobs=logprobs, raw=response, ) diff --git a/camel/responses/adapters/responses_adapter.py b/camel/responses/adapters/responses_adapter.py index e7101c1ea3..5c6694fc37 100644 --- a/camel/responses/adapters/responses_adapter.py +++ b/camel/responses/adapters/responses_adapter.py @@ -45,6 +45,7 @@ def responses_to_camel_response( audio_bytes: Optional[bytes] = None audio_transcript: Optional[str] = None + logprobs_list: List[Any] = [] text = getattr(resp, "output_text", None) parts: List[str] = [] @@ -64,6 +65,13 @@ def responses_to_camel_response( val = chunk.get("text") or chunk.get("output_text") if val: parts.append(str(val)) + lp = ( + chunk.get("logprobs") + if isinstance(chunk, dict) + else getattr(chunk, "logprobs", None) + ) + if lp is not None: + logprobs_list.append(lp) elif chunk_type == "output_audio": audio = chunk.get("audio") if isinstance(audio, dict): @@ -184,6 +192,7 @@ def responses_to_camel_response( total_tokens=usage_dict.get("total_tokens"), raw=usage_raw or None, ) + logprobs: Optional[List[Any]] = logprobs_list if logprobs_list else None return CamelModelResponse( id=getattr(resp, "id", ""), @@ -193,6 +202,7 @@ def responses_to_camel_response( tool_call_requests=tool_call_requests if tool_call_requests else None, finish_reasons=["stop"], usage=usage, + logprobs=logprobs, raw=resp, ) diff --git a/camel/responses/model_response.py b/camel/responses/model_response.py index bc3ecc5758..d74ee27307 100644 --- a/camel/responses/model_response.py +++ b/camel/responses/model_response.py @@ -59,6 +59,8 @@ class CamelModelResponse(BaseModel): tool_call_requests: Optional[List[CamelToolCall]] = None finish_reasons: List[str] = Field(default_factory=list) usage: CamelUsage = Field(default_factory=CamelUsage) + # Optional logprobs aligned to provider choices (if requested) + logprobs: Optional[List[Any]] = None # Keep a handle to the original provider response for debugging/tests raw: Any = None diff --git a/test/models/test_openai_responses_model.py b/test/models/test_openai_responses_model.py index 765a5a3ec2..6913033546 100644 --- a/test/models/test_openai_responses_model.py +++ b/test/models/test_openai_responses_model.py @@ -139,6 +139,37 @@ def test_responses_to_camel_response_with_audio(): assert camel_response.output_messages[0].audio_transcript == "Hello world" +def test_responses_to_camel_response_keeps_logprobs(): + mock_response = MagicMock() + mock_response.id = "resp_124" + mock_response.model = "gpt-4o-mini" + mock_response.created = 1234567891 + + logprob_entry = { + "token": "Hi", + "bytes": [72, 105], + "logprob": -0.1, + "top_logprobs": [], + } + mock_text_chunk = { + "type": "output_text", + "text": "Hi there", + "logprobs": [logprob_entry], + } + + mock_item = MagicMock() + mock_item.content = [mock_text_chunk] + + mock_response.output = [mock_item] + mock_response.output_text = None + mock_response.usage = None + + camel_response = responses_to_camel_response(mock_response) + + assert camel_response.logprobs is not None + assert camel_response.logprobs[0][0]["token"] == "Hi" + + def test_openai_messages_to_camel_with_audio(): from camel.core.messages import openai_messages_to_camel diff --git a/test/responses/test_chat_adapter.py b/test/responses/test_chat_adapter.py index ec2039b1d3..b99543dead 100644 --- a/test/responses/test_chat_adapter.py +++ b/test/responses/test_chat_adapter.py @@ -14,6 +14,12 @@ import json +from openai.types.chat.chat_completion import ChoiceLogprobs +from openai.types.chat.chat_completion_token_logprob import ( + ChatCompletionTokenLogprob, + TopLogprob, +) + from camel.responses.adapters.chat_completions import ( adapt_chat_to_camel_response, ) @@ -96,3 +102,33 @@ def test_adapt_tool_calls_if_present(): ) tc = cmr.tool_call_requests[0] assert tc.id == "call_1" and tc.name == "search" and tc.args == {"q": "x"} + + +def test_adapt_chat_to_camel_response_preserves_logprobs(): + top_lp = TopLogprob.construct(token="Hi", bytes=None, logprob=-0.1) + token_lp = ChatCompletionTokenLogprob.construct( + token="Hi", bytes=None, logprob=-0.1, top_logprobs=[top_lp] + ) + logprobs = ChoiceLogprobs.construct(content=[token_lp]) + + choice = dict( + index=0, + message=ChatCompletionMessage.construct( + role="assistant", content="Hi", tool_calls=None + ), + finish_reason="stop", + logprobs=logprobs, + ) + + cc = ChatCompletion.construct( + id="chatcmpl-test-003", + choices=[choice], + created=1730000002, + model="gpt-4o-mini", + object="chat.completion", + usage=None, + ) + + cmr = adapt_chat_to_camel_response(cc) + assert cmr.logprobs is not None + assert cmr.logprobs[0].content[0].token == "Hi" From 8ea658ff97ef555b6da552e9ce04ff3582975473 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Tue, 9 Dec 2025 09:44:25 +0300 Subject: [PATCH 14/17] fix input_tokens and output_tokens extraction --- camel/responses/adapters/responses_adapter.py | 19 ++++++++++++++++--- test/models/test_openai_responses_model.py | 5 ++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/camel/responses/adapters/responses_adapter.py b/camel/responses/adapters/responses_adapter.py index 5c6694fc37..c023ed105b 100644 --- a/camel/responses/adapters/responses_adapter.py +++ b/camel/responses/adapters/responses_adapter.py @@ -186,10 +186,23 @@ def responses_to_camel_response( usage_raw = None usage_dict = usage_raw or {} + input_tokens = usage_dict.get("input_tokens") + if input_tokens is None: + input_tokens = usage_dict.get("prompt_tokens") + + output_tokens = usage_dict.get("output_tokens") + if output_tokens is None: + output_tokens = usage_dict.get("completion_tokens") + + total_tokens = usage_dict.get("total_tokens") + if total_tokens is None and input_tokens is not None: + if output_tokens is not None: + total_tokens = input_tokens + output_tokens + usage = CamelUsage( - input_tokens=usage_dict.get("prompt_tokens"), - output_tokens=usage_dict.get("completion_tokens"), - total_tokens=usage_dict.get("total_tokens"), + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, raw=usage_raw or None, ) logprobs: Optional[List[Any]] = logprobs_list if logprobs_list else None diff --git a/test/models/test_openai_responses_model.py b/test/models/test_openai_responses_model.py index 6913033546..aa7d66599f 100644 --- a/test/models/test_openai_responses_model.py +++ b/test/models/test_openai_responses_model.py @@ -162,12 +162,15 @@ def test_responses_to_camel_response_keeps_logprobs(): mock_response.output = [mock_item] mock_response.output_text = None - mock_response.usage = None + mock_response.usage = {"input_tokens": 10, "output_tokens": 5} camel_response = responses_to_camel_response(mock_response) assert camel_response.logprobs is not None assert camel_response.logprobs[0][0]["token"] == "Hi" + assert camel_response.usage.input_tokens == 10 + assert camel_response.usage.output_tokens == 5 + assert camel_response.usage.total_tokens == 15 def test_openai_messages_to_camel_with_audio(): From a3ccb856f1d6b6182baa8df22ba8c2cd6dc95400 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Tue, 9 Dec 2025 10:07:25 +0300 Subject: [PATCH 15/17] update message --- camel/core/messages.py | 30 ++++++++--- test/core/test_messages.py | 100 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 8 deletions(-) create mode 100644 test/core/test_messages.py diff --git a/camel/core/messages.py b/camel/core/messages.py index 67bc768c6a..7affd831c9 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -102,12 +102,8 @@ def openai_messages_to_camel( ) elif item_t == "input_image": image_url = item.get("image_url", "") - # input_image in Responses API uses "image_url" field - # which is a string URL - # But wait, the user snippet shows: "image_url": "..." - # Chat API "image_url" is a dict {"url": "...", ...} - # Responses API "input_image" has "image_url" string. - + if isinstance(image_url, dict): + image_url = image_url.get("url", "") payload = {"url": image_url} parts.append( CamelContentPart(type="input_image", payload=payload) @@ -181,6 +177,14 @@ def camel_messages_to_openai( hybrid.append( {"type": "text", "text": part.payload.get("text", "")} ) + elif part.type == "input_text": + # Responses-style input_text -> Chat text for compatibility + hybrid.append( + { + "type": "text", + "text": part.payload.get("text", ""), + } + ) elif part.type == "image_url": url = part.payload.get("url") detail = part.payload.get("detail") or "auto" @@ -190,6 +194,16 @@ def camel_messages_to_openai( "image_url": {"url": url, "detail": detail}, } ) + elif part.type == "input_image": + # Responses-style input_image -> Chat image_url schema + url = part.payload.get("url") or part.payload.get("image_url") + detail = part.payload.get("detail") or "auto" + hybrid.append( + { + "type": "image_url", + "image_url": {"url": url, "detail": detail}, + } + ) elif part.type == "input_audio": payload = part.payload hybrid.append( @@ -296,8 +310,8 @@ def camel_messages_to_responses_request( instructions_parts.append(str(txt)) continue - # Map other roles to a user message for broad compat in Responses - role = "user" + # Map other roles to Responses-supported roles (user/assistant) + role = "assistant" if msg.role == "assistant" else "user" content_frags = [_part_to_responses_fragment(p) for p in msg.content] input_messages.append( { diff --git a/test/core/test_messages.py b/test/core/test_messages.py new file mode 100644 index 0000000000..134da7259b --- /dev/null +++ b/test/core/test_messages.py @@ -0,0 +1,100 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +from typing import cast + +from camel.core.messages import ( + CamelContentPart, + CamelMessage, + OpenAIMessage, + camel_messages_to_openai, +) + + +def test_camel_messages_to_openai_preserves_input_text_and_image() -> None: + msg = CamelMessage( + role="user", + content=[ + CamelContentPart(type="input_text", payload={"text": "hello"}), + CamelContentPart( + type="input_image", + payload={"url": "https://example.com/img.png"}, + ), + ], + ) + + converted = camel_messages_to_openai([msg]) + + assert len(converted) == 1 + content = converted[0]["content"] + assert isinstance(content, list) and len(content) == 2 + + text_part = content[0] + image_part = content[1] + + assert text_part["type"] == "text" + assert text_part["text"] == "hello" + + assert image_part["type"] == "image_url" + assert image_part["image_url"]["url"] == "https://example.com/img.png" + # default detail should be present to mirror Chat schema + assert image_part["image_url"]["detail"] == "auto" + + +def test_openai_messages_to_camel_handles_input_image_dict() -> None: + from camel.core.messages import openai_messages_to_camel + + messages = cast( + list[OpenAIMessage], + [ + { + "role": "user", + "content": [ + { + "type": "input_image", + "image_url": {"url": "https://example.com/x.png"}, + }, + ], + } + ], + ) + + camel_msgs = openai_messages_to_camel(messages) + + assert len(camel_msgs) == 1 + parts = camel_msgs[0].content + assert len(parts) == 1 + assert parts[0].type == "input_image" + assert parts[0].payload["url"] == "https://example.com/x.png" + + +def test_camel_messages_to_responses_request_preserves_assistant_role() -> ( + None +): + from camel.core.messages import camel_messages_to_responses_request + + msg_user = CamelMessage( + role="user", + content=[CamelContentPart(type="input_text", payload={"text": "hi"})], + ) + msg_assistant = CamelMessage( + role="assistant", + content=[ + CamelContentPart(type="input_text", payload={"text": "hello"}) + ], + ) + + body = camel_messages_to_responses_request([msg_user, msg_assistant]) + assert "input" in body + assert body["input"][0]["role"] == "user" + assert body["input"][1]["role"] == "assistant" From fd78ecf4854ba7085b1e18d214cc3438a5660a90 Mon Sep 17 00:00:00 2001 From: Xiaotian Jin Date: Tue, 9 Dec 2025 10:17:21 +0300 Subject: [PATCH 16/17] handle role==tool --- camel/core/messages.py | 38 +++++++++++++++++++ camel/responses/adapters/responses_adapter.py | 12 ++++++ test/core/test_messages.py | 19 ++++++++++ test/models/test_openai_responses_model.py | 25 ++++++++++++ 4 files changed, 94 insertions(+) diff --git a/camel/core/messages.py b/camel/core/messages.py index 7affd831c9..ceac695082 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -43,6 +43,7 @@ class CamelContentPart(BaseModel): "input_image", "input_file", "input_audio", + "function_call_output", ] payload: Dict[str, Any] = Field(default_factory=dict) @@ -249,6 +250,7 @@ def _part_to_responses_fragment(part: CamelContentPart) -> Dict[str, Any]: - input_image -> passthrough as {type: input_image, image_url} - input_file -> {type: input_file, file_id|file_url} - input_audio -> {type: input_audio, input_audio: {data, format}} + - function_call_output -> {type: function_call_output, call_id, output} """ t = part.type p = part.payload or {} @@ -278,6 +280,13 @@ def _part_to_responses_fragment(part: CamelContentPart) -> Dict[str, Any]: } return {"type": "input_audio", "input_audio": audio} + if t == "function_call_output": + return { + "type": "function_call_output", + "call_id": p.get("call_id"), + "output": p.get("output"), + } + # Default safe fallback: treat as text return {"type": "input_text", "text": str(p.get("text", ""))} @@ -310,6 +319,35 @@ def camel_messages_to_responses_request( instructions_parts.append(str(txt)) continue + if msg.role == "tool": + # Convert tool outputs into function_call_output content + call_id = msg.tool_call_id + output_texts: List[str] = [] + for part in msg.content: + if part.type == "function_call_output": + output_val = part.payload.get("output") + if output_val is not None: + output_texts.append(str(output_val)) + elif part.type in {"text", "input_text"}: + txt = part.payload.get("text") + if txt: + output_texts.append(str(txt)) + + content_frags = [ + { + "type": "function_call_output", + "call_id": call_id, + "output": "\n".join(output_texts) if output_texts else "", + } + ] + input_messages.append( + { + "role": "assistant", # function outputs sit in assistant slot + "content": content_frags, + } + ) + continue + # Map other roles to Responses-supported roles (user/assistant) role = "assistant" if msg.role == "assistant" else "user" content_frags = [_part_to_responses_fragment(p) for p in msg.content] diff --git a/camel/responses/adapters/responses_adapter.py b/camel/responses/adapters/responses_adapter.py index c023ed105b..76958ea7b4 100644 --- a/camel/responses/adapters/responses_adapter.py +++ b/camel/responses/adapters/responses_adapter.py @@ -86,6 +86,18 @@ def responses_to_camel_response( audio_transcript = transcript elif chunk_type == "function_call": pass + elif chunk_type == "output_image": + image_url = chunk.get("image_url") or chunk.get("url") + if image_url: + parts.append(f"[image]: {image_url}") + elif chunk_type == "output_file": + file_url = ( + chunk.get("file_url") + or chunk.get("url") + or chunk.get("file_id") + ) + if file_url: + parts.append(f"[file]: {file_url}") # Check for tool calls in the output list directly tool_call_requests: List[CamelToolCall] = [] diff --git a/test/core/test_messages.py b/test/core/test_messages.py index 134da7259b..680bec7cd4 100644 --- a/test/core/test_messages.py +++ b/test/core/test_messages.py @@ -98,3 +98,22 @@ def test_camel_messages_to_responses_request_preserves_assistant_role() -> ( assert "input" in body assert body["input"][0]["role"] == "user" assert body["input"][1]["role"] == "assistant" + + +def test_tool_message_converted_to_function_call_output() -> None: + from camel.core.messages import camel_messages_to_responses_request + + msg_tool = CamelMessage( + role="tool", + tool_call_id="call_123", + content=[ + CamelContentPart(type="text", payload={"text": "result value"}), + ], + ) + + body = camel_messages_to_responses_request([msg_tool]) + assert body["input"][0]["role"] == "assistant" + frag = body["input"][0]["content"][0] + assert frag["type"] == "function_call_output" + assert frag["call_id"] == "call_123" + assert frag["output"] == "result value" diff --git a/test/models/test_openai_responses_model.py b/test/models/test_openai_responses_model.py index aa7d66599f..04301eb11b 100644 --- a/test/models/test_openai_responses_model.py +++ b/test/models/test_openai_responses_model.py @@ -173,6 +173,31 @@ def test_responses_to_camel_response_keeps_logprobs(): assert camel_response.usage.total_tokens == 15 +def test_responses_to_camel_response_keeps_output_image(): + mock_response = MagicMock() + mock_response.id = "resp_125" + mock_response.model = "gpt-4o-mini" + mock_response.created = 1234567892 + + mock_image_chunk = { + "type": "output_image", + "image_url": "https://example.com/img.png", + } + mock_item = MagicMock() + mock_item.content = [mock_image_chunk] + + mock_response.output = [mock_item] + mock_response.output_text = None + mock_response.usage = None + + camel_response = responses_to_camel_response(mock_response) + + assert ( + "[image]: https://example.com/img.png" + in camel_response.output_messages[0].content + ) + + def test_openai_messages_to_camel_with_audio(): from camel.core.messages import openai_messages_to_camel From 2fd72a02a6acfb41e6decd893d1a8d3b0a40904c Mon Sep 17 00:00:00 2001 From: Hesam Sheikh <41022652+hesamsheikh@users.noreply.github.com> Date: Wed, 10 Dec 2025 18:02:59 +0100 Subject: [PATCH 17/17] [fix] Response API Phase 0.1 Minor change (#3560) --- camel/core/messages.py | 2 +- camel/models/__init__.py | 70 +++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/camel/core/messages.py b/camel/core/messages.py index ceac695082..0240e172bc 100644 --- a/camel/core/messages.py +++ b/camel/core/messages.py @@ -342,7 +342,7 @@ def camel_messages_to_responses_request( ] input_messages.append( { - "role": "assistant", # function outputs sit in assistant slot + "role": "assistant", # function outputs go here "content": content_frags, } ) diff --git a/camel/models/__init__.py b/camel/models/__init__.py index 55f728a40d..c6e22a0cb5 100644 --- a/camel/models/__init__.py +++ b/camel/models/__init__.py @@ -44,6 +44,7 @@ from .openai_audio_models import OpenAIAudioModels from .openai_compatible_model import OpenAICompatibleModel from .openai_model import OpenAIModel +from .openai_responses_model import OpenAIResponsesModel from .openrouter_model import OpenRouterModel from .ppio_model import PPIOModel from .qianfan_model import QianfanModel @@ -61,53 +62,54 @@ from .zhipuai_model import ZhipuAIModel __all__ = [ - 'BaseModelBackend', - 'OpenAIModel', - 'OpenRouterModel', - 'AzureOpenAIModel', - 'AnthropicModel', + 'AIMLModel', 'AMDModel', - 'MistralModel', - 'GroqModel', - 'StubModel', - 'ZhipuAIModel', + 'AWSBedrockModel', + 'AihubMixModel', + 'AnthropicModel', + 'AzureOpenAIModel', + 'BaseAudioModel', + 'BaseModelBackend', 'CohereModel', 'CometAPIModel', + 'CrynuxModel', + 'DeepSeekModel', + 'FishAudioModel', + 'GeminiModel', + 'GroqModel', + 'InternLMModel', + 'LMStudioModel', + 'LiteLLMModel', + 'MinimaxModel', + 'MistralModel', 'ModelFactory', 'ModelManager', - 'LiteLLMModel', - 'OpenAIAudioModels', - 'NetmindModel', + 'ModelProcessingError', + 'ModelScopeModel', + 'MoonshotModel', + 'NebiusModel', 'NemotronModel', + 'NetmindModel', 'NovitaModel', 'NvidiaModel', 'OllamaModel', - 'VLLMModel', - 'SGLangModel', - 'GeminiModel', + 'OpenAIAudioModels', 'OpenAICompatibleModel', - 'RekaModel', - 'SambaModel', - 'TogetherAIModel', + 'OpenAIModel', + 'OpenAIResponsesModel', + 'OpenRouterModel', 'PPIOModel', - 'YiModel', + 'QianfanModel', 'QwenModel', - 'AWSBedrockModel', - 'ModelProcessingError', - 'NebiusModel', - 'DeepSeekModel', - 'FishAudioModel', - 'InternLMModel', - 'ModelScopeModel', - 'MoonshotModel', - 'AIMLModel', - 'BaseAudioModel', + 'RekaModel', + 'SGLangModel', + 'SambaModel', 'SiliconFlowModel', + 'StubModel', + 'TogetherAIModel', + 'VLLMModel', 'VolcanoModel', - 'LMStudioModel', - 'MinimaxModel', 'WatsonXModel', - 'QianfanModel', - 'CrynuxModel', - 'AihubMixModel', + 'YiModel', + 'ZhipuAIModel', ]