camel-ai
diff --git a/‎camel/agents/chat_agent.py‎
Lines changed: 112 additions & 14 deletions b/‎camel/agents/chat_agent.py‎
Lines changed: 112 additions & 14 deletions
diff --git a/‎examples/agents/chatagent_stream.py‎
Lines changed: 40 additions & 37 deletions b/‎examples/agents/chatagent_stream.py‎
Lines changed: 40 additions & 37 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
@@ -448,10 +448,12 @@ class ChatAgent(BaseAgent):
         step_timeout (Optional[float], optional): Timeout in seconds for the
             entire step operation. If None, no timeout is applied.
             (default: :obj:`None`)
-        stream_accumulate (bool, optional): When True, partial streaming
-            updates return accumulated content (current behavior). When False,
-            partial updates return only the incremental delta. (default:
-            :obj:`True`)
+        stream_accumulate (Optional[bool], optional): When True, partial
+            streaming updates return accumulated content. When False, partial
+            updates return only the incremental delta (recommended).
+            If None, defaults to False with a deprecation warning for users
+            who previously relied on the old default (True).
+            (default: :obj:`None`, which behaves as :obj:`False`)
         summary_window_ratio (float, optional): Maximum fraction of the total
             context window that can be occupied by summary information. Used
             to limit how much of the model's context is reserved for
@@ -501,7 +503,7 @@ def __init__(
         retry_attempts: int = 3,
         retry_delay: float = 1.0,
         step_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
-        stream_accumulate: bool = True,
+        stream_accumulate: Optional[bool] = None,
         summary_window_ratio: float = 0.6,
     ) -> None:
         if isinstance(model, ModelManager):
@@ -615,7 +617,13 @@ def __init__(
         self.step_timeout = step_timeout
         self._context_utility: Optional[ContextUtility] = None
         self._context_summary_agent: Optional["ChatAgent"] = None
-        self.stream_accumulate = stream_accumulate
+
+        # Store whether user explicitly set stream_accumulate
+        # Warning will be issued only when streaming is actually used
+        self._stream_accumulate_explicit = stream_accumulate is not None
+        self.stream_accumulate = (
+            stream_accumulate if stream_accumulate is not None else False
+        )
         self._last_tool_call_record: Optional[ToolCallingRecord] = None
         self._last_tool_call_signature: Optional[str] = None
         self.summary_window_ratio = summary_window_ratio
@@ -4020,6 +4028,28 @@ def _get_token_count(self, content: str) -> int:
         # Conservative estimate: ~3 chars per token
         return len(content) // 3
 
+    def _warn_stream_accumulate_deprecation(self) -> None:
+        r"""Issue deprecation warning for stream_accumulate default change.
+
+        Only warns once per agent instance, and only if the user didn't
+        explicitly set stream_accumulate.
+        """
+        if not self._stream_accumulate_explicit:
+            import warnings
+
+            warnings.warn(
+                "The default value of 'stream_accumulate' has changed from "
+                "True to False. In streaming mode, each chunk now returns "
+                "only the incremental delta instead of accumulated content. "
+                "To suppress this warning, explicitly set "
+                "stream_accumulate=False (recommended) or stream_accumulate="
+                "True if you need the old behavior.",
+                DeprecationWarning,
+                stacklevel=5,
+            )
+            # Only warn once per agent instance
+            self._stream_accumulate_explicit = True
+
     def _stream_response(
         self,
         openai_messages: List[OpenAIMessage],
@@ -4028,6 +4058,8 @@ def _stream_response(
     ) -> Generator[ChatAgentResponse, None, None]:
         r"""Internal method to handle streaming responses with tool calls."""
 
+        self._warn_stream_accumulate_deprecation()
+
         tool_call_records: List[ToolCallingRecord] = []
         accumulated_tool_calls: Dict[str, Any] = {}
         step_token_usage = self._create_token_usage_tracker()
@@ -4346,12 +4378,20 @@ def _process_stream_chunks_with_accumulator(
                             content_accumulator.get_full_reasoning_content()
                             or None
                         )
+                        # In delta mode, final response content should be empty
+                        # since all content was already yielded incrementally
+                        display_content = (
+                            final_content if self.stream_accumulate else ""
+                        )
+                        display_reasoning = (
+                            final_reasoning if self.stream_accumulate else None
+                        )
                         final_message = BaseMessage(
                             role_name=self.role_name,
                             role_type=self.role_type,
                             meta_dict={},
-                            content=final_content,
-                            reasoning_content=final_reasoning,
+                            content=display_content,
+                            reasoning_content=display_reasoning,
                         )
 
                         if response_format:
@@ -4402,21 +4442,60 @@ def _accumulate_tool_calls(
             bool: True if any tool call is complete, False otherwise.
         """
 
+        index_map_key = '_index_to_key_map'
+        if index_map_key not in accumulated_tool_calls:
+            accumulated_tool_calls[index_map_key] = {}
+        index_map = accumulated_tool_calls[index_map_key]
+
         for delta_tool_call in tool_call_deltas:
-            index = delta_tool_call.index
+            index = getattr(delta_tool_call, 'index', None)
             tool_call_id = getattr(delta_tool_call, 'id', None)
 
+            # Determine entry key
+            if index is not None:
+                index_str = str(index)
+                if tool_call_id:
+                    # New ID provided: check if it differs from current mapping
+                    current_key = index_map.get(index_str)
+                    if current_key is None:
+                        # First time seeing this index, use tool_call_id as key
+                        entry_key = tool_call_id
+                    elif current_key in accumulated_tool_calls:
+                        existing_id = accumulated_tool_calls[current_key].get(
+                            'id'
+                        )
+                        if existing_id and existing_id != tool_call_id:
+                            # ID changed: use new ID as key
+                            entry_key = tool_call_id
+                        else:
+                            # No existing ID or same ID: keep current key
+                            entry_key = current_key
+                    else:
+                        entry_key = current_key
+                    # Update mapping
+                    index_map[index_str] = entry_key
+                else:
+                    # No ID in this chunk: use existing mapping or index as
+                    # string
+                    entry_key = index_map.get(index_str, index_str)
+                    if index_str not in index_map:
+                        index_map[index_str] = entry_key
+            elif tool_call_id is not None:
+                entry_key = tool_call_id
+            else:
+                entry_key = '0'  # Default fallback as string
+
             # Initialize tool call entry if not exists
-            if index not in accumulated_tool_calls:
-                accumulated_tool_calls[index] = {
+            if entry_key not in accumulated_tool_calls:
+                accumulated_tool_calls[entry_key] = {
                     'id': '',
                     'type': 'function',
                     'function': {'name': '', 'arguments': ''},
                     'extra_content': None,
                     'complete': False,
                 }
 
-            tool_call_entry = accumulated_tool_calls[index]
+            tool_call_entry = accumulated_tool_calls[entry_key]
 
             # Accumulate tool call data
             if tool_call_id:
@@ -4448,6 +4527,9 @@ def _accumulate_tool_calls(
         # Check if any tool calls are complete
         any_complete = False
         for _index, tool_call_entry in accumulated_tool_calls.items():
+            # Skip internal mapping key
+            if _index == '_index_to_key_map':
+                continue
             if (
                 tool_call_entry['id']
                 and tool_call_entry['function']['name']
@@ -4475,6 +4557,9 @@ def _execute_tools_sync_with_status_accumulator(
 
         tool_calls_to_execute = []
         for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
+            # Skip internal mapping key
+            if _tool_call_index == '_index_to_key_map':
+                continue
             if tool_call_data.get('complete', False):
                 tool_calls_to_execute.append(tool_call_data)
 
@@ -4936,6 +5021,8 @@ async def _astream_response(
     ) -> AsyncGenerator[ChatAgentResponse, None]:
         r"""Async method to handle streaming responses with tool calls."""
 
+        self._warn_stream_accumulate_deprecation()
+
         tool_call_records: List[ToolCallingRecord] = []
         accumulated_tool_calls: Dict[str, Any] = {}
         step_token_usage = self._create_token_usage_tracker()
@@ -5310,12 +5397,20 @@ async def _aprocess_stream_chunks_with_accumulator(
                             content_accumulator.get_full_reasoning_content()
                             or None
                         )
+                        # In delta mode, final response content should be empty
+                        # since all content was already yielded incrementally
+                        display_content = (
+                            final_content if self.stream_accumulate else ""
+                        )
+                        display_reasoning = (
+                            final_reasoning if self.stream_accumulate else None
+                        )
                         final_message = BaseMessage(
                             role_name=self.role_name,
                             role_type=self.role_type,
                             meta_dict={},
-                            content=final_content,
-                            reasoning_content=final_reasoning,
+                            content=display_content,
+                            reasoning_content=display_reasoning,
                         )
 
                         if response_format:
@@ -5363,6 +5458,9 @@ async def _execute_tools_async_with_status_accumulator(
         # statuses immediately
         tool_tasks = []
         for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
+            # Skip internal mapping key
+            if _tool_call_index == '_index_to_key_map':
+                continue
             if tool_call_data.get('complete', False):
                 function_name = tool_call_data['function']['name']
                 try:
 
@@ -11,70 +11,73 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
+
 from camel.agents import ChatAgent
 from camel.models import ModelFactory
+from camel.toolkits import MathToolkit
 from camel.types import ModelPlatformType, ModelType
 
 # Create a streaming-capable model backend
 streaming_model = ModelFactory.create(
     model_platform=ModelPlatformType.DEFAULT,
-    model_type=ModelType.GPT_4O_MINI,
+    model_type=ModelType.DEFAULT,
     model_config_dict={
         "stream": True,
         "stream_options": {"include_usage": True},
     },
 )
 
-agent_accumulated = ChatAgent(
-    system_message="You are a helpful assistant that provides detailed "
-    "and informative responses.",
+# Initialize MathToolkit for parallel calculation demo
+math_toolkit = MathToolkit()
+
+# Create agent with math tools for parallel tool call demonstration
+# stream_accumulate=False means each chunk returns only delta (incremental)
+# content
+agent_with_tools = ChatAgent(
+    system_message="You are a helpful math assistant. When asked to perform "
+    "multiple calculations, use the math tools to compute each one. "
+    "Always use the tools for calculations.",
     model=streaming_model,
+    tools=math_toolkit.get_tools(),
+    stream_accumulate=False,  # Recommended: get delta content per chunk
 )
 
-# Example user message
-user_message = "How many Rs are there in the word 'strawberry'?"
+# User message that triggers parallel tool calls
+user_message = (
+    "Please calculate the following three operations simultaneously:\n"
+    "1. 123.45 + 678.90\n"
+    "2. 100 * 3.14159\n"
+    "3. 1000 / 7"
+)
 
-# Accumulated streaming mode (default)
-streaming_response = agent_accumulated.step(user_message)
+# Stream the response with tool calls
+streaming_response = agent_with_tools.step(user_message)
 
 for chunk_response in streaming_response:
     message = chunk_response.msgs[0]
-    reasoning_text = message.reasoning_content
-    if reasoning_text:
-        print(reasoning_text, end="", flush=True)
 
-    content_text = message.content
-    if content_text:
-        print(content_text, end="", flush=True)
-usage = streaming_response.info.get("usage", {})
-print(
-    f"\n\nUsage: prompt={usage.get('prompt_tokens')}, "
-    f"completion={usage.get('completion_tokens')}, "
-    f"total={usage.get('total_tokens')}"
-)
-print("\n\n---\nDelta streaming mode (stream_accumulate=False):\n")
-
-# Delta streaming mode (only new content per chunk)
-agent_delta = ChatAgent(
-    system_message="You are a helpful assistant that provides concise "
-    "and informative responses.",
-    model=streaming_model,
-    stream_accumulate=False,
-)
-
-streaming_response_delta = agent_delta.step(user_message)
-
-for chunk_response in streaming_response_delta:
-    message = chunk_response.msgs[0]
-    reasoning_delta = message.reasoning_content or ""
-    if reasoning_delta:
-        print(reasoning_delta, end="", flush=True)
+    # Print reasoning content if available (for models that support it)
+    if message.reasoning_content:
+        print(message.reasoning_content, end="", flush=True)
 
+    # Print main content (delta mode - each chunk contains only new content)
     if message.content:
         print(message.content, end="", flush=True)
+
+# Print usage statistics
 usage = streaming_response.info.get("usage", {})
 print(
     f"\n\nUsage: prompt={usage.get('prompt_tokens')}, "
     f"completion={usage.get('completion_tokens')}, "
     f"total={usage.get('total_tokens')}"
 )
+
+# Print tool call records if any
+tool_calls = streaming_response.info.get("tool_calls", [])
+if tool_calls:
+    print(f"\nTool calls made: {len(tool_calls)}")
+    for i, tool_call in enumerate(tool_calls, 1):
+        print(
+            f"  {i}. {tool_call.tool_name}({tool_call.args}) = "
+            f"{tool_call.result}"
+        )
@@ -188,7 +188,7 @@ dev_tools = [
     "langfuse>=2.60.5",
 ]
 model_platforms = [
-    "litellm>=1.38.1,<2",
+    "litellm>=1.38.1,<1.80.12",
     "mistralai>=1.1.0,<2",
     "reka-api>=3.0.8,<4",
     "anthropic>=0.47.0,<0.50.0",
@@ -383,7 +383,7 @@ all = [
     "markitdown>=0.1.1; python_version >= '3.13'",
     "nebula3-python==3.8.2",
     "rank-bm25>=0.2.2,<0.3",
-    "litellm>=1.38.1,<2",
+    "litellm>=1.38.1,<1.80.12",
     "mistralai>=1.1.0,<2",
     "fish-audio-sdk>=1.0.0",
     "anthropic>=0.47.0,<0.50.0",