enhance: truncate long toolkit responses (#3604)

Wendong-Fan · web-flow · commit 17d863cbe67e · 2025-12-19T02:58:36.000+08:00
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
@@ -398,6 +398,10 @@ class ChatAgent(BaseAgent):
             window that triggers summarization. If `None`, will trigger
             summarization when the context window is full.
             (default: :obj:`None`)
+        token_limit (int, optional): The maximum number of tokens allowed for
+            the context window. If `None`, uses the model's default token
+            limit. This can be used to restrict the context size below the
+            model's maximum capacity. (default: :obj:`None`)
         output_language (str, optional): The language to be output by the
             agent. (default: :obj:`None`)
         tools (Optional[List[Union[FunctionTool, Callable]]], optional): List
@@ -529,10 +533,24 @@ def __init__(
         self._tool_output_history: List[_ToolOutputHistoryEntry] = []
 
         # Set up memory
+        # Use user-provided token_limit if valid, otherwise use model's default
+        model_token_limit = self.model_backend.token_limit
+        if token_limit is not None and token_limit > model_token_limit:
+            logger.warning(
+                f"Provided token_limit ({token_limit}) exceeds model's "
+                f"maximum capacity ({model_token_limit}). "
+                f"Using model's token_limit instead."
+            )
+        effective_token_limit = (
+            min(token_limit, model_token_limit)
+            if token_limit is not None
+            else model_token_limit
+        )
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
-            self.model_backend.token_limit,
+            effective_token_limit,
         )
+        self._token_limit = effective_token_limit
 
         self._memory: AgentMemory = memory or ChatHistoryMemory(
             context_creator,
@@ -775,6 +793,11 @@ def tool_dict(self) -> Dict[str, FunctionTool]:
         r"""Returns a dictionary of internal tools."""
         return self._internal_tools
 
+    @property
+    def token_limit(self) -> int:
+        r"""Returns the token limit for the agent's context window."""
+        return self._token_limit
+
     @property
     def output_language(self) -> Optional[str]:
         r"""Returns the output language for the agent."""
@@ -1095,14 +1118,19 @@ def _update_memory_with_summary(
         summary_content: str = summary
 
         existing_summaries = []
-        if not include_summaries:
-            messages, _ = self.memory.get_context()
-            for msg in messages:
-                content = msg.get('content', '')
-                if isinstance(content, str) and content.startswith(
-                    '[CONTEXT_SUMMARY]'
-                ):
-                    existing_summaries.append(msg)
+        last_user_message: Optional[str] = None
+        messages, _ = self.memory.get_context()
+        for msg in messages:
+            content = msg.get('content', '')
+            role = msg.get('role', '')
+            if role == 'user' and isinstance(content, str) and content:
+                last_user_message = content
+            if (
+                not include_summaries
+                and isinstance(content, str)
+                and content.startswith('[CONTEXT_SUMMARY]')
+            ):
+                existing_summaries.append(msg)
 
         # Clear memory
         self.clear_memory()
@@ -1122,16 +1150,27 @@ def _update_memory_with_summary(
             role_name="assistant", content=summary_content
         )
         self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
-        input_message = BaseMessage.make_assistant_message(
-            role_name="assistant",
-            content=(
-                "Please continue the conversation from "
-                "where we left it off without asking the user any further "
-                "questions. Continue with the last task that you were "
-                "asked to work on."
-            ),
-        )
-        self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
+
+        # Restore last user message to maintain conversation structure
+        if last_user_message:
+            if not last_user_message.startswith("[Previous Request]"):
+                last_user_message = f"[Previous Request] {last_user_message}"
+            user_msg = BaseMessage.make_user_message(
+                role_name="user",
+                content=last_user_message,
+            )
+            self.update_memory(user_msg, OpenAIBackendRole.USER)
+
+            # Add continuation prompt to guide the model
+            continue_msg = BaseMessage.make_assistant_message(
+                role_name="assistant",
+                content=(
+                    "I'll continue working on this request based on the "
+                    "context summary above."
+                ),
+            )
+            self.update_memory(continue_msg, OpenAIBackendRole.ASSISTANT)
+
         # Update token count
         try:
             summary_tokens = (
@@ -1179,6 +1218,45 @@ def _serialize_tool_result(self, result: Any) -> str:
         except (TypeError, ValueError):
             return str(result)
 
+    def _truncate_tool_result(
+        self, func_name: str, result: Any
+    ) -> Tuple[Any, bool]:
+        r"""Truncate tool result if it exceeds the maximum token limit.
+
+        Args:
+            func_name (str): The name of the tool function called.
+            result (Any): The result returned by the tool execution.
+
+        Returns:
+            Tuple[Any, bool]: A tuple containing:
+                - The (possibly truncated) result
+                - A boolean indicating whether truncation occurred
+        """
+        serialized = self._serialize_tool_result(result)
+        # Leave 10% room for system message, user input, and model response
+        max_tokens = int(self.model_backend.token_limit * 0.9)
+        result_tokens = self._get_token_count(serialized)
+
+        if result_tokens <= max_tokens:
+            return result, False
+
+        # Reserve ~100 tokens for notice, use char-based truncation directly
+        target_tokens = max(max_tokens - 100, 100)
+        truncated = serialized[: target_tokens * 3]
+
+        notice = (
+            f"\n\n[TRUNCATED] Tool '{func_name}' output truncated "
+            f"({result_tokens} > {max_tokens} tokens). "
+            f"Tool executed successfully."
+        )
+
+        logger.warning(
+            f"Tool '{func_name}' result truncated: "
+            f"{result_tokens} -> ~{target_tokens} tokens"
+        )
+
+        return notice + truncated, True
+
     def _clean_snapshot_line(self, line: str) -> str:
         r"""Clean a single snapshot line by removing prefixes and references.
 
@@ -3985,6 +4063,13 @@ def _record_tool_calling(
             ToolCallingRecord: A struct containing information about
             this tool call.
         """
+        # Truncate tool result if it exceeds the maximum token limit
+        # This prevents single tool calls from exceeding context window
+        truncated_result, was_truncated = self._truncate_tool_result(
+            func_name, result
+        )
+        result_for_memory = truncated_result if was_truncated else result
+
         assist_msg = FunctionCallingMessage(
             role_name=self.role_name,
             role_type=self.role_type,
@@ -4001,7 +4086,7 @@ def _record_tool_calling(
             meta_dict=None,
             content="",
             func_name=func_name,
-            result=result,
+            result=result_for_memory,
             tool_call_id=tool_call_id,
             mask_output=mask_output,
             extra_content=extra_content,
@@ -4031,7 +4116,7 @@ def _record_tool_calling(
 
         # Register tool output for snapshot cleaning if enabled
         if self._enable_snapshot_clean and not mask_output and func_records:
-            serialized_result = self._serialize_tool_result(result)
+            serialized_result = self._serialize_tool_result(result_for_memory)
             self._register_tool_output_for_cache(
                 func_name,
                 tool_call_id,
@@ -4099,13 +4184,14 @@ def _record_tool_calling(
                 )
 
         # Record information about this tool call
+        # Note: tool_record contains the original result for the caller,
+        # while result_for_memory (possibly truncated) is stored in memory
         tool_record = ToolCallingRecord(
             tool_name=func_name,
             args=args,
             result=result,
             tool_call_id=tool_call_id,
         )
-
         self._update_last_tool_call_state(tool_record)
         return tool_record
 
@@ -4152,9 +4238,14 @@ def _stream(
     def _get_token_count(self, content: str) -> int:
         r"""Get token count for content with fallback."""
         if hasattr(self.model_backend, 'token_counter'):
-            return len(self.model_backend.token_counter.encode(content))
-        else:
-            return len(content.split())
+            try:
+                return len(self.model_backend.token_counter.encode(content))
+            except BaseException as e:
+                logger.debug(
+                    f"Token counting failed, using char fallback: {e}"
+                )
+        # Conservative estimate: ~3 chars per token
+        return len(content) // 3
 
     def _stream_response(
         self,
diff --git a/camel/models/base_model.py b/camel/models/base_model.py
@@ -536,10 +536,7 @@ def token_limit(self) -> int:
         Returns:
             int: The maximum token limit for the given model.
         """
-        return (
-            self.model_config_dict.get("max_tokens")
-            or self.model_type.token_limit
-        )
+        return self.model_type.token_limit
 
     @property
     def stream(self) -> bool: