Skip to content

Commit 17d863c

Browse files
authored
enhance: truncate long toolkit responses (#3604)
1 parent d449281 commit 17d863c

File tree

2 files changed

+117
-29
lines changed

2 files changed

+117
-29
lines changed

camel/agents/chat_agent.py

Lines changed: 116 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@ class ChatAgent(BaseAgent):
398398
window that triggers summarization. If `None`, will trigger
399399
summarization when the context window is full.
400400
(default: :obj:`None`)
401+
token_limit (int, optional): The maximum number of tokens allowed for
402+
the context window. If `None`, uses the model's default token
403+
limit. This can be used to restrict the context size below the
404+
model's maximum capacity. (default: :obj:`None`)
401405
output_language (str, optional): The language to be output by the
402406
agent. (default: :obj:`None`)
403407
tools (Optional[List[Union[FunctionTool, Callable]]], optional): List
@@ -529,10 +533,24 @@ def __init__(
529533
self._tool_output_history: List[_ToolOutputHistoryEntry] = []
530534

531535
# Set up memory
536+
# Use user-provided token_limit if valid, otherwise use model's default
537+
model_token_limit = self.model_backend.token_limit
538+
if token_limit is not None and token_limit > model_token_limit:
539+
logger.warning(
540+
f"Provided token_limit ({token_limit}) exceeds model's "
541+
f"maximum capacity ({model_token_limit}). "
542+
f"Using model's token_limit instead."
543+
)
544+
effective_token_limit = (
545+
min(token_limit, model_token_limit)
546+
if token_limit is not None
547+
else model_token_limit
548+
)
532549
context_creator = ScoreBasedContextCreator(
533550
self.model_backend.token_counter,
534-
self.model_backend.token_limit,
551+
effective_token_limit,
535552
)
553+
self._token_limit = effective_token_limit
536554

537555
self._memory: AgentMemory = memory or ChatHistoryMemory(
538556
context_creator,
@@ -775,6 +793,11 @@ def tool_dict(self) -> Dict[str, FunctionTool]:
775793
r"""Returns a dictionary of internal tools."""
776794
return self._internal_tools
777795

796+
@property
797+
def token_limit(self) -> int:
798+
r"""Returns the token limit for the agent's context window."""
799+
return self._token_limit
800+
778801
@property
779802
def output_language(self) -> Optional[str]:
780803
r"""Returns the output language for the agent."""
@@ -1095,14 +1118,19 @@ def _update_memory_with_summary(
10951118
summary_content: str = summary
10961119

10971120
existing_summaries = []
1098-
if not include_summaries:
1099-
messages, _ = self.memory.get_context()
1100-
for msg in messages:
1101-
content = msg.get('content', '')
1102-
if isinstance(content, str) and content.startswith(
1103-
'[CONTEXT_SUMMARY]'
1104-
):
1105-
existing_summaries.append(msg)
1121+
last_user_message: Optional[str] = None
1122+
messages, _ = self.memory.get_context()
1123+
for msg in messages:
1124+
content = msg.get('content', '')
1125+
role = msg.get('role', '')
1126+
if role == 'user' and isinstance(content, str) and content:
1127+
last_user_message = content
1128+
if (
1129+
not include_summaries
1130+
and isinstance(content, str)
1131+
and content.startswith('[CONTEXT_SUMMARY]')
1132+
):
1133+
existing_summaries.append(msg)
11061134

11071135
# Clear memory
11081136
self.clear_memory()
@@ -1122,16 +1150,27 @@ def _update_memory_with_summary(
11221150
role_name="assistant", content=summary_content
11231151
)
11241152
self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
1125-
input_message = BaseMessage.make_assistant_message(
1126-
role_name="assistant",
1127-
content=(
1128-
"Please continue the conversation from "
1129-
"where we left it off without asking the user any further "
1130-
"questions. Continue with the last task that you were "
1131-
"asked to work on."
1132-
),
1133-
)
1134-
self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
1153+
1154+
# Restore last user message to maintain conversation structure
1155+
if last_user_message:
1156+
if not last_user_message.startswith("[Previous Request]"):
1157+
last_user_message = f"[Previous Request] {last_user_message}"
1158+
user_msg = BaseMessage.make_user_message(
1159+
role_name="user",
1160+
content=last_user_message,
1161+
)
1162+
self.update_memory(user_msg, OpenAIBackendRole.USER)
1163+
1164+
# Add continuation prompt to guide the model
1165+
continue_msg = BaseMessage.make_assistant_message(
1166+
role_name="assistant",
1167+
content=(
1168+
"I'll continue working on this request based on the "
1169+
"context summary above."
1170+
),
1171+
)
1172+
self.update_memory(continue_msg, OpenAIBackendRole.ASSISTANT)
1173+
11351174
# Update token count
11361175
try:
11371176
summary_tokens = (
@@ -1179,6 +1218,45 @@ def _serialize_tool_result(self, result: Any) -> str:
11791218
except (TypeError, ValueError):
11801219
return str(result)
11811220

1221+
def _truncate_tool_result(
1222+
self, func_name: str, result: Any
1223+
) -> Tuple[Any, bool]:
1224+
r"""Truncate tool result if it exceeds the maximum token limit.
1225+
1226+
Args:
1227+
func_name (str): The name of the tool function called.
1228+
result (Any): The result returned by the tool execution.
1229+
1230+
Returns:
1231+
Tuple[Any, bool]: A tuple containing:
1232+
- The (possibly truncated) result
1233+
- A boolean indicating whether truncation occurred
1234+
"""
1235+
serialized = self._serialize_tool_result(result)
1236+
# Leave 10% room for system message, user input, and model response
1237+
max_tokens = int(self.model_backend.token_limit * 0.9)
1238+
result_tokens = self._get_token_count(serialized)
1239+
1240+
if result_tokens <= max_tokens:
1241+
return result, False
1242+
1243+
# Reserve ~100 tokens for notice, use char-based truncation directly
1244+
target_tokens = max(max_tokens - 100, 100)
1245+
truncated = serialized[: target_tokens * 3]
1246+
1247+
notice = (
1248+
f"\n\n[TRUNCATED] Tool '{func_name}' output truncated "
1249+
f"({result_tokens} > {max_tokens} tokens). "
1250+
f"Tool executed successfully."
1251+
)
1252+
1253+
logger.warning(
1254+
f"Tool '{func_name}' result truncated: "
1255+
f"{result_tokens} -> ~{target_tokens} tokens"
1256+
)
1257+
1258+
return notice + truncated, True
1259+
11821260
def _clean_snapshot_line(self, line: str) -> str:
11831261
r"""Clean a single snapshot line by removing prefixes and references.
11841262
@@ -3985,6 +4063,13 @@ def _record_tool_calling(
39854063
ToolCallingRecord: A struct containing information about
39864064
this tool call.
39874065
"""
4066+
# Truncate tool result if it exceeds the maximum token limit
4067+
# This prevents single tool calls from exceeding context window
4068+
truncated_result, was_truncated = self._truncate_tool_result(
4069+
func_name, result
4070+
)
4071+
result_for_memory = truncated_result if was_truncated else result
4072+
39884073
assist_msg = FunctionCallingMessage(
39894074
role_name=self.role_name,
39904075
role_type=self.role_type,
@@ -4001,7 +4086,7 @@ def _record_tool_calling(
40014086
meta_dict=None,
40024087
content="",
40034088
func_name=func_name,
4004-
result=result,
4089+
result=result_for_memory,
40054090
tool_call_id=tool_call_id,
40064091
mask_output=mask_output,
40074092
extra_content=extra_content,
@@ -4031,7 +4116,7 @@ def _record_tool_calling(
40314116

40324117
# Register tool output for snapshot cleaning if enabled
40334118
if self._enable_snapshot_clean and not mask_output and func_records:
4034-
serialized_result = self._serialize_tool_result(result)
4119+
serialized_result = self._serialize_tool_result(result_for_memory)
40354120
self._register_tool_output_for_cache(
40364121
func_name,
40374122
tool_call_id,
@@ -4099,13 +4184,14 @@ def _record_tool_calling(
40994184
)
41004185

41014186
# Record information about this tool call
4187+
# Note: tool_record contains the original result for the caller,
4188+
# while result_for_memory (possibly truncated) is stored in memory
41024189
tool_record = ToolCallingRecord(
41034190
tool_name=func_name,
41044191
args=args,
41054192
result=result,
41064193
tool_call_id=tool_call_id,
41074194
)
4108-
41094195
self._update_last_tool_call_state(tool_record)
41104196
return tool_record
41114197

@@ -4152,9 +4238,14 @@ def _stream(
41524238
def _get_token_count(self, content: str) -> int:
41534239
r"""Get token count for content with fallback."""
41544240
if hasattr(self.model_backend, 'token_counter'):
4155-
return len(self.model_backend.token_counter.encode(content))
4156-
else:
4157-
return len(content.split())
4241+
try:
4242+
return len(self.model_backend.token_counter.encode(content))
4243+
except BaseException as e:
4244+
logger.debug(
4245+
f"Token counting failed, using char fallback: {e}"
4246+
)
4247+
# Conservative estimate: ~3 chars per token
4248+
return len(content) // 3
41584249

41594250
def _stream_response(
41604251
self,

camel/models/base_model.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -536,10 +536,7 @@ def token_limit(self) -> int:
536536
Returns:
537537
int: The maximum token limit for the given model.
538538
"""
539-
return (
540-
self.model_config_dict.get("max_tokens")
541-
or self.model_type.token_limit
542-
)
539+
return self.model_type.token_limit
543540

544541
@property
545542
def stream(self) -> bool:

0 commit comments

Comments
 (0)