@@ -398,6 +398,10 @@ class ChatAgent(BaseAgent):
398398 window that triggers summarization. If `None`, will trigger
399399 summarization when the context window is full.
400400 (default: :obj:`None`)
401+ token_limit (int, optional): The maximum number of tokens allowed for
402+ the context window. If `None`, uses the model's default token
403+ limit. This can be used to restrict the context size below the
404+ model's maximum capacity. (default: :obj:`None`)
401405 output_language (str, optional): The language to be output by the
402406 agent. (default: :obj:`None`)
403407 tools (Optional[List[Union[FunctionTool, Callable]]], optional): List
@@ -529,10 +533,24 @@ def __init__(
529533 self ._tool_output_history : List [_ToolOutputHistoryEntry ] = []
530534
531535 # Set up memory
536+ # Use user-provided token_limit if valid, otherwise use model's default
537+ model_token_limit = self .model_backend .token_limit
538+ if token_limit is not None and token_limit > model_token_limit :
539+ logger .warning (
540+ f"Provided token_limit ({ token_limit } ) exceeds model's "
541+ f"maximum capacity ({ model_token_limit } ). "
542+ f"Using model's token_limit instead."
543+ )
544+ effective_token_limit = (
545+ min (token_limit , model_token_limit )
546+ if token_limit is not None
547+ else model_token_limit
548+ )
532549 context_creator = ScoreBasedContextCreator (
533550 self .model_backend .token_counter ,
534- self . model_backend . token_limit ,
551+ effective_token_limit ,
535552 )
553+ self ._token_limit = effective_token_limit
536554
537555 self ._memory : AgentMemory = memory or ChatHistoryMemory (
538556 context_creator ,
@@ -775,6 +793,11 @@ def tool_dict(self) -> Dict[str, FunctionTool]:
775793 r"""Returns a dictionary of internal tools."""
776794 return self ._internal_tools
777795
796+ @property
797+ def token_limit (self ) -> int :
798+ r"""Returns the token limit for the agent's context window."""
799+ return self ._token_limit
800+
778801 @property
779802 def output_language (self ) -> Optional [str ]:
780803 r"""Returns the output language for the agent."""
@@ -1095,14 +1118,19 @@ def _update_memory_with_summary(
10951118 summary_content : str = summary
10961119
10971120 existing_summaries = []
1098- if not include_summaries :
1099- messages , _ = self .memory .get_context ()
1100- for msg in messages :
1101- content = msg .get ('content' , '' )
1102- if isinstance (content , str ) and content .startswith (
1103- '[CONTEXT_SUMMARY]'
1104- ):
1105- existing_summaries .append (msg )
1121+ last_user_message : Optional [str ] = None
1122+ messages , _ = self .memory .get_context ()
1123+ for msg in messages :
1124+ content = msg .get ('content' , '' )
1125+ role = msg .get ('role' , '' )
1126+ if role == 'user' and isinstance (content , str ) and content :
1127+ last_user_message = content
1128+ if (
1129+ not include_summaries
1130+ and isinstance (content , str )
1131+ and content .startswith ('[CONTEXT_SUMMARY]' )
1132+ ):
1133+ existing_summaries .append (msg )
11061134
11071135 # Clear memory
11081136 self .clear_memory ()
@@ -1122,16 +1150,27 @@ def _update_memory_with_summary(
11221150 role_name = "assistant" , content = summary_content
11231151 )
11241152 self .update_memory (new_summary_msg , OpenAIBackendRole .ASSISTANT )
1125- input_message = BaseMessage .make_assistant_message (
1126- role_name = "assistant" ,
1127- content = (
1128- "Please continue the conversation from "
1129- "where we left it off without asking the user any further "
1130- "questions. Continue with the last task that you were "
1131- "asked to work on."
1132- ),
1133- )
1134- self .update_memory (input_message , OpenAIBackendRole .ASSISTANT )
1153+
1154+ # Restore last user message to maintain conversation structure
1155+ if last_user_message :
1156+ if not last_user_message .startswith ("[Previous Request]" ):
1157+ last_user_message = f"[Previous Request] { last_user_message } "
1158+ user_msg = BaseMessage .make_user_message (
1159+ role_name = "user" ,
1160+ content = last_user_message ,
1161+ )
1162+ self .update_memory (user_msg , OpenAIBackendRole .USER )
1163+
1164+ # Add continuation prompt to guide the model
1165+ continue_msg = BaseMessage .make_assistant_message (
1166+ role_name = "assistant" ,
1167+ content = (
1168+ "I'll continue working on this request based on the "
1169+ "context summary above."
1170+ ),
1171+ )
1172+ self .update_memory (continue_msg , OpenAIBackendRole .ASSISTANT )
1173+
11351174 # Update token count
11361175 try :
11371176 summary_tokens = (
@@ -1179,6 +1218,45 @@ def _serialize_tool_result(self, result: Any) -> str:
11791218 except (TypeError , ValueError ):
11801219 return str (result )
11811220
1221+ def _truncate_tool_result (
1222+ self , func_name : str , result : Any
1223+ ) -> Tuple [Any , bool ]:
1224+ r"""Truncate tool result if it exceeds the maximum token limit.
1225+
1226+ Args:
1227+ func_name (str): The name of the tool function called.
1228+ result (Any): The result returned by the tool execution.
1229+
1230+ Returns:
1231+ Tuple[Any, bool]: A tuple containing:
1232+ - The (possibly truncated) result
1233+ - A boolean indicating whether truncation occurred
1234+ """
1235+ serialized = self ._serialize_tool_result (result )
1236+ # Leave 10% room for system message, user input, and model response
1237+ max_tokens = int (self .model_backend .token_limit * 0.9 )
1238+ result_tokens = self ._get_token_count (serialized )
1239+
1240+ if result_tokens <= max_tokens :
1241+ return result , False
1242+
1243+ # Reserve ~100 tokens for notice, use char-based truncation directly
1244+ target_tokens = max (max_tokens - 100 , 100 )
1245+ truncated = serialized [: target_tokens * 3 ]
1246+
1247+ notice = (
1248+ f"\n \n [TRUNCATED] Tool '{ func_name } ' output truncated "
1249+ f"({ result_tokens } > { max_tokens } tokens). "
1250+ f"Tool executed successfully."
1251+ )
1252+
1253+ logger .warning (
1254+ f"Tool '{ func_name } ' result truncated: "
1255+ f"{ result_tokens } -> ~{ target_tokens } tokens"
1256+ )
1257+
1258+ return notice + truncated , True
1259+
11821260 def _clean_snapshot_line (self , line : str ) -> str :
11831261 r"""Clean a single snapshot line by removing prefixes and references.
11841262
@@ -3985,6 +4063,13 @@ def _record_tool_calling(
39854063 ToolCallingRecord: A struct containing information about
39864064 this tool call.
39874065 """
4066+ # Truncate tool result if it exceeds the maximum token limit
4067+ # This prevents single tool calls from exceeding context window
4068+ truncated_result , was_truncated = self ._truncate_tool_result (
4069+ func_name , result
4070+ )
4071+ result_for_memory = truncated_result if was_truncated else result
4072+
39884073 assist_msg = FunctionCallingMessage (
39894074 role_name = self .role_name ,
39904075 role_type = self .role_type ,
@@ -4001,7 +4086,7 @@ def _record_tool_calling(
40014086 meta_dict = None ,
40024087 content = "" ,
40034088 func_name = func_name ,
4004- result = result ,
4089+ result = result_for_memory ,
40054090 tool_call_id = tool_call_id ,
40064091 mask_output = mask_output ,
40074092 extra_content = extra_content ,
@@ -4031,7 +4116,7 @@ def _record_tool_calling(
40314116
40324117 # Register tool output for snapshot cleaning if enabled
40334118 if self ._enable_snapshot_clean and not mask_output and func_records :
4034- serialized_result = self ._serialize_tool_result (result )
4119+ serialized_result = self ._serialize_tool_result (result_for_memory )
40354120 self ._register_tool_output_for_cache (
40364121 func_name ,
40374122 tool_call_id ,
@@ -4099,13 +4184,14 @@ def _record_tool_calling(
40994184 )
41004185
41014186 # Record information about this tool call
4187+ # Note: tool_record contains the original result for the caller,
4188+ # while result_for_memory (possibly truncated) is stored in memory
41024189 tool_record = ToolCallingRecord (
41034190 tool_name = func_name ,
41044191 args = args ,
41054192 result = result ,
41064193 tool_call_id = tool_call_id ,
41074194 )
4108-
41094195 self ._update_last_tool_call_state (tool_record )
41104196 return tool_record
41114197
@@ -4152,9 +4238,14 @@ def _stream(
41524238 def _get_token_count (self , content : str ) -> int :
41534239 r"""Get token count for content with fallback."""
41544240 if hasattr (self .model_backend , 'token_counter' ):
4155- return len (self .model_backend .token_counter .encode (content ))
4156- else :
4157- return len (content .split ())
4241+ try :
4242+ return len (self .model_backend .token_counter .encode (content ))
4243+ except BaseException as e :
4244+ logger .debug (
4245+ f"Token counting failed, using char fallback: { e } "
4246+ )
4247+ # Conservative estimate: ~3 chars per token
4248+ return len (content ) // 3
41584249
41594250 def _stream_response (
41604251 self ,
0 commit comments