@@ -448,10 +448,12 @@ class ChatAgent(BaseAgent):
448448 step_timeout (Optional[float], optional): Timeout in seconds for the
449449 entire step operation. If None, no timeout is applied.
450450 (default: :obj:`None`)
451- stream_accumulate (bool, optional): When True, partial streaming
452- updates return accumulated content (current behavior). When False,
453- partial updates return only the incremental delta. (default:
454- :obj:`True`)
451+ stream_accumulate (Optional[bool], optional): When True, partial
452+ streaming updates return accumulated content. When False, partial
453+ updates return only the incremental delta (recommended).
454+ If None, defaults to False with a deprecation warning for users
455+ who previously relied on the old default (True).
456+ (default: :obj:`None`, which behaves as :obj:`False`)
455457 summary_window_ratio (float, optional): Maximum fraction of the total
456458 context window that can be occupied by summary information. Used
457459 to limit how much of the model's context is reserved for
@@ -501,7 +503,7 @@ def __init__(
501503 retry_attempts : int = 3 ,
502504 retry_delay : float = 1.0 ,
503505 step_timeout : Optional [float ] = Constants .TIMEOUT_THRESHOLD ,
504- stream_accumulate : bool = True ,
506+ stream_accumulate : Optional [ bool ] = None ,
505507 summary_window_ratio : float = 0.6 ,
506508 ) -> None :
507509 if isinstance (model , ModelManager ):
@@ -615,7 +617,13 @@ def __init__(
615617 self .step_timeout = step_timeout
616618 self ._context_utility : Optional [ContextUtility ] = None
617619 self ._context_summary_agent : Optional ["ChatAgent" ] = None
618- self .stream_accumulate = stream_accumulate
620+
621+ # Store whether user explicitly set stream_accumulate
622+ # Warning will be issued only when streaming is actually used
623+ self ._stream_accumulate_explicit = stream_accumulate is not None
624+ self .stream_accumulate = (
625+ stream_accumulate if stream_accumulate is not None else False
626+ )
619627 self ._last_tool_call_record : Optional [ToolCallingRecord ] = None
620628 self ._last_tool_call_signature : Optional [str ] = None
621629 self .summary_window_ratio = summary_window_ratio
@@ -4020,6 +4028,28 @@ def _get_token_count(self, content: str) -> int:
40204028 # Conservative estimate: ~3 chars per token
40214029 return len (content ) // 3
40224030
4031+ def _warn_stream_accumulate_deprecation (self ) -> None :
4032+ r"""Issue deprecation warning for stream_accumulate default change.
4033+
4034+ Only warns once per agent instance, and only if the user didn't
4035+ explicitly set stream_accumulate.
4036+ """
4037+ if not self ._stream_accumulate_explicit :
4038+ import warnings
4039+
4040+ warnings .warn (
4041+ "The default value of 'stream_accumulate' has changed from "
4042+ "True to False. In streaming mode, each chunk now returns "
4043+ "only the incremental delta instead of accumulated content. "
4044+ "To suppress this warning, explicitly set "
4045+ "stream_accumulate=False (recommended) or stream_accumulate="
4046+ "True if you need the old behavior." ,
4047+ DeprecationWarning ,
4048+ stacklevel = 5 ,
4049+ )
4050+ # Only warn once per agent instance
4051+ self ._stream_accumulate_explicit = True
4052+
40234053 def _stream_response (
40244054 self ,
40254055 openai_messages : List [OpenAIMessage ],
@@ -4028,6 +4058,8 @@ def _stream_response(
40284058 ) -> Generator [ChatAgentResponse , None , None ]:
40294059 r"""Internal method to handle streaming responses with tool calls."""
40304060
4061+ self ._warn_stream_accumulate_deprecation ()
4062+
40314063 tool_call_records : List [ToolCallingRecord ] = []
40324064 accumulated_tool_calls : Dict [str , Any ] = {}
40334065 step_token_usage = self ._create_token_usage_tracker ()
@@ -4346,12 +4378,20 @@ def _process_stream_chunks_with_accumulator(
43464378 content_accumulator .get_full_reasoning_content ()
43474379 or None
43484380 )
4381+ # In delta mode, final response content should be empty
4382+ # since all content was already yielded incrementally
4383+ display_content = (
4384+ final_content if self .stream_accumulate else ""
4385+ )
4386+ display_reasoning = (
4387+ final_reasoning if self .stream_accumulate else None
4388+ )
43494389 final_message = BaseMessage (
43504390 role_name = self .role_name ,
43514391 role_type = self .role_type ,
43524392 meta_dict = {},
4353- content = final_content ,
4354- reasoning_content = final_reasoning ,
4393+ content = display_content ,
4394+ reasoning_content = display_reasoning ,
43554395 )
43564396
43574397 if response_format :
@@ -4402,21 +4442,60 @@ def _accumulate_tool_calls(
44024442 bool: True if any tool call is complete, False otherwise.
44034443 """
44044444
4445+ index_map_key = '_index_to_key_map'
4446+ if index_map_key not in accumulated_tool_calls :
4447+ accumulated_tool_calls [index_map_key ] = {}
4448+ index_map = accumulated_tool_calls [index_map_key ]
4449+
44054450 for delta_tool_call in tool_call_deltas :
4406- index = delta_tool_call . index
4451+ index = getattr ( delta_tool_call , ' index' , None )
44074452 tool_call_id = getattr (delta_tool_call , 'id' , None )
44084453
4454+ # Determine entry key
4455+ if index is not None :
4456+ index_str = str (index )
4457+ if tool_call_id :
4458+ # New ID provided: check if it differs from current mapping
4459+ current_key = index_map .get (index_str )
4460+ if current_key is None :
4461+ # First time seeing this index, use tool_call_id as key
4462+ entry_key = tool_call_id
4463+ elif current_key in accumulated_tool_calls :
4464+ existing_id = accumulated_tool_calls [current_key ].get (
4465+ 'id'
4466+ )
4467+ if existing_id and existing_id != tool_call_id :
4468+ # ID changed: use new ID as key
4469+ entry_key = tool_call_id
4470+ else :
4471+ # No existing ID or same ID: keep current key
4472+ entry_key = current_key
4473+ else :
4474+ entry_key = current_key
4475+ # Update mapping
4476+ index_map [index_str ] = entry_key
4477+ else :
4478+ # No ID in this chunk: use existing mapping or index as
4479+ # string
4480+ entry_key = index_map .get (index_str , index_str )
4481+ if index_str not in index_map :
4482+ index_map [index_str ] = entry_key
4483+ elif tool_call_id is not None :
4484+ entry_key = tool_call_id
4485+ else :
4486+ entry_key = '0' # Default fallback as string
4487+
44094488 # Initialize tool call entry if not exists
4410- if index not in accumulated_tool_calls :
4411- accumulated_tool_calls [index ] = {
4489+ if entry_key not in accumulated_tool_calls :
4490+ accumulated_tool_calls [entry_key ] = {
44124491 'id' : '' ,
44134492 'type' : 'function' ,
44144493 'function' : {'name' : '' , 'arguments' : '' },
44154494 'extra_content' : None ,
44164495 'complete' : False ,
44174496 }
44184497
4419- tool_call_entry = accumulated_tool_calls [index ]
4498+ tool_call_entry = accumulated_tool_calls [entry_key ]
44204499
44214500 # Accumulate tool call data
44224501 if tool_call_id :
@@ -4448,6 +4527,9 @@ def _accumulate_tool_calls(
44484527 # Check if any tool calls are complete
44494528 any_complete = False
44504529 for _index , tool_call_entry in accumulated_tool_calls .items ():
4530+ # Skip internal mapping key
4531+ if _index == '_index_to_key_map' :
4532+ continue
44514533 if (
44524534 tool_call_entry ['id' ]
44534535 and tool_call_entry ['function' ]['name' ]
@@ -4475,6 +4557,9 @@ def _execute_tools_sync_with_status_accumulator(
44754557
44764558 tool_calls_to_execute = []
44774559 for _tool_call_index , tool_call_data in accumulated_tool_calls .items ():
4560+ # Skip internal mapping key
4561+ if _tool_call_index == '_index_to_key_map' :
4562+ continue
44784563 if tool_call_data .get ('complete' , False ):
44794564 tool_calls_to_execute .append (tool_call_data )
44804565
@@ -4936,6 +5021,8 @@ async def _astream_response(
49365021 ) -> AsyncGenerator [ChatAgentResponse , None ]:
49375022 r"""Async method to handle streaming responses with tool calls."""
49385023
5024+ self ._warn_stream_accumulate_deprecation ()
5025+
49395026 tool_call_records : List [ToolCallingRecord ] = []
49405027 accumulated_tool_calls : Dict [str , Any ] = {}
49415028 step_token_usage = self ._create_token_usage_tracker ()
@@ -5310,12 +5397,20 @@ async def _aprocess_stream_chunks_with_accumulator(
53105397 content_accumulator .get_full_reasoning_content ()
53115398 or None
53125399 )
5400+ # In delta mode, final response content should be empty
5401+ # since all content was already yielded incrementally
5402+ display_content = (
5403+ final_content if self .stream_accumulate else ""
5404+ )
5405+ display_reasoning = (
5406+ final_reasoning if self .stream_accumulate else None
5407+ )
53135408 final_message = BaseMessage (
53145409 role_name = self .role_name ,
53155410 role_type = self .role_type ,
53165411 meta_dict = {},
5317- content = final_content ,
5318- reasoning_content = final_reasoning ,
5412+ content = display_content ,
5413+ reasoning_content = display_reasoning ,
53195414 )
53205415
53215416 if response_format :
@@ -5363,6 +5458,9 @@ async def _execute_tools_async_with_status_accumulator(
53635458 # statuses immediately
53645459 tool_tasks = []
53655460 for _tool_call_index , tool_call_data in accumulated_tool_calls .items ():
5461+ # Skip internal mapping key
5462+ if _tool_call_index == '_index_to_key_map' :
5463+ continue
53665464 if tool_call_data .get ('complete' , False ):
53675465 function_name = tool_call_data ['function' ]['name' ]
53685466 try :
0 commit comments