11import asyncio
22import json
33import logging
4- import sys
54from collections .abc import Callable
65from datetime import datetime
76from pathlib import Path
1817 load_test_case_by_name ,
1918)
2019from deepset_mcp .benchmark .runner .models import AgentConfig , TestCaseConfig
20+ from deepset_mcp .benchmark .runner .streaming import StreamingCallbackManager
2121from deepset_mcp .benchmark .runner .teardown_actions import teardown_test_case_async
2222from deepset_mcp .benchmark .runner .tracing import enable_tracing
2323
@@ -31,7 +31,7 @@ def __init__(
3131 self ,
3232 agent_config : AgentConfig ,
3333 benchmark_config : BenchmarkConfig ,
34- streaming : bool = False ,
34+ streaming : bool = True ,
3535 ):
3636 """
3737 Initialize the benchmark runner.
@@ -66,7 +66,6 @@ def __init__(
6666 f"{ self .agent_config .display_name } -{ self .commit_hash } _{ self .run_timestamp .strftime ('%Y%m%d_%H%M%S' )} "
6767 )
6868
69- # TODO: streaming is WIP; wait until https://github.com/deepset-ai/haystack-core-integrations/issues/1947 is fixed
7069 def _create_streaming_callback (self , test_case_name : str ) -> Callable [[StreamingChunk ], Any ]:
7170 """
7271 Create a streaming callback function for a specific test case.
@@ -77,33 +76,10 @@ def _create_streaming_callback(self, test_case_name: str) -> Callable[[Streaming
7776 Returns:
7877 Callback function for streaming
7978 """
79+ callback = StreamingCallbackManager ()
8080
81- async def streaming_callback (chunk : StreamingChunk ) -> None :
82- """Handle streaming chunks from the agent."""
83- if hasattr (chunk , "content" ) and chunk .content :
84- # meta content_block type=tool_use
85- # meta type (content_block_start)
86- # meta delta type=input_json_delta
87- # meta delta message_delta
88- # meta delta stop_reason=tool_use
89- # Print with test case context, using a subtle prefix
90- content = chunk .content
91- # Handle newlines by adding the prefix to each new line
92- lines = content .split ("\n " )
93- for i , line in enumerate (lines ):
94- if i == 0 :
95- print (f"{ line } " , end = "" )
96- elif line .strip (): # Only print non-empty lines with prefix
97- print (f"\n [{ test_case_name } ] { line } " , end = "" )
98- else :
99- print () # Just print the newline for empty lines
100-
101- # If the content ends with a newline, print it
102- if content .endswith ("\n " ):
103- print ()
104-
105- # Ensure output is flushed immediately
106- sys .stdout .flush ()
81+ async def streaming_callback (chunk : StreamingChunk ) -> Any :
82+ return await callback (chunk )
10783
10884 return streaming_callback
10985
@@ -477,8 +453,10 @@ def _extract_assistant_message_stats(messages: list[ChatMessage]) -> dict[str, s
477453 meta = message .meta
478454 if "usage" in meta :
479455 usage = meta ["usage" ]
480- total_prompt_tokens += usage .get ("prompt_tokens" , 0 )
481- total_completion_tokens += usage .get ("completion_tokens" , 0 )
456+ prompt_tokens = usage .get ("prompt_tokens" )
457+ total_prompt_tokens += prompt_tokens if prompt_tokens is not None else 0
458+ completion_tokens = usage .get ("completion_tokens" )
459+ total_completion_tokens += completion_tokens if completion_tokens is not None else 0
482460
483461 # Extract model (should be consistent across messages)
484462 if "model" in meta and model is None :
0 commit comments