@@ -34,7 +34,7 @@ def safe_float(value: Any) -> float:
3434 try :
3535 return float (value )
3636 except (TypeError , ValueError ):
37- return 0
37+ return 0.0
3838
3939def summarize (items : List [float ], percentiles : List [float ]) -> Optional [dict [str , float ]]:
4040 if len (items ) == 0 :
@@ -92,10 +92,10 @@ def calculate_slo_metrics(
9292 ttft_results = []
9393 tpot_results = []
9494
95- # Iterate through metrics and their corresponding calculated values
96- for i , m in enumerate ( metrics ):
97- ttft = ttft_values [ i ]
98- tpot = tpot_values [ i ]
95+ # Iterate through metrics and their corresponding calculated values.
96+ # Using zip(..., strict=True) ensures that all sequences have the same length
97+ # and prevents misalignment or IndexError if they do not.
98+ for m , ttft , tpot in zip ( metrics , ttft_values , tpot_values , strict = True ):
9999
100100 # Check TTFT SLO (Only if streamable / ttft exists)
101101 ttft_met = None
@@ -243,7 +243,7 @@ def summarize_requests(
243243 total_time = max (x .end_time for x in metrics ) - min (x .start_time for x in metrics )
244244
245245 schedule_deltas = [x .start_time - x .scheduled_time for x in metrics ]
246- send_duration = max ([ x .start_time for x in metrics ] ) - min ([ x .start_time for x in metrics ] )
246+ send_duration = max (x .start_time for x in metrics ) - min (x .start_time for x in metrics )
247247
248248 load_summary : dict [Any , Any ] = {
249249 "count" : len (metrics ),
@@ -325,13 +325,29 @@ def summarize_requests(
325325 "inter_token_latency" : summarize (inter_token_latencies , percentiles ),
326326 },
327327 "throughput" : {
328- "input_tokens_per_sec" : sum (safe_float (x .info .input_tokens ) for x in all_successful ) / total_time ,
329- "output_tokens_per_sec" : sum (safe_float (x .info .output_tokens ) for x in all_successful ) / total_time ,
330- "total_tokens_per_sec" : sum (
331- safe_float (x .info .input_tokens ) + safe_float (x .info .output_tokens )
332- for x in all_successful
333- ) / total_time ,
334- "requests_per_sec" : len (all_successful ) / total_time ,
328+ "input_tokens_per_sec" : (
329+ sum (safe_float (x .info .input_tokens ) for x in all_successful ) / total_time
330+ if total_time > 0
331+ else 0.0
332+ ),
333+ "output_tokens_per_sec" : (
334+ sum (safe_float (x .info .output_tokens ) for x in all_successful ) / total_time
335+ if total_time > 0
336+ else 0.0
337+ ),
338+ "total_tokens_per_sec" : (
339+ sum (
340+ safe_float (x .info .input_tokens ) + safe_float (x .info .output_tokens )
341+ for x in all_successful
342+ ) / total_time
343+ if total_time > 0
344+ else 0.0
345+ ),
346+ "requests_per_sec" : (
347+ len (all_successful ) / total_time
348+ if total_time > 0
349+ else 0.0
350+ ),
335351 },
336352 "prompt_len" : summarize ([safe_float (success .info .input_tokens ) for success in all_successful ], percentiles ),
337353 "output_len" : summarize ([float (v ) for success in all_successful if (v := success .info .output_tokens ) is not None ], percentiles ),
0 commit comments