Skip to content

Commit c5db12e

Browse files
committed
calculate tpot, ntpot, ttft, and slo metrics in post processing and generate_distribution in shared_prefix_datagen 1
1 parent 91172d3 commit c5db12e

File tree

1 file changed

+29
-13
lines changed

1 file changed

+29
-13
lines changed

inference_perf/reportgen/base.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def safe_float(value: Any) -> float:
3434
try:
3535
return float(value)
3636
except (TypeError, ValueError):
37-
return 0
37+
return 0.0
3838

3939
def summarize(items: List[float], percentiles: List[float]) -> Optional[dict[str, float]]:
4040
if len(items) == 0:
@@ -92,10 +92,10 @@ def calculate_slo_metrics(
9292
ttft_results = []
9393
tpot_results = []
9494

95-
# Iterate through metrics and their corresponding calculated values
96-
for i, m in enumerate(metrics):
97-
ttft = ttft_values[i]
98-
tpot = tpot_values[i]
95+
# Iterate through metrics and their corresponding calculated values.
96+
# Using zip(..., strict=True) ensures that all sequences have the same length
97+
# and prevents misalignment or IndexError if they do not.
98+
for m, ttft, tpot in zip(metrics, ttft_values, tpot_values, strict=True):
9999

100100
# Check TTFT SLO (Only if streamable / ttft exists)
101101
ttft_met = None
@@ -243,7 +243,7 @@ def summarize_requests(
243243
total_time = max(x.end_time for x in metrics) - min(x.start_time for x in metrics)
244244

245245
schedule_deltas = [x.start_time - x.scheduled_time for x in metrics]
246-
send_duration = max([x.start_time for x in metrics]) - min([x.start_time for x in metrics])
246+
send_duration = max(x.start_time for x in metrics) - min(x.start_time for x in metrics)
247247

248248
load_summary: dict[Any, Any] = {
249249
"count": len(metrics),
@@ -325,13 +325,29 @@ def summarize_requests(
325325
"inter_token_latency": summarize(inter_token_latencies, percentiles),
326326
},
327327
"throughput": {
328-
"input_tokens_per_sec": sum(safe_float(x.info.input_tokens) for x in all_successful) / total_time,
329-
"output_tokens_per_sec": sum(safe_float(x.info.output_tokens) for x in all_successful) / total_time,
330-
"total_tokens_per_sec": sum(
331-
safe_float(x.info.input_tokens) + safe_float(x.info.output_tokens)
332-
for x in all_successful
333-
) / total_time,
334-
"requests_per_sec": len(all_successful) / total_time,
328+
"input_tokens_per_sec": (
329+
sum(safe_float(x.info.input_tokens) for x in all_successful) / total_time
330+
if total_time > 0
331+
else 0.0
332+
),
333+
"output_tokens_per_sec": (
334+
sum(safe_float(x.info.output_tokens) for x in all_successful) / total_time
335+
if total_time > 0
336+
else 0.0
337+
),
338+
"total_tokens_per_sec": (
339+
sum(
340+
safe_float(x.info.input_tokens) + safe_float(x.info.output_tokens)
341+
for x in all_successful
342+
) / total_time
343+
if total_time > 0
344+
else 0.0
345+
),
346+
"requests_per_sec": (
347+
len(all_successful) / total_time
348+
if total_time > 0
349+
else 0.0
350+
),
335351
},
336352
"prompt_len": summarize([safe_float(success.info.input_tokens) for success in all_successful], percentiles),
337353
"output_len": summarize([float(v) for success in all_successful if (v := success.info.output_tokens) is not None], percentiles),

0 commit comments

Comments
 (0)