-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_metrics.py
More file actions
121 lines (100 loc) · 4.15 KB
/
extract_metrics.py
File metadata and controls
121 lines (100 loc) · 4.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import csv
import sys
import os
def parse_percentiles_max(value_str):
"""
Parses the percentiles string (e.g., '[1.0, 2.0, ...]') and returns the maximum value.
Assumes the last value in the list is the maximum.
"""
try:
clean_str = value_str.strip('[]')
if not clean_str:
return None
values = [float(x.strip()) for x in clean_str.split(',')]
return max(values)
except Exception:
return None
def extract_metrics(csv_path):
if not os.path.exists(csv_path):
print(f"File not found: {csv_path}")
return
# Use utf-8-sig to handle BOM if present
with open(csv_path, 'r', encoding='utf-8-sig') as f:
reader = csv.reader(f)
try:
# Read 3 header lines
header1 = next(reader)
header2 = next(reader)
header3 = next(reader)
except StopIteration:
print(f"Error: CSV file {csv_path} is empty or malformed.")
return
# Create a mapping from (Level1, Level2, Level3) header to column index
col_map = {}
for i, (h1, h2, h3) in enumerate(zip(header1, header2, header3)):
# Normalize keys: strip whitespace
key = (h1.strip(), h2.strip(), h3.strip())
col_map[key] = i
data_rows = list(reader)
# Define the metrics we want to extract
metrics_config = [
("Output Throughput", "Token Throughput", "Successful Output Tokens/Sec"),
("TTFT", "Time to First Token", "Successful ms"),
("TPOT", "Time per Output Token", "Successful ms"),
("ITL", "Inter Token Latency", "Successful ms")
]
# Prepare Headers
headers = ["Output Throughput (Mean)"]
for name, _, _ in metrics_config[1:]:
headers.append(f"{name} (Median)")
headers.append(f"{name} (Mean)")
headers.append(f"{name} (Max)")
# Collect all rows data
all_output_rows = []
for row_idx, row in enumerate(data_rows):
if not row: continue # Skip empty lines
# Helper to safely get value from row
def get_val(l1, l2, l3):
key = (l1, l2, l3)
idx = col_map.get(key)
if idx is not None and idx < len(row):
val = row[idx].strip()
if val == "": return None
return val
return None
row_values = []
# 1. Output Throughput (Mean only)
l1, l2 = metrics_config[0][1], metrics_config[0][2]
avg = get_val(l1, l2, "Mean")
row_values.append(f"{float(avg):.3f}" if avg else "N/A")
# 2. Others (Median, Mean, Max)
for name, l1, l2 in metrics_config[1:]:
median = get_val(l1, l2, "Median")
avg = get_val(l1, l2, "Mean")
percentiles = get_val(l1, l2, "Percentiles")
max_val = "N/A"
if percentiles:
m = parse_percentiles_max(percentiles)
if m is not None:
max_val = f"{m:.3f}"
row_values.append(f"{float(median):.3f}" if median else "N/A")
row_values.append(f"{float(avg):.3f}" if avg else "N/A")
row_values.append(max_val)
all_output_rows.append(row_values)
# Calculate column widths
col_widths = [len(h) for h in headers]
for r in all_output_rows:
for i, val in enumerate(r):
col_widths[i] = max(col_widths[i], len(val))
# Print Header
header_fmt = [f"{h:<{col_widths[i]}}" for i, h in enumerate(headers)]
print(" , ".join(header_fmt))
# Print Rows
for r in all_output_rows:
row_fmt = [f"{val:<{col_widths[i]}}" for i, val in enumerate(r)]
print(" , ".join(row_fmt))
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python extract_metrics.py <csv_file>")
sys.exit(1)
extract_metrics(sys.argv[1])