|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +plot_benchmark_results.py: Generates comparison plots across executor |
| 4 | +strategies (serial, unbounded, pool) for each parallel benchmark test. |
| 5 | +
|
| 6 | +Reads benchmark_results.csv produced by run_benchmarks.py and outputs |
| 7 | +a PDF with two plots per test: |
| 8 | + Left: TPS vs worker count, one line per executor strategy |
| 9 | + Right: TPS vs mean latency (with std error bars), one series per |
| 10 | + executor × worker combination |
| 11 | +""" |
| 12 | + |
1 | 13 | import pandas as pd |
2 | 14 | import matplotlib.pyplot as plt |
3 | 15 | from matplotlib.backends.backend_pdf import PdfPages |
|
6 | 18 | from pathlib import Path |
7 | 19 |
|
8 | 20 | parser = argparse.ArgumentParser() |
9 | | - |
10 | | -# Optional positional argument |
11 | 21 | parser.add_argument( |
12 | 22 | "results_file", |
13 | | - nargs="?", # makes it optional |
| 23 | + nargs="?", |
14 | 24 | default="benchmark_results.csv", |
15 | | - help="Path to the results CSV file" |
| 25 | + help="Path to the results CSV file (default: benchmark_results.csv)" |
16 | 26 | ) |
17 | | - |
18 | 27 | args = parser.parse_args() |
19 | 28 |
|
20 | | -# Set source and target paths |
21 | 29 | csv_path = args.results_file |
22 | 30 | pdf_path = str(Path(csv_path).with_suffix(".pdf")) |
23 | 31 |
|
|
29 | 37 | "TestParallelBenchmarkValidatorTransfer", |
30 | 38 | ] |
31 | 39 |
|
| 40 | +executors = ["serial", "unbounded", "pool"] |
| 41 | +executor_colors = { |
| 42 | + "serial": "tab:blue", |
| 43 | + "unbounded": "tab:orange", |
| 44 | + "pool": "tab:green", |
| 45 | +} |
| 46 | +executor_markers = { |
| 47 | + "serial": "o", |
| 48 | + "unbounded": "s", |
| 49 | + "pool": "^", |
| 50 | +} |
| 51 | + |
32 | 52 | df = pd.read_csv(csv_path) |
33 | | -last_row = df.iloc[-1] |
| 53 | +last_row = df.iloc[-1] |
34 | 54 | timestamp = last_row["timestamp"] |
35 | | -markers = ['o', 's', '^', 'D', 'x', '*'] |
36 | | -p95_marker = "X" # single marker style for all p95 points |
37 | | - |
38 | | -with PdfPages(pdf_path) as pdf: |
39 | 55 |
|
40 | | - for test_name in test_names: |
| 56 | +# Column pattern: TestParallelBenchmarkSender[pool]/8 tps |
| 57 | +col_re = re.compile( |
| 58 | + r"^(.+?)\[(\w+)\]/(\d+)\s+(tps|lat-p95|lat-avg|lat-std|goroutines)$" |
| 59 | +) |
41 | 60 |
|
42 | | - pattern = re.compile(rf"{re.escape(test_name)}/(\d+)\s+tps") |
| 61 | +def get_value(row, test, executor, cpu, metric): |
| 62 | + col = f"{test}[{executor}]/{cpu} {metric}" |
| 63 | + return row.get(col, None) |
43 | 64 |
|
44 | | - workers = [] |
45 | | - tps_values = [] |
46 | | - lat_p95_values = [] |
47 | | - lat_avg_values = [] |
48 | | - lat_std_values = [] |
| 65 | +with PdfPages(pdf_path) as pdf: |
| 66 | + for test_name in test_names: |
49 | 67 |
|
| 68 | + # Discover which worker counts are present for this test |
| 69 | + worker_set = set() |
50 | 70 | for col in df.columns: |
51 | | - match = pattern.match(col) |
52 | | - if match: |
53 | | - worker = match.group(1) |
54 | | - tps = last_row[col] |
55 | | - lat_p95_col = f"{test_name}/{worker} lat-p95" |
56 | | - lat_avg_col = f"{test_name}/{worker} lat-avg" |
57 | | - lat_std_col = f"{test_name}/{worker} lat-std" |
58 | | - lat_p95 = last_row[lat_p95_col] |
59 | | - lat_avg = last_row[lat_avg_col] |
60 | | - lat_std = last_row[lat_std_col] |
61 | | - |
62 | | - workers.append(worker) |
63 | | - tps_values.append(tps) |
64 | | - lat_p95_values.append(lat_p95) |
65 | | - lat_avg_values.append(lat_avg) |
66 | | - lat_std_values.append(lat_std) |
67 | | - |
68 | | - # Create figure with two subplots |
69 | | - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) |
70 | | - |
71 | | - # --- Left subplot: |
72 | | - worker_counts = [int(w) for w in workers] # convert strings to integers |
73 | | - |
74 | | - ax1.plot( |
75 | | - worker_counts, |
76 | | - tps_values, |
77 | | - marker='o', |
78 | | - linestyle='-', |
79 | | - color='tab:blue' |
80 | | - ) |
| 71 | + m = col_re.match(col) |
| 72 | + if m and m.group(1) == test_name: |
| 73 | + worker_set.add(int(m.group(3))) |
| 74 | + if not worker_set: |
| 75 | + # Fall back to old-style columns without executor tag |
| 76 | + old_re = re.compile(rf"^{re.escape(test_name)}/(\d+)\s+tps$") |
| 77 | + for col in df.columns: |
| 78 | + om = old_re.match(col) |
| 79 | + if om: |
| 80 | + worker_set.add(int(om.group(1))) |
| 81 | + if not worker_set: |
| 82 | + continue |
| 83 | + |
| 84 | + workers_sorted = sorted(worker_set) |
| 85 | + |
| 86 | + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7)) |
| 87 | + fig.suptitle(f"{test_name}\n(last run: {timestamp})", fontsize=12) |
| 88 | + |
| 89 | + # ---- Left plot: TPS vs workers, one line per executor ---- |
| 90 | + for executor in executors: |
| 91 | + tps_vals = [] |
| 92 | + w_vals = [] |
| 93 | + for cpu in workers_sorted: |
| 94 | + v = get_value(last_row, test_name, executor, cpu, "tps") |
| 95 | + if v is not None and not pd.isna(v): |
| 96 | + tps_vals.append(float(v)) |
| 97 | + w_vals.append(cpu) |
| 98 | + if tps_vals: |
| 99 | + ax1.plot( |
| 100 | + w_vals, tps_vals, |
| 101 | + marker=executor_markers[executor], |
| 102 | + linestyle="-", |
| 103 | + color=executor_colors[executor], |
| 104 | + label=executor, |
| 105 | + ) |
81 | 106 |
|
82 | 107 | ax1.set_xlabel("Worker count") |
83 | 108 | ax1.set_ylabel("TPS") |
84 | | - ax1.set_title(f"{test_name}: TPS vs Worker Count\nLast Row ({timestamp})") |
85 | | - ax1.grid(True) # <-- grid added |
86 | | - |
87 | | - # --- Right subplot: TPS vs Latency with error bars (mean ± std) --- |
88 | | - for i, worker in enumerate(workers): |
89 | | - err = ax2.errorbar( |
90 | | - tps_values[i], |
91 | | - lat_avg_values[i], |
92 | | - yerr=lat_std_values[i], |
93 | | - marker=markers[i % len(markers)], |
94 | | - label=f"{worker} worker(s)", |
95 | | - capsize=5, |
96 | | - capthick=2, |
97 | | - linestyle='None' |
98 | | - ) |
99 | | - color = err[0].get_color() |
100 | | - |
101 | | - ax2.scatter( |
102 | | - tps_values[i], |
103 | | - lat_p95_values[i], |
104 | | - marker=p95_marker, |
105 | | - color=color, |
106 | | - s=80, # size tweak so it stands out |
107 | | - zorder=3, |
108 | | - label=None # don't add a second legend entry |
109 | | - ) |
110 | | - |
111 | | - # dummy scatter = to add ONE legend entry documenting p95 marker --- |
| 109 | + ax1.set_title("TPS vs Worker Count") |
| 110 | + ax1.legend(title="Executor") |
| 111 | + ax1.grid(True) |
| 112 | + |
| 113 | + # ---- Right plot: TPS vs mean latency with error bars ---- |
| 114 | + # One point per (executor, worker) combination |
| 115 | + p95_marker = "X" |
| 116 | + plotted_executors = set() |
| 117 | + for executor in executors: |
| 118 | + for cpu in workers_sorted: |
| 119 | + tps = get_value(last_row, test_name, executor, cpu, "tps") |
| 120 | + avg = get_value(last_row, test_name, executor, cpu, "lat-avg") |
| 121 | + std = get_value(last_row, test_name, executor, cpu, "lat-std") |
| 122 | + p95 = get_value(last_row, test_name, executor, cpu, "lat-p95") |
| 123 | + |
| 124 | + if any(v is None or pd.isna(v) for v in [tps, avg, std, p95]): |
| 125 | + continue |
| 126 | + |
| 127 | + label = executor if executor not in plotted_executors else None |
| 128 | + err = ax2.errorbar( |
| 129 | + float(tps), float(avg), |
| 130 | + yerr=float(std), |
| 131 | + marker=executor_markers[executor], |
| 132 | + color=executor_colors[executor], |
| 133 | + label=label, |
| 134 | + capsize=4, |
| 135 | + capthick=1.5, |
| 136 | + linestyle="None", |
| 137 | + ) |
| 138 | + plotted_executors.add(executor) |
| 139 | + ax2.scatter( |
| 140 | + float(tps), float(p95), |
| 141 | + marker=p95_marker, |
| 142 | + color=executor_colors[executor], |
| 143 | + s=80, |
| 144 | + zorder=3, |
| 145 | + ) |
| 146 | + # Annotate worker count |
| 147 | + ax2.annotate( |
| 148 | + str(cpu), |
| 149 | + (float(tps), float(avg)), |
| 150 | + textcoords="offset points", |
| 151 | + xytext=(4, 4), |
| 152 | + fontsize=7, |
| 153 | + color=executor_colors[executor], |
| 154 | + ) |
| 155 | + |
| 156 | + # Dummy entry for p95 marker in legend |
112 | 157 | ax2.scatter([], [], marker=p95_marker, color="black", label="p95") |
113 | | - ax2.set_title(f"{test_name}\nThroughput vs. Latency per worker count") |
| 158 | + |
114 | 159 | ax2.set_xlabel("Throughput (TPS)") |
115 | 160 | ax2.set_ylabel("Mean Latency [ms]") |
| 161 | + ax2.set_title("TPS vs Latency (dot=mean±std, X=p95, label=workers)") |
| 162 | + ax2.legend(title="Executor") |
116 | 163 | ax2.grid(True) |
117 | | - ax2.legend() |
118 | 164 |
|
119 | 165 | plt.tight_layout() |
120 | | - plt.show() |
121 | | - |
122 | 166 | pdf.savefig(fig) |
123 | 167 | plt.close(fig) |
124 | 168 |
|
|
0 commit comments