22"""Compare benchmark results across PR, main, and tag and output a markdown table."""
33
44import json
5+ import logging
6+ import re
57import statistics
68from pathlib import Path
79from typing import Literal , NamedTuple
810
11+ _logger = logging .getLogger (__name__ )
12+
13+
14+ ALERT = 250 # Value (arbitrary; in ms) to indicate difference between benchmarks
15+
916
1017class BenchmarkResult (NamedTuple ):
1118 fullname : str
@@ -64,9 +71,9 @@ def _delta(pr: BenchmarkResult, ref: BenchmarkResult) -> str:
6471 if ref == 0 :
6572 return "N/A"
6673 diff = _scale (pr .median - ref .median )
67- pct = ( pr . median / ref . median - 1 ) * 100
68- icon = "🔴" if pct > 5 else "🟢" if pct < - 5 else "⚪"
69- return f"{ icon } { diff :+.3f} ms ( { pct :+.1f } %) "
74+ # Indicator for 250ms absolute diff (arbitrary)
75+ icon = "🔴" if diff > ALERT else "🟢" if diff < - ALERT else "⚪"
76+ return f"{ icon } { diff :+.3f} ms "
7077
7178
7279def _label (result : BenchmarkResult ) -> str :
@@ -83,10 +90,13 @@ def _label(result: BenchmarkResult) -> str:
8390def build_table (
8491 pr : dict [str , BenchmarkResult ],
8592 main : dict [str , BenchmarkResult ],
86- tag : dict [str , BenchmarkResult ],
87- tag_name : str ,
93+ tag : dict [str , BenchmarkResult ] = {} ,
94+ tag_name : str | None = None ,
8895) -> str :
8996 all_keys = set (pr ) | set (main ) | set (tag )
97+ all_keys = sorted (
98+ all_keys , key = lambda x : (0 if "index" in x else 1 if "query" in x else 2 , x )
99+ )
90100 labels = [_label ((pr .get (k ) or main .get (k ) or tag .get (k ))) for k in all_keys ]
91101
92102 col_sep = " | "
@@ -110,14 +120,14 @@ def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str:
110120 divider ,
111121 row ("PR" , pr ),
112122 row ("main" , main ),
113- row (tag_name , tag ),
123+ # row(tag_name, tag),
114124 divider .replace ("-" , "" ),
115125 delta_row ("PR vs main" , main ),
116- delta_row (f"PR vs { tag_name } " , tag ),
126+ # delta_row(f"PR vs {tag_name}", tag),
117127 "" ,
118128 "> `median (mean ± std)`" ,
119129 "> " ,
120- " 🔴 >5% slower ⚪ within 5% 🟢 >5% faster" ,
130+ f"> 🔴 >{ ALERT } ms slower ⚪ within { ALERT } ms 🟢 >{ ALERT } ms faster" ,
121131 ]
122132 return "\n " .join (lines )
123133
@@ -134,27 +144,33 @@ def main():
134144 parser .add_argument (
135145 "-o" ,
136146 "--output" ,
147+ type = Path ,
137148 help = "Output markdown filepath containing benchmark comparisons" ,
138149 )
139150 args = parser .parse_args ()
140151
141152 files = sorted (Path ("." ).glob (args .pattern ))
142- assert len (files ) == 3 , f "Expected 3 files, found { len ( files ) } : { files } "
153+ assert len (files ) > 1 , "Expected more than 1 file for benchmark comparison. "
143154
144155 # Infer pr/main/tag from directory name
145156 parsed : dict [str , BenchmarkResult ] = {}
146157 tag = None
147158 for f in files :
148- stem = f .parent .name # e.g. "benchmark-pr"
149- key = stem .split ("-" )[- 1 ] # "pr", "main", tag
150- if key not in ("pr" , "main" ):
159+ stem = f .name # e.g. "benchmark-pr-PR-#"
160+ key = stem .split ("-" )[1 ] # commit-sha, "main", tag
161+
162+ # Special cases
163+ if re .match (r"^v\d+\.\d+.\d+$" , key ):
151164 tag = key
165+ elif key != "main" :
166+ key = "pr"
167+
152168 parsed [key ] = parse_file (f )
153169 if tag is None :
154- raise ValueError ( "Unknown tag " )
155- table = build_table (parsed ["pr" ], parsed ["main" ], parsed [ tag ] , tag_name = tag )
170+ _logger . warning ( "Tag not found " )
171+ table = build_table (parsed ["pr" ], parsed ["main" ], parsed . get ( tag , {}) , tag_name = tag )
156172 args .output .write_text (table )
157- print (table )
173+ _logger . info (table )
158174
159175
160176if __name__ == "__main__" :
0 commit comments