@@ -189,99 +189,105 @@ def write_markdown(impl_names, result_maps, all_keys, output_path, reports, syst
189189 f .write (f'| { display } | { a_val :.2f} | { b_val :.2f} | { sign } { change :.1f} |\n ' )
190190 f .write ('\n ' )
191191
192- # --- Summary ---
193- regressions = 0
194- improvements = 0
195- same_count = 0
196- cat_regressions = {}
197- cat_improvements = {}
198-
192+ # --- Build comparison rows (include all results, not just verified) ---
199193 comparison_rows = []
200194 for key in all_keys :
201195 name , mode , resolution = key
202196 r_a = result_maps [0 ].get (key )
203197 r_b = result_maps [1 ].get (key )
204198
205- if not (r_a and r_b ):
206- continue
207- if not (r_a .get ('supported' , False ) and r_b .get ('supported' , False )):
208- continue
209- if not (r_a .get ('verified' , True ) and r_b .get ('verified' , True )):
199+ if not r_a and not r_b :
210200 continue
211201
212- wc_a = r_a .get ('wall_clock' , {})
213- wc_b = r_b .get ('wall_clock' , {})
214- median_a = wc_a .get ('median_ms' , 0 )
215- median_b = wc_b .get ('median_ms' , 0 )
216- if median_a <= 0 or median_b <= 0 :
217- continue
218-
219- mps_a = r_a .get ('megapixels_per_sec' , 0 )
220- mps_b = r_b .get ('megapixels_per_sec' , 0 )
221- cv_a = wc_a .get ('cv_percent' , 0 )
222- cv_b = wc_b .get ('cv_percent' , 0 )
223- category = r_a .get ('category' , '' )
224- change_pct = ((median_b - median_a ) / median_a ) * 100
225-
226- if change_pct > 5.0 :
227- status = 'REGRESSION'
228- regressions += 1
229- cat_regressions [category ] = cat_regressions .get (category , 0 ) + 1
230- elif change_pct < - 5.0 :
231- status = 'IMPROVEMENT'
232- improvements += 1
233- cat_improvements [category ] = cat_improvements .get (category , 0 ) + 1
202+ row = {'name' : name , 'mode' : mode , 'resolution' : resolution }
203+
204+ for side , r in [('a' , r_a ), ('b' , r_b )]:
205+ if r :
206+ row [f'supported_{ side } ' ] = r .get ('supported' , False )
207+ row [f'verified_{ side } ' ] = r .get ('verified' , True ) if r .get ('supported' , False ) else False
208+ wc = r .get ('wall_clock' , {})
209+ row [f'median_{ side } ' ] = wc .get ('median_ms' , 0 )
210+ row [f'mps_{ side } ' ] = r .get ('megapixels_per_sec' , 0 )
211+ row [f'cv_{ side } ' ] = wc .get ('cv_percent' , 0 )
212+ row [f'category' ] = r .get ('category' , '' )
213+ else :
214+ row [f'supported_{ side } ' ] = False
215+ row [f'verified_{ side } ' ] = False
216+ row [f'median_{ side } ' ] = 0
217+ row [f'mps_{ side } ' ] = 0
218+ row [f'cv_{ side } ' ] = 0
219+
220+ if (row ['median_a' ] > 0 and row ['median_b' ] > 0
221+ and row ['verified_a' ] and row ['verified_b' ]):
222+ row ['speedup' ] = row ['mps_b' ] / row ['mps_a' ] if row ['mps_a' ] > 0 else 0
234223 else :
235- status = 'same'
236- same_count += 1
224+ row ['speedup' ] = 0
225+
226+ comparison_rows .append (row )
237227
238- comparison_rows .append ({
239- 'name' : name , 'category' : category , 'mode' : mode , 'resolution' : resolution ,
240- 'median_a' : median_a , 'median_b' : median_b ,
241- 'mps_a' : mps_a , 'mps_b' : mps_b ,
242- 'cv_a' : cv_a , 'cv_b' : cv_b ,
243- 'change_pct' : change_pct , 'status' : status
244- })
228+ comparison_rows .sort (key = lambda r : r .get ('speedup' , 0 ))
245229
246- comparison_rows .sort (key = lambda r : r ['change_pct' ], reverse = True )
230+ # --- Summary ---
231+ both_verified = sum (1 for r in comparison_rows if r ['verified_a' ] and r ['verified_b' ])
232+ a_only = sum (1 for r in comparison_rows if r ['verified_a' ] and not r ['verified_b' ])
233+ b_only = sum (1 for r in comparison_rows if not r ['verified_a' ] and r ['verified_b' ])
234+
235+ keys_a = set (result_maps [0 ].keys ())
236+ keys_b = set (result_maps [1 ].keys ())
237+ only_a_keys = sorted (keys_a - keys_b )
238+ only_b_keys = sorted (keys_b - keys_a )
247239
248240 f .write ('## Summary\n \n ' )
249241 f .write ('| Metric | Count |\n ' )
250242 f .write ('|:---|---:|\n ' )
251- f .write (f'| Total compared | { len (comparison_rows )} |\n ' )
252- f .write (f'| Regressions (>5% slower) | { regressions } |\n ' )
253- f .write (f'| Improvements (>5% faster) | { improvements } |\n ' )
254- f .write (f'| Unchanged | { same_count } |\n \n ' )
255-
256- if cat_regressions or cat_improvements :
257- f .write ('### By Category\n \n ' )
258- f .write ('| Category | Regressions | Improvements |\n ' )
259- f .write ('|:---|---:|---:|\n ' )
260- all_summary_cats = sorted (set (list (cat_regressions .keys ()) + list (cat_improvements .keys ())))
261- for cat in all_summary_cats :
262- reg = cat_regressions .get (cat , 0 )
263- imp = cat_improvements .get (cat , 0 )
264- f .write (f'| { cat } | { reg } | { imp } |\n ' )
265- f .write ('\n ' )
243+ f .write (f'| Total benchmarks compared | { len (comparison_rows )} |\n ' )
244+ f .write (f'| Both verified | { both_verified } |\n ' )
245+ if a_only > 0 :
246+ f .write (f'| Verified only in { impl_names [0 ]} | { a_only } |\n ' )
247+ if b_only > 0 :
248+ f .write (f'| Verified only in { impl_names [1 ]} | { b_only } |\n ' )
249+ if only_a_keys :
250+ f .write (f'| Only in { impl_names [0 ]} | { len (only_a_keys )} |\n ' )
251+ if only_b_keys :
252+ f .write (f'| Only in { impl_names [1 ]} | { len (only_b_keys )} |\n ' )
253+ f .write ('\n ' )
266254
267255 # --- Detailed Results ---
268256 f .write ('## Detailed Comparison\n \n ' )
269- f .write (f'> Change % is based on median latency. Positive = slower (regression), negative = faster (improvement).\n \n ' )
270- f .write (f'| Benchmark | Mode | Resolution | { impl_names [0 ]} (ms) | { impl_names [0 ]} (MP/s) | '
271- f'{ impl_names [1 ]} (ms) | { impl_names [1 ]} (MP/s) | Change % | Status |\n ' )
272- f .write ('|:---|:---|:---|---:|---:|---:|---:|---:|:---|\n ' )
257+ f .write (f'> Speedup = { impl_names [1 ]} throughput / { impl_names [0 ]} throughput. '
258+ f'Values >1.00 mean { impl_names [1 ]} is faster.\n \n ' )
259+ f .write (f'| Benchmark | Mode | Resolution '
260+ f'| { impl_names [0 ]} (ms) | { impl_names [0 ]} (MP/s) | { impl_names [0 ]} Verified '
261+ f'| { impl_names [1 ]} (ms) | { impl_names [1 ]} (MP/s) | { impl_names [1 ]} Verified '
262+ f'| Speedup |\n ' )
263+ f .write ('|:---|:---|:---|---:|---:|:---:|---:|---:|:---:|---:|\n ' )
273264
274265 has_unstable = False
275266 for row in comparison_rows :
276267 flag = ''
277268 if row ['cv_a' ] > 15 or row ['cv_b' ] > 15 :
278269 flag = ' *'
279270 has_unstable = True
280- sign = '+' if row ['change_pct' ] >= 0 else ''
281- f .write (f'| { row ["name" ]} | { row ["mode" ]} | { row ["resolution" ]} '
282- f'| { row ["median_a" ]:.3f} | { row ["mps_a" ]:.1f} '
283- f'| { row ["median_b" ]:.3f} | { row ["mps_b" ]:.1f} '
284- f'| { sign } { row ["change_pct" ]:.1f} | { row ["status" ]} { flag } |\n ' )
271+
272+ f .write (f'| { row ["name" ]} | { row ["mode" ]} | { row ["resolution" ]} | ' )
273+
274+ if not row ['supported_a' ]:
275+ f .write ('N/A | N/A | N/A | ' )
276+ else :
277+ v = 'PASS' if row ['verified_a' ] else 'FAIL'
278+ f .write (f'{ row ["median_a" ]:.3f} | { row ["mps_a" ]:.1f} | { v } | ' )
279+
280+ if not row ['supported_b' ]:
281+ f .write ('N/A | N/A | N/A | ' )
282+ else :
283+ v = 'PASS' if row ['verified_b' ] else 'FAIL'
284+ f .write (f'{ row ["median_b" ]:.3f} | { row ["mps_b" ]:.1f} | { v } | ' )
285+
286+ if row ['speedup' ] > 0 :
287+ f .write (f'{ row ["speedup" ]:.2f} x{ flag } ' )
288+ else :
289+ f .write ('N/A' )
290+ f .write (' |\n ' )
285291 f .write ('\n ' )
286292
287293 if has_unstable :
@@ -330,46 +336,49 @@ def write_markdown(impl_names, result_maps, all_keys, output_path, reports, syst
330336def write_csv (impl_names , result_maps , all_keys , output_path , reports ):
331337 with open (output_path + '.csv' , 'w' ) as f :
332338 header = f'benchmark,category,mode,resolution'
333- header += f',{ impl_names [0 ]} _median_ms,{ impl_names [0 ]} _mp_per_sec'
334- header += f',{ impl_names [1 ]} _median_ms,{ impl_names [1 ]} _mp_per_sec'
335- header += ',change_percent,status '
339+ header += f',{ impl_names [0 ]} _median_ms,{ impl_names [0 ]} _mp_per_sec, { impl_names [ 0 ] } _verified '
340+ header += f',{ impl_names [1 ]} _median_ms,{ impl_names [1 ]} _mp_per_sec, { impl_names [ 1 ] } _verified '
341+ header += ',speedup '
336342 f .write (header + '\n ' )
337343
338- for key in all_keys :
344+ for key in sorted ( all_keys ) :
339345 name , mode , resolution = key
340346 r_a = result_maps [0 ].get (key )
341347 r_b = result_maps [1 ].get (key )
342348
343- if not ( r_a and r_b ) :
349+ if not r_a and not r_b :
344350 continue
345- if not (r_a .get ('supported' , False ) and r_b .get ('supported' , False )):
346- continue
347- if not (r_a .get ('verified' , True ) and r_b .get ('verified' , True )):
348- continue
349-
350- wc_a = r_a .get ('wall_clock' , {})
351- wc_b = r_b .get ('wall_clock' , {})
352- median_a = wc_a .get ('median_ms' , 0 )
353- median_b = wc_b .get ('median_ms' , 0 )
354- if median_a <= 0 or median_b <= 0 :
355- continue
356-
357- mps_a = r_a .get ('megapixels_per_sec' , 0 )
358- mps_b = r_b .get ('megapixels_per_sec' , 0 )
359- category = r_a .get ('category' , '' )
360- change_pct = ((median_b - median_a ) / median_a ) * 100
361-
362- if change_pct > 5.0 :
363- status = 'REGRESSION'
364- elif change_pct < - 5.0 :
365- status = 'IMPROVEMENT'
366- else :
367- status = 'same'
368351
369- f .write (f'{ name } ,{ category } ,{ mode } ,{ resolution } ,'
370- f'{ median_a :.4f} ,{ mps_a :.2f} ,'
371- f'{ median_b :.4f} ,{ mps_b :.2f} ,'
372- f'{ change_pct :.2f} ,{ status } \n ' )
352+ category = ''
353+ cols_a = ',,'
354+ cols_b = ',,'
355+ verified_a = False
356+ verified_b = False
357+ mps_a = 0
358+ mps_b = 0
359+
360+ if r_a and r_a .get ('supported' , False ):
361+ category = r_a .get ('category' , '' )
362+ wc = r_a .get ('wall_clock' , {})
363+ median = wc .get ('median_ms' , 0 )
364+ mps_a = r_a .get ('megapixels_per_sec' , 0 )
365+ verified_a = r_a .get ('verified' , True )
366+ cols_a = f'{ median :.4f} ,{ mps_a :.2f} ,{ "PASS" if verified_a else "FAIL" } '
367+
368+ if r_b and r_b .get ('supported' , False ):
369+ if not category :
370+ category = r_b .get ('category' , '' )
371+ wc = r_b .get ('wall_clock' , {})
372+ median = wc .get ('median_ms' , 0 )
373+ mps_b = r_b .get ('megapixels_per_sec' , 0 )
374+ verified_b = r_b .get ('verified' , True )
375+ cols_b = f'{ median :.4f} ,{ mps_b :.2f} ,{ "PASS" if verified_b else "FAIL" } '
376+
377+ speedup = ''
378+ if verified_a and verified_b and mps_a > 0 :
379+ speedup = f'{ mps_b / mps_a :.4f} '
380+
381+ f .write (f'{ name } ,{ category } ,{ mode } ,{ resolution } ,{ cols_a } ,{ cols_b } ,{ speedup } \n ' )
373382
374383 print (f' Comparison CSV: { output_path } .csv' )
375384
0 commit comments