@@ -152,17 +152,100 @@ def find_varying_parameters(all_configs: List[Dict[str, Any]]) -> Set[str]:
152152 return varying
153153
154154
155+ def find_multirun_dir (index : int = 0 , sort_by_date : bool = True ) -> str :
156+ """Find the Nth latest directory in multirun (0=most recent, 1=previous, etc.)"""
157+ multirun_path = 'multirun'
158+ if not os .path .exists (multirun_path ):
159+ raise FileNotFoundError ("multirun directory not found" )
160+
161+ # Get all subdirectories and their sort keys
162+ all_subdirs = []
163+ for item in os .listdir (multirun_path ):
164+ item_path = os .path .join (multirun_path , item )
165+ if os .path .isdir (item_path ):
166+ # Find all subdirectories within this date directory
167+ for subitem in os .listdir (item_path ):
168+ subitem_path = os .path .join (item_path , subitem )
169+ if os .path .isdir (subitem_path ):
170+ if sort_by_date :
171+ # Parse the date and time from the path structure
172+ # Expected format: multirun/YYYY-MM-DD/HH-MM-SS
173+ try :
174+ date_str = item # e.g., "2025-08-27"
175+ time_str = subitem # e.g., "16-49-08"
176+ datetime_str = f"{ date_str } { time_str .replace ('-' , ':' )} "
177+ all_subdirs .append ((datetime_str , subitem_path ))
178+ except Exception :
179+ # Fall back to mtime if parsing fails
180+ mtime = os .path .getmtime (subitem_path )
181+ all_subdirs .append ((mtime , subitem_path ))
182+ else :
183+ # Use modification time
184+ mtime = os .path .getmtime (subitem_path )
185+ all_subdirs .append ((mtime , subitem_path ))
186+
187+ if not all_subdirs :
188+ raise FileNotFoundError ("No experiment directories found in multirun" )
189+
190+ # Sort by key (most recent first) and return the requested index
191+ sorted_subdirs = sorted (all_subdirs , key = lambda x : x [0 ], reverse = True )
192+
193+ if index >= len (sorted_subdirs ):
194+ raise IndexError (f"Index { index } out of range, only { len (sorted_subdirs )} directories found" )
195+
196+ return sorted_subdirs [index ][1 ]
197+
198+
155199def main () -> None :
156200 parser = argparse .ArgumentParser (description = 'Print benchmark throughput data automatically grouped' )
157- parser .add_argument ('--base-dir' , required = True , help = 'Base directory containing benchmark results' )
201+
202+ # Positional argument for base directory (optional)
203+ parser .add_argument ('base_dir' , nargs = '?' , help = 'Base directory containing benchmark results' )
204+
205+ # Options for automatic latest directory selection
206+ parser .add_argument (
207+ '--latest' ,
208+ type = int ,
209+ nargs = '?' ,
210+ const = 0 ,
211+ metavar = 'N' ,
212+ help = 'Use the Nth latest multirun directory (0=most recent, 1=previous, etc.)' ,
213+ )
214+ parser .add_argument (
215+ '--latest-order' ,
216+ choices = ['date' , 'mod_time' ],
217+ default = 'date' ,
218+ help = 'Sort latest directories by date in path or modification time' ,
219+ )
220+
158221 parser .add_argument ('--csv-output' , help = 'Optional CSV file to write the results to' )
222+ parser .add_argument ('--runs' , choices = ['tri' , 'all' ], help = 'Show run numbers in results (tri=min/median/max, all=all runs)' )
159223 args = parser .parse_args ()
160224
225+ # Validate conflicting arguments
226+ if args .base_dir and args .latest is not None :
227+ parser .error ("Cannot specify both a base directory and --latest option" )
228+
229+ # Determine the base directory to use
230+ # Priority: positional arg > --latest > default to latest=0
231+ if args .base_dir :
232+ base_dir = args .base_dir
233+ else :
234+ latest_index = args .latest if args .latest is not None else 0
235+ try :
236+ base_dir = find_multirun_dir (index = latest_index , sort_by_date = (args .latest_order == 'date' ))
237+ ordinal = ["latest" , "2nd latest" , "3rd latest" ]
238+ index_desc = ordinal [latest_index ] if latest_index < len (ordinal ) else f"{ latest_index + 1 } th latest"
239+ print (f"Using { index_desc } multirun directory (sorted by { args .latest_order } ): { base_dir } " )
240+ except (FileNotFoundError , IndexError ) as e :
241+ print (f"Error: { e } " )
242+ return
243+
161244 # List to store all results
162245 all_results = []
163246
164247 # Process all iteration directories
165- for root , dirs , files in os .walk (args . base_dir ):
248+ for root , dirs , files in os .walk (base_dir ):
166249 for dir_name in dirs :
167250 if dir_name .isdigit ():
168251 iteration_path = os .path .join (root , dir_name )
@@ -184,30 +267,68 @@ def main() -> None:
184267 grouped_results = defaultdict (list )
185268 for config , throughput , iter_num in all_results :
186269 key = tuple ((param , str (config .get (param , 'N/A' ))) for param in sorted (varying_params ))
187- grouped_results [key ].append (throughput )
188-
189- # Aggregated results table
190- aggregated_headers = varying_params + [
191- "Count" ,
192- "Median (Gbps)" ,
193- "Std Dev (Gbps)" ,
194- "Min (Gbps)" ,
195- "Max (Gbps)" ,
196- ]
197- aggregated_rows = []
198- for config_key , throughputs in grouped_results .items ():
270+ grouped_results [key ].append ((throughput , iter_num ))
271+
272+ # Generate aggregated results table with optional Run Numbers column
273+ if args .runs :
274+ results_headers = varying_params + [
275+ "Run Numbers" ,
276+ "Count" ,
277+ "Max (Gbps)" ,
278+ "Median (Gbps)" ,
279+ "Min (Gbps)" ,
280+ "Std Dev (Gbps)" ,
281+ ]
282+ else :
283+ results_headers = varying_params + [
284+ "Count" ,
285+ "Max (Gbps)" ,
286+ "Median (Gbps)" ,
287+ "Min (Gbps)" ,
288+ "Std Dev (Gbps)" ,
289+ ]
290+
291+ results_rows = []
292+ for config_key , throughput_data in grouped_results .items ():
293+ throughputs = [t for t , _ in throughput_data ]
294+ run_numbers = [r for _ , r in throughput_data ]
295+
199296 row = []
200297 for _ , value in config_key :
201298 row .append (value )
299+
300+ # Add run numbers column if requested
301+ if args .runs :
302+ if args .runs == "tri" :
303+ # Find min, max, and median run numbers based on throughput
304+ sorted_by_throughput = sorted (zip (throughputs , run_numbers ))
305+ min_run = sorted_by_throughput [0 ][1 ]
306+ max_run = sorted_by_throughput [- 1 ][1 ]
307+ median_idx = len (sorted_by_throughput ) // 2
308+ median_run = sorted_by_throughput [median_idx ][1 ]
309+
310+ selected_runs = [max_run , median_run , min_run ]
311+ # Remove duplicates while preserving order
312+ unique_runs = []
313+ for run in selected_runs :
314+ if run not in unique_runs :
315+ unique_runs .append (run )
316+
317+ row .append ("," .join (unique_runs ))
318+ else :
319+ sorted_by_throughput = sorted (zip (throughputs , run_numbers ), reverse = True )
320+ all_runs = [r for _ , r in sorted_by_throughput ]
321+ row .append ("," .join (all_runs ))
322+
202323 row .append (len (throughputs ))
324+ row .append (f"{ max (throughputs ):.2f} " )
203325 row .append (f"{ statistics .median (throughputs ):.2f} " )
326+ row .append (f"{ min (throughputs ):.2f} " )
204327 if len (throughputs ) > 1 :
205328 row .append (f"{ statistics .stdev (throughputs ):.2f} " )
206329 else :
207330 row .append ("N/A" )
208- row .append (f"{ min (throughputs ):.2f} " )
209- row .append (f"{ max (throughputs ):.2f} " )
210- aggregated_rows .append (row )
331+ results_rows .append (row )
211332
212333 # Custom sorting function for benchmark types
213334 def benchmark_type_sort_key (value : str ) -> int :
@@ -217,7 +338,7 @@ def benchmark_type_sort_key(value: str) -> int:
217338 # Sort rows by all columns
218339 def sort_key (row : List [str ]) -> List [Union [int , float , str ]]:
219340 key_parts = []
220- for value , header in zip (row , aggregated_headers ):
341+ for value , header in zip (row , results_headers ):
221342 if header == 'benchmark_type' :
222343 # Use custom ordering for benchmark type
223344 key_parts .append (benchmark_type_sort_key (value ))
@@ -231,17 +352,24 @@ def sort_key(row: List[str]) -> List[Union[int, float, str]]:
231352 key_parts .append (str (value ))
232353 return key_parts
233354
234- aggregated_rows .sort (key = sort_key )
355+ results_rows .sort (key = sort_key )
356+
357+ # Display results
358+ if args .runs == "all" :
359+ print ("\n Results Summary (with all run numbers):" )
360+ elif args .runs == "tri" :
361+ print ("\n Results Summary (with representative run numbers):" )
362+ else :
363+ print ("\n Results Summary:" )
235364
236- print ("\n Results Summary:" )
237- print (tabulate (aggregated_rows , headers = aggregated_headers , tablefmt = "grid" ))
365+ print (tabulate (results_rows , headers = results_headers , tablefmt = "grid" ))
238366
239367 # Write to CSV if requested
240368 if args .csv_output :
241369 with open (args .csv_output , 'w' , newline = '' ) as csvfile :
242370 writer = csv .writer (csvfile )
243- writer .writerow (aggregated_headers )
244- writer .writerows (aggregated_rows )
371+ writer .writerow (results_headers )
372+ writer .writerows (results_rows )
245373 print (f"\n Results written to CSV: { args .csv_output } " )
246374
247375
0 commit comments