@@ -152,17 +152,102 @@ def find_varying_parameters(all_configs: List[Dict[str, Any]]) -> Set[str]:
152152 return varying
153153
154154
155+ def find_multirun_dir (index : int = 0 , sort_by_date : bool = True ) -> str :
156+ """Find the Nth latest directory in multirun (0=most recent, 1=previous, etc.)"""
157+ multirun_path = 'multirun'
158+ if not os .path .exists (multirun_path ):
159+ raise FileNotFoundError ("multirun directory not found" )
160+
161+ # Get all subdirectories and their sort keys
162+ all_subdirs = []
163+ for item in os .listdir (multirun_path ):
164+ item_path = os .path .join (multirun_path , item )
165+ if os .path .isdir (item_path ):
166+ # Find all subdirectories within this date directory
167+ for subitem in os .listdir (item_path ):
168+ subitem_path = os .path .join (item_path , subitem )
169+ if os .path .isdir (subitem_path ):
170+ if sort_by_date :
171+ # Parse the date and time from the path structure
172+ # Expected format: multirun/YYYY-MM-DD/HH-MM-SS
173+ try :
174+ date_str = item # e.g., "2025-08-27"
175+ time_str = subitem # e.g., "16-49-08"
176+ datetime_str = f"{ date_str } { time_str .replace ('-' , ':' )} "
177+ all_subdirs .append ((datetime_str , subitem_path ))
178+ except Exception :
179+ # Fall back to mtime if parsing fails
180+ mtime = os .path .getmtime (subitem_path )
181+ all_subdirs .append ((mtime , subitem_path ))
182+ else :
183+ # Use modification time
184+ mtime = os .path .getmtime (subitem_path )
185+ all_subdirs .append ((mtime , subitem_path ))
186+
187+ if not all_subdirs :
188+ raise FileNotFoundError ("No experiment directories found in multirun" )
189+
190+ # Sort by key (most recent first) and return the requested index
191+ sorted_subdirs = sorted (all_subdirs , key = lambda x : x [0 ], reverse = True )
192+
193+ if index >= len (sorted_subdirs ):
194+ raise IndexError (f"Index { index } out of range, only { len (sorted_subdirs )} directories found" )
195+
196+ return sorted_subdirs [index ][1 ]
197+
198+
155199def main () -> None :
156200 parser = argparse .ArgumentParser (description = 'Print benchmark throughput data automatically grouped' )
157- parser .add_argument ('--base-dir' , required = True , help = 'Base directory containing benchmark results' )
201+
202+ # Positional argument for base directory (optional)
203+ parser .add_argument ('base_dir' , nargs = '?' , help = 'Base directory containing benchmark results' )
204+
205+ # Options for automatic latest directory selection
206+ parser .add_argument (
207+ '--latest' ,
208+ type = int ,
209+ nargs = '?' ,
210+ const = 0 ,
211+ metavar = 'N' ,
212+ help = 'Use the Nth latest multirun directory (0=most recent, 1=previous, etc.)' ,
213+ )
214+ parser .add_argument (
215+ '--latest-order' ,
216+ choices = ['date' , 'mod_time' ],
217+ default = 'date' ,
218+ help = 'Sort latest directories by date in path or modification time' ,
219+ )
220+
158221 parser .add_argument ('--csv-output' , help = 'Optional CSV file to write the results to' )
222+ parser .add_argument (
223+ '--runs' , choices = ['tri' , 'all' ], help = 'Show run numbers in results (tri=min/median/max, all=all runs)'
224+ )
159225 args = parser .parse_args ()
160226
227+ # Validate conflicting arguments
228+ if args .base_dir and args .latest is not None :
229+ parser .error ("Cannot specify both a base directory and --latest option" )
230+
231+ # Determine the base directory to use
232+ # Priority: positional arg > --latest > default to latest=0
233+ if args .base_dir :
234+ base_dir = args .base_dir
235+ else :
236+ latest_index = args .latest if args .latest is not None else 0
237+ try :
238+ base_dir = find_multirun_dir (index = latest_index , sort_by_date = (args .latest_order == 'date' ))
239+ ordinal = ["latest" , "2nd latest" , "3rd latest" ]
240+ index_desc = ordinal [latest_index ] if latest_index < len (ordinal ) else f"{ latest_index + 1 } th latest"
241+ print (f"Using { index_desc } multirun directory (sorted by { args .latest_order } ): { base_dir } " )
242+ except (FileNotFoundError , IndexError ) as e :
243+ print (f"Error: { e } " )
244+ return
245+
161246 # List to store all results
162247 all_results = []
163248
164249 # Process all iteration directories
165- for root , dirs , files in os .walk (args . base_dir ):
250+ for root , dirs , files in os .walk (base_dir ):
166251 for dir_name in dirs :
167252 if dir_name .isdigit ():
168253 iteration_path = os .path .join (root , dir_name )
@@ -184,30 +269,68 @@ def main() -> None:
184269 grouped_results = defaultdict (list )
185270 for config , throughput , iter_num in all_results :
186271 key = tuple ((param , str (config .get (param , 'N/A' ))) for param in sorted (varying_params ))
187- grouped_results [key ].append (throughput )
188-
189- # Aggregated results table
190- aggregated_headers = varying_params + [
191- "Count" ,
192- "Median (Gbps)" ,
193- "Std Dev (Gbps)" ,
194- "Min (Gbps)" ,
195- "Max (Gbps)" ,
196- ]
197- aggregated_rows = []
198- for config_key , throughputs in grouped_results .items ():
272+ grouped_results [key ].append ((throughput , iter_num ))
273+
274+ # Generate aggregated results table with optional Run Numbers column
275+ if args .runs :
276+ results_headers = varying_params + [
277+ "Run Numbers" ,
278+ "Count" ,
279+ "Max (Gbps)" ,
280+ "Median (Gbps)" ,
281+ "Min (Gbps)" ,
282+ "Std Dev (Gbps)" ,
283+ ]
284+ else :
285+ results_headers = varying_params + [
286+ "Count" ,
287+ "Max (Gbps)" ,
288+ "Median (Gbps)" ,
289+ "Min (Gbps)" ,
290+ "Std Dev (Gbps)" ,
291+ ]
292+
293+ results_rows = []
294+ for config_key , throughput_data in grouped_results .items ():
295+ throughputs = [t for t , _ in throughput_data ]
296+ run_numbers = [r for _ , r in throughput_data ]
297+
199298 row = []
200299 for _ , value in config_key :
201300 row .append (value )
301+
302+ # Add run numbers column if requested
303+ if args .runs :
304+ if args .runs == "tri" :
305+ # Find min, max, and median run numbers based on throughput
306+ sorted_by_throughput = sorted (zip (throughputs , run_numbers ))
307+ min_run = sorted_by_throughput [0 ][1 ]
308+ max_run = sorted_by_throughput [- 1 ][1 ]
309+ median_idx = len (sorted_by_throughput ) // 2
310+ median_run = sorted_by_throughput [median_idx ][1 ]
311+
312+ selected_runs = [max_run , median_run , min_run ]
313+ # Remove duplicates while preserving order
314+ unique_runs = []
315+ for run in selected_runs :
316+ if run not in unique_runs :
317+ unique_runs .append (run )
318+
319+ row .append ("," .join (unique_runs ))
320+ else :
321+ sorted_by_throughput = sorted (zip (throughputs , run_numbers ), reverse = True )
322+ all_runs = [r for _ , r in sorted_by_throughput ]
323+ row .append ("," .join (all_runs ))
324+
202325 row .append (len (throughputs ))
326+ row .append (f"{ max (throughputs ):.2f} " )
203327 row .append (f"{ statistics .median (throughputs ):.2f} " )
328+ row .append (f"{ min (throughputs ):.2f} " )
204329 if len (throughputs ) > 1 :
205330 row .append (f"{ statistics .stdev (throughputs ):.2f} " )
206331 else :
207332 row .append ("N/A" )
208- row .append (f"{ min (throughputs ):.2f} " )
209- row .append (f"{ max (throughputs ):.2f} " )
210- aggregated_rows .append (row )
333+ results_rows .append (row )
211334
212335 # Custom sorting function for benchmark types
213336 def benchmark_type_sort_key (value : str ) -> int :
@@ -217,7 +340,7 @@ def benchmark_type_sort_key(value: str) -> int:
217340 # Sort rows by all columns
218341 def sort_key (row : List [str ]) -> List [Union [int , float , str ]]:
219342 key_parts = []
220- for value , header in zip (row , aggregated_headers ):
343+ for value , header in zip (row , results_headers ):
221344 if header == 'benchmark_type' :
222345 # Use custom ordering for benchmark type
223346 key_parts .append (benchmark_type_sort_key (value ))
@@ -231,17 +354,24 @@ def sort_key(row: List[str]) -> List[Union[int, float, str]]:
231354 key_parts .append (str (value ))
232355 return key_parts
233356
234- aggregated_rows .sort (key = sort_key )
357+ results_rows .sort (key = sort_key )
358+
359+ # Display results
360+ if args .runs == "all" :
361+ print ("\n Results Summary (with all run numbers):" )
362+ elif args .runs == "tri" :
363+ print ("\n Results Summary (with representative run numbers):" )
364+ else :
365+ print ("\n Results Summary:" )
235366
236- print ("\n Results Summary:" )
237- print (tabulate (aggregated_rows , headers = aggregated_headers , tablefmt = "grid" ))
367+ print (tabulate (results_rows , headers = results_headers , tablefmt = "grid" ))
238368
239369 # Write to CSV if requested
240370 if args .csv_output :
241371 with open (args .csv_output , 'w' , newline = '' ) as csvfile :
242372 writer = csv .writer (csvfile )
243- writer .writerow (aggregated_headers )
244- writer .writerows (aggregated_rows )
373+ writer .writerow (results_headers )
374+ writer .writerows (results_rows )
245375 print (f"\n Results written to CSV: { args .csv_output } " )
246376
247377
0 commit comments