1010from rich .console import Console
1111from rich .table import Table
1212
13- def df_IO_data (file_path , mod ):
13+ def df_IO_data (file_path , mod , filter_patterns , filter_mode ):
1414 """
1515 Save relevant file statisitcs from a single Darshan log file to a DataFrame.
1616
@@ -24,10 +24,14 @@ def df_IO_data(file_path, mod):
2424 a single DataFrame.
2525
2626 """
27+ extra_options = {}
28+ if filter_patterns :
29+ extra_options ["filter_patterns" ] = filter_patterns
30+ extra_options ["filter_mode" ] = filter_mode
2731 report = darshan .DarshanReport (file_path , read_all = False )
2832 if mod not in report .modules :
2933 return pd .DataFrame ()
30- report .mod_read_all_records (mod )
34+ report .mod_read_all_records (mod , ** extra_options )
3135 recs = report .records [mod ].to_df ()
3236 if mod != 'MPI-IO' :
3337 rec_cols = ['id' , f'{ mod } _BYTES_READ' , f'{ mod } _BYTES_WRITTEN' , f'{ mod } _READS' , f'{ mod } _WRITES' ]
@@ -178,20 +182,17 @@ def setup_parser(parser: argparse.ArgumentParser):
178182
179183 parser .add_argument (
180184 "log_paths" ,
181- type = str ,
182185 nargs = '+' ,
183186 help = "specify the paths to Darshan log files"
184187 )
185188 parser .add_argument (
186189 "--module" , "-m" ,
187- type = str ,
188190 nargs = '?' , default = 'POSIX' ,
189191 choices = ['POSIX' , 'MPI-IO' , 'STDIO' ],
190192 help = "specify the Darshan module to generate file stats for (default: %(default)s)"
191193 )
192194 parser .add_argument (
193195 "--order_by" , "-o" ,
194- type = str ,
195196 nargs = '?' , default = 'bytes_read' ,
196197 choices = ['bytes_read' , 'bytes_written' , 'reads' , 'writes' , 'total_jobs' ],
197198 help = "specify the I/O metric to order files by (default: %(default)s)"
@@ -205,7 +206,17 @@ def setup_parser(parser: argparse.ArgumentParser):
205206 parser .add_argument (
206207 "--csv" , "-c" ,
207208 action = 'store_true' ,
208- help = "output job stats in CSV format"
209+ help = "output file stats in CSV format"
210+ )
211+ parser .add_argument (
212+ "--exclude_names" , "-e" ,
213+ action = 'append' ,
214+ help = "regex patterns for file record names to exclude in stats"
215+ )
216+ parser .add_argument (
217+ "--include_names" , "-i" ,
218+ action = 'append' ,
219+ help = "regex patterns for file record names to include in stats"
209220 )
210221
211222def main (args : Union [Any , None ] = None ):
@@ -225,9 +236,20 @@ def main(args: Union[Any, None] = None):
225236 order_by = args .order_by
226237 limit = args .limit
227238 log_paths = args .log_paths
239+ filter_patterns = None
240+ filter_mode = None
241+ if args .exclude_names and args .include_names :
242+ print ('file_stats error: only one of --exclude-names and --include-names may be used.' )
243+ sys .exit (1 )
244+ elif args .exclude_names :
245+ filter_patterns = args .exclude_names
246+ filter_mode = "exclude"
247+ elif args .include_names :
248+ filter_patterns = args .include_names
249+ filter_mode = "include"
228250 list_dfs = []
229251 for log_path in log_paths :
230- df_i = df_IO_data (log_path , mod )
252+ df_i = df_IO_data (log_path , mod , filter_patterns , filter_mode )
231253 if not df_i .empty :
232254 list_dfs .append (df_i )
233255 if len (list_dfs ) == 0 :
0 commit comments