Skip to content

Commit 15d39d9

Browse files
author
Shane Snyder
committed
add support for pydarshan record name filters
1 parent 8069a15 commit 15d39d9

File tree

2 files changed

+58
-13
lines changed

2 files changed

+58
-13
lines changed

darshan-util/pydarshan/darshan/cli/file_stats.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from rich.console import Console
1111
from rich.table import Table
1212

13-
def df_IO_data(file_path, mod):
13+
def df_IO_data(file_path, mod, filter_patterns, filter_mode):
1414
"""
1515
Save relevant file statisitcs from a single Darshan log file to a DataFrame.
1616
@@ -24,10 +24,14 @@ def df_IO_data(file_path, mod):
2424
a single DataFrame.
2525
2626
"""
27+
extra_options = {}
28+
if filter_patterns:
29+
extra_options["filter_patterns"] = filter_patterns
30+
extra_options["filter_mode"] = filter_mode
2731
report = darshan.DarshanReport(file_path, read_all=False)
2832
if mod not in report.modules:
2933
return pd.DataFrame()
30-
report.mod_read_all_records(mod)
34+
report.mod_read_all_records(mod, **extra_options)
3135
recs = report.records[mod].to_df()
3236
if mod != 'MPI-IO':
3337
rec_cols = ['id', f'{mod}_BYTES_READ', f'{mod}_BYTES_WRITTEN', f'{mod}_READS', f'{mod}_WRITES']
@@ -178,20 +182,17 @@ def setup_parser(parser: argparse.ArgumentParser):
178182

179183
parser.add_argument(
180184
"log_paths",
181-
type=str,
182185
nargs='+',
183186
help="specify the paths to Darshan log files"
184187
)
185188
parser.add_argument(
186189
"--module", "-m",
187-
type=str,
188190
nargs='?', default='POSIX',
189191
choices=['POSIX', 'MPI-IO', 'STDIO'],
190192
help="specify the Darshan module to generate file stats for (default: %(default)s)"
191193
)
192194
parser.add_argument(
193195
"--order_by", "-o",
194-
type=str,
195196
nargs='?', default='bytes_read',
196197
choices=['bytes_read', 'bytes_written', 'reads', 'writes', 'total_jobs'],
197198
help="specify the I/O metric to order files by (default: %(default)s)"
@@ -205,7 +206,17 @@ def setup_parser(parser: argparse.ArgumentParser):
205206
parser.add_argument(
206207
"--csv", "-c",
207208
action='store_true',
208-
help="output job stats in CSV format"
209+
help="output file stats in CSV format"
210+
)
211+
parser.add_argument(
212+
"--exclude_names", "-e",
213+
action='append',
214+
help="regex patterns for file record names to exclude in stats"
215+
)
216+
parser.add_argument(
217+
"--include_names", "-i",
218+
action='append',
219+
help="regex patterns for file record names to include in stats"
209220
)
210221

211222
def main(args: Union[Any, None] = None):
@@ -225,9 +236,20 @@ def main(args: Union[Any, None] = None):
225236
order_by = args.order_by
226237
limit = args.limit
227238
log_paths = args.log_paths
239+
filter_patterns=None
240+
filter_mode=None
241+
if args.exclude_names and args.include_names:
242+
print('file_stats error: only one of --exclude-names and --include-names may be used.')
243+
sys.exit(1)
244+
elif args.exclude_names:
245+
filter_patterns = args.exclude_names
246+
filter_mode = "exclude"
247+
elif args.include_names:
248+
filter_patterns = args.include_names
249+
filter_mode = "include"
228250
list_dfs = []
229251
for log_path in log_paths:
230-
df_i = df_IO_data(log_path, mod)
252+
df_i = df_IO_data(log_path, mod, filter_patterns, filter_mode)
231253
if not df_i.empty:
232254
list_dfs.append(df_i)
233255
if len(list_dfs) == 0:

darshan-util/pydarshan/darshan/cli/job_stats.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from rich.console import Console
1212
from rich.table import Table
1313

14-
def df_IO_data(file_path, mod):
14+
def df_IO_data(file_path, mod, filter_patterns, filter_mode):
1515
"""
1616
Save the statistical data from a single Darshan log file to a DataFrame.
1717
@@ -25,10 +25,14 @@ def df_IO_data(file_path, mod):
2525
a single DataFrame of job statistics.
2626
2727
"""
28+
extra_options = {}
29+
if filter_patterns:
30+
extra_options["filter_patterns"] = filter_patterns
31+
extra_options["filter_mode"] = filter_mode
2832
report = darshan.DarshanReport(file_path, read_all=False)
2933
if mod not in report.modules:
3034
return pd.DataFrame()
31-
report.mod_read_all_records(mod)
35+
report.mod_read_all_records(mod, **extra_options)
3236
recs = report.records[mod].to_df()
3337
acc_rec = accumulate_records(recs, mod, report.metadata['job']['nprocs'])
3438
dict_acc_rec = {}
@@ -155,20 +159,17 @@ def setup_parser(parser: argparse.ArgumentParser):
155159

156160
parser.add_argument(
157161
"log_paths",
158-
type=str,
159162
nargs='+',
160163
help="specify the paths to Darshan log files"
161164
)
162165
parser.add_argument(
163166
"--module", "-m",
164-
type=str,
165167
nargs='?', default='POSIX',
166168
choices=['POSIX', 'MPI-IO', 'STDIO'],
167169
help="specify the Darshan module to generate job stats for (default: %(default)s)"
168170
)
169171
parser.add_argument(
170172
"--order_by", "-o",
171-
type=str,
172173
nargs='?', default='total_bytes',
173174
choices=['perf_by_slowest', 'time_by_slowest', 'total_bytes', 'total_files'],
174175
help="specify the I/O metric to order jobs by (default: %(default)s)"
@@ -184,6 +185,17 @@ def setup_parser(parser: argparse.ArgumentParser):
184185
action='store_true',
185186
help="output job stats in CSV format"
186187
)
188+
parser.add_argument(
189+
"--exclude_names", "-e",
190+
action='append',
191+
help="regex patterns for file record names to exclude in stats"
192+
)
193+
parser.add_argument(
194+
"--include_names", "-i",
195+
action='append',
196+
help="regex patterns for file record names to include in stats"
197+
)
198+
187199

188200
def main(args: Union[Any, None] = None):
189201
"""
@@ -202,9 +214,20 @@ def main(args: Union[Any, None] = None):
202214
order_by = args.order_by
203215
limit = args.limit
204216
log_paths = args.log_paths
217+
filter_patterns=None
218+
filter_mode=None
219+
if args.exclude_names and args.include_names:
220+
print('job_stats error: only one of --exclude-names and --include-names may be used.')
221+
sys.exit(1)
222+
elif args.exclude_names:
223+
filter_patterns = args.exclude_names
224+
filter_mode = "exclude"
225+
elif args.include_names:
226+
filter_patterns = args.include_names
227+
filter_mode = "include"
205228
list_dfs = []
206229
for log_path in log_paths:
207-
df_i = df_IO_data(log_path, mod)
230+
df_i = df_IO_data(log_path, mod, filter_patterns, filter_mode)
208231
if not df_i.empty:
209232
list_dfs.append(df_i)
210233
if len(list_dfs) == 0:

0 commit comments

Comments
 (0)