Skip to content

Commit 3d5d197

Browse files
author
Shane Snyder
committed
add optional manifest file for large inputs
1 parent 3b744a0 commit 3d5d197

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

darshan-util/pydarshan/darshan/cli/job_stats.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import sys
22
import pandas as pd
33
import argparse
4+
from pathlib import Path
45
import darshan
56
import darshan.cli
67
from darshan.backend.cffi_backend import accumulate_records
78
from typing import Any, Union, Callable
89
from datetime import datetime
910
from humanize import naturalsize
10-
1111
import concurrent.futures
1212
from functools import partial
1313

@@ -173,9 +173,14 @@ def setup_parser(parser: argparse.ArgumentParser):
173173

174174
parser.add_argument(
175175
"log_paths",
176-
nargs='+',
176+
nargs='*',
177177
help="specify the paths to Darshan log files"
178178
)
179+
parser.add_argument(
180+
"--log_paths_file",
181+
type=str,
182+
help="specify the path to a manifest file listing Darshan log files"
183+
)
179184
parser.add_argument(
180185
"--module", "-m",
181186
nargs='?', default='POSIX',
@@ -210,6 +215,17 @@ def setup_parser(parser: argparse.ArgumentParser):
210215
help="regex patterns for file record names to include in stats"
211216
)
212217

218+
def get_input_logs(args):
219+
if args.log_paths_file:
220+
manifest_path = Path(args.log_paths_file)
221+
if not manifest_path.is_file():
222+
raise ValueError(f"Input manifest file {manifest_path} not found.")
223+
with open(manifest_path) as f:
224+
return [line.strip() for line in f if line.strip()]
225+
elif args.log_paths:
226+
return args.log_paths
227+
else:
228+
raise ValueError("No input Darshan logs provided.")
213229

214230
def main(args: Union[Any, None] = None):
215231
"""
@@ -227,12 +243,11 @@ def main(args: Union[Any, None] = None):
227243
mod = args.module
228244
order_by = args.order_by
229245
limit = args.limit
230-
log_paths = args.log_paths
246+
log_paths = get_input_logs(args)
231247
filter_patterns=None
232248
filter_mode=None
233249
if args.exclude_names and args.include_names:
234-
print('job_stats error: only one of --exclude-names and --include-names may be used.')
235-
sys.exit(1)
250+
raise ValueError('Only one of --exclude_names and --include_names may be used.')
236251
elif args.exclude_names:
237252
filter_patterns = args.exclude_names
238253
filter_mode = "exclude"

0 commit comments

Comments
 (0)