Skip to content

Commit a266ae5

Browse files
author
Shane Snyder
committed
add more in-depth testing for job_stats/file_stats
1 parent fe2e7b4 commit a266ae5

File tree

3 files changed

+149
-8
lines changed

3 files changed

+149
-8
lines changed

darshan-util/pydarshan/darshan/tests/test_file_stats.py

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22
from unittest import mock
33
from darshan.log_utils import get_log_path
44
from darshan.cli import file_stats
5+
from darshan.log_utils import _provide_logs_repo_filepaths
56
import pandas as pd
67
import io
78
import pytest
89

910
@pytest.mark.parametrize(
1011
"argv", [
11-
[get_log_path("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan"),
12-
"--csv",
12+
["--csv",
1313
"--module=POSIX",
1414
"--order_by=bytes_written",
15-
"--limit=5"],
15+
get_log_path("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan")],
1616
]
1717
)
1818
def test_file_stats(argv, capsys):
@@ -58,3 +58,77 @@ def test_file_stats(argv, capsys):
5858
# run again to ensure default Rich print mode runs successfully
5959
args.csv = False
6060
file_stats.main(args=args)
61+
assert not captured.err
62+
63+
def _provide_logs_repo_filepaths_filtered():
64+
return [
65+
path for path in _provide_logs_repo_filepaths()
66+
if 'dlio_logs' in path
67+
]
68+
@pytest.mark.parametrize(
69+
("argv", "expected"),
70+
[
71+
(
72+
["--csv",
73+
"--module=POSIX",
74+
"--order_by=bytes_read",
75+
*_provide_logs_repo_filepaths_filtered()],
76+
{'len': 194,
77+
'bytes_read': 129953991223,
78+
'bytes_written': 523946754,
79+
'reads': 35762,
80+
'writes': 168,
81+
'total_jobs': 670}
82+
),
83+
(
84+
["--csv",
85+
"--module=POSIX",
86+
"--order_by=bytes_read",
87+
"--limit=5",
88+
*_provide_logs_repo_filepaths_filtered()],
89+
{'len': 5,
90+
'bytes_read': 7214542900,
91+
'bytes_written': 0,
92+
'reads': 1830,
93+
'writes': 0,
94+
'total_jobs': 5}
95+
),
96+
(
97+
["--csv",
98+
"--module=POSIX",
99+
"--order_by=bytes_read",
100+
"--include_names=\\.npz$",
101+
*_provide_logs_repo_filepaths_filtered()],
102+
{'len': 168,
103+
'bytes_read': 129953701195,
104+
'bytes_written': 0,
105+
'reads': 34770,
106+
'writes': 0,
107+
'total_jobs': 172}
108+
)
109+
]
110+
)
111+
def test_file_stats_multi(argv, expected, capsys):
112+
with mock.patch("sys.argv", argv):
113+
# initialize the parser
114+
parser = argparse.ArgumentParser(description="")
115+
# run through setup_parser()
116+
file_stats.setup_parser(parser=parser)
117+
# parse the input arguments
118+
args = parser.parse_args(argv)
119+
# run once with CSV output and spot check some of the output
120+
file_stats.main(args=args)
121+
captured = capsys.readouterr()
122+
assert not captured.err
123+
assert captured.out
124+
df = pd.read_csv(io.StringIO(captured.out))
125+
assert len(df) == expected['len']
126+
assert df['bytes_read'].sum() == expected['bytes_read']
127+
assert df['bytes_written'].sum() == expected['bytes_written']
128+
assert df['reads'].sum() == expected['reads']
129+
assert df['writes'].sum() == expected['writes']
130+
assert df['total_jobs'].sum() == expected['total_jobs']
131+
# run again to ensure default Rich print mode runs successfully
132+
args.csv = False
133+
file_stats.main(args=args)
134+
assert not captured.err

darshan-util/pydarshan/darshan/tests/test_job_stats.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22
from unittest import mock
33
from darshan.log_utils import get_log_path
44
from darshan.cli import job_stats
5+
from darshan.log_utils import _provide_logs_repo_filepaths
56
from numpy.testing import assert_allclose
67
import pandas as pd
78
import io
89
import pytest
910

1011
@pytest.mark.parametrize(
1112
"argv", [
12-
[get_log_path("sample-badost.darshan"),
13-
"--csv",
13+
["--csv",
1414
"--module=STDIO",
1515
"--order_by=total_bytes",
16-
"--limit=5"],
16+
get_log_path("sample-badost.darshan")],
1717
]
1818
)
1919
def test_job_stats(argv, capsys):
@@ -51,3 +51,70 @@ def test_job_stats(argv, capsys):
5151
# run again to ensure default Rich print mode runs successfully
5252
args.csv = False
5353
job_stats.main(args=args)
54+
assert not captured.err
55+
56+
def _provide_logs_repo_filepaths_filtered():
57+
return [
58+
path for path in _provide_logs_repo_filepaths()
59+
if 'dlio_logs' in path
60+
]
61+
@pytest.mark.parametrize(
62+
("argv", "expected"),
63+
[
64+
(
65+
["--csv",
66+
"--module=POSIX",
67+
"--order_by=perf_by_slowest",
68+
*_provide_logs_repo_filepaths_filtered()],
69+
{'perf_by_slowest': 1818543162.0558,
70+
'time_by_slowest': 89.185973,
71+
'total_bytes': 130477937977,
72+
'total_files': 670}
73+
),
74+
(
75+
["--csv",
76+
"--module=POSIX",
77+
"--order_by=perf_by_slowest",
78+
"--limit=5",
79+
*_provide_logs_repo_filepaths_filtered()],
80+
{'perf_by_slowest': 1818543162.0558,
81+
'time_by_slowest': 30.823626,
82+
'total_bytes': 54299532365,
83+
'total_files': 190}
84+
)
85+
]
86+
)
87+
def test_job_stats_multi(argv, expected, capsys):
88+
# this case tests job_stats with multiple input logs
89+
# and ensures that aggregate statistics are as expected
90+
with mock.patch("sys.argv", argv):
91+
# initialize the parser
92+
parser = argparse.ArgumentParser(description="")
93+
# run through setup_parser()
94+
job_stats.setup_parser(parser=parser)
95+
# parse the input arguments
96+
args = parser.parse_args(argv)
97+
# run once with CSV output and spot check some of the output
98+
job_stats.main(args=args)
99+
captured = capsys.readouterr()
100+
assert not captured.err
101+
assert captured.out
102+
df = pd.read_csv(io.StringIO(captured.out))
103+
# verify max perf is first row and min perf is last row
104+
max_perf = df['perf_by_slowest'].max()
105+
min_perf = df['perf_by_slowest'].min()
106+
assert df.iloc[0]['perf_by_slowest'] == max_perf
107+
assert df.iloc[-1]['perf_by_slowest'] == min_perf
108+
# verify values against expected
109+
assert_allclose(max_perf, expected['perf_by_slowest'], rtol=1e-5, atol=1e-8)
110+
assert max_perf == expected['perf_by_slowest']
111+
total_time = df['time_by_slowest'].sum()
112+
assert_allclose(total_time, expected['time_by_slowest'], rtol=1e-5, atol=1e-8)
113+
total_bytes = df['total_bytes'].sum()
114+
assert total_bytes == expected['total_bytes']
115+
total_files = df['total_files'].sum()
116+
assert total_files == expected['total_files']
117+
# run again to ensure default Rich print mode runs successfully
118+
args.csv = False
119+
job_stats.main(args=args)
120+
assert not captured.err

darshan-util/pydarshan/darshan/tests/test_report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,10 @@ def test_load_records():
7878
def test_load_records_filtered():
7979
"""Sample for an expected number of records after filtering."""
8080
logfile = get_log_path("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan")
81-
with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="exclude") as report:
81+
with darshan.DarshanReport(logfile, filter_patterns=[r"\.h5$"], filter_mode="exclude") as report:
8282
assert 2 == len(report.data['records']['POSIX'])
8383
assert 0 == len(report.data['records']['MPI-IO'])
84-
with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="include") as report:
84+
with darshan.DarshanReport(logfile, filter_patterns=[r"\.h5$"], filter_mode="include") as report:
8585
assert 1 == len(report.data['records']['POSIX'])
8686
assert 1 == len(report.data['records']['MPI-IO'])
8787

0 commit comments

Comments
 (0)