|
2 | 2 | from unittest import mock |
3 | 3 | from darshan.log_utils import get_log_path |
4 | 4 | from darshan.cli import job_stats |
| 5 | +from darshan.log_utils import _provide_logs_repo_filepaths |
5 | 6 | from numpy.testing import assert_allclose |
6 | 7 | import pandas as pd |
7 | 8 | import io |
8 | 9 | import pytest |
9 | 10 |
|
10 | 11 | @pytest.mark.parametrize( |
11 | 12 | "argv", [ |
12 | | - [get_log_path("sample-badost.darshan"), |
13 | | - "--csv", |
| 13 | + ["--csv", |
14 | 14 | "--module=STDIO", |
15 | 15 | "--order_by=total_bytes", |
16 | | - "--limit=5"], |
| 16 | + get_log_path("sample-badost.darshan")], |
17 | 17 | ] |
18 | 18 | ) |
19 | 19 | def test_job_stats(argv, capsys): |
@@ -51,3 +51,70 @@ def test_job_stats(argv, capsys): |
51 | 51 | # run again to ensure default Rich print mode runs successfully |
52 | 52 | args.csv = False |
53 | 53 | job_stats.main(args=args) |
| 54 | + assert not captured.err |
| 55 | + |
| 56 | +def _provide_logs_repo_filepaths_filtered(): |
| 57 | + return [ |
| 58 | + path for path in _provide_logs_repo_filepaths() |
| 59 | + if 'dlio_logs' in path |
| 60 | + ] |
| 61 | +@pytest.mark.parametrize( |
| 62 | + ("argv", "expected"), |
| 63 | + [ |
| 64 | + ( |
| 65 | + ["--csv", |
| 66 | + "--module=POSIX", |
| 67 | + "--order_by=perf_by_slowest", |
| 68 | + *_provide_logs_repo_filepaths_filtered()], |
| 69 | + {'perf_by_slowest': 1818543162.0558, |
| 70 | + 'time_by_slowest': 89.185973, |
| 71 | + 'total_bytes': 130477937977, |
| 72 | + 'total_files': 670} |
| 73 | + ), |
| 74 | + ( |
| 75 | + ["--csv", |
| 76 | + "--module=POSIX", |
| 77 | + "--order_by=perf_by_slowest", |
| 78 | + "--limit=5", |
| 79 | + *_provide_logs_repo_filepaths_filtered()], |
| 80 | + {'perf_by_slowest': 1818543162.0558, |
| 81 | + 'time_by_slowest': 30.823626, |
| 82 | + 'total_bytes': 54299532365, |
| 83 | + 'total_files': 190} |
| 84 | + ) |
| 85 | + ] |
| 86 | +) |
| 87 | +def test_job_stats_multi(argv, expected, capsys): |
| 88 | + # this case tests job_stats with multiple input logs |
| 89 | + # and ensures that aggregate statistics are as expected |
| 90 | + with mock.patch("sys.argv", argv): |
| 91 | + # initialize the parser |
| 92 | + parser = argparse.ArgumentParser(description="") |
| 93 | + # run through setup_parser() |
| 94 | + job_stats.setup_parser(parser=parser) |
| 95 | + # parse the input arguments |
| 96 | + args = parser.parse_args(argv) |
| 97 | + # run once with CSV output and spot check some of the output |
| 98 | + job_stats.main(args=args) |
| 99 | + captured = capsys.readouterr() |
| 100 | + assert not captured.err |
| 101 | + assert captured.out |
| 102 | + df = pd.read_csv(io.StringIO(captured.out)) |
| 103 | + # verify max perf is first row and min perf is last row |
| 104 | + max_perf = df['perf_by_slowest'].max() |
| 105 | + min_perf = df['perf_by_slowest'].min() |
| 106 | + assert df.iloc[0]['perf_by_slowest'] == max_perf |
| 107 | + assert df.iloc[-1]['perf_by_slowest'] == min_perf |
| 108 | + # verify values against expected |
| 109 | + assert_allclose(max_perf, expected['perf_by_slowest'], rtol=1e-5, atol=1e-8) |
| 110 | + assert max_perf == expected['perf_by_slowest'] |
| 111 | + total_time = df['time_by_slowest'].sum() |
| 112 | + assert_allclose(total_time, expected['time_by_slowest'], rtol=1e-5, atol=1e-8) |
| 113 | + total_bytes = df['total_bytes'].sum() |
| 114 | + assert total_bytes == expected['total_bytes'] |
| 115 | + total_files = df['total_files'].sum() |
| 116 | + assert total_files == expected['total_files'] |
| 117 | + # run again to ensure default Rich print mode runs successfully |
| 118 | + args.csv = False |
| 119 | + job_stats.main(args=args) |
| 120 | + assert not captured.err |
0 commit comments