22from unittest import mock
33from darshan .log_utils import get_log_path
44from darshan .cli import file_stats
5+ import pandas as pd
6+ import io
57import pytest
8+
69@pytest .mark .parametrize (
710 "argv" , [
8- [get_log_path ("e3sm_io_heatmap_only.darshan" ),
9- "-mSTDIO" ,
10- "-oSTDIO_BYTES_READ" ,
11- "-n5" ],
11+ [get_log_path ("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan" ),
12+ "--csv" ,
13+ "--module=POSIX" ,
14+ "--order_by=bytes_written" ,
15+ "--limit=5" ],
1216 ]
1317)
1418def test_file_stats (argv , capsys ):
@@ -19,7 +23,38 @@ def test_file_stats(argv, capsys):
1923 file_stats .setup_parser (parser = parser )
2024 # parse the input arguments
2125 args = parser .parse_args (argv )
26+ # run once with CSV output and spot check some of the output
2227 file_stats .main (args = args )
2328 captured = capsys .readouterr ()
24- assert "15920181672442173319" in captured .out
25-
29+ assert not captured .err
30+ assert captured .out
31+ df = pd .read_csv (io .StringIO (captured .out ))
32+ assert len (df ) == 3
33+ # check the first file (most bytes written)
34+ expected_first = {
35+ 'file' : '/tmp/test/macsio_hdf5_000.h5' ,
36+ 'bytes_read' : 39816960 ,
37+ 'bytes_written' : 54579416 ,
38+ 'reads' : 6 ,
39+ 'writes' : 7699 ,
40+ 'total_jobs' : 1
41+ }
42+ row = df .iloc [0 ]
43+ for key , value in expected_first .items ():
44+ assert row [key ] == value
45+ # check the last file (least bytes written)
46+ expected_last = {
47+ 'file' : '/tmp/test/macsio-timings.log' ,
48+ 'bytes_read' : 0 ,
49+ 'bytes_written' : 12460 ,
50+ 'reads' : 0 ,
51+ 'writes' : 51 ,
52+ 'total_jobs' : 1
53+ }
54+ row = df .iloc [- 1 ]
55+ for key , value in expected_last .items ():
56+ assert row [key ] == value
57+ assert expected_first ['bytes_written' ] > expected_last ['bytes_written' ]
58+ # run again to ensure default Rich print mode runs successfully
59+ args .csv = False
60+ file_stats .main (args = args )
0 commit comments