testing additions

Shane Snyder · Shane Snyder · commit a9f1986614a1 · 2025-04-26T12:17:25.000-05:00
diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -713,7 +713,7 @@ def main(args: Union[Any, None] = None):
     filter_patterns=None
     filter_mode="exclude"
     if args.exclude_names and args.include_names:
-        print('Error: only one of --exclude-names and --include-names may be used.')
+        print('Error: only one of --exclude_names and --include_names may be used.')
         sys.exit(1)
     elif args.exclude_names:
         filter_patterns = args.exclude_names
diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py
@@ -317,6 +317,8 @@ def __init__(self,
             dtype (str): default dtype for internal structures
             automatic_summary (bool): automatically generate summary after loading
             read_all (bool): whether to read all records for log
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -411,6 +413,8 @@ def open(self, filename, read_all=False, filter_patterns=None, filter_mode="excl
         Args:
             filename (str): filename to open (optional)
             read_all (bool): whether to read all records for log
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -521,7 +525,8 @@ def read_all(self, dtype=None, filter_patterns=None, filter_mode="exclude"):
         Read all available records from darshan log and return as dictionary.
 
         Args:
-            None
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -547,7 +552,8 @@ def read_all_generic_records(self, counters=True, fcounters=True, dtype=None,
         Read all generic records from darshan log and return as dictionary.
 
         Args:
-            None
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -566,7 +572,8 @@ def read_all_dxt_records(self, reads=True, writes=True, dtype=None,
         Read all dxt records from darshan log and return as dictionary.
 
         Args:
-            None
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -636,6 +643,8 @@ def mod_read_all_records(self, mod, dtype=None, warnings=True,
         Args:
             mod (str): Identifier of module to fetch all records
             dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas'
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -819,6 +828,8 @@ def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, w
         Args:
             mod (str): Identifier of module to fetch all records
             dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
@@ -872,6 +883,8 @@ def mod_read_all_lustre_records(self, dtype=None, warnings=True,
 
         Args:
             dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
+            filter_patterns (list of strings): list of Python regex strings to match against
+            filter_mode (str): filter mode to use (either "exclude" or "include")
 
         Return:
             None
diff --git a/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py
@@ -679,3 +679,32 @@ def test_stdio_basic_inclusion(logname,
     assert_series_equal(file_wr_series, expected_file_wr_series)
     assert_series_equal(bytes_rd_series, expected_bytes_rd_series)
     assert_series_equal(bytes_wr_series, expected_bytes_wr_series)
+
+def test_plot_with_empty_data():
+    # generate a report object that filters out all contained records
+    # to ensure data access by category plot properly returns None instead of failing
+    logpath = get_log_path("ior_hdf5_example.darshan")
+    # use a bogus regex with the "include" filter mode to ensure no records are included
+    with darshan.DarshanReport(logpath, filter_patterns=["bogus-regex"], filter_mode="include") as report:
+        fig = data_access_by_filesystem.plot_with_report(report=report)
+        assert fig == None
+
+def test_with_filtered_data():
+    # ensure get_io_cost_df doesn't include data for modules with no records
+    logpath = get_log_path("sample-badost.darshan")
+    # generate a report object with all STDIO module records filtered out
+    # POSIX records should still remain
+    with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="include") as report:
+        file_id_dict = report.data["name_records"]
+        actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report,
+                                                                                                      file_id_dict=file_id_dict)
+        assert len(actual_df_reads) == 0
+        assert len(actual_df_writes) == 2048
+    # generate a report object with all POSIX module records filtered out
+    # STDIO records should still remain
+    with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="exclude") as report:
+        file_id_dict = report.data["name_records"]
+        actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report,
+                                                                                                      file_id_dict=file_id_dict)
+        assert len(actual_df_reads) == 1
+        assert len(actual_df_writes) == 2
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
@@ -315,3 +315,22 @@ def test_plot_io_cost_x_ticks_and_labels(logname,
         expected_rotations = 90
         x_rotations = [tl.get_rotation() for tl in xticklabels]
         assert_allclose(x_rotations, expected_rotations)
+
+def test_plot_io_cost_empty_data():
+    # generate a report object that filters out all contained records
+    # to ensure plot_io_cost properly returns None instead of failing
+    logpath = get_log_path("ior_hdf5_example.darshan")
+    # use a bogus regex with the "include" filter mode to ensure no records are included
+    with darshan.DarshanReport(logpath, filter_patterns=["bogus-regex"], filter_mode="include") as report:
+        fig = plot_io_cost(report=report)
+        assert fig == None
+
+def test_plot_io_cost_filtered_data():
+    # ensure get_io_cost_df doesn't include data for modules with no records
+    logpath = get_log_path("sample-badost.darshan")
+    # generate a report object with all POSIX module records filtered out
+    # STDIO records should still remain
+    with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="exclude") as report:
+        io_cost_df = get_io_cost_df(report=report)
+        assert "POSIX" not in io_cost_df.index
+        assert "STDIO" in io_cost_df.index
diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py
@@ -75,6 +75,15 @@ def test_load_records():
         report.mod_read_all_records("POSIX")
         assert 1 == len(report.data['records']['POSIX'])
 
+def test_load_records_filtered():
+    """Sample for an expected number of records after filtering."""
+    logfile = get_log_path("shane_macsio_id29959_5-22-32552-7035573431850780836_1590156158.darshan")
+    with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="exclude") as report:
+        assert 2 == len(report.data['records']['POSIX'])
+        assert 0 == len(report.data['records']['MPI-IO'])
+    with darshan.DarshanReport(logfile, filter_patterns=["\.h5$"], filter_mode="include") as report:
+        assert 1 == len(report.data['records']['POSIX'])
+        assert 1 == len(report.data['records']['MPI-IO'])
 
 @pytest.mark.parametrize("unsupported_record",
         ["DXT_POSIX", "DXT_MPIIO", "LUSTRE", "APMPI", "APXC"]