pydarshan updates for DAOS module

Shane Snyder · Shane Snyder · commit 1f0532b793ca · 2024-11-11T09:43:52.000-06:00
diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -484,7 +484,7 @@ def register_figures(self):
 
         # for the operation counts, since the `H5D` variant contains
         # both modules' data, we either want `H5F` or `H5D`, not both
-        opcounts_mods = ["POSIX", "MPI-IO", "STDIO", "DFS"]
+        opcounts_mods = ["POSIX", "MPI-IO", "STDIO", "DFS", "DAOS"]
         if "H5D" in self.report.modules:
             opcounts_mods.append("H5D")
         elif "H5F" in self.report.modules:
@@ -560,7 +560,7 @@ def register_figures(self):
                 # repo
                 pass
 
-            if mod in ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS"]:
+            if mod in ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS", "DAOS"]:
                 access_hist_description = (
                     "Histogram of read and write access sizes. The specific values "
                     "of the most frequently occurring access sizes can be found in "
diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py
@@ -16,7 +16,7 @@ def agg_ioops(self, mode='append'):
     ctx = {}
 
     # aggregate
-    mods = ['MPI-IO', 'POSIX', 'STDIO', "H5F", "H5D", "PNETCDF_VAR", "PNETCDF_FILE", "DFS"]
+    mods = ['MPI-IO', 'POSIX', 'STDIO', "H5F", "H5D", "PNETCDF_VAR", "PNETCDF_FILE", "DFS", "DAOS"]
     for mod in mods:
 
         # check records for module are present
@@ -121,6 +121,21 @@ def agg_ioops(self, mode='append'):
             ctx[mod] = agg
             ctx[mod + '_simple'] = tmp
 
+        elif mod == "DAOS":
+            tmp = {
+                'Obj Fetches':  agg[mod + '_OBJ_FETCHES'],
+                'Obj Updates':  agg[mod + '_OBJ_UPDATES'],
+                'Obj Opens':  agg[mod + '_OBJ_OPENS'],
+                'Array Reads':  agg[mod + '_ARRAY_READS'],
+                'Array Writes':  agg[mod + '_ARRAY_WRITES'],
+                'Array Opens':  agg[mod + '_ARRAY_OPENS'],
+                'KV Gets':  agg[mod + '_KV_PUTS'],
+                'KV Puts':  agg[mod + '_KV_GETS'],
+                'KV Opens':  agg[mod + '_KV_OPENS'],
+            }
+            ctx[mod] = agg
+            ctx[mod + '_simple'] = tmp
+
         else:
             # POSIX and STDIO share most counter names and are handled 
             # together for this reason, except for metadata/sync counter 
diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py b/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py
@@ -12,7 +12,7 @@ def mod_agg_iohist(self, mod, mode='append'):
     """
 
     # sanitation and guards
-    supported = ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS"]
+    supported = ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS", "DAOS"]
     if mod not in supported:
         raise Exception("Unsupported mod_name for aggregated iohist.")
 
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py
@@ -109,7 +109,7 @@ def get_io_cost_df(report: darshan.DarshanReport) -> Any:
 
     """
     io_cost_dict = {}
-    supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR", "DFS"]
+    supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR", "DFS", "DAOS"]
     for mod_key in report.modules:
         if mod_key in supported_modules:
             # collect the records in dataframe form
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py
@@ -170,6 +170,20 @@ def gather_count_data(report, mod):
             mod_data['DFS_STATS'],
         ]
 
+    elif mod == 'DAOS':
+        labels = ['ObjFetch', 'ObjUpdate', 'ObjOpen', 'ArrRead', 'ArrWrite', 'ArrOpen', 'KVGet', 'KVPut', 'KVOpen']
+        counts = [
+            mod_data['DAOS_OBJ_FETCHES'],
+            mod_data['DAOS_OBJ_UPDATES'],
+            mod_data['DAOS_OBJ_OPENS'],
+            mod_data['DAOS_ARRAY_READS'],
+            mod_data['DAOS_ARRAY_WRITES'],
+            mod_data['DAOS_ARRAY_OPENS'],
+            mod_data['DAOS_KV_GETS'],
+            mod_data['DAOS_KV_PUTS'],
+            mod_data['DAOS_KV_OPENS'],
+        ]
+
     return labels, counts
 
 def plot_opcounts(report, mod, ax=None):
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
@@ -20,6 +20,13 @@
             ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M",
             "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"]
         ),
+        (
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
+            "DAOS",
+            plot_access_histogram,
+            ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M",
+            "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"]
+        ),
         (
             "dxt.darshan",
             "POSIX",
@@ -77,6 +84,12 @@
             plot_opcounts,
             ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'Get Size', 'Punch', 'Remove', 'Stat'],
         ),
+        (
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
+            "DAOS",
+            plot_opcounts,
+            ['ObjFetch', 'ObjUpdate', 'ObjOpen', 'ArrRead', 'ArrWrite', 'ArrOpen', 'KVGet', 'KVPut', 'KVOpen'],
+        ),
         (
             "dxt.darshan",
             "POSIX",
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
@@ -63,8 +63,9 @@
                     [0.0, 0.0, 0.0, 0.0],
                     [0.0, 4.515051841e-06, 0.0, 0.0],
                     [0.001456562, 0.002266062, 0.007923812, 0.0],
+                    [0.001492562, 0.002273217, 0.007910812, 0.0],
                 ]),
-                ["POSIX", "STDIO", "DFS"],
+                ["POSIX", "STDIO", "DFS", "DAOS"],
                 ["Read", "Write", "Meta", "Wait"],
             ),
         ),
diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py
@@ -84,45 +84,61 @@ def test_dfs_daos_posix_match():
     dfs_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan"))
     posix_ior_report.mod_read_all_records("POSIX")
     dfs_ior_report.mod_read_all_records("DFS")
-    posix_data_dict = posix_ior_report.data['records']["POSIX"].to_df()
-    dfs_data_dict = dfs_ior_report.data['records']["DFS"].to_df()
+    dfs_ior_report.mod_read_all_records("DAOS")
+    posix_data_dict = posix_ior_report.data['records']["POSIX"].to_df()["counters"]
+    dfs_data_dict = dfs_ior_report.data['records']["DFS"].to_df()["counters"]
+    daos_data_dict = dfs_ior_report.data['records']["DAOS"].to_df()["counters"]
     dfs_ior_name_recs = dfs_ior_report.data["name_records"]
 
-    for column_name in dfs_data_dict["counters"].columns:
+    # also gather counters for the underlying DAOS record for the DFS record
+    # (they have the same record ID, simplifying this a bit)
+    dfs_hash = dfs_data_dict["id"][0]
+    daos_data_dict = daos_data_dict[daos_data_dict["id"] == dfs_hash]
+    for column_name in dfs_data_dict.columns:
         # for some columns we can't reasonably expect a match
         # or we need to handle the data differently between POSIX
         # and DAOS DFS
         if column_name in ["id", "DFS_LOOKUPS", "DFS_DUPS", "DFS_NB_READS", "DFS_NB_WRITES",
                            "DFS_GET_SIZES", "DFS_PUNCHES", "DFS_REMOVES", "DFS_STATS",
-                           "DFS_CHUNK_SIZE",
-                           "DFS_FASTEST_RANK", "DFS_SLOWEST_RANK"]:
+                           "DFS_CHUNK_SIZE", "DFS_FASTEST_RANK", "DFS_SLOWEST_RANK",
+                           "DFS_FASTEST_RANK_BYTES", "DFS_SLOWEST_RANK_BYTES",
+                           "DFS_MAX_READ_TIME_SIZE", "DFS_MAX_WRITE_TIME_SIZE",
+                           "DFS_GLOBAL_OPENS", "DFS_READXS", "DFS_WRITEXS"]:
             continue
-        elif column_name in ["DFS_GLOBAL_OPENS", "DFS_OPENS"]:
+        elif column_name == "DFS_OPENS":
             # sum these together to match the POSIX version
-            column_name = "DFS_OPENS"
-            dfs_data = (dfs_data_dict["counters"]["DFS_GLOBAL_OPENS"] +
-                        dfs_data_dict["counters"]["DFS_OPENS"])
-        elif column_name in ["DFS_READS", "DFS_READXS"]:
-            column_name = "DFS_READS"
-            dfs_data = (dfs_data_dict["counters"]["DFS_READS"] +
-                        dfs_data_dict["counters"]["DFS_READXS"])
+            dfs_data = (dfs_data_dict["DFS_GLOBAL_OPENS"] +
+                        dfs_data_dict["DFS_OPENS"])
+        elif column_name == "DFS_READS":
+            # sum these together to match the POSIX version
+            dfs_data = (dfs_data_dict["DFS_READS"] +
+                        dfs_data_dict["DFS_READXS"])
             # we know the hardcoded value for certain
             assert dfs_data.values == 64
-        elif column_name in ["DFS_WRITES", "DFS_WRITEXS"]:
-            column_name = "DFS_WRITES"
-            dfs_data = (dfs_data_dict["counters"]["DFS_WRITES"] +
-                        dfs_data_dict["counters"]["DFS_WRITEXS"])
+        elif column_name == "DFS_WRITES":
+            # sum these together to match the POSIX version
+            dfs_data = (dfs_data_dict["DFS_WRITES"] +
+                        dfs_data_dict["DFS_WRITEXS"])
             # we know the hardcoded value for certain
             assert dfs_data.values == 64
         else:
-            dfs_data = dfs_data_dict["counters"][column_name]
+            dfs_data = dfs_data_dict[column_name]
         posix_column_name = column_name.replace("DFS", "POSIX")
-        posix_data = posix_data_dict["counters"][posix_column_name]
+        posix_data = posix_data_dict[posix_column_name]
         assert_allclose(dfs_data.values, posix_data.values)
+        # also check the DAOS-level data
+        daos_column_name = column_name.replace("DFS", "DAOS")
+        if daos_column_name == "DAOS_OPENS":
+            # this won't match exactly
+            continue
+        elif daos_column_name in ["DAOS_READS", "DAOS_WRITES"]:
+            daos_column_name = daos_column_name.replace("DAOS", "DAOS_ARRAY")
+        daos_data = daos_data_dict[daos_column_name]
         if column_name.endswith("BYTES_WRITTEN"):
             # we know the hardcoded value for certain
             # 256 KiB * 16
             assert dfs_data.values == 16777216
+            assert daos_data.values == 16777216
 
 
 @pytest.mark.parametrize("unsupported_record",