PyDarshan changes for new job timestamps

Shane Snyder · Shane Snyder · commit d5a78432dd11 · 2022-11-09T12:19:17.000-06:00
* new logutils function (`darshan_log_get_job_runtime`) to
  calculate job runtime as a floating point value
  * for pre 3.41 logs, this is end-start+1 (i.e., runtime
    rounded up to nearest integer)
  * for 3.41+ logs, this is exact runtime to nsec precision
* CFFI definition updates of job types, type sizes
* CFFI bindings updates to extract new job timers and runtime
* updates to summary and report interfaces to use new job timers
  and runtime values
* updates to plotting code to use new job timers and runtime
  values
  * `plot_dxt_heatmap.py` in particular had some code removed
    that tried to correct for a calculated runtime of 0
* updates to tests to accommodate new changes to various
  interfaces and record formats
  * `test_plot_dxt_heatmap.py` has changes to avoid need for
    rounding up calculated runtime
  * new ior-PNETCF/ior-HDF5 log files are included as previous
    ones are no longer valid due to log format changes
diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py
@@ -10,8 +10,8 @@
 header = """/* from darshan-logutils.h */
 struct darshan_mnt_info
 {
-    char mnt_type[3031];
-    char mnt_path[3031];
+    char mnt_type[3015];
+    char mnt_path[3015];
 };
 
 struct darshan_mod_info
@@ -29,8 +29,10 @@
 struct darshan_job
 {
     int64_t uid;
-    int64_t start_time;
-    int64_t end_time;
+    int64_t start_time_sec;
+    int64_t start_time_nsec;
+    int64_t end_time_sec;
+    int64_t end_time_nsec;
     int64_t nprocs;
     int64_t jobid;
     char metadata[1024];
@@ -167,6 +169,7 @@
 void darshan_log_get_modules(void*, struct darshan_mod_info **, int*);
 int darshan_log_get_record(void*, int, void **);
 char* darshan_log_get_lib_version(void);
+int darshan_log_get_job_runtime(void *, struct darshan_job job, double *runtime);
 void darshan_free(void *);
 
 int darshan_log_get_namehash(void*, struct darshan_name_record_ref **hash);
diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
@@ -126,11 +126,17 @@ def log_get_job(log):
     libdutil.darshan_log_get_job(log['handle'], jobrec)
 
     job['uid'] = jobrec[0].uid
-    job['start_time'] = jobrec[0].start_time
-    job['end_time'] = jobrec[0].end_time
+    job['start_time_sec'] = jobrec[0].start_time_sec
+    job['start_time_nsec'] = jobrec[0].start_time_nsec
+    job['end_time_sec'] = jobrec[0].end_time_sec
+    job['end_time_nsec'] = jobrec[0].end_time_nsec
     job['nprocs'] = jobrec[0].nprocs
     job['jobid'] = jobrec[0].jobid
 
+    runtime = ffi.new("double *")
+    libdutil.darshan_log_get_job_runtime(log['handle'], jobrec[0], runtime)
+    job['run_time'] = runtime[0]
+
     # dirty hack to get log format version -- we know it's currently stored at the
     # very beginning of the log handle structure, so we just cast the struct
     # pointer as a string...
diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -178,16 +178,9 @@ def get_runtime(report: darshan.report.DarshanReport) -> str:
         runtime : the calculated executable run time.
 
         """
-        # calculate the run time
-        runtime_val = int(
-            report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
-        )
-        if runtime_val < 1:
-            # to prevent the displayed run time from being 0 seconds
-            # label anything under 1 second as less than 1
-            runtime = "< 1"
-        else:
-            runtime = str(runtime_val)
+        # get the run time string
+        runtime_val = report.metadata["job"]["run_time"]
+        runtime = f'{runtime_val:.4f}'
         return runtime
 
     def get_header(self):
@@ -200,7 +193,7 @@ def get_header(self):
         else:
             app_name = os.path.basename(command.split()[0])
         # collect the date from the time stamp
-        date = datetime.date.fromtimestamp(self.report.metadata["job"]["start_time"])
+        date = datetime.date.fromtimestamp(self.report.metadata["job"]["start_time_sec"])
         # the header is the application name and the log date
         self.header = f"{app_name} ({date})"
 
@@ -222,9 +215,9 @@ def get_metadata_table(self):
             "Job ID": job_data["jobid"],
             "User ID": job_data["uid"],
             "# Processes": job_data["nprocs"],
-            "Runtime (s)": self.get_runtime(report=self.report),
-            "Start Time": datetime.datetime.fromtimestamp(job_data["start_time"]),
-            "End Time": datetime.datetime.fromtimestamp(job_data["end_time"]),
+            "Run time (s)": self.get_runtime(report=self.report),
+            "Start Time": datetime.datetime.fromtimestamp(job_data["start_time_sec"]),
+            "End Time": datetime.datetime.fromtimestamp(job_data["end_time_sec"]),
             "Command Line": self.get_full_command(report=self.report),
         }
         # convert the dictionary into a dataframe
diff --git a/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-HDF5_id438090-438090_11-9-41522-17417065676046418211_1.darshan b/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-HDF5_id438090-438090_11-9-41522-17417065676046418211_1.darshan
diff --git a/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-HDF5_id545128-545128_9-7-60657-7307669767025130365_1.darshan b/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-HDF5_id545128-545128_9-7-60657-7307669767025130365_1.darshan
diff --git a/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan b/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan
diff --git a/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan b/darshan-util/pydarshan/darshan/examples/example_logs/shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan
diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/create_dxttimeline.py b/darshan-util/pydarshan/darshan/experimental/aggregators/create_dxttimeline.py
@@ -22,7 +22,7 @@ def create_dxttimeline(self, group_by='rank', mode="append"):
     items = ctx['items']
     
 
-    start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time'] )
+    start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time_sec'] )
 
 
 
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_dxt_heatmap.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_dxt_heatmap.py
@@ -38,24 +38,19 @@ def determine_hmap_runtime(report: darshan.DarshanReport) -> Tuple[float, float]
     or both module types are available, to achieve a common
     max displayed runtime.
 
-    In some cases, this may mean that the time value is
-    rounded up from the actual runtime.
-
-    Paramaters
+    Parameters
     ----------
 
     report: a ``darshan.DarshanReport``
 
     Returns
     -------
 
-    A tuple containing `tmax`, (rounded) `runtime` floats.
+    A tuple containing `tmax`, `runtime` floats.
 
     """
-    # calculate the elapsed runtime
-    runtime = report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
-    # ensure a minimum runtime value of 1 second
-    runtime = max(runtime, 1)
+    # get the elapsed runtime
+    runtime = report.metadata["job"]["run_time"]
     # leverage higher resolution DXT timing
     # data if available
     if ("DXT_POSIX" in report.modules or
@@ -69,10 +64,6 @@ def determine_hmap_runtime(report: darshan.DarshanReport) -> Tuple[float, float]
                 tmax_dxt = float(agg_df["end_time"].max())
                 if tmax_dxt > tmax:
                     tmax = tmax_dxt
-        # if the data max time exceeds the runtime, buffer by 1 second
-        # until our timer precision improves to prevent truncation
-        if tmax > runtime:
-            runtime += 1
     else:
         tmax = runtime
     return tmax, runtime
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py
@@ -148,12 +148,8 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
     stacked bar graph of the average read, write, and metadata times.
 
     """
-    # calculate the run time from the report metadata
-    runtime = report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
-    if runtime == 0:
-        # for cases where runtime is < 1, just set it
-        # to 1 like the original perl code
-        runtime = 1
+    # get the run time from the report metadata
+    runtime = report.metadata["job"]["run_time"]
     # get the I/O cost dataframe
     io_cost_df = get_io_cost_df(report=report)
     # generate a figure with 2 y axes
diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py
@@ -476,8 +476,8 @@ def read_metadata(self, read_all=False):
         self.metadata['job'] = backend.log_get_job(self.log)
         self.metadata['exe'] = backend.log_get_exe(self.log)
 
-        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time'])
-        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time'])
+        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time_sec'])
+        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time_sec'])
 
         self.data['mounts'] = backend.log_get_mounts(self.log)
         self.mounts = self.data['mounts']
@@ -991,13 +991,13 @@ def info(self, metadata=False):
         """
         print("Filename:       ", self.filename, sep="")
 
-        tdelta = self.end_time - self.start_time
-        print("Times:          ", self.start_time, " to ", self.end_time, " (Duration ", tdelta, ")", sep="")
-
         if 'exe' in self.metadata:
-            print("Executable:    ", self.metadata['exe'], sep="")
+            print("Executable:     ", self.metadata['exe'], sep="")
+
+        print("Times:          ", self.start_time, " to ", self.end_time, sep="")
 
         if 'job' in self.metadata:
+            print("Run time:       %.4f (s)" % self.metadata['job']['run_time'], sep="")
             print("Processes:      ", self.metadata['job']['nprocs'], sep="")
             print("JobID:          ", self.metadata['job']['jobid'], sep="")
             print("UID:            ", self.metadata['job']['uid'], sep="")
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_dxt_heatmap.py b/darshan-util/pydarshan/darshan/tests/test_plot_dxt_heatmap.py
@@ -41,24 +41,24 @@ def jointgrid():
             np.linspace(0.0, 348.956244, 10),
             np.around(np.linspace(0, 1.0, 10), decimals=2),
         ),
-        ("dxt.darshan", 2, np.linspace(0.0, 100.023116, 2), [0, 1468]),
+        ("dxt.darshan", 2, np.linspace(0.0, 100.091252, 2), [0, 1469]),
         (
             "dxt.darshan",
             4,
-            np.linspace(0.0, 100.023116, 4),
-            [0, 489, 978, 1468],
+            np.linspace(0.0, 100.091252, 4),
+            [0, 489, 979, 1469],
         ),
         (
             "dxt.darshan",
             6,
-            np.linspace(0.0, 100.023116, 6),
-            [0, 293, 587, 880, 1174, 1468],
+            np.linspace(0.0, 100.091252, 6),
+            [0, 293, 587, 881, 1175, 1469],
         ),
         (
             "dxt.darshan",
             10,
-            np.linspace(0.0, 100.023116, 10),
-            [0, 163, 326, 489, 652, 815, 978, 1141, 1304, 1468],
+            np.linspace(0.0, 100.091252, 10),
+            [0, 163, 326, 489, 652, 816, 979, 1142, 1305, 1469],
         ),
         ("sample-dxt-simple.darshan", 2, np.linspace(0.0, 959.403244, 2), [0.0, 1.0]),
         (
@@ -98,7 +98,7 @@ def test_set_x_axis_ticks_and_labels(
         data = [[4, 1.03378843, 1.03387713, 0], [4000, 1.04216653, 1.04231459, 0]]
         cols = ["length", "start_time", "end_time", "rank"]
         agg_df = pd.DataFrame(data=data, columns=cols)
-        runtime = 1
+        runtime = 2
 
     else:
         filepath = get_log_path(filepath)
@@ -107,12 +107,9 @@ def test_set_x_axis_ticks_and_labels(
         agg_df = heatmap_handling.get_aggregate_data(
             report=report, mod="DXT_POSIX", ops=["read", "write"]
         )
-        runtime = report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
+        runtime = report.metadata["job"]["run_time"]
 
-    runtime = max(runtime, 1)
     tmax_dxt = float(agg_df["end_time"].max())
-    if tmax_dxt > runtime:
-        runtime += 1
 
     # the jointgrid fixture has 100 xbins
     xbins = 100
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
@@ -125,15 +125,15 @@
             'H5D Flush', 'H5F Open', 'H5F Flush'],
         ),
         (
-            "shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan",
+            "shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
             "PNETCDF_FILE",
             plot_opcounts,
             ['Var Ind Read', 'Var Ind Write', 'Var Open', 'Var Coll Read',
              'Var Coll Write', 'Var NB Read', 'Var NB Write', 'File Open', 'File Sync',
              'File Ind Waits', 'File Coll Waits'],
         ),
         (
-            "shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan",
+            "shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
             "PNETCDF_VAR",
             plot_opcounts,
             ['Var Ind Read', 'Var Ind Write', 'Var Open', 'Var Coll Read',
@@ -366,13 +366,13 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod):
             [0, 3, 3, 0, 0, 0],
         ),
         (
-            "shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan",
+            "shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
             "PNETCDF_FILE",
             plot_opcounts,
             [0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0],
         ),
         (
-            "shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan",
+            "shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
             "PNETCDF_VAR",
             plot_opcounts,
             [16, 16, 8, 0, 0, 0, 0, 8, 0, 0, 0],
diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
@@ -42,13 +42,13 @@
             ),
         ),
         (
-            "shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan",
+            "shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
             pd.DataFrame(
                 np.array([
-                [0.000444889069, 0.006031095982, 0.000148117542],
-                [0.000467121601, 0.006095588207, 0.003371536732],
-                [0.000423729420, 0.006246685982, 0.004460096359],
-                [0.000000000000, 0.000153362751, 0.000000000000],
+                [0.000378787518, 0.002514898777, 0.000068306923],
+                [0.000397562981, 0.002540826797, 0.001559376717],
+                [0.000402510166, 0.002579867840, 0.001994967461],
+                [0.000000000000, 0.000120997429, 0.000000000000],
                 ]),
                 ["POSIX", "MPIIO", "PNETCDF", "STDIO"],
                 ["Read", "Write", "Meta"],
@@ -71,19 +71,19 @@ def test_get_io_cost_df(logname, expected_df):
         ),
         (
             "sample-badost.darshan",
-            [0.0, 779.0],
+            [0.0, 780.0],
         ),
         (
             "dxt.darshan",
-            [0.0, 1468.0],
+            [0.0, 1469.0],
         ),
         (
             "noposix.darshan",
-            [0.0, 39212.0],
+            [0.0, 39213.0],
         ),
         (
             "noposixopens.darshan",
-            [0.0, 1110.0],
+            [0.0, 1111.0],
         ),
     ],
 )
@@ -110,7 +110,7 @@ def test_plot_io_cost_ylims(logname, expected_ylims):
         ),
         (
             "sample-badost.darshan",
-            [0.0, 155.8, 311.6, 467.4, 623.2, 779.0],
+            [0, 156, 312, 468, 624, 780],
         ),
     ],
 )
diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py
@@ -503,8 +503,8 @@ def test_heatmap_df_invalid_operation():
 def test_pnetcdf_hdf5_match():
     # test for some equivalent (f)counters between similar
     # HDF5 and PNETCDF-enabled runs of ior
-    pnetcdf_ior_report = darshan.DarshanReport(get_log_path("shane_ior-PNETCDF_id864223-864223_10-27-46849-11258636277699483231_1.darshan"))
-    hdf5_ior_report = darshan.DarshanReport(get_log_path("shane_ior-HDF5_id545128-545128_9-7-60657-7307669767025130365_1.darshan"))
+    pnetcdf_ior_report = darshan.DarshanReport(get_log_path("shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan"))
+    hdf5_ior_report = darshan.DarshanReport(get_log_path("shane_ior-HDF5_id438090-438090_11-9-41522-17417065676046418211_1.darshan"))
     pnetcdf_ior_report.mod_read_all_records("PNETCDF_FILE")
     pnetcdf_ior_report.mod_read_all_records("PNETCDF_VAR")
     hdf5_ior_report.mod_read_all_records("H5F")
diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ def create_dxttimeline(self, group_by='rank', mode="append"):`
`22`	`22`	`items = ctx['items']`
`23`	`23`
`24`	`24`
`25`		`- start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time'] )`
	`25`	`+ start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time_sec'] )`
`26`	`26`
`27`	`27`
`28`	`28`