Skip to content

Commit d5a7843

Browse files
author
Shane Snyder
committed
PyDarshan changes for new job timestamps
* new logutils function (`darshan_log_get_job_runtime`) to calculate job runtime as a floating point value * for pre 3.41 logs, this is end-start+1 (i.e., runtime rounded up to nearest integer) * for 3.41+ logs, this is exact runtime to nsec precision * CFFI definition updates of job types, type sizes * CFFI bindings updates to extract new job timers and runtime * updates to summary and report interfaces to use new job timers and runtime values * updates to plotting code to use new job timers and runtime values * `plot_dxt_heatmap.py` in particular had some code removed that tried to correct for a calculated runtime of 0 * updates to tests to accommodate new changes to various interfaces and record formats * `test_plot_dxt_heatmap.py` has changes to avoid need for rounding up calculated runtime * new ior-PNETCF/ior-HDF5 log files are included as previous ones are no longer valid due to log format changes
1 parent 302909b commit d5a7843

16 files changed

+72
-87
lines changed

darshan-util/pydarshan/darshan/backend/api_def_c.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
header = """/* from darshan-logutils.h */
1111
struct darshan_mnt_info
1212
{
13-
char mnt_type[3031];
14-
char mnt_path[3031];
13+
char mnt_type[3015];
14+
char mnt_path[3015];
1515
};
1616
1717
struct darshan_mod_info
@@ -29,8 +29,10 @@
2929
struct darshan_job
3030
{
3131
int64_t uid;
32-
int64_t start_time;
33-
int64_t end_time;
32+
int64_t start_time_sec;
33+
int64_t start_time_nsec;
34+
int64_t end_time_sec;
35+
int64_t end_time_nsec;
3436
int64_t nprocs;
3537
int64_t jobid;
3638
char metadata[1024];
@@ -167,6 +169,7 @@
167169
void darshan_log_get_modules(void*, struct darshan_mod_info **, int*);
168170
int darshan_log_get_record(void*, int, void **);
169171
char* darshan_log_get_lib_version(void);
172+
int darshan_log_get_job_runtime(void *, struct darshan_job job, double *runtime);
170173
void darshan_free(void *);
171174
172175
int darshan_log_get_namehash(void*, struct darshan_name_record_ref **hash);

darshan-util/pydarshan/darshan/backend/cffi_backend.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,17 @@ def log_get_job(log):
126126
libdutil.darshan_log_get_job(log['handle'], jobrec)
127127

128128
job['uid'] = jobrec[0].uid
129-
job['start_time'] = jobrec[0].start_time
130-
job['end_time'] = jobrec[0].end_time
129+
job['start_time_sec'] = jobrec[0].start_time_sec
130+
job['start_time_nsec'] = jobrec[0].start_time_nsec
131+
job['end_time_sec'] = jobrec[0].end_time_sec
132+
job['end_time_nsec'] = jobrec[0].end_time_nsec
131133
job['nprocs'] = jobrec[0].nprocs
132134
job['jobid'] = jobrec[0].jobid
133135

136+
runtime = ffi.new("double *")
137+
libdutil.darshan_log_get_job_runtime(log['handle'], jobrec[0], runtime)
138+
job['run_time'] = runtime[0]
139+
134140
# dirty hack to get log format version -- we know it's currently stored at the
135141
# very beginning of the log handle structure, so we just cast the struct
136142
# pointer as a string...

darshan-util/pydarshan/darshan/cli/summary.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -178,16 +178,9 @@ def get_runtime(report: darshan.report.DarshanReport) -> str:
178178
runtime : the calculated executable run time.
179179
180180
"""
181-
# calculate the run time
182-
runtime_val = int(
183-
report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
184-
)
185-
if runtime_val < 1:
186-
# to prevent the displayed run time from being 0 seconds
187-
# label anything under 1 second as less than 1
188-
runtime = "< 1"
189-
else:
190-
runtime = str(runtime_val)
181+
# get the run time string
182+
runtime_val = report.metadata["job"]["run_time"]
183+
runtime = f'{runtime_val:.4f}'
191184
return runtime
192185

193186
def get_header(self):
@@ -200,7 +193,7 @@ def get_header(self):
200193
else:
201194
app_name = os.path.basename(command.split()[0])
202195
# collect the date from the time stamp
203-
date = datetime.date.fromtimestamp(self.report.metadata["job"]["start_time"])
196+
date = datetime.date.fromtimestamp(self.report.metadata["job"]["start_time_sec"])
204197
# the header is the application name and the log date
205198
self.header = f"{app_name} ({date})"
206199

@@ -222,9 +215,9 @@ def get_metadata_table(self):
222215
"Job ID": job_data["jobid"],
223216
"User ID": job_data["uid"],
224217
"# Processes": job_data["nprocs"],
225-
"Runtime (s)": self.get_runtime(report=self.report),
226-
"Start Time": datetime.datetime.fromtimestamp(job_data["start_time"]),
227-
"End Time": datetime.datetime.fromtimestamp(job_data["end_time"]),
218+
"Run time (s)": self.get_runtime(report=self.report),
219+
"Start Time": datetime.datetime.fromtimestamp(job_data["start_time_sec"]),
220+
"End Time": datetime.datetime.fromtimestamp(job_data["end_time_sec"]),
228221
"Command Line": self.get_full_command(report=self.report),
229222
}
230223
# convert the dictionary into a dataframe

darshan-util/pydarshan/darshan/experimental/aggregators/create_dxttimeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def create_dxttimeline(self, group_by='rank', mode="append"):
2222
items = ctx['items']
2323

2424

25-
start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time'] )
25+
start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time_sec'] )
2626

2727

2828

darshan-util/pydarshan/darshan/experimental/plots/plot_dxt_heatmap.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,19 @@ def determine_hmap_runtime(report: darshan.DarshanReport) -> Tuple[float, float]
3838
or both module types are available, to achieve a common
3939
max displayed runtime.
4040
41-
In some cases, this may mean that the time value is
42-
rounded up from the actual runtime.
43-
44-
Paramaters
41+
Parameters
4542
----------
4643
4744
report: a ``darshan.DarshanReport``
4845
4946
Returns
5047
-------
5148
52-
A tuple containing `tmax`, (rounded) `runtime` floats.
49+
A tuple containing `tmax`, `runtime` floats.
5350
5451
"""
55-
# calculate the elapsed runtime
56-
runtime = report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
57-
# ensure a minimum runtime value of 1 second
58-
runtime = max(runtime, 1)
52+
# get the elapsed runtime
53+
runtime = report.metadata["job"]["run_time"]
5954
# leverage higher resolution DXT timing
6055
# data if available
6156
if ("DXT_POSIX" in report.modules or
@@ -69,10 +64,6 @@ def determine_hmap_runtime(report: darshan.DarshanReport) -> Tuple[float, float]
6964
tmax_dxt = float(agg_df["end_time"].max())
7065
if tmax_dxt > tmax:
7166
tmax = tmax_dxt
72-
# if the data max time exceeds the runtime, buffer by 1 second
73-
# until our timer precision improves to prevent truncation
74-
if tmax > runtime:
75-
runtime += 1
7667
else:
7768
tmax = runtime
7869
return tmax, runtime

darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,8 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
148148
stacked bar graph of the average read, write, and metadata times.
149149
150150
"""
151-
# calculate the run time from the report metadata
152-
runtime = report.metadata["job"]["end_time"] - report.metadata["job"]["start_time"]
153-
if runtime == 0:
154-
# for cases where runtime is < 1, just set it
155-
# to 1 like the original perl code
156-
runtime = 1
151+
# get the run time from the report metadata
152+
runtime = report.metadata["job"]["run_time"]
157153
# get the I/O cost dataframe
158154
io_cost_df = get_io_cost_df(report=report)
159155
# generate a figure with 2 y axes

0 commit comments

Comments
 (0)