Skip to content

Commit 141d97b

Browse files
committed
ENH: WAIT_TIME in Python summary
Fixes #860 * include `PNETCDF_FILE_F_WAIT_TIME` in the I/O cost plot in the Python summary reports, along with regression testing and caption changes * I would have felt more comfortable if the issue pointed me to a log file with prominent non-zero wait times, because all-zero values will also be the default for the `Wait` category when it isn't valid (even when `PNETCDF` is not used); I mostly decided not to exclude the category when `PNETCDF` is absent because it was easier to code + accounted for in the caption adjustment anyway * I'll provide some samples of the new I/O cost plots--note that the x label squishing is handled separately in gh-883.
1 parent 457b72e commit 141d97b

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

darshan-util/pydarshan/darshan/cli/summary.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,8 @@ def register_figures(self):
405405
"Average (across all ranks) amount of run time that each process "
406406
"spent performing I/O, broken down by access type. See the right "
407407
"edge bar graph on heat maps in preceding section to indicate if "
408-
"I/O activity was balanced across processes."
408+
"I/O activity was balanced across processes. The 'Wait' category "
409+
"is only meaningful for PNETCDF asynchronous I/O operations."
409410
)
410411
io_cost_params = {
411412
"section_title": "Cross-Module Comparisons",

darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def get_by_avg_series(df: Any, mod_key: str, nprocs: int) -> Any:
3030
Returns
3131
-------
3232
by_avg_series: a ``pd.Series`` containing the
33-
average read, write, and meta times.
33+
average read, write, meta, and wait times.
3434
3535
"""
3636
# filter out all except the following columns
@@ -39,11 +39,16 @@ def get_by_avg_series(df: Any, mod_key: str, nprocs: int) -> Any:
3939
f"{mod_key}_F_WRITE_TIME",
4040
f"{mod_key}_F_META_TIME",
4141
]
42+
if "PNETCDF_FILE" in mod_key:
43+
cols.append("PNETCDF_FILE_F_WAIT_TIME")
44+
else:
45+
cols.append("Wait")
4246
by_avg_series = df.filter(cols, axis=1).sum(axis=0) / nprocs
4347
# reindex to ensure 3 rows are always created
4448
by_avg_series = by_avg_series.reindex(cols, fill_value=0.0)
4549
# rename the columns so the labels are automatically generated when plotting
4650
name_dict = {cols[0]: "Read", cols[1]: "Write", cols[2]: "Meta"}
51+
name_dict[cols[3]] = "Wait"
4752
by_avg_series.rename(index=name_dict, inplace=True)
4853
return by_avg_series
4954

darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,37 @@
2121
"ior_hdf5_example.darshan",
2222
pd.DataFrame(
2323
np.array([
24-
[0.0196126699, 0.1342029571533203, 0.0074423551],
25-
[0.0196372866, 0.13425052165985107, 0.0475],
26-
[0.016869, 0.086689, 0.097160],
27-
[0.0, 2.5570392608642578e-05, 0.0],
24+
[0.0196126699, 0.1342029571533203, 0.0074423551, 0.0],
25+
[0.0196372866, 0.13425052165985107, 0.0475, 0.0],
26+
[0.016869, 0.086689, 0.097160, 0.0],
27+
[0.0, 2.5570392608642578e-05, 0.0, 0.0],
2828
]),
2929
["POSIX", "MPIIO", "HDF5", "STDIO"],
30-
["Read", "Write", "Meta"],
30+
["Read", "Write", "Meta", "Wait"],
3131
),
3232
),
3333
(
3434
"sample-badost.darshan",
3535
pd.DataFrame(
3636
np.array([
37-
[0.0, 33.48587587394286, 0.5547398688504472],
38-
[0.011203573201783001, 4.632166e-07, 0.135187],
37+
[0.0, 33.48587587394286, 0.5547398688504472, 0.0],
38+
[0.011203573201783001, 4.632166e-07, 0.135187, 0.0],
3939
]),
4040
["POSIX", "STDIO"],
41-
["Read", "Write", "Meta"],
41+
["Read", "Write", "Meta", "Wait"],
4242
),
4343
),
4444
(
4545
"shane_ior-PNETCDF_id438100-438100_11-9-41525-10280033558448664385_1.darshan",
4646
pd.DataFrame(
4747
np.array([
48-
[0.000378787518, 0.002514898777, 0.000068306923],
49-
[0.000397562981, 0.002540826797, 0.001559376717],
50-
[0.000402510166, 0.002579867840, 0.001994967461],
51-
[0.000000000000, 0.000120997429, 0.000000000000],
48+
[0.000378787518, 0.002514898777, 0.000068306923, 0.0],
49+
[0.000397562981, 0.002540826797, 0.001559376717, 0.0],
50+
[0.000402510166, 0.002579867840, 0.001994967461, 0.0],
51+
[0.000000000000, 0.000120997429, 0.000000000000, 0.0],
5252
]),
5353
["POSIX", "MPIIO", "PNETCDF", "STDIO"],
54-
["Read", "Write", "Meta"],
54+
["Read", "Write", "Meta", "Wait"],
5555
),
5656
),
5757
],
@@ -158,8 +158,8 @@ def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
158158
],
159159
),
160160
pd.Series(
161-
data=[1.2, .6, 3.0],
162-
index=["Read", "Write", "Meta"],
161+
data=[1.2, .6, 3.0, 0.0],
162+
index=["Read", "Write", "Meta", "Wait"],
163163
),
164164
),
165165
(
@@ -177,8 +177,8 @@ def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
177177
],
178178
),
179179
pd.Series(
180-
data=[3000.0, 300.0, 30.0],
181-
index=["Read", "Write", "Meta"],
180+
data=[3000.0, 300.0, 30.0, 0.0],
181+
index=["Read", "Write", "Meta", "Wait"],
182182
),
183183
),
184184
(
@@ -197,8 +197,8 @@ def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
197197
],
198198
),
199199
pd.Series(
200-
data=[3001.2, 300.6, 33.0],
201-
index=["Read", "Write", "Meta"],
200+
data=[3001.2, 300.6, 33.0, 0.0],
201+
index=["Read", "Write", "Meta", "Wait"],
202202
),
203203
)
204204
])
@@ -215,11 +215,11 @@ def test_get_by_avg_series(mod_key, input_df, expected_series):
215215
"nonmpi_dxt_anonymized.darshan",
216216
pd.DataFrame(
217217
np.array([
218-
[0.281718, 0.504260, 0.170138],
219-
[0.232386, 0.165982, 0.072751],
218+
[0.281718, 0.504260, 0.170138, 0.0],
219+
[0.232386, 0.165982, 0.072751, 0.0],
220220
]),
221221
["POSIX", "STDIO"],
222-
["Read", "Write", "Meta"],
222+
["Read", "Write", "Meta", "Wait"],
223223
),
224224
),
225225
])

0 commit comments

Comments
 (0)