Skip to content

Commit 7d72488

Browse files
authored
Merge pull request #799 from tylerjereddy/treddy_faster_dxt_interpolate
ENH: faster DXT interpolation
2 parents fa10aa7 + 9d11b15 commit 7d72488

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,26 @@ def get_heatmap_df(agg_df: pd.DataFrame, xbins: int, nprocs: int) -> pd.DataFram
360360
# and stop events
361361
# interpolation is pointless when there is
362362
# a single non-null value in a row
363-
if sys.version_info.minor < 7:
364-
cats.interpolate(method="linear", limit_area="inside", axis=1, inplace=True)
365-
else:
366-
null_mask = cats.notna().sum(axis=1) > 1
367-
null_mask = null_mask.loc[null_mask == True].index
368-
cats_vals_to_interp = pd.DataFrame(cats.iloc[null_mask].values)
369-
cats_vals_to_interp.interpolate(method="nearest", axis=1, inplace=True)
370-
cats.iloc[null_mask] = cats_vals_to_interp
363+
null_mask = cats.notna().sum(axis=1) > 1
364+
null_mask = null_mask.loc[null_mask == True].index
365+
cats_vals = cats.iloc[null_mask].values
366+
nan_mask = ~np.isnan(cats_vals)
367+
fill_locs = np.where(nan_mask)
368+
num_rows = cats_vals.shape[0]
369+
start_index = 0
370+
row = 0
371+
372+
# TODO: this is already much faster than
373+
# pandas per gh-798, but consider moving
374+
# to i.e., Cython or numba?
375+
while start_index < (2 * num_rows):
376+
start_bound = fill_locs[1][start_index]
377+
end_bound = fill_locs[1][start_index + 1]
378+
cats_vals[row, start_bound:end_bound] = 1
379+
start_index += 2
380+
row += 1
381+
382+
cats.iloc[null_mask] = cats_vals
371383
# each time bin containing an event has a 1 in it, otherwise NaN
372384
# store mask for restoring fully occupied bins
373385
mask_occ = cats == 2

0 commit comments

Comments
 (0)