Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve efficiency of local exceedance intensitry/impact and local return periods functions #1012

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
165 changes: 89 additions & 76 deletions climada/engine/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import pandas as pd
import xlsxwriter
from deprecation import deprecated
from matplotlib.colors import Normalize
from pyproj import CRS as pyprojCRS
from rasterio.crs import CRS as rasterioCRS # pylint: disable=no-name-in-module
from scipy import sparse
Expand Down Expand Up @@ -107,8 +108,8 @@ def __init__(
crs=DEF_CRS,
eai_exp=None,
at_event=None,
tot_value=0.,
aai_agg=0.,
tot_value=0.0,
aai_agg=0.0,
unit="",
imp_mat=None,
haz_type="",
Expand Down Expand Up @@ -515,7 +516,10 @@ def local_exceedance_impact(
return periods larger than the Impact object's observed local return periods will be
assigned the largest local impact, and return periods smaller than the Impact object's
observed local return periods will be assigned 0. If set to "extrapolate", local
exceedance impacts will be extrapolated (and interpolated). Defauls to "interpolate".
exceedance impacts will be extrapolated (and interpolated). The extrapolation to
large return periods uses the two highest impacts of the centroid and their return
periods and extends the interpolation between these points to the given return period
(similar for small return periods). Defauls to "interpolate".
min_impact : float, optional
Minimum threshold to filter the impact. Defaults to 0.
log_frequency : bool, optional
Expand Down Expand Up @@ -564,23 +568,33 @@ def local_exceedance_impact(

# calculate local exceedance impact
test_frequency = 1 / np.array(return_periods)
exceedance_impact = np.array(
[
u_interp.preprocess_and_interpolate_ev(
test_frequency,
None,
self.frequency,
self.imp_mat.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_impact,
value_threshold=min_impact,
method=method,
y_asymptotic=0.0,
)
for i_centroid in range(self.imp_mat.shape[1])
]

exceedance_impact = np.full(
(self.imp_mat.shape[1], len(test_frequency)),
np.nan if method == "interpolate" else 0.0,
)

nonzero_centroids = np.where(self.imp_mat.getnnz(axis=0) > 0)[0]

if not len(nonzero_centroids) == 0:
exceedance_impact[nonzero_centroids, :] = np.array(
[
u_interp.preprocess_and_interpolate_ev(
test_frequency,
None,
self.frequency,
self.imp_mat.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_impact,
value_threshold=min_impact,
method=method,
y_asymptotic=0.0,
n_sig_dig=3,
)
for i_centroid in nonzero_centroids
]
)

# create the output GeoDataFrame
gdf = gpd.GeoDataFrame(
geometry=gpd.points_from_xy(self.coord_exp[:, 1], self.coord_exp[:, 0]),
Expand Down Expand Up @@ -637,6 +651,9 @@ def local_return_period(
impacts will be assigned NaN, and threshold impacts smaller than the Impact
object's local impacts will be assigned the smallest observed local return period.
If set to "extrapolate", local return periods will be extrapolated (and interpolated).
The extrapolation to large threshold impacts uses the two highest impacts of
the centroid and their return periods and extends the interpolation between these
points to the given threshold imapct (similar for small large threshold impacts).
Defaults to "interpolate".
min_impacts : float, optional
Minimum threshold to filter the impact. Defaults to 0.
Expand Down Expand Up @@ -683,23 +700,29 @@ def local_return_period(
]:
raise ValueError(f"Unknown method: {method}")

return_periods = np.full((self.imp_mat.shape[1], len(threshold_impact)), np.nan)

nonzero_centroids = np.where(self.imp_mat.getnnz(axis=0) > 0)[0]

# calculate local return periods
return_periods = np.array(
[
u_interp.preprocess_and_interpolate_ev(
None,
np.array(threshold_impact),
self.frequency,
self.imp_mat.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_impact,
value_threshold=min_impact,
method=method,
y_asymptotic=np.nan,
)
for i_centroid in range(self.imp_mat.shape[1])
]
)
if not len(nonzero_centroids) == 0:
return_periods[nonzero_centroids, :] = np.array(
[
u_interp.preprocess_and_interpolate_ev(
None,
np.array(threshold_impact),
self.frequency,
self.imp_mat.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_impact,
value_threshold=min_impact,
method=method,
y_asymptotic=np.nan,
n_sig_dig=3,
)
for i_centroid in nonzero_centroids
]
)
return_periods = safe_divide(1.0, return_periods)

# create the output GeoDataFrame
Expand Down Expand Up @@ -1109,24 +1132,17 @@ def plot_basemap_impact_exposure(

return axis

@deprecated(
details="The use of Impact.plot_rp_imp is deprecated."
"Use Impact.local_exceedance_impact and util.plot.plot_from_gdf instead."
)
def plot_rp_imp(
self,
return_periods=(25, 50, 100, 250),
log10_scale=True,
smooth=True,
axis=None,
**kwargs,
):
"""
This function is deprecated, use Impact.local_exceedance_impact and
util.plot.plot_from_gdf instead.

Compute and plot exceedance impact maps for different return periods.
Calls local_exceedance_imp.
Calls local_exceedance_impact. For handling large data sets and for further options,
see Notes.

Parameters
----------
Expand All @@ -1145,43 +1161,40 @@ def plot_rp_imp(
axis : matplotlib.axes.Axes
imp_stats : np.array
return_periods.size x num_centroids

Notes
-----
For handling large data, and for more flexible options in the exceedance
impact computation and in the plotting, we recommend to use
gdf, title, labels = impact.local_exceedance_impact() and
util.plot.plot_from_gdf(gdf, title, labels) instead.
"""
imp_stats = (
self.local_exceedance_impact(np.array(return_periods))[0].values[:, 1:].T

LOGGER.info(
"Some errors in the previous calculation of local exceedance impacts have been corrected,"
" see Impact.local_exceedance_impact. To reproduce data with the "
"previous calculation, use CLIMADA v5.0.0 or less."
Comment on lines +1173 to +1176
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool idea, I am not sure though that this should be an info logging. @emanuel-schmid : what would be the most elegant way to inform the users that something changed, without having logs all the time?

)
imp_stats = imp_stats.astype(float)
if imp_stats.size == 0:
raise ValueError(
"Error: Attribute imp_mat is empty. Recalculate Impact"
"instance with parameter save_mat=True"
)
if log10_scale:
if np.min(imp_stats) < 0:
imp_stats_log = np.log10(abs(imp_stats) + 1)
colbar_name = "Log10(abs(Impact)+1) (" + self.unit + ")"
elif np.min(imp_stats) < 1:
imp_stats_log = np.log10(imp_stats + 1)
colbar_name = "Log10(Impact+1) (" + self.unit + ")"
else:
imp_stats_log = np.log10(imp_stats)
colbar_name = "Log10(Impact) (" + self.unit + ")"
else:
imp_stats_log = imp_stats
colbar_name = "Impact (" + self.unit + ")"
title = list()
for ret in return_periods:
title.append("Return period: " + str(ret) + " years")
axis = u_plot.geo_im_from_array(
imp_stats_log,
self.coord_exp,
colbar_name,
title,
smooth=smooth,
axes=axis,
**kwargs,

impacts_stats, title, column_labels = self.local_exceedance_impact(
return_periods
Comment on lines +1179 to +1180
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you make this calculous optional? Or what would be the way to refine the plot without having to recalculate these values (which can take quite long)? Is it using the util method instead?

Copy link
Collaborator Author

@ValentinGebhart ValentinGebhart Feb 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the best way is to do gdf, title, columns = hazard.local_exceedance_intensity() and u_plot.plot_from_gdf(gdf, title, columns). Then you can just change the plot options by repeating the second step, without recalculating the gdf. The plot_rp_intensity is just a wrapper around these two functions, which is why I asked if we want to keep it in the first place.

One way of not repeating the calculation here if one calls the function plot_rp_intensity twice would be to save the local exceedance gdf as an attribute of the Hazard object, but I don't think we want to do that?

Copy link
Collaborator Author

@ValentinGebhart ValentinGebhart Feb 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about writing the info to better use the other two functions in the docstring, as you suggested in the issue? Is this enough?

Copy link
Member

@chahank chahank Feb 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that would be a solution I think. For small hazards I guess the method is rather fast, and for lager ones there would the solution in the docstring.

)

return axis, imp_stats
impacts_stats_vals = impacts_stats.values[:, 1:].T.astype(float)
if not log10_scale:
min_impact, max_impact = np.nanmin(impacts_stats_vals), np.nanmax(
impacts_stats_vals
)
kwargs.update(
{
"norm": Normalize(vmin=min_impact, vmax=max_impact),
}
)

axis = u_plot.plot_from_gdf(
impacts_stats, title, column_labels, axis=axis, **kwargs
)
return axis, impacts_stats_vals

def write_csv(self, file_name):
"""Write data into csv file. imp_mat is not saved.
Expand Down
88 changes: 55 additions & 33 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,10 @@ def local_exceedance_intensity(
periods larger than the Hazard object's observed local return periods will be assigned
the largest local intensity, and return periods smaller than the Hazard object's
observed local return periods will be assigned 0. If set to "extrapolate", local
exceedance intensities will be extrapolated (and interpolated).
Defauls to "interpolate".
exceedance intensities will be extrapolated (and interpolated). The extrapolation to
large return periods uses the two highest intensites of the centroid and their return
periods and extends the interpolation between these points to the given return period
(similar for small return periods). Defauls to "interpolate".
min_intensity : float, optional
Minimum threshold to filter the hazard intensity. If set to None, self.intensity_thres
will be used. Defaults to None.
Expand Down Expand Up @@ -553,23 +555,32 @@ def local_exceedance_intensity(

# calculate local exceedance intensity
test_frequency = 1 / np.array(return_periods)
exceedance_intensity = np.array(
[
u_interp.preprocess_and_interpolate_ev(
test_frequency,
None,
self.frequency,
self.intensity.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_intensity,
value_threshold=min_intensity,
method=method,
y_asymptotic=0.0,
)
for i_centroid in range(self.intensity.shape[1])
]

exceedance_intensity = np.full(
(self.intensity.shape[1], len(test_frequency)),
np.nan if method == "interpolate" else 0.0,
)

nonzero_centroids = np.where(self.intensity.getnnz(axis=0) > 0)[0]
if not len(nonzero_centroids) == 0:
exceedance_intensity[nonzero_centroids, :] = np.array(
[
u_interp.preprocess_and_interpolate_ev(
test_frequency,
None,
self.frequency,
self.intensity.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_intensity,
value_threshold=min_intensity,
method=method,
y_asymptotic=0.0,
n_sig_dig=3,
)
for i_centroid in nonzero_centroids
]
)

# create the output GeoDataFrame
gdf = gpd.GeoDataFrame(
geometry=self.centroids.gdf["geometry"], crs=self.centroids.gdf.crs
Expand Down Expand Up @@ -631,6 +642,9 @@ def local_return_period(
intensities will be assigned NaN, and threshold intensities smaller than the Hazard
object's local intensities will be assigned the smallest observed local return period.
If set to "extrapolate", local return periods will be extrapolated (and interpolated).
The extrapolation to large threshold intensities uses the two highest intensites of
the centroid and their return periods and extends the interpolation between these
points to the given threshold intensity (similar for small threshold intensites).
Defaults to "interpolate".
min_intensity : float, optional
Minimum threshold to filter the hazard intensity. If set to None, self.intensity_thres
Expand Down Expand Up @@ -672,23 +686,31 @@ def local_return_period(
]:
raise ValueError(f"Unknown method: {method}")

# calculate local return periods
return_periods = np.array(
[
u_interp.preprocess_and_interpolate_ev(
None,
np.array(threshold_intensities),
self.frequency,
self.intensity.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_intensity,
value_threshold=min_intensity,
method=method,
y_asymptotic=np.nan,
)
for i_centroid in range(self.intensity.shape[1])
]
return_periods = np.full(
(self.intensity.shape[1], len(threshold_intensities)), np.nan
)

nonzero_centroids = np.where(self.intensity.getnnz(axis=0) > 0)[0]

if not len(nonzero_centroids) == 0:
return_periods[nonzero_centroids, :] = np.array(
[
u_interp.preprocess_and_interpolate_ev(
None,
np.array(threshold_intensities),
self.frequency,
self.intensity.getcol(i_centroid).toarray().flatten(),
log_frequency=log_frequency,
log_values=log_intensity,
value_threshold=min_intensity,
method=method,
y_asymptotic=np.nan,
n_sig_dig=3,
)
for i_centroid in nonzero_centroids
]
)

return_periods = safe_divide(1.0, return_periods)

# create the output GeoDataFrame
Expand Down
Loading
Loading