Skip to content

Commit 53e9e9b

Browse files
authored
Merge pull request #136 from viccollado/feature/extremes_hessian_optimizer
Merge Feature/extremes latest updates
2 parents 496b122 + 6137212 commit 53e9e9b

File tree

9 files changed

+821
-575
lines changed

9 files changed

+821
-575
lines changed

bluemath_tk/distributions/_base_distributions.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def summary(self):
399399
print(f"Negative Log-Likelihood value: {self.nll:.4f}")
400400
print(f"{self.message}")
401401

402-
def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.axes]:
402+
def plot(self, ax: plt.axes = None, plot_type="all", npy=1) -> Tuple[plt.figure, plt.axes]:
403403
"""
404404
Plots of fitting results: PP-plot, QQ-plot, histogram with fitted distribution, and return period plot.
405405
Parameters
@@ -410,6 +410,8 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
410410
Type of plot to create. Options are "hist" for histogram, "pp" for P-P plot,
411411
"qq" for Q-Q plot, "return_period" for return period plot, or "all" for all plots.
412412
Default is "all".
413+
npy : int, optional
414+
Number of observations per year. Default is 1.
413415
414416
Returns
415417
-------
@@ -426,7 +428,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
426428
self.pp(ax=axs[0, 0])
427429
self.qq(ax=axs[0, 1])
428430
self.hist(ax=axs[1, 0])
429-
self.return_period(ax=axs[1, 1])
431+
self.return_period(ax=axs[1, 1], npy=npy)
430432
plt.tight_layout()
431433
return fig, axs
432434
elif plot_type == "hist":
@@ -436,7 +438,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
436438
elif plot_type == "qq":
437439
return self.qq()
438440
elif plot_type == "return_period":
439-
return self.return_period()
441+
return self.return_period(npy=npy)
440442
else:
441443
raise ValueError(
442444
"Invalid plot type. Use 'hist', 'pp', 'qq', 'return_period', or 'all'."
@@ -541,34 +543,36 @@ def hist(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:
541543

542544
return fig, ax
543545

544-
def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:
546+
def return_period(self, ax: plt.axes = None, npy=1) -> Tuple[plt.figure, plt.axes]:
545547
"""
546548
Return period plot of the fitted distribution.
547549
548550
Parameters
549551
----------
550552
ax : matplotlib.axes.Axes, optional
551553
Axes to plot on. If None, a new figure and axes will be created.
554+
npy : int, optional
555+
Number of observations per year. Default is 1.
552556
"""
553557
if ax is None:
554558
fig, ax = plt.subplots(figsize=(8, 6))
555559
else:
556560
fig = None
557561

558-
return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000])
559-
ecdf_fitted = 1 - 1/return_years
562+
return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000, 10000])
563+
ecdf_fitted = 1 - 1/(return_years)
560564
sorted_data = np.sort(self.data)
561565
exceedance_prob = 1 - self.ecdf
562-
return_period = 1 / exceedance_prob
566+
return_period = 1 / (exceedance_prob)
563567

564568
ax.plot(
565-
return_years,
569+
return_years / npy,
566570
self.dist.qf(ecdf_fitted, *self.params),
567571
color="tab:red",
568572
label="Fitted Distribution",
569573
)
570574
ax.plot(
571-
return_period,
575+
return_period / npy,
572576
sorted_data,
573577
marker="o",
574578
linestyle="",
@@ -580,7 +584,7 @@ def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:
580584
ax.set_xticks([1, 2, 5, 10, 25, 50, 100, 250, 1000, 10000])
581585
ax.set_xticklabels([1, 2, 5, 10, 25, 50, 100, 500, 1000, 10000])
582586
# ax.set_xlim(right=np.max(return_period) * 1.2)
583-
ax.set_xlabel("Return Period")
587+
ax.set_xlabel("Return Period (Years)")
584588
ax.set_ylabel("Data Values")
585589
ax.set_title("Return Period Plot")
586590
ax.legend()

bluemath_tk/distributions/extreme_correction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def fit(
221221
self.threshold, self.pot_data, pot_idx = opt_threshold.fit()
222222
self.poiss_parameter = self.pot_data.size / self.am_data.size
223223

224-
fit_result = GPD.fit(self.pot_data, f0=self.threshold)
224+
fit_result = GPD.fit(self.pot_data, threshold=self.threshold)
225225

226226
# If Annual Maxima used in fitting step
227227
if self.method == "am":

bluemath_tk/distributions/gev.py

Lines changed: 16 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Dict, List
22

33
import numpy as np
4-
from scipy.special import gamma
4+
from scipy.stats import genextreme
55

66
from ._base_distributions import BaseDistribution, FitResult, fit_dist
77

@@ -117,22 +117,7 @@ def pdf(
117117
if scale <= 0:
118118
raise ValueError("Scale parameter must be > 0")
119119

120-
y = (x - loc) / scale
121-
122-
# Gumbel case (shape = 0)
123-
if shape == 0.0:
124-
pdf = (1 / scale) * (np.exp(-y) * np.exp(-np.exp(-y)))
125-
126-
# General case (Weibull and Frechet, shape != 0)
127-
else:
128-
pdf = np.full_like(x, 0, dtype=float) # 0
129-
yy = 1 + shape * y
130-
yymask = yy > 0
131-
pdf[yymask] = (1 / scale) * (
132-
yy[yymask] ** (-1 - (1 / shape)) * np.exp(-(yy[yymask] ** (-1 / shape)))
133-
)
134-
135-
return pdf
120+
return genextreme.pdf(x, -shape, loc=loc, scale=scale)
136121

137122
@staticmethod
138123
def cdf(
@@ -167,17 +152,7 @@ def cdf(
167152
if scale <= 0:
168153
raise ValueError("Scale parameter must be > 0")
169154

170-
y = (x - loc) / scale
171-
172-
# Gumbel case (shape = 0)
173-
if shape == 0.0:
174-
p = np.exp(-np.exp(-y))
175-
176-
# General case (Weibull and Frechet, shape != 0)
177-
else:
178-
p = np.exp(-(np.maximum(1 + shape * y, 0) ** (-1 / shape)))
179-
180-
return p
155+
return genextreme.cdf(x, -shape, loc=loc, scale=scale)
181156

182157
@staticmethod
183158
def sf(
@@ -212,9 +187,7 @@ def sf(
212187
if scale <= 0:
213188
raise ValueError("Scale parameter must be > 0")
214189

215-
sp = 1 - GEV.cdf(x, loc=loc, scale=scale, shape=shape)
216-
217-
return sp
190+
return genextreme.sf(x, -shape, loc=loc, scale=scale)
218191

219192
@staticmethod
220193
def qf(
@@ -255,15 +228,7 @@ def qf(
255228
if scale <= 0:
256229
raise ValueError("Scale parameter must be > 0")
257230

258-
# Gumbel case (shape = 0)
259-
if shape == 0.0:
260-
q = loc - scale * np.log(-np.log(p))
261-
262-
# General case (Weibull and Frechet, shape != 0)
263-
else:
264-
q = loc + scale * ((-np.log(p)) ** (-shape) - 1) / shape
265-
266-
return q
231+
return genextreme.ppf(p, -shape, loc=loc, scale=scale)
267232

268233
@staticmethod
269234
def nll(
@@ -291,35 +256,12 @@ def nll(
291256
"""
292257

293258
if scale <= 0:
294-
nll = np.inf # Return a large value for invalid scale
259+
return np.inf # Return a large value for invalid scale
295260

296261
else:
297-
y = (data - loc) / scale
298-
299-
# # Gumbel case (shape = 0)
300-
# if shape == 0.0:
301-
# pass
302-
# nll = data.shape[0] * np.log(scale) + np.sum(
303-
# np.exp(-y) + np.sum(-y)
304-
# ) # Gumbel case
305-
306-
# # General case (Weibull and Frechet, shape != 0)
307-
# else:
308-
309-
shape = (
310-
np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8)
311-
) # Avoid division by zero
312-
y = 1 + shape * y
313-
if any(y <= 0):
314-
nll = np.inf # Return a large value for invalid y
315-
else:
316-
nll = (
317-
data.shape[0] * np.log(scale)
318-
+ np.sum(y ** (-1 / shape))
319-
+ (1 / shape + 1) * np.sum(np.log(y))
320-
)
321-
322-
return nll
262+
return -np.sum(
263+
genextreme.logpdf(data, -shape, loc=loc, scale=scale), axis=0
264+
)
323265

324266
@staticmethod
325267
def fit(data: np.ndarray, **kwargs) -> FitResult:
@@ -385,22 +327,9 @@ def random(
385327
if scale <= 0:
386328
raise ValueError("Scale parameter must be > 0")
387329

388-
# Set random state if provided
389-
if random_state is not None:
390-
np.random.seed(random_state)
391-
392-
# Generate uniform random numbers
393-
u = np.random.uniform(0, 1, size)
394-
395-
# Gumbel case (shape = 0)
396-
if shape == 0.0:
397-
x = loc - scale * np.log(-np.log(u))
398-
399-
# General case (Weibull and Frechet, shape != 0)
400-
else:
401-
x = loc + scale * ((-np.log(u)) ** (-shape) - 1) / shape
402-
403-
return x
330+
return genextreme.rvs(
331+
-shape, loc=loc, scale=scale, size=size, random_state=random_state
332+
)
404333

405334
@staticmethod
406335
def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
@@ -431,21 +360,7 @@ def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
431360
if scale <= 0:
432361
raise ValueError("Scale parameter must be > 0")
433362

434-
eu_cons = np.euler_gamma # Euler-Mascheroni constant
435-
436-
# Gumbel case (shape = 0)
437-
if shape == 0.0:
438-
mean = loc + scale * eu_cons
439-
440-
# General case (Weibull and Frechet, shape != 0 and shape < 1)
441-
elif shape != 0.0 and shape < 1:
442-
mean = loc + scale * (gamma(1 - shape) - 1) / shape
443-
444-
# Shape >= 1 case
445-
else:
446-
mean = np.inf
447-
448-
return mean
363+
return genextreme.mean(-shape, loc=loc, scale=scale)
449364

450365
@staticmethod
451366
def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
@@ -476,13 +391,7 @@ def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
476391
if scale <= 0:
477392
raise ValueError("Scale parameter must be > 0")
478393

479-
if shape == 0.0:
480-
median = loc - scale * np.log(np.log(2))
481-
482-
else:
483-
median = loc + scale * ((np.log(2)) ** (-shape) - 1) / shape
484-
485-
return median
394+
return genextreme.median(-shape, loc=loc, scale=scale)
486395

487396
@staticmethod
488397
def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
@@ -513,21 +422,7 @@ def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
513422
if scale <= 0:
514423
raise ValueError("Scale parameter must be > 0")
515424

516-
# Gumbel case (shape = 0)
517-
if shape == 0.0:
518-
var = (np.pi**2 / 6) * scale**2
519-
520-
elif shape != 0.0 and shape < 0.5:
521-
var = (
522-
(scale**2)
523-
* (gamma(1 - 2 * shape) - (gamma(1 - shape) ** 2))
524-
/ (shape**2)
525-
)
526-
527-
else:
528-
var = np.inf
529-
530-
return var
425+
return genextreme.var(-shape, loc=loc, scale=scale)
531426

532427
@staticmethod
533428
def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
@@ -559,9 +454,7 @@ def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
559454
if scale <= 0:
560455
raise ValueError("Scale parameter must be > 0")
561456

562-
std = np.sqrt(GEV.variance(loc, scale, shape))
563-
564-
return std
457+
return genextreme.std(-shape, loc=loc, scale=scale)
565458

566459
@staticmethod
567460
def stats(

0 commit comments

Comments
 (0)