diff --git a/bluemath_tk/distributions/_base_distributions.py b/bluemath_tk/distributions/_base_distributions.py index 50c66e2..8cbc4c8 100644 --- a/bluemath_tk/distributions/_base_distributions.py +++ b/bluemath_tk/distributions/_base_distributions.py @@ -1,431 +1,137 @@ +from __future__ import annotations from abc import abstractmethod -from typing import Dict, List +from typing import Dict, List, Tuple import matplotlib.pyplot as plt import numpy as np -from scipy.optimize import minimize +from scipy.optimize import minimize, OptimizeResult from ..core.models import BlueMathModel -class FitResult(BlueMathModel): - """ - Class used for the results of fitting a distribution - - Attributes - ---------- - dist : BaseDistribution - The distribution that was fitted. - data : np.ndarray - The data used for fitting the distribution. - params : np.ndarray - Fitted parameters of the distribution. - success : bool - Indicates whether the fitting was successful. - message : str - Message from the optimization result. - nll : float - Negative log-likelihood of the fitted distribution. - res : OptimizeResult - The result of the optimization process, containing additional information. - - Methods - ------- - summary() -> dict - Returns a summary of the fitting results, including parameters, negative log-likelihood, - success status, message, and the optimization result. - plot(ax=None, plot_type="hist") - Plots of fitting results (NOT IMPLEMENTED). - Notes - ------- - - This class is used to encapsulate the results of fitting a distribution to data. - - It provides a method to summarize the fitting results and a placeholder for plotting the results. +class BaseDistribution(BlueMathModel): + """ + Base class for all extreme distributions. """ - def __init__(self, dist, data, res): + @abstractmethod + def __init__(self) -> None: + """ + Initialize the base distribution class + """ super().__init__() - self.dist = dist - self.data = data - self.params = res.x - self.success = res.success - self.message = res.message - self.nll = res.fun - self.res = res + @staticmethod + @abstractmethod + def name() -> str: + pass - # Auxiliar for diagnostics plots - self.n = self.data.shape[0] - self.ecdf = np.arange(1, self.n + 1) / (self.n + 1) + @staticmethod + @abstractmethod + def nparams() -> int: + pass - def summary(self): - """ - Print a summary of the fitting results - """ - print(f"Fitting results for {self.dist.name()}:") - print("--------------------------------------") - print("Parameters:") - for i, param in enumerate(self.params): - print(f" - {self.dist.param_names()[i]}: {param:.4f}") - # print("\n") - print(f"Negative Log-Likelihood value: {self.nll:.4f}") - print(f"{self.message}") + @staticmethod + @abstractmethod + def param_names() -> List[str]: + pass - def plot(self, ax=None, plot_type="all"): + @staticmethod + @abstractmethod + def pdf(x: np.ndarray, **kwargs) -> np.ndarray: """ - Plots of fitting results: PP-plot, QQ-plot, histogram with fitted distribution, and return period plot. + Probability density function + Parameters ---------- - ax : matplotlib.axes.Axes, optional - Axes to plot on. If None, a new figure and axes will be created. - plot_type : str, optional - Type of plot to create. Options are "hist" for histogram, "pp" for P-P plot, - "qq" for Q-Q plot, "return_period" for return period plot, or "all" for all plots. - Default is "all". + x : np.ndarray + Values to compute the probability density value + **kwargs : + Distribution specific parameters as keyword arguments. + Common parameters include: + - loc: Location parameter + - scale: Scale parameter + - shape: Shape parameter (for some distributions) Returns - ------- - fig : matplotlib.figure.Figure - Figure object containing the plots. If `ax` is provided, returns None. - - Raises - ------- - ValueError - If `plot_type` is not one of the valid options ("hist", "pp", "qq", "return_period", "all"). + ---------- + pdf : np.ndarray + Probability density function values """ - if plot_type == "all": - fig, axs = plt.subplots(2, 2, figsize=(12, 10)) - self.pp(ax=axs[0, 0]) - self.qq(ax=axs[0, 1]) - self.hist(ax=axs[1, 0]) - self.return_period(ax=axs[1, 1]) - plt.tight_layout() - return fig - elif plot_type == "hist": - return self.hist() - elif plot_type == "pp": - return self.pp() - elif plot_type == "qq": - return self.qq() - elif plot_type == "return_period": - return self.return_period() - else: - raise ValueError( - "Invalid plot type. Use 'hist', 'pp', 'qq', 'return_period', or 'all'." - ) + pass - def pp(self, ax=None): + @staticmethod + @abstractmethod + def cdf(x: np.ndarray, **kwargs) -> np.ndarray: """ - Probability plot of the fitted distribution. + Cumulative distribution function Parameters ---------- - ax : matplotlib.axes.Axes, optional - Axes to plot on. If None, a new figure and axes will be created. - """ - if ax is None: - fig, ax = plt.subplots(figsize=(8, 6)) - else: - fig = None - - probabilities = self.dist.cdf(np.sort(self.data), *self.params) - ax.plot([0, 1], [0, 1], color="tab:red", linestyle="--") - ax.plot( - probabilities, - self.ecdf, - color="tab:blue", - marker="o", - linestyle="", - alpha=0.7, - ) - ax.set_xlabel("Fitted Probability") - ax.set_ylabel("Empirical Probability") - ax.set_title("PP Plot") - ax.grid() + x : np.ndarray + Values to compute their probability + **kwargs : + Distribution specific parameters as keyword arguments. + Common parameters include: + - loc: Location parameter + - scale: Scale parameter + - shape: Shape parameter (for some distributions) - return fig + Returns + ---------- + p : np.ndarray + Probability + """ + pass - def qq(self, ax=None): + @staticmethod + @abstractmethod + def sf(x: np.ndarray, **kwargs) -> np.ndarray: """ - Quantile-Quantile plot of the fitted distribution. + Survival function (1-Cumulative Distribution Function) Parameters ---------- - ax : matplotlib.axes.Axes, optional - Axes to plot on. If None, a new figure and axes will be created. - """ - if ax is None: - fig, ax = plt.subplots(figsize=(8, 6)) - else: - fig = None - - quantiles = self.dist.qf(self.ecdf, *self.params) - ax.plot( - [np.min(self.data), np.max(self.data)], - [np.min(self.data), np.max(self.data)], - color="tab:red", - linestyle="--", - ) - ax.plot( - quantiles, - np.sort(self.data), - color="tab:blue", - marker="o", - linestyle="", - alpha=0.7, - ) - ax.set_xlabel("Theoretical Quantiles") - ax.set_ylabel("Sample Quantiles") - ax.set_title("QQ Plot") - ax.grid() + x : np.ndarray + Values to compute their survival function value + **kwargs : + Distribution specific parameters as keyword arguments. + Common parameters include: + - loc: Location parameter + - scale: Scale parameter + - shape: Shape parameter (for some distributions) - return fig + Returns + ---------- + sp : np.ndarray + Survival function value + """ + pass - def hist(self, ax=None): + @staticmethod + @abstractmethod + def qf(p: np.ndarray, **kwargs) -> np.ndarray: """ - Histogram of the data with the fitted distribution overlayed. + Quantile function Parameters ---------- - ax : matplotlib.axes.Axes, optional - Axes to plot on. If None, a new figure and axes will be created. - """ - if ax is None: - fig, ax = plt.subplots(figsize=(8, 6)) - else: - fig = None - - ax.hist( - self.data, - bins=30, - density=True, - alpha=0.7, - color="tab:blue", - label="Data Histogram", - ) - x = np.linspace(np.min(self.data), np.max(self.data), 1000) - ax.plot(x, self.dist.pdf(x, *self.params), color="tab:red", label="Fitted PDF") - ax.set_xlabel("Data Values") - ax.set_ylabel("Density") - ax.set_title("Histogram and Fitted PDF") - ax.legend() - ax.grid() - - return fig + p : np.ndarray + Probabilities to compute their quantile + **kwargs : + Distribution specific parameters as keyword arguments. + Common parameters include: + - loc: Location parameter + - scale: Scale parameter + - shape: Shape parameter (for some distributions) - def return_period(self, ax=None): + Returns + ---------- + q : np.ndarray + Quantile value """ - Return period plot of the fitted distribution. - - Parameters - ---------- - ax : matplotlib.axes.Axes, optional - Axes to plot on. If None, a new figure and axes will be created. - """ - if ax is None: - fig, ax = plt.subplots(figsize=(8, 6)) - else: - fig = None - - sorted_data = np.sort(self.data) - exceedance_prob = 1 - self.ecdf - return_period = 1 / exceedance_prob - - ax.plot( - return_period, - self.dist.qf(self.ecdf, *self.params), - color="tab:red", - label="Fitted Distribution", - ) - ax.plot( - return_period, - sorted_data, - marker="o", - linestyle="", - color="tab:blue", - alpha=0.7, - label="Empirical Data", - ) - ax.set_xscale("log") - ax.set_xticks([1, 2, 5, 10, 25, 50, 100, 250, 1000, 10000]) - ax.set_xticklabels([1, 2, 5, 10, 25, 50, 100, 500, 1000, 10000]) - ax.set_xlim(right=np.max(return_period) * 1.2) - ax.set_xlabel("Return Period") - ax.set_ylabel("Data Values") - ax.set_title("Return Period Plot") - ax.legend() - ax.grid() - - return fig - - -def fit_dist(dist, data: np.ndarray, **kwargs) -> FitResult: - """ - Fit a distribution to data using Maximum Likelihood Estimation (MLE). - - Parameters - ---------- - dist : BaseDistribution - Distribution to fit. - data : np.ndarray - Data to use for fitting the distribution. - **kwargs : dict, optional - Additional options for fitting: - - 'x0': Initial guess for distribution parameters (default: [mean, std, 0.0]). - - 'method': Optimization method (default: 'Nelder-Mead'). - - 'bounds': Bounds for optimization parameters (default: [(None, None), (0, None), ...]). - - 'options': Options for the optimizer (default: {'disp': False}). - - Returns - ------- - FitResult - The fitting results, including parameters, success status, and negative log-likelihood. - """ - nparams = dist.nparams() - - # Default optimization settings - x0 = kwargs.get( - "x0", np.asarray([np.mean(data), np.std(data)] + [0.0] * (nparams - 2)) - ) - method = kwargs.get("method", "Nelder-Mead").lower() - bounds = kwargs.get( - "bounds", [(None, None), (0, None)] + [(None, None)] * (nparams - 2) - ) - options = kwargs.get("options", {"disp": False}) - - # Objective function: Negative Log-Likelihood - def obj(params): - return dist.nll(data, *params) - - # Perform optimization - result = minimize(fun=obj, x0=x0, method=method, bounds=bounds, options=options) - - # Return the fitting result as a FitResult object - return FitResult(dist, data, result) - - -class BaseDistribution(BlueMathModel): - """ - Base class for all extreme distributions. - """ - - @abstractmethod - def __init__(self) -> None: - """ - Initialize the base distribution class - """ - super().__init__() - - @staticmethod - @abstractmethod - def name() -> str: - pass - - @staticmethod - @abstractmethod - def nparams() -> int: - pass - - @staticmethod - @abstractmethod - def param_names() -> List[str]: - pass - - @staticmethod - @abstractmethod - def pdf(x: np.ndarray, **kwargs) -> np.ndarray: - """ - Probability density function - - Parameters - ---------- - x : np.ndarray - Values to compute the probability density value - **kwargs : - Distribution specific parameters as keyword arguments. - Common parameters include: - - loc: Location parameter - - scale: Scale parameter - - shape: Shape parameter (for some distributions) - - Returns - ---------- - pdf : np.ndarray - Probability density function values - """ - pass - - @staticmethod - @abstractmethod - def cdf(x: np.ndarray, **kwargs) -> np.ndarray: - """ - Cumulative distribution function - - Parameters - ---------- - x : np.ndarray - Values to compute their probability - **kwargs : - Distribution specific parameters as keyword arguments. - Common parameters include: - - loc: Location parameter - - scale: Scale parameter - - shape: Shape parameter (for some distributions) - - Returns - ---------- - p : np.ndarray - Probability - """ - pass - - @staticmethod - @abstractmethod - def sf(x: np.ndarray, **kwargs) -> np.ndarray: - """ - Survival function (1-Cumulative Distribution Function) - - Parameters - ---------- - x : np.ndarray - Values to compute their survival function value - **kwargs : - Distribution specific parameters as keyword arguments. - Common parameters include: - - loc: Location parameter - - scale: Scale parameter - - shape: Shape parameter (for some distributions) - - Returns - ---------- - sp : np.ndarray - Survival function value - """ - pass - - @staticmethod - @abstractmethod - def qf(p: np.ndarray, **kwargs) -> np.ndarray: - """ - Quantile function - - Parameters - ---------- - p : np.ndarray - Probabilities to compute their quantile - **kwargs : - Distribution specific parameters as keyword arguments. - Common parameters include: - - loc: Location parameter - - scale: Scale parameter - - shape: Shape parameter (for some distributions) - - Returns - ---------- - q : np.ndarray - Quantile value - """ - pass + pass @staticmethod @abstractmethod @@ -616,3 +322,357 @@ def fit(dist, data: np.ndarray, **kwargs) -> FitResult: success status, and negative log-likelihood value. """ pass + + +class FitResult(BlueMathModel): + """ + Class used for the results of fitting a distribution + + Attributes + ---------- + dist : BaseDistribution + The distribution that was fitted. + data : np.ndarray + The data used for fitting the distribution. + params : np.ndarray + Fitted parameters of the distribution. + success : bool + Indicates whether the fitting was successful. + message : str + Message from the optimization result. + nll : float + Negative log-likelihood of the fitted distribution. + res : OptimizeResult + The result of the optimization process, containing additional information. + + Methods + ------- + summary() -> dict + Returns a summary of the fitting results, including parameters, negative log-likelihood, + success status, message, and the optimization result. + plot(ax=None, plot_type="hist") + Plots of fitting results (NOT IMPLEMENTED). + + Notes + ------- + - This class is used to encapsulate the results of fitting a distribution to data. + - It provides a method to summarize the fitting results and a placeholder for plotting the results. + """ + + def __init__(self, dist: BaseDistribution, data: np.ndarray, res: OptimizeResult): + """ + Initialize the FitResult object. + + Parameters + ---------- + dist : BaseDistribution + The distribution that was fitted. + data : np.ndarray + The data used for fitting the distribution. + res : OptimizeResult + The result of the optimization process. + """ + super().__init__() + self.dist = dist + self.data = data + + self.params = res.x + self.success = res.success + self.message = res.message + self.nll = res.fun + self.res = res + + # Auxiliar for diagnostics plots + self.n = self.data.shape[0] + self.ecdf = np.arange(1, self.n + 1) / (self.n + 1) + + def summary(self): + """ + Print a summary of the fitting results + """ + print(f"Fitting results for {self.dist.name()}:") + print("--------------------------------------") + print("Parameters:") + for i, param in enumerate(self.params): + print(f" - {self.dist.param_names()[i]}: {param:.4f}") + # print("\n") + print(f"Negative Log-Likelihood value: {self.nll:.4f}") + print(f"{self.message}") + + def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.axes]: + """ + Plots of fitting results: PP-plot, QQ-plot, histogram with fitted distribution, and return period plot. + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure and axes will be created. + plot_type : str, optional + Type of plot to create. Options are "hist" for histogram, "pp" for P-P plot, + "qq" for Q-Q plot, "return_period" for return period plot, or "all" for all plots. + Default is "all". + + Returns + ------- + fig : matplotlib.figure.Figure + Figure object containing the plots. If `ax` is provided, returns None. + + Raises + ------- + ValueError + If `plot_type` is not one of the valid options ("hist", "pp", "qq", "return_period", "all"). + """ + if plot_type == "all": + fig, axs = plt.subplots(2, 2, figsize=(12, 10)) + self.pp(ax=axs[0, 0]) + self.qq(ax=axs[0, 1]) + self.hist(ax=axs[1, 0]) + self.return_period(ax=axs[1, 1]) + plt.tight_layout() + return fig, axs + elif plot_type == "hist": + return self.hist() + elif plot_type == "pp": + return self.pp() + elif plot_type == "qq": + return self.qq() + elif plot_type == "return_period": + return self.return_period() + else: + raise ValueError( + "Invalid plot type. Use 'hist', 'pp', 'qq', 'return_period', or 'all'." + ) + + def pp(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: + """ + Probability plot of the fitted distribution. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure and axes will be created. + """ + if ax is None: + fig, ax = plt.subplots(figsize=(8, 6)) + else: + fig = None + + probabilities = self.dist.cdf(np.sort(self.data), *self.params) + ax.plot([0, 1], [0, 1], color="tab:red", linestyle="--") + ax.plot( + probabilities, + self.ecdf, + color="tab:blue", + marker="o", + linestyle="", + alpha=0.7, + ) + ax.set_xlabel("Fitted Probability") + ax.set_ylabel("Empirical Probability") + ax.set_title("PP Plot") + ax.grid() + + return fig, ax + + def qq(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: + """ + Quantile-Quantile plot of the fitted distribution. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure and axes will be created. + """ + if ax is None: + fig, ax = plt.subplots(figsize=(8, 6)) + else: + fig = None + + quantiles = self.dist.qf(self.ecdf, *self.params) + ax.plot( + [np.min(self.data), np.max(self.data)], + [np.min(self.data), np.max(self.data)], + color="tab:red", + linestyle="--", + ) + ax.plot( + quantiles, + np.sort(self.data), + color="tab:blue", + marker="o", + linestyle="", + alpha=0.7, + ) + ax.set_xlabel("Theoretical Quantiles") + ax.set_ylabel("Sample Quantiles") + ax.set_title("QQ Plot") + ax.grid() + + return fig, ax + + def hist(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: + """ + Histogram of the data with the fitted distribution overlayed. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure and axes will be created. + """ + if ax is None: + fig, ax = plt.subplots(figsize=(8, 6)) + else: + fig = None + + ax.hist( + self.data, + bins=30, + density=True, + alpha=0.7, + color="tab:blue", + label="Data Histogram", + ) + x = np.linspace(np.min(self.data), np.max(self.data), 1000) + ax.plot(x, self.dist.pdf(x, *self.params), color="tab:red", label="Fitted PDF") + ax.set_xlabel("Data Values") + ax.set_ylabel("Density") + ax.set_title("Histogram and Fitted PDF") + ax.legend() + ax.grid() + + return fig, ax + + def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: + """ + Return period plot of the fitted distribution. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure and axes will be created. + """ + if ax is None: + fig, ax = plt.subplots(figsize=(8, 6)) + else: + fig = None + + sorted_data = np.sort(self.data) + exceedance_prob = 1 - self.ecdf + return_period = 1 / exceedance_prob + + ax.plot( + return_period, + self.dist.qf(self.ecdf, *self.params), + color="tab:red", + label="Fitted Distribution", + ) + ax.plot( + return_period, + sorted_data, + marker="o", + linestyle="", + color="tab:blue", + alpha=0.7, + label="Empirical Data", + ) + ax.set_xscale("log") + ax.set_xticks([1, 2, 5, 10, 25, 50, 100, 250, 1000, 10000]) + ax.set_xticklabels([1, 2, 5, 10, 25, 50, 100, 500, 1000, 10000]) + ax.set_xlim(right=np.max(return_period) * 1.2) + ax.set_xlabel("Return Period") + ax.set_ylabel("Data Values") + ax.set_title("Return Period Plot") + ax.legend() + ax.grid() + + return fig, ax + + +def fit_dist(dist: BaseDistribution, data: np.ndarray, **kwargs) -> FitResult: + """ + Fit a distribution to data using Maximum Likelihood Estimation (MLE). + + Parameters + ---------- + dist : BaseDistribution + Distribution to fit. + data : np.ndarray + Data to use for fitting the distribution. + **kwargs : dict, optional + Additional options for fitting: + - 'x0': Initial guess for distribution parameters (default: [mean, std, 0.0]). + - 'method': Optimization method (default: 'Nelder-Mead'). + - 'bounds': Bounds for optimization parameters (default: [(None, None), (0, None), ...]). + - 'options': Options for the optimizer (default: {'disp': False}). + - 'f0', 'floc': Fix location parameter (first parameter). + - 'f1', 'fscale': Fix scale parameter (second parameter). + - 'f2', 'fshape': Fix shape parameter (third parameter). + + Returns + ------- + FitResult + The fitting results, including parameters, success status, and negative log-likelihood. + """ + nparams = dist.nparams() + param_names = dist.param_names() + + # Handle fixed parameters + fixed_params = {} + for i, name in enumerate(param_names): + # Check both numerical (f0, f1, f2) and named (floc, fscale, fshape) fixed parameters + num_key = f'f{i}' + if num_key in kwargs: + fixed_params[i] = kwargs[num_key] + + + # Create list of free parameters (those not fixed) + free_params = [i for i in range(nparams) if i not in fixed_params] + # n_free = len(free_params) + + # Default optimization settings for free parameters only + default_x0 = np.asarray([np.mean(data), np.std(data)] + [0.1] * (nparams - 2)) + x0 = kwargs.get("x0", default_x0) + x0_free = np.array([x0[i] for i in free_params]) + + method = kwargs.get("method", "Nelder-Mead").lower() + default_bounds = [(None, None), (1e-6, None)] + [(None, None)] * (nparams - 2) + bounds = kwargs.get("bounds", default_bounds) + bounds_free = [bounds[i] for i in free_params] + options = kwargs.get("options", {"disp": False}) + + # Objective function that handles fixed parameters + def obj(free_values): + # Reconstruct full parameter vector with fixed values + full_params = np.zeros(nparams) + free_idx = 0 + for i in range(nparams): + if i in fixed_params: + full_params[i] = fixed_params[i] + else: + full_params[i] = free_values[free_idx] + free_idx += 1 + return dist.nll(data, *full_params) + + # Perform optimization only on free parameters + result = minimize(fun=obj, x0=x0_free, method=method, + bounds=bounds_free, options=options) + + # Reconstruct full parameter vector for final result + full_params = np.zeros(nparams) + free_idx = 0 + for i in range(nparams): + if i in fixed_params: + full_params[i] = fixed_params[i] + else: + full_params[i] = result.x[free_idx] + free_idx += 1 + + # Create modified result with full parameters + modified_result = OptimizeResult( + x=full_params, + success=result.success, + fun=result.fun, + message=result.message + ) + + # Return the fitting result as a FitResult object + return FitResult(dist, data, modified_result) \ No newline at end of file diff --git a/bluemath_tk/distributions/gev.py b/bluemath_tk/distributions/gev.py index 6e3b5a9..f0fd0eb 100644 --- a/bluemath_tk/distributions/gev.py +++ b/bluemath_tk/distributions/gev.py @@ -6,7 +6,7 @@ from ._base_distributions import BaseDistribution, FitResult, fit_dist -class gev(BaseDistribution): +class GEV(BaseDistribution): """ Generalized Extreme Value (GEV) distribution class. @@ -18,6 +18,8 @@ class gev(BaseDistribution): The complete name of the distribution (GEV). nparams : int Number of GEV parameters. + param_names : List[str] + Names of GEV parameters (location, scale, shape). Methods ------- @@ -52,10 +54,10 @@ class gev(BaseDistribution): Examples -------- - >>> from bluemath_tk.distributions.gev import gev - >>> gev_pdf = gev.pdf(x, loc=0, scale=1, shape=0.1) - >>> gev_cdf = gev.cdf(x, loc=0, scale=1, shape=0.1) - >>> gev_qf = gev.qf(p, loc=0, scale=1, shape=0.1) + >>> from bluemath_tk.distributions.gev import GEV + >>> gev_pdf = GEV.pdf(x, loc=0, scale=1, shape=0.1) + >>> gev_cdf = GEV.cdf(x, loc=0, scale=1, shape=0.1) + >>> gev_qf = GEV.qf(p, loc=0, scale=1, shape=0.1) """ def __init__(self) -> None: @@ -80,7 +82,7 @@ def param_names() -> List[str]: """ Name of parameters of GEV """ - return ["location", "scale", "shape"] + return ["loc", "scale", "shape"] @staticmethod def pdf( @@ -210,7 +212,7 @@ def sf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - sp = 1 - gev.cdf(x, loc=loc, scale=scale, shape=shape) + sp = 1 - GEV.cdf(x, loc=loc, scale=scale, shape=shape) return sp @@ -286,26 +288,25 @@ def nll( ---------- nll : float Negative Log-Likelihood value - - Raises - ------ - ValueError - If scale is not greater than 0. """ if scale <= 0: nll = np.inf # Return a large value for invalid scale + + else: + y = (data - loc) / scale - y = (data - loc) / scale + # # Gumbel case (shape = 0) + # if shape == 0.0: + # pass + # nll = data.shape[0] * np.log(scale) + np.sum( + # np.exp(-y) + np.sum(-y) + # ) # Gumbel case - # Gumbel case (shape = 0) - if shape == 0.0: - nll = data.shape[0] * np.log(scale) + np.sum( - np.exp(-y) + np.sum(-y) - ) # Gumbel case + # # General case (Weibull and Frechet, shape != 0) + # else: - # General case (Weibull and Frechet, shape != 0) - else: + shape = np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8) # Avoid division by zero y = 1 + shape * y if any(y <= 0): nll = np.inf # Return a large value for invalid y @@ -340,7 +341,7 @@ def fit(data: np.ndarray, **kwargs) -> FitResult: success status, and negative log-likelihood value. """ # Fit the GEV distribution to the data using the fit_dist function - return fit_dist(gev, data, **kwargs) + return fit_dist(GEV, data, **kwargs) @staticmethod def random( @@ -556,7 +557,7 @@ def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - std = np.sqrt(gev.variance(loc, scale, shape)) + std = np.sqrt(GEV.variance(loc, scale, shape)) return std @@ -595,10 +596,10 @@ def stats( raise ValueError("Scale parameter must be > 0") stats = { - "mean": float(gev.mean(loc, scale, shape)), - "median": float(gev.median(loc, scale, shape)), - "variance": float(gev.variance(loc, scale, shape)), - "std": float(gev.std(loc, scale, shape)), + "mean": float(GEV.mean(loc, scale, shape)), + "median": float(GEV.median(loc, scale, shape)), + "variance": float(GEV.variance(loc, scale, shape)), + "std": float(GEV.std(loc, scale, shape)), } return stats diff --git a/bluemath_tk/distributions/gpd.py b/bluemath_tk/distributions/gpd.py index e69de29..7603c9b 100644 --- a/bluemath_tk/distributions/gpd.py +++ b/bluemath_tk/distributions/gpd.py @@ -0,0 +1,597 @@ +from typing import Dict, List + +import numpy as np + +from ._base_distributions import BaseDistribution, FitResult, fit_dist + + +class GPD(BaseDistribution): + """ + Generalized Pareto Distribution (GPD) class. + + This class contains all the methods assocaited to the GPD distribution. + + Attributes + ---------- + name : str + The complete name of the distribution (GPD). + nparams : int + Number of GPD parameters. + param_names : List[str] + Names of the GPD parameters (threshold, scale, shape). + + Methods + ------- + pdf(x, loc, scale, shape) + Probability density function. + cdf(x, loc, scale, shape) + Cumulative distribution function + qf(p, loc, scale, shape) + Quantile function + sf(x, loc, scale, shape) + Survival function + nll(data, loc, scale, shape) + Negative Log-Likelihood function + fit(data) + Fit distribution to data (NOT IMPLEMENTED). + random(size, loc, scale, shape) + Generates random values from GPD distribution. + mean(loc, scale, shape) + Mean of GPD distribution. + median(loc, scale, shape) + Median of GPD distribution. + variance(loc, scale, shape) + Variance of GPD distribution. + std(loc, scale, shape) + Standard deviation of GPD distribution. + stats(loc, scale, shape) + Summary statistics of GPD distribution. + + Notes + ----- + - This class is designed to obtain all the properties associated to the GPD distribution. + + Examples + -------- + >>> from bluemath_tk.distributions.gpd import GPD + >>> gpd_pdf = GPD.pdf(x, loc=0, scale=1, shape=0.1) + >>> gpd_cdf = GPD.cdf(x, loc=0, scale=1, shape=0.1) + >>> gpd_qf = GPD.qf(p, loc=0, scale=1, shape=0.1) + """ + + def __init__(self) -> None: + """ + Initialize the GPD distribution class + """ + super().__init__() + + @staticmethod + def name() -> str: + return "Generalized Pareto Distribution (GPD)" + + @staticmethod + def nparams() -> int: + """ + Number of parameters of GPD + """ + return int(3) + + @staticmethod + def param_names() -> List[str]: + """ + Name of parameters of GPD + """ + return ["loc", "scale", "shape"] + + @staticmethod + def pdf( + x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> np.ndarray: + """ + Probability density function + + Parameters + ---------- + x : np.ndarray + Values to compute the probability density value + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + pdf : np.ndarray + Probability density function values + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + y = np.maximum(x - loc, 0) / scale + + # Gumbel case (shape = 0) + if shape == 0.0: + pdf = (1 / scale) * (np.exp(-y)) + + # General case (Weibull and Frechet, shape != 0) + else: + pdf = np.full_like(x, 0, dtype=float) + yy = 1 + shape * y + yymask = yy > 0 + pdf[yymask] = (1 / scale) * ( + yy[yymask] ** (-1 - (1 / shape)) + ) + + return pdf + + @staticmethod + def cdf( + x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> np.ndarray: + """ + Cumulative distribution function + + Parameters + ---------- + x : np.ndarray + Values to compute their probability + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + p : np.ndarray + Probability + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + y = np.maximum(x - loc, 0) / scale + + # Gumbel case (shape = 0) + if shape == 0.0: + p = 1 - np.exp(-y) + + # General case (Weibull and Frechet, shape != 0) + else: + p = 1 - np.maximum(1 + shape * y, 0) ** (-1 / shape) + + return p + + @staticmethod + def sf( + x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> np.ndarray: + """ + Survival function (1-Cumulative Distribution Function) + + Parameters + ---------- + x : np.ndarray + Values to compute their survival function value + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + sp : np.ndarray + Survival function value + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + sp = 1 - GPD.cdf(x, loc=loc, scale=scale, shape=shape) + + return sp + + @staticmethod + def qf( + p: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> np.ndarray: + """ + Quantile function (Inverse of Cumulative Distribution Function) + + Parameters + ---------- + p : np.ndarray + Probabilities to compute their quantile + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + q : np.ndarray + Quantile value + + Raises + ------ + ValueError + If probabilities are not in the range (0, 1). + + ValueError + If scale is not greater than 0. + """ + + if np.min(p) <= 0 or np.max(p) >= 1: + raise ValueError("Probabilities must be in the range (0, 1)") + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + # Gumbel case (shape = 0) + if shape == 0.0: + q = loc - scale * np.log(1 - p) + + # General case (Weibull and Frechet, shape != 0) + else: + q = loc + scale * ((1 - p) ** (-shape) - 1) / shape + + return q + + + @staticmethod + def nll( + data: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> float: + """ + Negative Log-Likelihood function + + Parameters + ---------- + data : np.ndarray + Data to compute the Negative Log-Likelihood value + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + nll : float + Negative Log-Likelihood value + """ + + if scale <= 0: + nll = np.inf # Return a large value for invalid scale + + else: + y = (data - loc) / scale + + # # Gumbel case (shape = 0) + # if shape == 0.0: + # nll = data.shape[0] * np.log(scale) + np.sum(y) + + # General case (Weibull and Frechet, shape != 0) + # else: + shape = np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8) # Avoid division by zero + y = 1 + shape * y + if np.min(y <= 0): + nll = np.inf # Return a large value for invalid y + else: + nll = ( + data.shape[0] * np.log(scale) + + (1 / shape + 1) * np.sum(np.log(y)) + ) + + return nll + + + + @staticmethod + def fit(data: np.ndarray, **kwargs) -> FitResult: + """ + Fit GEV distribution + + Parameters + ---------- + data : np.ndarray + Data to fit the GEV distribution + **kwargs : dict, optional + Additional keyword arguments for the fitting function. + These can include options like method, bounds, etc. + See fit_dist for more details. + If not provided, default fitting options will be used. + + Returns + ---------- + FitResult + Result of the fit containing the parameters loc, scale, shape, + success status, and negative log-likelihood value. + """ + # Fit the GEV distribution to the data using the fit_dist function + return fit_dist(GPD, data, **kwargs) + + @staticmethod + def random( + size: int, + loc: float = 0.0, + scale: float = 1.0, + shape: float = 0.0, + random_state: int = None, + ) -> np.ndarray: + """ + Generates random values from GPD distribution + + Parameters + ---------- + size : int + Number of random values to generate + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + random_state : np.random.RandomState, optional + Random state for reproducibility. + If None, do not use random stat. + + Returns + ---------- + x : np.ndarray + Random values from GEV distribution + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + # Set random state if provided + if random_state is not None: + np.random.seed(random_state) + + # Generate uniform random numbers + u = np.random.uniform(0, 1, size) + + # Gumbel case (shape = 0) + if shape == 0.0: + x = loc - scale * np.log(u) + + # General case (Weibull and Frechet, shape != 0) + else: + x = loc + scale * (u ** (-shape) - 1) / shape + + return x + + + @staticmethod + def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + """ + Mean + + Parameters + ---------- + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + mean : np.ndarray + Mean value of GEV with the given parameters + + Raises + ------ + ValueError + If scale is not greater than 0. + + Warning + If shape is greater than or equal to 1, mean is not defined. + In this case, it returns infinity. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + if shape >= 1: + Warning("Shape parameter must be < 1 for mean to be defined") + mean = np.inf + + # Shape < 1 case + else: + mean = scale / (1 - shape) + + return mean + + @staticmethod + def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + """ + Median + + Parameters + ---------- + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + median : np.ndarray + Median value of GEV with the given parameters + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + if shape == 0: + median = np.inf + else: + median = loc + scale * (2 ** shape - 1) / shape + + return median + + @staticmethod + def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + """ + Variance + + Parameters + ---------- + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + var : np.ndarray + Variance of GEV with the given parameters + + Raises + ------ + ValueError + If scale is not greater than 0. + Warning + If shape is greater than or equal to 172, mean is not defined. + In this case, it returns infinity. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + # Gumbel case (shape = 0) + if shape >= 1/2: + Warning("Shape parameter must be < 1/2 for variance to be defined") + var = np.inf + + else: + var = scale**2 / ((1 - shape) ** 2 * (1 - 2 * shape)) + + return var + + @staticmethod + def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + """ + Standard deviation + + Parameters + ---------- + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + std : np.ndarray + Standard Deviation of GEV with the given + parameters + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + std = np.sqrt(GPD.variance(loc, scale, shape)) + + return std + + @staticmethod + def stats( + loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> Dict[str, float]: + """ + Summary statistics + + Return summary statistics including mean, std, variance, etc. + + Parameters + ---------- + loc : float, default=0.0 + Location parameter + scale : float, default = 1.0 + Scale parameter. + Must be greater than 0. + shape : float, default = 0.0 + Shape parameter. + + Returns + ---------- + stats : dict + Summary statistics of GEV distribution with the given + parameters + + Raises + ------ + ValueError + If scale is not greater than 0. + """ + + if scale <= 0: + raise ValueError("Scale parameter must be > 0") + + stats = { + "mean": float(GPD.mean(loc, scale, shape)), + "median": float(GPD.median(loc, scale, shape)), + "variance": float(GPD.variance(loc, scale, shape)), + "std": float(GPD.std(loc, scale, shape)), + } + + return stats diff --git a/tests/distributions/test_gev.py b/tests/distributions/test_gev.py index c4cabde..665b822 100644 --- a/tests/distributions/test_gev.py +++ b/tests/distributions/test_gev.py @@ -3,7 +3,7 @@ import numpy as np from bluemath_tk.distributions._base_distributions import FitResult -from bluemath_tk.distributions.gev import gev +from bluemath_tk.distributions.gev import GEV class TestGEV(unittest.TestCase): @@ -18,62 +18,62 @@ def setUp(self): def test_pdf(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - custom_pdf = gev.pdf(self.x, self.loc, self.scale, shape) + custom_pdf = GEV.pdf(self.x, self.loc, self.scale, shape) self.assertIsInstance(custom_pdf, np.ndarray) self.assertEqual(custom_pdf.shape[0], 1000) def test_cdf(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - custom_cdf = gev.cdf(self.x, self.loc, self.scale, shape) + custom_cdf = GEV.cdf(self.x, self.loc, self.scale, shape) self.assertIsInstance(custom_cdf, np.ndarray) self.assertEqual(custom_cdf.shape[0], 1000) def test_sf(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - custom_sf = gev.sf(self.x, self.loc, self.scale, shape) + custom_sf = GEV.sf(self.x, self.loc, self.scale, shape) self.assertIsInstance(custom_sf, np.ndarray) self.assertEqual(custom_sf.shape[0], 1000) def test_qf(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - custom_qf = gev.qf(self.p, self.loc, self.scale, shape) + custom_qf = GEV.qf(self.p, self.loc, self.scale, shape) self.assertIsInstance(custom_qf, np.ndarray) self.assertEqual(custom_qf.shape[0], 1000) def test_nll(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - nll = gev.nll(self.x, self.loc, self.scale, shape) + nll = GEV.nll(self.x, self.loc, self.scale, shape) self.assertIsInstance(nll, float) def test_random(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - random_values = gev.random(1000, self.loc, self.scale, shape) + random_values = GEV.random(1000, self.loc, self.scale, shape) self.assertIsInstance(random_values, np.ndarray) self.assertEqual(random_values.shape[0], 1000) def test_mean(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - mean = gev.mean(self.loc, self.scale, shape) + mean = GEV.mean(self.loc, self.scale, shape) self.assertIsInstance(mean, float) def test_median(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - median = gev.median(self.loc, self.scale, shape) + median = GEV.median(self.loc, self.scale, shape) self.assertIsInstance(median, float) def test_variance(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - variance = gev.variance(self.loc, self.scale, shape) + variance = GEV.variance(self.loc, self.scale, shape) self.assertIsInstance(variance, float) def test_std(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - std = gev.std(self.loc, self.scale, shape) + std = GEV.std(self.loc, self.scale, shape) self.assertIsInstance(std, float) def test_stats(self): for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: - stats = gev.stats(self.loc, self.scale, shape) + stats = GEV.stats(self.loc, self.scale, shape) self.assertIsInstance(stats, dict) self.assertIn("mean", stats) self.assertIn("median", stats) @@ -82,33 +82,33 @@ def test_stats(self): def test_invalid_scale(self): with self.assertRaises(ValueError): - gev.pdf(self.x, self.loc, 0.0, self.shape_frechet) + GEV.pdf(self.x, self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.cdf(self.x, self.loc, 0.0, self.shape_frechet) + GEV.cdf(self.x, self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.sf(self.x, self.loc, 0.0, self.shape_frechet) + GEV.sf(self.x, self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.qf(self.p, self.loc, 0.0, self.shape_frechet) + GEV.qf(self.p, self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.random(1000, self.loc, 0.0, self.shape_frechet) + GEV.random(1000, self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.mean(self.loc, 0.0, self.shape_frechet) + GEV.mean(self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.median(self.loc, 0.0, self.shape_frechet) + GEV.median(self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.variance(self.loc, 0.0, self.shape_frechet) + GEV.variance(self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.std(self.loc, 0.0, self.shape_frechet) + GEV.std(self.loc, 0.0, self.shape_frechet) with self.assertRaises(ValueError): - gev.stats(self.loc, 0.0, self.shape_frechet) + GEV.stats(self.loc, 0.0, self.shape_frechet) def test_fit(self): # Generate data using specific parameters loc, scale, shape = 0.5, 1.5, 0.2 - data = gev.random(1000, loc, scale, shape, random_state=42) + data = GEV.random(1000, loc, scale, shape, random_state=42) # Fit the GEV distribution to the data - fit_result = gev.fit(data) + fit_result = GEV.fit(data) # Check the fit result self.assertIsInstance(fit_result, FitResult) @@ -118,8 +118,8 @@ def test_fit(self): self.assertIsInstance(fit_result.nll, float) # Verify that the fitted parameters are close to the original ones - self.assertAlmostEqual(fit_result.params[0], loc, delta=0.1) - self.assertAlmostEqual(fit_result.params[1], scale, delta=0.1) + self.assertAlmostEqual(fit_result.params[0], loc, delta=0.2) + self.assertAlmostEqual(fit_result.params[1], scale, delta=0.2) self.assertAlmostEqual(fit_result.params[2], shape, delta=0.1) diff --git a/tests/distributions/test_gpd.py b/tests/distributions/test_gpd.py new file mode 100644 index 0000000..48ac11c --- /dev/null +++ b/tests/distributions/test_gpd.py @@ -0,0 +1,127 @@ +import unittest + +import numpy as np + +from bluemath_tk.distributions._base_distributions import FitResult +from bluemath_tk.distributions.gpd import GPD + + +class TestGPD(unittest.TestCase): + def setUp(self): + self.x = np.random.rand(1000) * 2 + self.p = np.random.rand(1000) + self.loc = 0.0 + self.scale = 1.0 + self.shape_frechet = 0.1 # GPD (Frechet) + self.shape_weibull = -0.1 # GPD (Weibull) + self.shape_gumbel = 0.0 # Gumbel case + + def test_pdf(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + custom_pdf = GPD.pdf(self.x, self.loc, self.scale, shape) + self.assertIsInstance(custom_pdf, np.ndarray) + self.assertEqual(custom_pdf.shape[0], 1000) + + def test_cdf(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + custom_cdf = GPD.cdf(self.x, self.loc, self.scale, shape) + self.assertIsInstance(custom_cdf, np.ndarray) + self.assertEqual(custom_cdf.shape[0], 1000) + + def test_sf(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + custom_sf = GPD.sf(self.x, self.loc, self.scale, shape) + self.assertIsInstance(custom_sf, np.ndarray) + self.assertEqual(custom_sf.shape[0], 1000) + + def test_qf(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + custom_qf = GPD.qf(self.p, self.loc, self.scale, shape) + self.assertIsInstance(custom_qf, np.ndarray) + self.assertEqual(custom_qf.shape[0], 1000) + + def test_nll(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + nll = GPD.nll(self.x, self.loc, self.scale, shape) + self.assertIsInstance(nll, float) + + def test_random(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + random_values = GPD.random(1000, self.loc, self.scale, shape) + self.assertIsInstance(random_values, np.ndarray) + self.assertEqual(random_values.shape[0], 1000) + + def test_mean(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + mean = GPD.mean(self.loc, self.scale, shape) + self.assertIsInstance(mean, float) + + def test_median(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + median = GPD.median(self.loc, self.scale, shape) + self.assertIsInstance(median, float) + + def test_variance(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + variance = GPD.variance(self.loc, self.scale, shape) + self.assertIsInstance(variance, float) + + def test_std(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + std = GPD.std(self.loc, self.scale, shape) + self.assertIsInstance(std, float) + + def test_stats(self): + for shape in [self.shape_frechet, self.shape_weibull, self.shape_gumbel]: + stats = GPD.stats(self.loc, self.scale, shape) + self.assertIsInstance(stats, dict) + self.assertIn("mean", stats) + self.assertIn("median", stats) + self.assertIn("variance", stats) + self.assertIn("std", stats) + + def test_invalid_scale(self): + with self.assertRaises(ValueError): + GPD.pdf(self.x, self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.cdf(self.x, self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.sf(self.x, self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.qf(self.p, self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.random(1000, self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.mean(self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.median(self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.variance(self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.std(self.loc, 0.0, self.shape_frechet) + with self.assertRaises(ValueError): + GPD.stats(self.loc, 0.0, self.shape_frechet) + + def test_fit(self): + # Generate data using specific parameters + loc, scale, shape = 0.5, 1.5, 0.2 + data = GPD.random(1000, loc, scale, shape, random_state=42) + + # Fit the GPD distribution to the data + # loc is fixed at 0.0 + fit_result = GPD.fit(data - loc, f0 = 0.0) + + # Check the fit result + self.assertIsInstance(fit_result, FitResult) + self.assertTrue(fit_result.success) + self.assertEqual(len(fit_result.params), 3) # loc, scale, shape + self.assertGreater(fit_result.params[1], 0) # Scale must be > 0 + self.assertIsInstance(fit_result.nll, float) + + # Verify that the fitted parameters are close to the original ones + self.assertAlmostEqual(fit_result.params[1], scale, delta=0.2) + self.assertAlmostEqual(fit_result.params[2], shape, delta=0.1) + + +if __name__ == "__main__": + unittest.main()