|
1 | | -"""Created on Wed Sep 9 23:31:11 2020. |
2 | | -
|
3 | | -@author: mofarrag |
4 | | -""" |
| 1 | +"""Extreme value statistics""" |
5 | 2 | import matplotlib |
6 | 3 |
|
7 | 4 | matplotlib.use("TkAgg") |
8 | 5 | import pandas as pd |
9 | | -from statista.distributions import GEV, ConfidenceInterval, Gumbel, PlottingPosition |
| 6 | + |
| 7 | +from statista.distributions import GEV, Gumbel, PlottingPosition, Distributions |
| 8 | +from statista.confidence_interval import ConfidenceInterval |
10 | 9 |
|
11 | 10 | time_series1 = pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist() |
12 | 11 | time_series2 = pd.read_csv("examples/data/time_series2.txt", header=None)[0].tolist() |
13 | | -#%% |
14 | | -Gdist = Gumbel(time_series1) |
| 12 | +# %% |
| 13 | +gumbel_dist = Distributions("Gumbel", time_series1) |
15 | 14 | # defult parameter estimation method is maximum liklihood method |
16 | | -Param_mle = Gdist.estimateParameter(method="mle") |
17 | | -Gdist.ks() |
18 | | -Gdist.chisquare() |
19 | | -print(Param_mle) |
20 | | -loc = Param_mle[0] |
21 | | -scale = Param_mle[1] |
| 15 | +param_mle = gumbel_dist.fit_model(method="mle") |
| 16 | +gumbel_dist.ks() |
| 17 | +gumbel_dist.chisquare() |
| 18 | +print(param_mle) |
22 | 19 | # calculate and plot the pdf |
23 | | -pdf = Gdist.pdf(loc, scale, plot_figure=True) |
24 | | -cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True) |
25 | | -#%% lmoments |
26 | | -Param_lmoments = Gdist.estimateParameter(method="lmoments") |
27 | | -Gdist.ks() |
28 | | -Gdist.chisquare() |
29 | | -print(Param_lmoments) |
30 | | -loc = Param_lmoments[0] |
31 | | -scale = Param_lmoments[1] |
| 20 | +pdf = gumbel_dist.pdf(param_mle, plot_figure=True) |
| 21 | +cdf, _, _ = gumbel_dist.cdf(param_mle, plot_figure=True) |
| 22 | +# %% lmoments |
| 23 | +param_lmoments = gumbel_dist.fit_model(method="lmoments") |
| 24 | +gumbel_dist.ks() |
| 25 | +gumbel_dist.chisquare() |
| 26 | +print(param_lmoments) |
32 | 27 | # calculate and plot the pdf |
33 | | -pdf = Gdist.pdf(loc, scale, plot_figure=True) |
34 | | -cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True) |
35 | | -#%% |
| 28 | +pdf = gumbel_dist.pdf(param_lmoments, plot_figure=True) |
| 29 | +cdf, _, _ = gumbel_dist.cdf(param_lmoments, plot_figure=True) |
| 30 | +# %% |
36 | 31 | # calculate the CDF(Non Exceedance probability) using weibul plotting position |
37 | 32 | time_series1.sort() |
38 | 33 | # calculate the F (Non Exceedence probability based on weibul) |
39 | | -cdf_Weibul = PlottingPosition.weibul(time_series1) |
| 34 | +cdf_weibul = PlottingPosition.weibul(time_series1) |
40 | 35 | # TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution |
41 | | -Qth = Gdist.theporeticalEstimate(loc, scale, cdf_Weibul) |
| 36 | +Qth = gumbel_dist.theoretical_estimate(param_lmoments, cdf_weibul) |
42 | 37 | # test = stats.chisquare(st.Standardize(Qth), st.Standardize(time_series1),ddof=5) |
43 | 38 | # calculate the confidence interval |
44 | | -upper, lower = Gdist.confidenceInterval(loc, scale, cdf_Weibul, alpha=0.1) |
| 39 | +upper, lower = gumbel_dist.confidence_interval(param_lmoments, cdf_weibul, alpha=0.1) |
45 | 40 | # ProbapilityPlot can estimate the Qth and the lower and upper confidence interval in the process of plotting |
46 | | -fig, ax = Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1) |
47 | | -#%% |
| 41 | +fig, ax = gumbel_dist.probability_plot(param_lmoments, cdf_weibul, alpha=0.1) |
| 42 | +# %% |
48 | 43 | """ |
49 | 44 | if you want to focus only on high values, you can use a threshold to make the code focus on what is higher |
50 | 45 | this threshold. |
51 | 46 | """ |
52 | 47 | threshold = 17 |
53 | | -Param_dist = Gdist.estimateParameter( |
54 | | - method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold |
| 48 | +param_dist = gumbel_dist.fit_model( |
| 49 | + method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold |
55 | 50 | ) |
56 | | -print(Param_dist) |
57 | | -loc = Param_dist[0] |
58 | | -scale = Param_dist[1] |
59 | | -Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1) |
60 | | -#%% |
| 51 | +print(param_dist) |
| 52 | +gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1) |
| 53 | +# %% |
61 | 54 | threshold = 18 |
62 | | -Param_dist = Gdist.estimateParameter( |
63 | | - method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold |
| 55 | +param_dist = gumbel_dist.fit_model( |
| 56 | + method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold |
64 | 57 | ) |
65 | | -print(Param_dist) |
66 | | -loc = Param_dist[0] |
67 | | -scale = Param_dist[1] |
68 | | -Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1) |
69 | | -#%% Generalized Extreme Value (GEV) |
70 | | -Gevdist = GEV(time_series2) |
| 58 | +print(param_dist) |
| 59 | +gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1) |
| 60 | +# %% Generalized Extreme Value (GEV) |
| 61 | +gev_dist = Distributions("GEV", time_series2) |
71 | 62 | # default parameter estimation method is maximum liklihood method |
72 | | -mle_param = Gevdist.estimateParameter(method="mle") |
73 | | -Gevdist.ks() |
74 | | -Gevdist.chisquare() |
| 63 | +gev_mle_param = gev_dist.fit_model(method="mle") |
| 64 | +gev_dist.ks() |
| 65 | +gev_dist.chisquare() |
75 | 66 |
|
76 | | -print(mle_param) |
77 | | -shape = mle_param[0] |
78 | | -loc = mle_param[1] |
79 | | -scale = mle_param[2] |
| 67 | +print(gev_mle_param) |
80 | 68 | # calculate and plot the pdf |
81 | | -pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True) |
82 | | -cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True) |
83 | | -#%% lmoment method |
84 | | -lmom_param = Gevdist.estimateParameter(method="lmoments") |
85 | | -print(lmom_param) |
86 | | -shape = lmom_param[0] |
87 | | -loc = lmom_param[1] |
88 | | -scale = lmom_param[2] |
| 69 | +pdf, fig, ax = gev_dist.pdf(gev_mle_param, plot_figure=True) |
| 70 | +cdf, _, _ = gev_dist.cdf(gev_mle_param, plot_figure=True) |
| 71 | +# %% lmoment method |
| 72 | +gev_lmom_param = gev_dist.fit_model(method="lmoments") |
| 73 | +print(gev_lmom_param) |
89 | 74 | # calculate and plot the pdf |
90 | | -pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True) |
91 | | -cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True) |
| 75 | +pdf, fig, ax = gev_dist.pdf(gev_lmom_param, plot_figure=True) |
| 76 | +cdf, _, _ = gev_dist.cdf(gev_lmom_param, plot_figure=True) |
92 | 77 | #%% |
93 | 78 | time_series1.sort() |
94 | 79 | # calculate the F (Non Exceedence probability based on weibul) |
95 | | -cdf_Weibul = PlottingPosition.weibul(time_series1) |
96 | | -T = PlottingPosition.weibul(time_series1, option=2) |
| 80 | +cdf_weibul = PlottingPosition.weibul(time_series1) |
| 81 | +T = PlottingPosition.weibul(time_series1, return_period=True) |
97 | 82 | # TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution |
98 | | -Qth = Gevdist.theporeticalEstimate(shape, loc, scale, cdf_Weibul) |
| 83 | +Qth = gev_dist.theoretical_estimate(gev_lmom_param, cdf_weibul) |
99 | 84 |
|
100 | 85 | func = GEV.ci_func |
101 | | -upper, lower = Gevdist.confidenceInterval( |
102 | | - shape, |
103 | | - loc, |
104 | | - scale, |
105 | | - F=cdf_Weibul, |
| 86 | +upper, lower = gev_dist.confidence_interval( |
| 87 | + gev_lmom_param, |
| 88 | + prob_non_exceed=cdf_weibul, |
106 | 89 | alpha=0.1, |
107 | 90 | statfunction=func, |
108 | 91 | n_samples=len(time_series1), |
| 92 | + method="lmoments", |
109 | 93 | ) |
110 | | -#%% |
| 94 | +# %% |
111 | 95 | """ |
112 | 96 | calculate the confidence interval using the boot strap method directly |
113 | 97 | """ |
114 | | -CI = ConfidenceInterval.BootStrap( |
| 98 | +CI = ConfidenceInterval.boot_strap( |
115 | 99 | time_series1, |
116 | 100 | statfunction=func, |
117 | | - gevfit=Param_dist, |
| 101 | + gevfit=gev_lmom_param, |
118 | 102 | n_samples=len(time_series1), |
119 | | - F=cdf_Weibul, |
| 103 | + F=cdf_weibul, |
| 104 | + method="lmoments", |
120 | 105 | ) |
121 | | -LB = CI["LB"] |
122 | | -UB = CI["UB"] |
123 | | -#%% |
124 | | -fig, ax = Gevdist.probapilityPlot( |
125 | | - shape, loc, scale, cdf_Weibul, func=func, n_samples=len(time_series1) |
| 106 | +LB = CI["lb"] |
| 107 | +UB = CI["ub"] |
| 108 | +# %% |
| 109 | +fig, ax = gev_dist.probability_plot( |
| 110 | + gev_lmom_param, cdf_weibul, func=func, n_samples=len(time_series1) |
126 | 111 | ) |
0 commit comments