Skip to content

Commit e1f3c07

Browse files
authored
Merge pull request #24 from Serapieum-of-alex/factory-design-pattern
Factory design pattern
2 parents 1f6d7d2 + 83358a8 commit e1f3c07

25 files changed

+2091
-1618
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[flake8]
2-
ignore = E501, W503
2+
ignore = E203, E266, E501, W503, E741
33
max-line-length = 88
44
max-complexity = 18
55
select = B,C,E,F,W,T4

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,4 @@ build_artifacts
146146
mo_*
147147
conda/
148148
*.zip
149+
.run/

HISTORY.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,18 @@ History
3535
* modify the pdf, cdf, and probability plot plots
3636
* create separate plot and confidence_interval modules.
3737

38-
0.4.0 (2023-011-23)
38+
0.4.0 (2023-11-23)
3939
------------------
4040

4141
* add Pearson 3 distribution
4242
* Use setup.py instead of pyproject.toml.
4343
* Correct pearson correlation coefficient and add documentation .
4444
* replace the pdf and cdf by the methods from scipy package.
45+
46+
0.5.0 (2023-12-11)
47+
------------------
48+
49+
* Unify the all the methods for the distributions.
50+
* Use factory design pattern to create the distributions.
51+
* add tests for the eva module.
52+
* use snake_case for the methods and variables.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ pip install git+https://github.com/MAfarrag/statista
6565
## pip
6666
to install the last release you can easly use pip
6767
```
68-
pip install statista==0.4.0
68+
pip install statista==0.5.0
6969
```
7070

7171
Quick start
Lines changed: 64 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,126 +1,111 @@
1-
"""Created on Wed Sep 9 23:31:11 2020.
2-
3-
@author: mofarrag
4-
"""
1+
"""Extreme value statistics"""
52
import matplotlib
63

74
matplotlib.use("TkAgg")
85
import pandas as pd
9-
from statista.distributions import GEV, ConfidenceInterval, Gumbel, PlottingPosition
6+
7+
from statista.distributions import GEV, Gumbel, PlottingPosition, Distributions
8+
from statista.confidence_interval import ConfidenceInterval
109

1110
time_series1 = pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist()
1211
time_series2 = pd.read_csv("examples/data/time_series2.txt", header=None)[0].tolist()
13-
#%%
14-
Gdist = Gumbel(time_series1)
12+
# %%
13+
gumbel_dist = Distributions("Gumbel", time_series1)
1514
# defult parameter estimation method is maximum liklihood method
16-
Param_mle = Gdist.estimateParameter(method="mle")
17-
Gdist.ks()
18-
Gdist.chisquare()
19-
print(Param_mle)
20-
loc = Param_mle[0]
21-
scale = Param_mle[1]
15+
param_mle = gumbel_dist.fit_model(method="mle")
16+
gumbel_dist.ks()
17+
gumbel_dist.chisquare()
18+
print(param_mle)
2219
# calculate and plot the pdf
23-
pdf = Gdist.pdf(loc, scale, plot_figure=True)
24-
cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
25-
#%% lmoments
26-
Param_lmoments = Gdist.estimateParameter(method="lmoments")
27-
Gdist.ks()
28-
Gdist.chisquare()
29-
print(Param_lmoments)
30-
loc = Param_lmoments[0]
31-
scale = Param_lmoments[1]
20+
pdf = gumbel_dist.pdf(param_mle, plot_figure=True)
21+
cdf, _, _ = gumbel_dist.cdf(param_mle, plot_figure=True)
22+
# %% lmoments
23+
param_lmoments = gumbel_dist.fit_model(method="lmoments")
24+
gumbel_dist.ks()
25+
gumbel_dist.chisquare()
26+
print(param_lmoments)
3227
# calculate and plot the pdf
33-
pdf = Gdist.pdf(loc, scale, plot_figure=True)
34-
cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
35-
#%%
28+
pdf = gumbel_dist.pdf(param_lmoments, plot_figure=True)
29+
cdf, _, _ = gumbel_dist.cdf(param_lmoments, plot_figure=True)
30+
# %%
3631
# calculate the CDF(Non Exceedance probability) using weibul plotting position
3732
time_series1.sort()
3833
# calculate the F (Non Exceedence probability based on weibul)
39-
cdf_Weibul = PlottingPosition.weibul(time_series1)
34+
cdf_weibul = PlottingPosition.weibul(time_series1)
4035
# TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
41-
Qth = Gdist.theporeticalEstimate(loc, scale, cdf_Weibul)
36+
Qth = gumbel_dist.theoretical_estimate(param_lmoments, cdf_weibul)
4237
# test = stats.chisquare(st.Standardize(Qth), st.Standardize(time_series1),ddof=5)
4338
# calculate the confidence interval
44-
upper, lower = Gdist.confidenceInterval(loc, scale, cdf_Weibul, alpha=0.1)
39+
upper, lower = gumbel_dist.confidence_interval(param_lmoments, cdf_weibul, alpha=0.1)
4540
# ProbapilityPlot can estimate the Qth and the lower and upper confidence interval in the process of plotting
46-
fig, ax = Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
47-
#%%
41+
fig, ax = gumbel_dist.probability_plot(param_lmoments, cdf_weibul, alpha=0.1)
42+
# %%
4843
"""
4944
if you want to focus only on high values, you can use a threshold to make the code focus on what is higher
5045
this threshold.
5146
"""
5247
threshold = 17
53-
Param_dist = Gdist.estimateParameter(
54-
method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
48+
param_dist = gumbel_dist.fit_model(
49+
method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
5550
)
56-
print(Param_dist)
57-
loc = Param_dist[0]
58-
scale = Param_dist[1]
59-
Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
60-
#%%
51+
print(param_dist)
52+
gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
53+
# %%
6154
threshold = 18
62-
Param_dist = Gdist.estimateParameter(
63-
method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
55+
param_dist = gumbel_dist.fit_model(
56+
method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
6457
)
65-
print(Param_dist)
66-
loc = Param_dist[0]
67-
scale = Param_dist[1]
68-
Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
69-
#%% Generalized Extreme Value (GEV)
70-
Gevdist = GEV(time_series2)
58+
print(param_dist)
59+
gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
60+
# %% Generalized Extreme Value (GEV)
61+
gev_dist = Distributions("GEV", time_series2)
7162
# default parameter estimation method is maximum liklihood method
72-
mle_param = Gevdist.estimateParameter(method="mle")
73-
Gevdist.ks()
74-
Gevdist.chisquare()
63+
gev_mle_param = gev_dist.fit_model(method="mle")
64+
gev_dist.ks()
65+
gev_dist.chisquare()
7566

76-
print(mle_param)
77-
shape = mle_param[0]
78-
loc = mle_param[1]
79-
scale = mle_param[2]
67+
print(gev_mle_param)
8068
# calculate and plot the pdf
81-
pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
82-
cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
83-
#%% lmoment method
84-
lmom_param = Gevdist.estimateParameter(method="lmoments")
85-
print(lmom_param)
86-
shape = lmom_param[0]
87-
loc = lmom_param[1]
88-
scale = lmom_param[2]
69+
pdf, fig, ax = gev_dist.pdf(gev_mle_param, plot_figure=True)
70+
cdf, _, _ = gev_dist.cdf(gev_mle_param, plot_figure=True)
71+
# %% lmoment method
72+
gev_lmom_param = gev_dist.fit_model(method="lmoments")
73+
print(gev_lmom_param)
8974
# calculate and plot the pdf
90-
pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
91-
cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
75+
pdf, fig, ax = gev_dist.pdf(gev_lmom_param, plot_figure=True)
76+
cdf, _, _ = gev_dist.cdf(gev_lmom_param, plot_figure=True)
9277
#%%
9378
time_series1.sort()
9479
# calculate the F (Non Exceedence probability based on weibul)
95-
cdf_Weibul = PlottingPosition.weibul(time_series1)
96-
T = PlottingPosition.weibul(time_series1, option=2)
80+
cdf_weibul = PlottingPosition.weibul(time_series1)
81+
T = PlottingPosition.weibul(time_series1, return_period=True)
9782
# TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
98-
Qth = Gevdist.theporeticalEstimate(shape, loc, scale, cdf_Weibul)
83+
Qth = gev_dist.theoretical_estimate(gev_lmom_param, cdf_weibul)
9984

10085
func = GEV.ci_func
101-
upper, lower = Gevdist.confidenceInterval(
102-
shape,
103-
loc,
104-
scale,
105-
F=cdf_Weibul,
86+
upper, lower = gev_dist.confidence_interval(
87+
gev_lmom_param,
88+
prob_non_exceed=cdf_weibul,
10689
alpha=0.1,
10790
statfunction=func,
10891
n_samples=len(time_series1),
92+
method="lmoments",
10993
)
110-
#%%
94+
# %%
11195
"""
11296
calculate the confidence interval using the boot strap method directly
11397
"""
114-
CI = ConfidenceInterval.BootStrap(
98+
CI = ConfidenceInterval.boot_strap(
11599
time_series1,
116100
statfunction=func,
117-
gevfit=Param_dist,
101+
gevfit=gev_lmom_param,
118102
n_samples=len(time_series1),
119-
F=cdf_Weibul,
103+
F=cdf_weibul,
104+
method="lmoments",
120105
)
121-
LB = CI["LB"]
122-
UB = CI["UB"]
123-
#%%
124-
fig, ax = Gevdist.probapilityPlot(
125-
shape, loc, scale, cdf_Weibul, func=func, n_samples=len(time_series1)
106+
LB = CI["lb"]
107+
UB = CI["ub"]
108+
# %%
109+
fig, ax = gev_dist.probability_plot(
110+
gev_lmom_param, cdf_weibul, func=func, n_samples=len(time_series1)
126111
)

examples/SensitivityAnalysis.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
"""Created on Sun Jun 21 01:55:25 2020.
2-
3-
@author: mofarrag
4-
"""
51
# import os
62
Path = "F:/01Algorithms/Hydrology/HAPI/examples"
73
import matplotlib
@@ -19,15 +15,14 @@
1915

2016
Parameterpath = Path + "/data/Lumped/Coello_Lumped2021-03-08_muskingum.txt"
2117
Path = Path + "/data/Lumped/"
22-
#%%
23-
### meteorological data
18+
# %% meteorological data
2419
start = "2009-01-01"
2520
end = "2011-12-31"
2621
name = "Coello"
2722
Coello = Catchment(name, start, end)
2823
Coello.ReadLumpedInputs(Path + "meteo_data-MSWEP.csv")
2924

30-
### Basic_inputs
25+
# %% Basic_inputs
3126
# catchment area
3227
CatArea = 1530
3328
# temporal resolution
@@ -67,11 +62,11 @@
6762

6863
Qobs = Coello.QGauges[Coello.QGauges.columns[0]]
6964

70-
Metrics["RMSE"] = PC.RMSE(Qobs, Coello.Qsim["q"])
71-
Metrics["NSE"] = PC.NSE(Qobs, Coello.Qsim["q"])
72-
Metrics["NSEhf"] = PC.NSEHF(Qobs, Coello.Qsim["q"])
73-
Metrics["KGE"] = PC.KGE(Qobs, Coello.Qsim["q"])
74-
Metrics["WB"] = PC.WB(Qobs, Coello.Qsim["q"])
65+
Metrics["RMSE"] = PC.rmse(Qobs, Coello.Qsim["q"])
66+
Metrics["NSE"] = PC.nse(Qobs, Coello.Qsim["q"])
67+
Metrics["NSEhf"] = PC.nse_hf(Qobs, Coello.Qsim["q"])
68+
Metrics["KGE"] = PC.kge(Qobs, Coello.Qsim["q"])
69+
Metrics["WB"] = PC.wb(Qobs, Coello.Qsim["q"])
7570

7671
print("RMSE= " + str(round(Metrics["RMSE"], 2)))
7772
print("NSE= " + str(round(Metrics["NSE"], 2)))
@@ -120,7 +115,7 @@ def WrapperType1(Randpar, Route, RoutingFn, Qobs):
120115
Coello.Parameters = Randpar
121116

122117
Run.RunLumped(Coello, Route, RoutingFn)
123-
rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
118+
rmse = PC.rmse(Qobs, Coello.Qsim["q"])
124119
return rmse
125120

126121

@@ -129,7 +124,7 @@ def WrapperType2(Randpar, Route, RoutingFn, Qobs):
129124
Coello.Parameters = Randpar
130125

131126
Run.RunLumped(Coello, Route, RoutingFn)
132-
rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
127+
rmse = PC.rmse(Qobs, Coello.Qsim["q"])
133128
return rmse, Coello.Qsim["q"]
134129

135130

examples/data/pdf_obs.txt

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"ams" "pdf"
2+
7550.433 0.000154642269711567
3+
8651.166 0.000117765584990689
4+
9380.397 9.33406205964961e-05
5+
9071.717 0.000103377700612861
6+
8354.173 0.000128159692079377
7+
6161.421 0.000170527780685648
8+
3544.866 2.77636664143863e-05
9+
7120.425 0.000165513917257198
10+
9298.242 9.59581061243758e-05
11+
4710.178 0.000103421285516972
12+
5380.688 0.000145317667468702
13+
8499.548 0.000123066996435036
14+
5617.157 0.000156011624908839
15+
3742.097 3.7808086513221e-05
16+
10274.362 6.78703210421545e-05
17+
6186.083 0.000170853335852759
18+
4552.435 9.20798127465615e-05
19+
8006.946 0.000140143254028843
20+
6790.353 0.000170872728246691
21+
5455.554 0.000148978662405294
22+
9361.352 9.39436873016158e-05
23+
5709.204 0.00015946742554235
24+
16825.49888 5.00789723424988e-06
25+
5363.839 0.000144459876612251
26+
12514.28 2.82532814190863e-05
27+
7225.553 0.000163202752532002
28+
12890.64 2.42739862374255e-05
29+
8045.471 0.000138838408679289
30+
18320.555 2.79830832979157e-06
31+
7339.839 0.000160415133366504
32+
7500.92 0.00015606542100079
33+
11532.695 4.18156642982465e-05
34+
6639.799 0.000172186064368232
35+
6341.556 0.000172279119179583
36+
6533.038 0.000172622478650322
37+
5113.036 0.000130326486583241
38+
3854.754 4.4283345872923e-05
39+
5477.022 0.000149982281171861
40+
7310.343 0.000161160143281715
41+
4670.991 0.000100626037052284
42+
10346.79 6.60642242605733e-05
43+
9039.356 0.000104459733167728
44+
6480.619 0.000172676431635955
45+
8425.722 0.000125654190498816
46+
10647.809 5.8974972602665e-05
47+
7565.567 0.000154199994168006
48+
5456.57 0.000149026629329571
49+
8713.613 0.000115593076636878
50+
9305.365 9.57295320374143e-05
51+
5948.87 0.000166548208934179
52+
7638.778 0.000152015488523764
53+
8576.919 0.000120358190580951
54+
4959.301 0.000120553356314615
55+
12426.522 2.9268918540962e-05
56+
6870.319 0.000169868029908526
57+
7838.047 0.000145744442504979
58+
6414.488 0.000172587367846412
59+
13121.738 2.21092580728735e-05
60+
9801.248 8.06319421593433e-05
61+
8745.997 0.00011447011440418

0 commit comments

Comments
 (0)