Skip to content

Commit eb52d28

Browse files
authored
feat(stats): improve chisquare test with p-value and small bin merging (#119)
feat(stats): improve chisquare test with p-value and small bin merging - Added `merge_small_bins` utility with tests - Updated `chisquare` method to calculate p-value and merge small bins for more reliable results - Refactored method signature and simplified error handling - Integrated `chisquare` call into all `fit_model` methods - Added documentation for `utils` module and updated changelog and PR template ref: #110
1 parent 325dd88 commit eb52d28

File tree

10 files changed

+687
-394
lines changed

10 files changed

+687
-394
lines changed

.github/pull_request_template.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Check relevant points.
2929
# Checklist:
3030

3131
- [ ] updated version number in pyproject.toml
32-
- [ ] added changes to History.rst
32+
- [ ] added changes to docs/change-log.md
3333
- [ ] updated the latest version in README file
3434
- [ ] I have added tests that prove my fix is effective or that my feature works
3535
- [ ] New and existing unit tests pass locally with my changes

docs/change-log.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.6.3 (2025-08-08)
4+
##### Distributions
5+
* fix the `chisquare` method to all distributions.
6+
37
## 0.6.2 (2025-07-31)
48
##### Docs
59
* add complete documentation for all modules.

docs/reference/utils-module.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
##### utils module
2+
3+
::: statista.utils
4+
options:
5+
show_root_heading: true
6+
show_source: true
7+
heading_level: 3
8+
members_order: source

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ nav:
111111
- Plot: reference/plot-class.md
112112
- Sensitivity Analysis: reference/sensitivity-class.md
113113
- Tools: reference/tools-module.md
114+
- utils: reference/utils-module.md
114115
- Examples:
115116
- sensitivity-analysis:
116117
- Sensitivity Analysis: notebook/sensitivity-analysis/scs-cn.ipynb

poetry.lock

Lines changed: 391 additions & 369 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "statista"
3-
version = "0.6.2"
3+
version = "0.6.3"
44
description = "statistics package"
55
readme = "README.md"
66
requires-python = ">=3.11,<4"
@@ -76,7 +76,7 @@ Repository = "https://github.com/Serapieum-of-alex/statista"
7676

7777

7878
[tool.pytest.ini_options]
79-
# Anything you would have put in addopts/pytest.ini
79+
8080
addopts = [
8181
"--cov",
8282
"--cov-branch",
@@ -88,6 +88,9 @@ testpaths = ["tests"]
8888
markers = [
8989
"slow: mark test as slow.",
9090
"fast: mark test as fast.",
91+
"e2e: marks tests as e2e (deselect with '-m \"not e2e\"')",
92+
"unit: marks tests as unit (deselect with '-m \"not unit\"')",
93+
"integration: marks tests as integration (deselect with '-m \"not integration\"')",
9194
]
9295

9396
[tool.flake8]

src/statista/distributions.py

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from statista.confidence_interval import ConfidenceInterval
1616
from statista.parameters import Lmoments
1717
from statista.plot import Plot
18-
from statista.tools import Tools as st
18+
from statista.utils import merge_small_bins
19+
1920

2021
ninf = 1e-5
2122

@@ -538,17 +539,25 @@ def ks(self) -> tuple:
538539
return test.statistic, test.pvalue
539540

540541
@abstractmethod
541-
def chisquare(self) -> Union[tuple, None]:
542+
def chisquare(self) -> Tuple[float, float]:
542543
"""Perform the Chi-square test for goodness of fit.
543544
544-
This method tests whether the data follows the fitted distribution using
545-
the Chi-square test. The test compares the observed frequencies with the
545+
- `chisquare test` refers to Pearson’s chi square goodness of fit test. It is designed for
546+
categorical/count data: you observe how many points fall into each bin and compare those counts with the
547+
frequencies expected under some hypothesis
548+
549+
This method tests whether the data follows the fitted distribution using the Chi-square test.
550+
The test compares the observed frequencies (number of values in each category/histogram bin) with the
546551
expected frequencies under the fitted distribution.
547552
548553
Returns:
549554
Tuple containing:
550555
- Chi-square statistic: The test statistic measuring the difference between
551556
observed and expected frequencies.
557+
The χ² statistic is simply a measure of how far your observed counts deviate from the counts you would
558+
expect if the fitted distribution were correct. For each bin 𝑖 we compute the squared difference
559+
between the observed count 𝑂𝑖 and the expected count 𝐸𝑖, scaled by 𝐸𝑖, and then sum over all bins:
560+
552561
- p-value: The probability of observing a Chi-square statistic as extreme as the one calculated,
553562
assuming the null hypothesis is true (data follows the distribution).
554563
If p-value < significance level (typically 0.05), reject the null hypothesis.
@@ -562,15 +571,17 @@ def chisquare(self) -> Union[tuple, None]:
562571
"The Value of parameters is unknown. Please use 'fit_model' to estimate the distribution parameters"
563572
)
564573

565-
qth = self.inverse_cdf(self.cdf_weibul, self.parameters)
566-
try:
567-
test = chisquare(st.standardize(qth), st.standardize(self.data))
568-
print("-----chisquare Test-----")
569-
print("Statistic = " + str(test.statistic))
570-
print("P value = " + str(test.pvalue))
571-
return test.statistic, test.pvalue
572-
except Exception as e:
573-
print(e)
574+
bin_edges = np.histogram_bin_edges(self.data, bins="sturges")
575+
obs_counts, _ = np.histogram(self.data, bins=bin_edges)
576+
577+
expected_prob = np.diff(self._cdf_eq(bin_edges, self.parameters))
578+
expected_counts = expected_prob * len(self.data)
579+
580+
# Pearson’s χ² test assumes each expected count is sufficiently large (at least about 5); otherwise the asymptotic χ² approximation is unreliable
581+
merged_obs, merged_exp = merge_small_bins(obs_counts, expected_counts)
582+
583+
test = chisquare(merged_obs, f_exp=merged_exp, ddof=len(self.parameters))
584+
return test.statistic, test.pvalue
574585

575586
def confidence_interval(
576587
self,
@@ -1345,8 +1356,6 @@ def fit_model(
13451356
Statistic = 0.019
13461357
Accept Hypothesis
13471358
P value = 0.9937026761524456
1348-
1349-
13501359
>>> print(parameters)
13511360
{'loc': np.float64(0.010101355750222706), 'scale': 1.0313042643102108}
13521361
@@ -1419,7 +1428,7 @@ def fit_model(
14191428

14201429
if test:
14211430
self.ks()
1422-
# self.chisquare()
1431+
self.chisquare()
14231432

14241433
return param
14251434

@@ -1523,8 +1532,7 @@ def _inv_cdf(
15231532
scale = parameters.get("scale")
15241533
if scale <= 0:
15251534
raise ValueError(SCALE_PARAMETER_ERROR)
1526-
# the main equation from scipy
1527-
# Qth = loc - scale * (np.log(-np.log(cdf)))
1535+
15281536
qth = gumbel_r.ppf(cdf, loc=loc, scale=scale)
15291537

15301538
return qth
@@ -2330,6 +2338,7 @@ def fit_model(
23302338

23312339
if test:
23322340
self.ks()
2341+
self.chisquare()
23332342

23342343
return param
23352344

@@ -3049,6 +3058,7 @@ def fit_model(
30493058

30503059
if test:
30513060
self.ks()
3061+
self.chisquare()
30523062

30533063
return param
30543064

@@ -3370,6 +3380,7 @@ def fit_model(
33703380

33713381
if test:
33723382
self.ks()
3383+
self.chisquare()
33733384

33743385
return param
33753386

src/statista/utils.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from typing import List
2+
import numpy as np
3+
4+
5+
def merge_small_bins(bin_count_observed: List[float], bin_count_fitted_data: List[float]):
6+
"""Merge small bins for goodness-of-fit tests (e.g., chi-square).
7+
8+
This utility merges adjacent "small" bins (those whose expected count is < 5)
9+
starting from the right-most bin and moving left, accumulating small bins
10+
until their combined expected count is >= 5. If a large (>= 5) bin is
11+
encountered while there is an accumulation, that accumulation is merged into
12+
that bin. If the left edge is reached with a remaining accumulation that was
13+
never merged into a large bin, the accumulation is appended as its own bin.
14+
15+
After merging, the expected counts are rescaled so that their sum equals the
16+
total observed count (required by Pearson's chi-square test), preserving the
17+
expected proportions within the merged structure.
18+
19+
Args:
20+
bin_count_observed (List[float]):
21+
Observed counts per original bin. Must be the same length as
22+
``bin_count_fitted_data``. Values should be non-negative.
23+
bin_count_fitted_data (List[float]):
24+
Expected (model-fitted) counts per original bin. Must be the same
25+
length as ``bin_count_observed``. Values should be non-negative.
26+
27+
Returns:
28+
Tuple[np.ndarray, np.ndarray]:
29+
Two 1D numpy arrays ``(merged_observed, merged_expected)`` in
30+
low-to-high bin order after merging and rescaling. The two arrays
31+
are the same length, and ``merged_expected.sum() ==
32+
merged_observed.sum()``.
33+
34+
Raises:
35+
ZeroDivisionError: If the total expected count across merged bins is 0,
36+
rescaling cannot be performed (division by zero). This can happen if
37+
all expected counts are zero.
38+
ValueError: If the input sequences have different lengths.
39+
40+
Notes:
41+
- The function assumes a one-to-one correspondence of observed and
42+
expected bins. If lengths differ, only a partial zip would occur; to
43+
avoid silent truncation a ``ValueError`` is raised.
44+
- Merging proceeds from right to left and the result is then reversed
45+
back to low-to-high order.
46+
- The "< 5" rule is a common heuristic for chi-square tests to ensure
47+
adequate expected counts per bin.
48+
49+
Examples:
50+
- Merge tail small bins with the nearest large bin on the left
51+
52+
```python
53+
>>> from statista.utils import merge_small_bins
54+
>>> merge_small_bins([10, 3, 2], [10, 3, 2])
55+
(array([15]), array([15.]))
56+
57+
```
58+
59+
- No merging when all expected counts are >= 5
60+
61+
```python
62+
>>> merge_small_bins([10, 20, 30], [12, 18, 30])
63+
(array([10, 20, 30]), array([12., 18., 30.]))
64+
65+
```
66+
67+
- Accumulated leftmost small bins remain as their own bin if no large bin is found to the left
68+
69+
```python
70+
>>> merge_small_bins([10, 10], [4, 6])
71+
(array([10, 10]), array([ 8., 12.]))
72+
73+
```
74+
75+
- Expected counts are rescaled to match the observed total while preserving proportions
76+
77+
```python
78+
>>> merge_small_bins([5, 5, 10], [2, 3, 5])
79+
(array([10, 10]), array([10., 10.]))
80+
81+
```
82+
"""
83+
if len(bin_count_observed) != len(bin_count_fitted_data):
84+
raise ValueError("bin_count_observed and bin_count_fitted_data must have the same length.")
85+
86+
# Merge tail bins whose expected counts are < 5
87+
merged_obs = []
88+
merged_exp = []
89+
accum_obs = 0
90+
accum_exp = 0
91+
92+
# Work from the rightmost bin backwards, accumulating bins until the combined
93+
# expected count is ≥ 5
94+
for observed, expected in reversed(list(zip(bin_count_observed, bin_count_fitted_data))):
95+
if expected < 5:
96+
accum_obs += observed
97+
accum_exp += expected
98+
else:
99+
if accum_exp > 0:
100+
# combine the accumulated small bins with this one
101+
accum_obs += observed
102+
accum_exp += expected
103+
merged_obs.append(accum_obs)
104+
merged_exp.append(accum_exp)
105+
accum_obs = accum_exp = 0
106+
else:
107+
# keep this bin separate
108+
merged_obs.append(observed)
109+
merged_exp.append(expected)
110+
111+
# Append any remaining accumulated bins
112+
if accum_exp > 0:
113+
merged_obs.append(accum_obs)
114+
merged_exp.append(accum_exp)
115+
116+
# Reverse the order back to low→high
117+
merged_obs = np.array(merged_obs[::-1])
118+
merged_exp = np.array(merged_exp[::-1]).astype(float)
119+
120+
# Rescale expected counts so they sum to the total number of observations
121+
# This is required for Pearson’s χ² test
122+
merged_exp *= merged_obs.sum() / merged_exp.sum()
123+
return merged_obs, merged_exp

tests/distribution/test_distributions.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,9 @@ def test_chisquare(
140140
):
141141
param = gum_dist_parameters[dist_estimation_parameters_ks]
142142
dist = Gumbel(time_series2, param)
143-
dstatic, _ = dist.chisquare()
144-
assert dstatic == pytest.approx(-0.2813945052127964)
143+
dstatic, p_value = dist.chisquare()
144+
assert dstatic == pytest.approx(0.5768408126308443)
145+
assert p_value == pytest.approx(0.7494464539783021)
145146

146147
def test_pdf(
147148
self,
@@ -288,8 +289,8 @@ def test_gev_chisquare(
288289
):
289290
param = gev_dist_parameters[dist_estimation_parameters_ks]
290291
dist = GEV(time_series1, param)
291-
dstatic, _ = dist.chisquare()
292-
assert dstatic == pytest.approx(-22.906818156545253)
292+
dstatic, p_value = dist.chisquare()
293+
assert dstatic == pytest.approx(1.745019092902356)
293294

294295
def test_gev_pdf(
295296
self,

0 commit comments

Comments
 (0)