@@ -55,6 +55,11 @@ df = pd.DataFrame({
5555 ' age' : [25 , 30 , 35 , 40 , 35 , 45 , 50 ]
5656})
5757
58+ """
59+ Note: As of v0.1.3, remover methods return the cleaner instance for method chaining.
60+ Access cleaned data via `cleaner.clean_df` and details via `cleaner.outlier_info`.
61+ """
62+
5863# Initialize StatClean
5964cleaner = StatClean(df)
6065
@@ -73,21 +78,23 @@ print(f"Outliers removed: {info['income']['outliers_removed']}")
7378``` python
7479# Grubbs' test for outliers with statistical significance
7580result = cleaner.grubbs_test(' income' , alpha = 0.05 )
76- print (f " Test statistic: { result[' test_statistic ' ]:.3f } " )
81+ print (f " Test statistic: { result[' statistic ' ]:.3f } " )
7782print (f " P-value: { result[' p_value' ]:.6f } " )
78- print (f " Outlier detected: { result[' outlier_detected ' ]} " )
83+ print (f " Outlier detected: { result[' is_outlier ' ]} " )
7984
8085# Dixon's Q-test for small samples
8186result = cleaner.dixon_q_test(' age' , alpha = 0.05 )
82- print (f " Q statistic: { result[' q_statistic ' ]:.3f } " )
87+ print (f " Q statistic: { result[' statistic ' ]:.3f } " )
8388print (f " Critical value: { result[' critical_value' ]:.3f } " )
8489```
8590
8691### Multivariate Outlier Detection
8792
8893``` python
8994# Mahalanobis distance for multivariate outliers
90- outliers = cleaner.detect_outliers_mahalanobis([' income' , ' age' ], chi2_threshold = 0.95 )
95+ # chi2_threshold can be a percentile (0<val<=1) or absolute chi-square statistic
96+ # use_shrinkage=True uses Ledoit–Wolf shrinkage covariance if scikit-learn is installed
97+ outliers = cleaner.detect_outliers_mahalanobis([' income' , ' age' ], chi2_threshold = 0.95 , use_shrinkage = True )
9198print (f " Multivariate outliers detected: { outliers.sum()} " )
9299
93100# Remove multivariate outliers
@@ -99,12 +106,12 @@ cleaned_df = cleaner.remove_outliers_mahalanobis(['income', 'age'])
99106``` python
100107# Automatic transformation recommendation
101108recommendation = cleaner.recommend_transformation(' income' )
102- print (f " Recommended transformation: { recommendation[' best_transformation ' ]} " )
103- print (f " Improvement in skewness: { recommendation[' skewness_improvement ' ]:.3f } " )
109+ print (f " Recommended transformation: { recommendation[' recommended_method ' ]} " )
110+ print (f " Improvement in skewness: { recommendation[' expected_improvement ' ]:.3f } " )
104111
105112# Apply Box-Cox transformation
106- transformed_df = cleaner.transform_boxcox(' income' )
107- print (f " Optimal lambda: { transformed_df [' lambda' ]:.3f } " )
113+ _, info = cleaner.transform_boxcox(' income' )
114+ print (f " Optimal lambda: { info [' lambda' ]:.3f } " )
108115
109116# Method chaining for complex workflows
110117result = (cleaner
@@ -263,10 +270,19 @@ for feature in features:
263270- ** seaborn** : ≥0.11.0
264271- ** scipy** : ≥1.6.0 (for statistical tests)
265272- ** tqdm** : ≥4.60.0 (for progress bars)
266- - ** scikit-learn** : ≥0.24.0 (optional, for examples )
273+ - ** scikit-learn** : ≥0.24.0 (optional, for shrinkage covariance in Mahalanobis )
267274
268275## Changelog
269276
277+ ### Version 0.1.3 (2025-08-08)
278+
279+ - Align docs/examples with actual API: remover methods return ` self ` ; use ` cleaner.clean_df ` and ` cleaner.outlier_info ` .
280+ - Grubbs/Dixon result keys clarified: ` statistic ` , ` is_outlier ` .
281+ - Mahalanobis ` chi2_threshold ` accepts percentile (0<val<=1) or absolute chi-square statistic; added ` use_shrinkage ` option.
282+ - Transformations preserve NaNs; Box-Cox computed on non-NA values only.
283+ - Seaborn plotting calls updated for compatibility; analysis functions made NaN-safe.
284+ - Added GitHub Actions workflow to publish to PyPI on releases.
285+
270286### Version 0.1.0 (2025-08-06)
271287
272288** 🎉 Initial Release of StatClean**
@@ -319,4 +335,30 @@ MIT License
319335
320336---
321337
322- * StatClean: Where statistical rigor meets practical data science.*
338+ * StatClean: Where statistical rigor meets practical data science.*
339+
340+ ## Development: Run Tests in Headless Mode and Capture Logs
341+
342+ ``` bash
343+ # Ensure a headless matplotlib backend and run tests quietly
344+ export MPLBACKEND=Agg
345+ pytest -q
346+
347+ # Save a timestamped test log (example)
348+ LOG=cursor_logs/test_log.md
349+ mkdir -p cursor_logs
350+ echo " ==== $( date) ====\n" >> " $LOG "
351+ MPLBACKEND=Agg pytest -q 2>&1 | tee -a " $LOG "
352+
353+ # # Continuous Delivery: Publish to PyPI (Trusted Publisher)
354+
355+ This repository includes a GitHub Actions workflow using PyPI Trusted Publisher (OIDC).
356+
357+ Setup (one-time on PyPI):
358+ - Add this GitHub repo as a Trusted Publisher in the PyPI project settings.
359+
360+ Release steps:
361+ 1. Bump version in ` statclean/__init__.py` and ` setup.py` (already ` 0.1.3` ).
362+ 2. Push a tag matching the version, e.g., ` git tag v0.1.3 && git push origin v0.1.3` .
363+ 3. Workflow will run tests, build, and publish to PyPI without storing credentials.
364+ ```
0 commit comments