From 9b4290d288135670b00b69ce2f50ad201fcca313 Mon Sep 17 00:00:00 2001 From: Soumyadip Sarkar Date: Thu, 19 Feb 2026 17:06:08 +0530 Subject: [PATCH 1/3] Add test coverage for IPW helpers --- CHANGELOG.md | 2 ++ balance/weighting_methods/ipw.py | 3 -- tests/test_ipw.py | 55 ++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6967dc814..15ccc7b85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ - **Expanded warning coverage for `Sample.from_frame()` ID inference** - Added assertions that validate all three expected warnings are emitted when inferring an `id` column and default weights, including ID guessing, ID string casting, and automatic weight creation. +- **Added focused unit coverage for IPW helpers** + - Added tests for `model_coefs()`, `link_transform()`, and `calc_dev()` to validate behavior for linear/non-linear models, extreme probabilities, and finite 10-fold deviance summaries. # 0.16.0 (2026-02-09) diff --git a/balance/weighting_methods/ipw.py b/balance/weighting_methods/ipw.py index 0a6a15733..a9c5377b6 100644 --- a/balance/weighting_methods/ipw.py +++ b/balance/weighting_methods/ipw.py @@ -30,7 +30,6 @@ logger: logging.Logger = logging.getLogger(__package__) -# TODO: Add tests for model_coefs() # TODO: Improve interpretability of model coefficients, as variables are no longer zero-centered. def model_coefs( model: ClassifierMixin, @@ -94,7 +93,6 @@ def model_coefs( } -# TODO: Add tests for link_transform() def link_transform(pred: np.ndarray) -> np.ndarray: """Transforms probabilities into log odds (link function). @@ -184,7 +182,6 @@ def _convert_to_dense_array( return X_matrix -# TODO: Add tests for calc_dev() def calc_dev( X_matrix: csr_matrix, y: np.ndarray, diff --git a/tests/test_ipw.py b/tests/test_ipw.py index 7c6a7004b..b5946a6b0 100644 --- a/tests/test_ipw.py +++ b/tests/test_ipw.py @@ -22,6 +22,7 @@ from balance.sample_class import Sample from balance.weighting_methods import ipw as balance_ipw from packaging.version import Version +from scipy.sparse import csr_matrix from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss @@ -37,6 +38,60 @@ class TestIPW( ): """Test suite for Inverse Propensity Weighting (IPW) functionality.""" + def test_model_coefs_with_and_without_linear_coefficients(self) -> None: + """model_coefs should expose coefficients for linear models and empty output otherwise.""" + + X = np.array([[0.0, 1.0], [1.0, 0.0], [2.0, 1.0], [3.0, 0.0]]) + y = np.array([0, 0, 1, 1]) + + linear_model = LogisticRegression(random_state=0, max_iter=200).fit(X, y) + coefs = balance_ipw.model_coefs( + linear_model, + feature_names=["feature_a", "feature_b"], + )["coefs"] + + self.assertIn("intercept", coefs.index) + self.assertIn("feature_a", coefs.index) + self.assertIn("feature_b", coefs.index) + self.assertEqual(len(coefs), 3) + self.assertTrue(np.isfinite(coefs.to_numpy()).all()) + + tree_model = DecisionTreeClassifier(random_state=0).fit(X, y) + tree_coefs = balance_ipw.model_coefs(tree_model)["coefs"] + self.assertTrue(tree_coefs.empty) + + def test_link_transform_handles_midpoint_and_extremes(self) -> None: + """link_transform should return finite log-odds for probabilities in [0, 1].""" + + transformed = balance_ipw.link_transform(np.array([0.5, 0.0, 1.0])) + self.assertAlmostEqual(transformed[0], 0.0, places=10) + self.assertTrue(np.isfinite(transformed[1])) + self.assertTrue(np.isfinite(transformed[2])) + self.assertLess(transformed[1], 0) + self.assertGreater(transformed[2], 0) + + def test_calc_dev_returns_finite_mean_and_sd(self) -> None: + """calc_dev should run 10-fold CV and return finite deviance summary.""" + + rng = np.random.RandomState(42) + X = rng.normal(size=(40, 2)) + y = np.array([0] * 20 + [1] * 20) + foldids = np.tile(np.arange(10), 4) + model_weights = np.ones(40) + + dev_mean, dev_sd = balance_ipw.calc_dev( + csr_matrix(X), + y, + LogisticRegression(random_state=0, max_iter=300), + model_weights, + foldids, + ) + + self.assertTrue(np.isfinite(dev_mean)) + self.assertTrue(np.isfinite(dev_sd)) + self.assertGreaterEqual(dev_mean, 0.0) + self.assertGreaterEqual(dev_sd, 0.0) + def test_ipw_weights_order(self) -> None: """Test that IPW assigns correct relative weight ordering. From 8f8440d876846f37ab87bcf8fb85f47db6aa9297 Mon Sep 17 00:00:00 2001 From: Soumyadip Sarkar Date: Thu, 19 Feb 2026 17:17:31 +0530 Subject: [PATCH 2/3] Implement suggestions --- tests/test_ipw.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tests/test_ipw.py b/tests/test_ipw.py index b5946a6b0..b8617cc0e 100644 --- a/tests/test_ipw.py +++ b/tests/test_ipw.py @@ -38,28 +38,6 @@ class TestIPW( ): """Test suite for Inverse Propensity Weighting (IPW) functionality.""" - def test_model_coefs_with_and_without_linear_coefficients(self) -> None: - """model_coefs should expose coefficients for linear models and empty output otherwise.""" - - X = np.array([[0.0, 1.0], [1.0, 0.0], [2.0, 1.0], [3.0, 0.0]]) - y = np.array([0, 0, 1, 1]) - - linear_model = LogisticRegression(random_state=0, max_iter=200).fit(X, y) - coefs = balance_ipw.model_coefs( - linear_model, - feature_names=["feature_a", "feature_b"], - )["coefs"] - - self.assertIn("intercept", coefs.index) - self.assertIn("feature_a", coefs.index) - self.assertIn("feature_b", coefs.index) - self.assertEqual(len(coefs), 3) - self.assertTrue(np.isfinite(coefs.to_numpy()).all()) - - tree_model = DecisionTreeClassifier(random_state=0).fit(X, y) - tree_coefs = balance_ipw.model_coefs(tree_model)["coefs"] - self.assertTrue(tree_coefs.empty) - def test_link_transform_handles_midpoint_and_extremes(self) -> None: """link_transform should return finite log-odds for probabilities in [0, 1].""" From a88c53e26832d31fa206cdb9cc0e424643e51f76 Mon Sep 17 00:00:00 2001 From: Soumyadip Sarkar Date: Thu, 19 Feb 2026 17:19:46 +0530 Subject: [PATCH 3/3] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15ccc7b85..e3b27c838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ - **Expanded warning coverage for `Sample.from_frame()` ID inference** - Added assertions that validate all three expected warnings are emitted when inferring an `id` column and default weights, including ID guessing, ID string casting, and automatic weight creation. - **Added focused unit coverage for IPW helpers** - - Added tests for `model_coefs()`, `link_transform()`, and `calc_dev()` to validate behavior for linear/non-linear models, extreme probabilities, and finite 10-fold deviance summaries. + - Added tests for `link_transform()`, and `calc_dev()` to validate behavior for extreme probabilities, and finite 10-fold deviance summaries. # 0.16.0 (2026-02-09)