fix: use weights.sum() as denominator for weighted means in Normalize

Mamba413 · claude · happy-otter · Mamba413 · commit d5d66e332d94 · 2026-02-27T13:37:41.000Z
Replace double(n) with weights.sum() when computing weighted means in Normalize() and Normalize3() so non-uniform sample_weight values produce the correct WLS centering. For uniform weights (default), sum_w == n so behavior is unchanged. Fixes sklearn check_sample_weight_equivalence_on_dense_data for LinearRegression. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
diff --git a/python/abess/decomposition.py b/python/abess/decomposition.py
@@ -136,8 +136,13 @@ def _more_tags(self):
         return {'requires_y': False}
 
     def __sklearn_tags__(self):
+        try:
+            from sklearn.utils._tags import TransformerTags
+        except ImportError:
+            from sklearn.utils.estimator_tags import TransformerTags
         tags = super().__sklearn_tags__()
         tags.input_tags.sparse = True
+        tags.transformer_tags = TransformerTags()
         return tags
 
     def transform(self, X):
diff --git a/src/normalize.cpp b/src/normalize.cpp
@@ -34,11 +34,12 @@ void Normalize(Eigen::MatrixXd &X, Eigen::VectorXd &y, Eigen::VectorXd &weights,
                Eigen::VectorXd &normx) {
     int n = X.rows();
     int p = X.cols();
+    double sum_w = weights.sum();
     Eigen::VectorXd tmp(n);
     for (int i = 0; i < p; i++) {
-        meanx(i) = weights.dot(X.col(i)) / double(n);
+        meanx(i) = weights.dot(X.col(i)) / sum_w;
     }
-    meany = (y.dot(weights)) / double(n);
+    meany = (y.dot(weights)) / sum_w;
     for (int i = 0; i < p; i++) {
         X.col(i) = X.col(i).array() - meanx(i);
     }
@@ -61,11 +62,12 @@ void Normalize(Eigen::MatrixXd &X, Eigen::MatrixXd &y, Eigen::VectorXd &weights,
                Eigen::VectorXd &meany, Eigen::VectorXd &normx) {
     int n = X.rows();
     int p = X.cols();
+    double sum_w = weights.sum();
     Eigen::VectorXd tmp(n);
     for (int i = 0; i < p; i++) {
-        meanx(i) = weights.dot(X.col(i)) / double(n);
+        meanx(i) = weights.dot(X.col(i)) / sum_w;
     }
-    meany = y.transpose() * weights / double(n);
+    meany = y.transpose() * weights / sum_w;
     for (int i = 0; i < p; i++) {
         X.col(i) = X.col(i).array() - meanx(i);
     }
@@ -91,9 +93,10 @@ void Normalize(Eigen::MatrixXd &X, Eigen::MatrixXd &y, Eigen::VectorXd &weights,
 void Normalize3(Eigen::MatrixXd &X, Eigen::VectorXd &weights, Eigen::VectorXd &meanx, Eigen::VectorXd &normx) {
     int n = X.rows();
     int p = X.cols();
+    double sum_w = weights.sum();
     Eigen::VectorXd tmp(n);
     for (int i = 0; i < p; i++) {
-        meanx(i) = weights.dot(X.col(i)) / double(n);
+        meanx(i) = weights.dot(X.col(i)) / sum_w;
     }
     for (int i = 0; i < p; i++) {
         X.col(i) = X.col(i).array() - meanx(i);