Skip to content

Commit d5d66e3

Browse files
Mamba413claudehappy-otter
committed
fix: use weights.sum() as denominator for weighted means in Normalize
Replace double(n) with weights.sum() when computing weighted means in Normalize() and Normalize3() so non-uniform sample_weight values produce the correct WLS centering. For uniform weights (default), sum_w == n so behavior is unchanged. Fixes sklearn check_sample_weight_equivalence_on_dense_data for LinearRegression. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
1 parent 4f36a1d commit d5d66e3

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

python/abess/decomposition.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,13 @@ def _more_tags(self):
136136
return {'requires_y': False}
137137

138138
def __sklearn_tags__(self):
139+
try:
140+
from sklearn.utils._tags import TransformerTags
141+
except ImportError:
142+
from sklearn.utils.estimator_tags import TransformerTags
139143
tags = super().__sklearn_tags__()
140144
tags.input_tags.sparse = True
145+
tags.transformer_tags = TransformerTags()
141146
return tags
142147

143148
def transform(self, X):

src/normalize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,12 @@ void Normalize(Eigen::MatrixXd &X, Eigen::VectorXd &y, Eigen::VectorXd &weights,
3434
Eigen::VectorXd &normx) {
3535
int n = X.rows();
3636
int p = X.cols();
37+
double sum_w = weights.sum();
3738
Eigen::VectorXd tmp(n);
3839
for (int i = 0; i < p; i++) {
39-
meanx(i) = weights.dot(X.col(i)) / double(n);
40+
meanx(i) = weights.dot(X.col(i)) / sum_w;
4041
}
41-
meany = (y.dot(weights)) / double(n);
42+
meany = (y.dot(weights)) / sum_w;
4243
for (int i = 0; i < p; i++) {
4344
X.col(i) = X.col(i).array() - meanx(i);
4445
}
@@ -61,11 +62,12 @@ void Normalize(Eigen::MatrixXd &X, Eigen::MatrixXd &y, Eigen::VectorXd &weights,
6162
Eigen::VectorXd &meany, Eigen::VectorXd &normx) {
6263
int n = X.rows();
6364
int p = X.cols();
65+
double sum_w = weights.sum();
6466
Eigen::VectorXd tmp(n);
6567
for (int i = 0; i < p; i++) {
66-
meanx(i) = weights.dot(X.col(i)) / double(n);
68+
meanx(i) = weights.dot(X.col(i)) / sum_w;
6769
}
68-
meany = y.transpose() * weights / double(n);
70+
meany = y.transpose() * weights / sum_w;
6971
for (int i = 0; i < p; i++) {
7072
X.col(i) = X.col(i).array() - meanx(i);
7173
}
@@ -91,9 +93,10 @@ void Normalize(Eigen::MatrixXd &X, Eigen::MatrixXd &y, Eigen::VectorXd &weights,
9193
void Normalize3(Eigen::MatrixXd &X, Eigen::VectorXd &weights, Eigen::VectorXd &meanx, Eigen::VectorXd &normx) {
9294
int n = X.rows();
9395
int p = X.cols();
96+
double sum_w = weights.sum();
9497
Eigen::VectorXd tmp(n);
9598
for (int i = 0; i < p; i++) {
96-
meanx(i) = weights.dot(X.col(i)) / double(n);
99+
meanx(i) = weights.dot(X.col(i)) / sum_w;
97100
}
98101
for (int i = 0; i < p; i++) {
99102
X.col(i) = X.col(i).array() - meanx(i);

0 commit comments

Comments
 (0)