2929)
3030from sklearn .utils ._mask import _get_mask
3131from sklearn .utils ._param_validation import Interval , StrOptions
32- from sklearn .utils .fixes import parse_version , sp_version
3332from sklearn .utils .stats import _weighted_percentile
3433from sklearn .utils .validation import (
3534 FLOAT_DTYPES ,
@@ -460,23 +459,6 @@ def transform(self, X):
460459 # edge case: deal with empty matrix
461460 XP = sparse .csr_matrix ((n_samples , 0 ), dtype = X .dtype )
462461 else :
463- # `scipy.sparse.hstack` breaks in scipy<1.9.2
464- # when `n_output_features_ > max_int32`
465- all_int32 = all (mat .indices .dtype == np .int32 for mat in to_stack )
466- if (
467- sp_version < parse_version ("1.9.2" )
468- and self .n_output_features_ > max_int32
469- and all_int32
470- ):
471- raise ValueError ( # pragma: no cover
472- "In scipy versions `<1.9.2`, the function `scipy.sparse.hstack`"
473- " produces negative columns when:\n 1. The output shape contains"
474- " `n_cols` too large to be represented by a 32bit signed"
475- " integer.\n 2. All sub-matrices to be stacked have indices of"
476- " dtype `np.int32`.\n To avoid this error, either use a version"
477- " of scipy `>=1.9.2` or alter the `PolynomialFeatures`"
478- " transformer to produce fewer than 2^31 output features"
479- )
480462 XP = sparse .hstack (to_stack , dtype = X .dtype , format = "csr" )
481463 elif sparse .issparse (X ) and X .format == "csc" and self ._max_degree < 4 :
482464 return self .transform (X .tocsr ()).tocsc ()
@@ -1022,27 +1004,14 @@ def transform(self, X):
10221004 n_splines = self .bsplines_ [0 ].c .shape [1 ]
10231005 degree = self .degree
10241006
1025- # TODO: Remove this condition, once scipy 1.10 is the minimum version.
1026- # Only scipy >= 1.10 supports design_matrix(.., extrapolate=..).
1027- # The default (implicit in scipy < 1.10) is extrapolate=False.
1028- scipy_1_10 = sp_version >= parse_version ("1.10.0" )
1029- # Note: self.bsplines_[0].extrapolate is True for extrapolation in
1030- # ["periodic", "continue"]
1031- if scipy_1_10 :
1032- use_sparse = self .sparse_output
1033- kwargs_extrapolate = {"extrapolate" : self .bsplines_ [0 ].extrapolate }
1034- else :
1035- use_sparse = self .sparse_output and not self .bsplines_ [0 ].extrapolate
1036- kwargs_extrapolate = dict ()
1037-
10381007 # Note that scipy BSpline returns float64 arrays and converts input
10391008 # x=X[:, i] to c-contiguous float64.
10401009 n_out = self .n_features_out_ + n_features * (1 - self .include_bias )
10411010 if X .dtype in FLOAT_DTYPES :
10421011 dtype = X .dtype
10431012 else :
10441013 dtype = np .float64
1045- if use_sparse :
1014+ if self . sparse_output :
10461015 output_list = []
10471016 else :
10481017 XBS = np .zeros ((n_samples , n_out ), dtype = dtype , order = self .order )
@@ -1071,7 +1040,7 @@ def transform(self, X):
10711040 else : # self.extrapolation in ("continue", "error")
10721041 x = X [:, feature_idx ]
10731042
1074- if use_sparse :
1043+ if self . sparse_output :
10751044 # We replace the nan values in the input column by some
10761045 # arbitrary, in-range, numerical value since
10771046 # BSpline.design_matrix() would otherwise raise on any nan
@@ -1093,8 +1062,11 @@ def transform(self, X):
10931062 elif nan_row_indices .shape [0 ] > 0 :
10941063 x = x .copy () # avoid mutation of input data
10951064 x [nan_row_indices ] = np .nanmin (x )
1065+
1066+ # Note: self.bsplines_[0].extrapolate is True for extrapolation in
1067+ # ["periodic", "continue"]
10961068 XBS_sparse = BSpline .design_matrix (
1097- x , spl .t , spl .k , ** kwargs_extrapolate
1069+ x , spl .t , spl .k , self . bsplines_ [ 0 ]. extrapolate
10981070 )
10991071
11001072 if self .extrapolation == "periodic" :
@@ -1122,7 +1094,7 @@ def transform(self, X):
11221094 XBS [
11231095 nan_row_indices , output_feature_idx : output_feature_idx + 1
11241096 ] = 0
1125- if use_sparse :
1097+ if self . sparse_output :
11261098 XBS_sparse = XBS
11271099
11281100 else : # extrapolation in ("constant", "linear")
@@ -1135,7 +1107,7 @@ def transform(self, X):
11351107 X [:, feature_idx ] <= xmax
11361108 )
11371109
1138- if use_sparse :
1110+ if self . sparse_output :
11391111 outside_range_mask = ~ inside_range_mask
11401112 x = X [:, feature_idx ].copy ()
11411113 # Set to some arbitrary value within the range of values
@@ -1162,7 +1134,7 @@ def transform(self, X):
11621134 # 'continue' is already returned as is by scipy BSplines
11631135 if self .extrapolation == "error" :
11641136 has_nan_output_values = False
1165- if use_sparse :
1137+ if self . sparse_output :
11661138 # Early convert to CSR as the sparsity structure of this
11671139 # block should not change anymore. This is needed to be able
11681140 # to safely assume that `.data` is a 1D array.
@@ -1187,7 +1159,7 @@ def transform(self, X):
11871159
11881160 below_xmin_mask = X [:, feature_idx ] < xmin
11891161 if np .any (below_xmin_mask ):
1190- if use_sparse :
1162+ if self . sparse_output :
11911163 # Note: See comment about SparseEfficiencyWarning above.
11921164 XBS_sparse = XBS_sparse .tolil ()
11931165 XBS_sparse [below_xmin_mask , :degree ] = f_min [:degree ]
@@ -1202,7 +1174,7 @@ def transform(self, X):
12021174
12031175 above_xmax_mask = X [:, feature_idx ] > xmax
12041176 if np .any (above_xmax_mask ):
1205- if use_sparse :
1177+ if self . sparse_output :
12061178 # Note: See comment about SparseEfficiencyWarning above.
12071179 XBS_sparse = XBS_sparse .tolil ()
12081180 XBS_sparse [above_xmax_mask , - degree :] = f_max [- degree :]
@@ -1235,7 +1207,7 @@ def transform(self, X):
12351207 f_min [j ]
12361208 + (X [below_xmin_mask , feature_idx ] - xmin ) * fp_min [j ]
12371209 )
1238- if use_sparse :
1210+ if self . sparse_output :
12391211 # Note: See comment about SparseEfficiencyWarning above.
12401212 XBS_sparse = XBS_sparse .tolil ()
12411213 XBS_sparse [below_xmin_mask , j ] = linear_extr
@@ -1251,7 +1223,7 @@ def transform(self, X):
12511223 f_max [k ]
12521224 + (X [above_xmax_mask , feature_idx ] - xmax ) * fp_max [k ]
12531225 )
1254- if use_sparse :
1226+ if self . sparse_output :
12551227 # Note: See comment about SparseEfficiencyWarning above.
12561228 XBS_sparse = XBS_sparse .tolil ()
12571229 XBS_sparse [above_xmax_mask , k : k + 1 ] = linear_extr [
@@ -1262,38 +1234,12 @@ def transform(self, X):
12621234 linear_extr
12631235 )
12641236
1265- if use_sparse :
1237+ if self . sparse_output :
12661238 XBS_sparse = XBS_sparse .tocsr ()
12671239 output_list .append (XBS_sparse )
12681240
1269- if use_sparse :
1270- # TODO: Remove this conditional error when the minimum supported version of
1271- # SciPy is 1.9.2
1272- # `scipy.sparse.hstack` breaks in scipy<1.9.2
1273- # when `n_features_out_ > max_int32`
1274- max_int32 = np .iinfo (np .int32 ).max
1275- all_int32 = True
1276- for mat in output_list :
1277- all_int32 &= mat .indices .dtype == np .int32
1278- if (
1279- sp_version < parse_version ("1.9.2" )
1280- and self .n_features_out_ > max_int32
1281- and all_int32
1282- ):
1283- raise ValueError (
1284- "In scipy versions `<1.9.2`, the function `scipy.sparse.hstack`"
1285- " produces negative columns when:\n 1. The output shape contains"
1286- " `n_cols` too large to be represented by a 32bit signed"
1287- " integer.\n . All sub-matrices to be stacked have indices of"
1288- " dtype `np.int32`.\n To avoid this error, either use a version"
1289- " of scipy `>=1.9.2` or alter the `SplineTransformer`"
1290- " transformer to produce fewer than 2^31 output features"
1291- )
1241+ if self .sparse_output :
12921242 XBS = sparse .hstack (output_list , format = "csr" )
1293- elif self .sparse_output :
1294- # TODO: Remove conversion to csr, once scipy 1.10 is the minimum version:
1295- # Adjust format of XBS to sparse, for scipy versions < 1.10.0:
1296- XBS = sparse .csr_matrix (XBS )
12971243
12981244 if self .include_bias :
12991245 return XBS
0 commit comments