diff --git a/gen_imgs.py b/gen_imgs.py index 4e9d3ca0..6fc8694d 100644 --- a/gen_imgs.py +++ b/gen_imgs.py @@ -323,7 +323,6 @@ def chicago_tensor(): def expectiles(): - """Generate expectiles visualization.""" X, y = mcycle(return_X_y=True) diff --git a/pygam/pygam.py b/pygam/pygam.py index 6f86ae43..86603ff3 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -406,9 +406,9 @@ def _linear_predictor(self, X=None, modelmat=None, b=None, term=-1): contains the spline coefficients if None, will use current model coefficients - term : int, optional - feature for which to compute the linear prediction - if -1, will compute for all features + term : int or list of int, default: -1 + term(s) to use in calculation of linear predictor + if -1, all terms are used Returns ------- @@ -485,12 +485,22 @@ def _modelmat(self, X, term=-1): modelmat : sparse matrix of len n_samples containing model matrix of the spline basis for selected features """ + # take features, dtypes and edge_knots based on supplied term values + if term != -1: + terms = list(np.atleast_1d(term)) + else: + terms = range(len(self.feature)) + + features = [self.feature[i] for i in terms] + edge_knots = [self.edge_knots_[i] for i in terms] + dtypes = [self.dtype[i] for i in terms] + X = check_X( X, n_feats=self.statistics_["m_features"], - edge_knots=self.edge_knots_, - dtypes=self.dtype, - features=self.feature, + edge_knots=edge_knots, + dtypes=dtypes, + features=features, verbose=self.verbose, ) @@ -1432,6 +1442,9 @@ def _flatten_mesh(self, Xs, term): X = np.zeros((n, self.statistics_["m_features"])) for term_, x in zip(terms, Xs): X[:, term_.feature] = x.ravel() + + if getattr(self.terms[term], "by", None) is not None: + X[:, self.terms[term].by] = 1.0 return X def generate_X_grid(self, term, n=100, meshgrid=False): @@ -1602,14 +1615,6 @@ def partial_dependence( shape = X[0].shape X = self._flatten_mesh(X, term=term) - X = check_X( - X, - n_feats=self.statistics_["m_features"], - edge_knots=self.edge_knots_, - dtypes=self.dtype, - features=self.feature, - verbose=self.verbose, - ) modelmat = self._modelmat(X, term=term) pdep = self._linear_predictor(modelmat=modelmat, term=term) diff --git a/pygam/terms.py b/pygam/terms.py index c5b0482a..3992a1b1 100644 --- a/pygam/terms.py +++ b/pygam/terms.py @@ -938,8 +938,6 @@ def build_columns(self, X, verbose=False): ------- scipy sparse array with n rows """ - X[:, self.feature][:, np.newaxis] - splines = b_spline_basis( X[:, self.feature], edge_knots=self.edge_knots_, diff --git a/pygam/tests/test_partial_dependence.py b/pygam/tests/test_partial_dependence.py index 0e08f3e7..f7d810eb 100644 --- a/pygam/tests/test_partial_dependence.py +++ b/pygam/tests/test_partial_dependence.py @@ -1,6 +1,7 @@ +import numpy as np import pytest -from pygam import LinearGAM +from pygam import LinearGAM, f, s class TestPartialDepencence: @@ -153,3 +154,24 @@ def test_no_X_needed_for_partial_dependence(self, mcycle_gam): mcycle_gam.partial_dependence(term=0) == mcycle_gam.partial_dependence(term=0, X=XX) ).all() + + def test_regression_value_error_in_factor_terms(self): + """ + test https://github.com/dswah/pyGAM/issues/301 + """ + X = np.random.standard_normal((100, 3)) + + # shift the features away from 0 and 1 + X[:, 2] = np.random.choice([2, 3], 100, replace=True) + + Y = np.random.standard_normal(100) + gam = LinearGAM(s(0) + s(1) + f(2)).fit(X, Y) + + # should be able to evaluate with fixed default values for the factor + gam.partial_dependence(0) + + # now do the same, but with a by feature + gam = LinearGAM(s(0, by=2) + s(1) + f(2)).fit(X, Y) + + # should be able to evaluate with fixed default values for the factor + gam.partial_dependence(0)