Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion gen_imgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ def chicago_tensor():


def expectiles():

"""Generate expectiles visualization."""
X, y = mcycle(return_X_y=True)

Expand Down
33 changes: 19 additions & 14 deletions pygam/pygam.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,9 @@ def _linear_predictor(self, X=None, modelmat=None, b=None, term=-1):
contains the spline coefficients
if None, will use current model coefficients

term : int, optional
feature for which to compute the linear prediction
if -1, will compute for all features
term : int or list of int, default: -1
term(s) to use in calculation of linear predictor
if -1, all terms are used

Returns
-------
Expand Down Expand Up @@ -485,12 +485,22 @@ def _modelmat(self, X, term=-1):
modelmat : sparse matrix of len n_samples
containing model matrix of the spline basis for selected features
"""
# take features, dtypes and edge_knots based on supplied term values
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK i like this.

the point is that we only check the features that we need

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, conveniently, check_X already allowed for that. Changing arguments was enough to fix the original issue.

if term != -1:
terms = list(np.atleast_1d(term))
else:
terms = range(len(self.feature))

features = [self.feature[i] for i in terms]
edge_knots = [self.edge_knots_[i] for i in terms]
dtypes = [self.dtype[i] for i in terms]

X = check_X(
X,
n_feats=self.statistics_["m_features"],
edge_knots=self.edge_knots_,
dtypes=self.dtype,
features=self.feature,
edge_knots=edge_knots,
dtypes=dtypes,
features=features,
verbose=self.verbose,
)

Expand Down Expand Up @@ -1432,6 +1442,9 @@ def _flatten_mesh(self, Xs, term):
X = np.zeros((n, self.statistics_["m_features"]))
for term_, x in zip(terms, Xs):
X[:, term_.feature] = x.ravel()

if getattr(self.terms[term], "by", None) is not None:
X[:, self.terms[term].by] = 1.0
return X

def generate_X_grid(self, term, n=100, meshgrid=False):
Expand Down Expand Up @@ -1602,14 +1615,6 @@ def partial_dependence(
shape = X[0].shape

X = self._flatten_mesh(X, term=term)
X = check_X(
X,
n_feats=self.statistics_["m_features"],
edge_knots=self.edge_knots_,
dtypes=self.dtype,
features=self.feature,
verbose=self.verbose,
)

modelmat = self._modelmat(X, term=term)
pdep = self._linear_predictor(modelmat=modelmat, term=term)
Expand Down
2 changes: 0 additions & 2 deletions pygam/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,8 +938,6 @@ def build_columns(self, X, verbose=False):
-------
scipy sparse array with n rows
"""
X[:, self.feature][:, np.newaxis]

splines = b_spline_basis(
X[:, self.feature],
edge_knots=self.edge_knots_,
Expand Down
24 changes: 23 additions & 1 deletion pygam/tests/test_partial_dependence.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pygam import LinearGAM
from pygam import LinearGAM, f, s


class TestPartialDepencence:
Expand Down Expand Up @@ -153,3 +154,24 @@ def test_no_X_needed_for_partial_dependence(self, mcycle_gam):
mcycle_gam.partial_dependence(term=0)
== mcycle_gam.partial_dependence(term=0, X=XX)
).all()

def test_regression_value_error_in_factor_terms(self):
"""
test https://github.com/dswah/pyGAM/issues/301
"""
X = np.random.standard_normal((100, 3))

# shift the features away from 0 and 1
X[:, 2] = np.random.choice([2, 3], 100, replace=True)

Y = np.random.standard_normal(100)
gam = LinearGAM(s(0) + s(1) + f(2)).fit(X, Y)

# should be able to evaluate with fixed default values for the factor
gam.partial_dependence(0)

# now do the same, but with a by feature
gam = LinearGAM(s(0, by=2) + s(1) + f(2)).fit(X, Y)

# should be able to evaluate with fixed default values for the factor
gam.partial_dependence(0)