Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions econml/_ortho_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,12 +1148,13 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None, s
# If using an _rlearner, the scoring parameter can be passed along, if provided
if scoring is not None:
# Cannot import in header, or circular imports
from .dml._rlearner import _ModelFinal
if isinstance(self._ortho_learner_model_final, _ModelFinal):
from .dml._rlearner import _ModelFinal as _DMLModelFinal
from .dr._drlearner import _ModelFinal as _DRModelFinal
if isinstance(self._ortho_learner_model_final, (_DMLModelFinal, _DRModelFinal)):
score_kwargs['scoring'] = scoring
else:
raise NotImplementedError("scoring parameter only implemented for "
"_rlearner._ModelFinal")
"_rlearner._ModelFinal or _drlearner._ModelFinal")
return self._ortho_learner_model_final.score(Y, T, nuisances=accumulated_nuisances,
**filter_none_kwargs(**score_kwargs))

Expand Down
114 changes: 108 additions & 6 deletions econml/dr/_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@

import numpy as np
from sklearn.base import clone

from sklearn.metrics import (
get_scorer,
get_scorer_names
)
from typing import Callable, Union

from .._ortho_learner import _OrthoLearner
from .._cate_estimator import (DebiasedLassoCateEstimatorDiscreteMixin, ForestModelFinalCateEstimatorDiscreteMixin,
Expand Down Expand Up @@ -185,7 +189,26 @@ def predict(self, X=None):
preds = np.array([mdl.predict(X).reshape((-1,) + self.d_y) for mdl in self.models_cate])
return np.moveaxis(preds, 0, -1) # move treatment dim to end

def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, groups=None):
def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, groups=None,
scoring='mean_squared_error'):
"""
Score final model fit of debiased outcomes from debiased treatments and nuisances.

The default scoring method "mean_squared_error" is the score used to fit debiased
outcomes from debiased treatments and nuisances, and reproduces the behavior of this
score function from before the scoring method option.

:param Y: Unused
:param T: Unused
:param X: Combined nuisances, treatments and instruments to call _model_final.predict
:param W: Unused
:param Z: Unused
:param nuisances: tuple of the outcome (Y) debiased and treatment (T) debiased
:param sample_weight: Optional weighting on the samples
:param groups: Unused
:param scoring: Optional alternative scoring metric from sklearn.get_scorer
:return: Float score
"""
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
Y_pred, _ = nuisances
Expand All @@ -195,18 +218,94 @@ def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, groups=N
cate_pred = self.model_cate.predict(X).reshape((-1, self.d_t))
if self.d_y:
cate_pred = cate_pred[:, np.newaxis, :]
return np.mean(np.average((Y_pred_diff - cate_pred)**2, weights=sample_weight, axis=0))
# when cate_pred returns a scalar, we should broadcast it to Y_pred_diff shape to match shapes
if cate_pred.shape[0] != Y_pred_diff.shape[0]:
cate_pred = np.broadcast_to(cate_pred, Y_pred_diff.shape)
# squeeze singleton dimensions
if cate_pred.ndim > 2:
cate_pred = cate_pred.squeeze()
if Y_pred_diff.ndim > 2:
Y_pred_diff = Y_pred_diff.squeeze()
# if the result is still 1D, we should not average over dimensions
if Y_pred_diff.ndim == 1:
return _ModelFinal._wrap_scoring(Y_true=Y_pred_diff, Y_pred=cate_pred,
scoring=scoring, sample_weight=sample_weight)
else:
return np.mean(_ModelFinal.wrap_scoring(Y_true=Y_pred_diff, Y_pred=cate_pred,
scoring=scoring, sample_weight=sample_weight,
score_by_dim=True))

else:
scores = []
for t in np.arange(1, Y_pred.shape[-1]):
# since we only allow single dimensional y, we could flatten the prediction
Y_pred_diff = (Y_pred[..., t] - Y_pred[..., 0]).flatten()
cate_pred = self.models_cate[t - 1].predict(X).flatten()
score = np.average((Y_pred_diff - cate_pred)**2, weights=sample_weight, axis=0)
# when cate_pred returns a scalar, we should broadcast it to Y_pred_diff shape to match shapes
if cate_pred.shape[0] != Y_pred_diff.shape[0]:
cate_pred = np.broadcast_to(cate_pred, Y_pred_diff.shape)
score = _ModelFinal.wrap_scoring(Y_true=Y_pred_diff, Y_pred=cate_pred,
scoring=scoring, sample_weight=sample_weight)
scores.append(score)
return np.mean(scores)

@staticmethod
def _wrap_scoring(scoring:Union[str, Callable], Y_true, Y_pred, sample_weight=None):
"""
Pull the scoring function from sklearn.get_scorer and call it with Y_true, Y_pred.

Standard score names like "mean_squared_error" are present in sklearn scoring as
"neg_..." so score names are accepted either with or without the "neg_" prefix.
The function _score_func is called directly because the scorer objects from get_scorer()
do not accept a sample_weight parameter. The _score_func member has been available in
sklearn scorers since before sklearn 1.0. Note that custom callable score functions
are allowed but they are not validated before use; any errors will be raised.


:param scoring: A string name of a scoring function from sklearn, or any callable that will
function as thes core.
:param Y_true: True Y values
:param Y_pred: Predicted Y values
:param sample_weight: Optional weighting on the examples
:return: Float score
"""
if isinstance(scoring,str) and scoring in get_scorer_names():
score_fn = get_scorer(scoring)._score_func
elif isinstance(scoring,str) and 'neg_' + scoring in get_scorer_names():
score_fn = get_scorer('neg_' + scoring)._score_func
elif callable(scoring):
score_fn = scoring
else:
raise NotImplementedError(f"_wrap_scoring does not support '{scoring}'" )

# Some score like functions are partial to np.array and not np.ndarray with shape (N,1)
Y_true = Y_true.squeeze() if len(Y_true.shape)==2 and Y_true.shape[1]==1 else Y_true
Y_pred = Y_pred.squeeze() if len(Y_pred.shape)==2 and Y_pred.shape[1]==1 else Y_pred
if sample_weight is not None:
res = score_fn(Y_true, Y_pred, sample_weight=sample_weight)
else:
res = score_fn(Y_true, Y_pred)

return res


@staticmethod
def wrap_scoring(scoring, Y_true, Y_pred, sample_weight=None, score_by_dim=False):
"""
In case the caller wants a score for each dimension of a multiple treatment model.

Loop over the call to the single score wrapper.
"""
if not score_by_dim:
return _ModelFinal._wrap_scoring(scoring, Y_true, Y_pred, sample_weight)
else:
assert Y_true.shape == Y_pred.shape, "Mismatch shape in wrap_scoring"
n_out = Y_pred.shape[1]
res = [None]*Y_pred.shape[1]
for yidx in range(n_out):
res[yidx]= _ModelFinal.wrap_scoring(scoring, Y_true[:,yidx], Y_pred[:,yidx], sample_weight)
return res


class DRLearner(_OrthoLearner):
"""
Expand Down Expand Up @@ -581,7 +680,7 @@ def refit_final(self, *, inference='auto'):
return super().refit_final(inference=inference)
refit_final.__doc__ = _OrthoLearner.refit_final.__doc__

def score(self, Y, T, X=None, W=None, sample_weight=None):
def score(self, Y, T, X=None, W=None, sample_weight=None, scoring=None):
"""
Score the fitted CATE model on a new data set.

Expand All @@ -604,14 +703,17 @@ def score(self, Y, T, X=None, W=None, sample_weight=None):
Controls for each sample
sample_weight:(n,) vector, optional
Weights for each samples
scoring: name of an sklearn scoring function to use instead of the default, optional
Supports f1_score, log_loss, mean_absolute_error, mean_squared_error, r2_score,
and roc_auc_score.

Returns
-------
score: float
The MSE of the final CATE model on the new data.
"""
# Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring
return super().score(Y, T, X=X, W=W, sample_weight=sample_weight)
return super().score(Y, T, X=X, W=W, sample_weight=sample_weight, scoring=scoring)

@property
def multitask_model_cate(self):
Expand Down
Loading