Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions causalml/inference/meta/tmle.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
check_p_conditions,
convert_pd_to_np,
)
from causalml.propensity import calibrate


logger = logging.getLogger("causalml")
Expand Down Expand Up @@ -105,7 +104,6 @@ def __init__(
ate_alpha=0.05,
control_name=0,
cv=None,
calibrate_propensity=True,
):
"""Initialize a TMLE learner.

Expand All @@ -119,7 +117,6 @@ def __init__(
self.ate_alpha = ate_alpha
self.control_name = control_name
self.cv = cv
self.calibrate_propensity = calibrate_propensity

def __repr__(self):
return "{}(model={}, cv={})".format(
Expand Down Expand Up @@ -165,10 +162,6 @@ def estimate_ate(self, X, treatment, y, p, segment=None, return_ci=False):
w_group = (treatment == group).astype(int)
p_group = p[group]

if self.calibrate_propensity:
logger.info("Calibrating propensity scores.")
p_group = calibrate(p_group, w_group)

yhat_c = np.zeros_like(y, dtype=float)
yhat_t = np.zeros_like(y, dtype=float)
if self.cv:
Expand Down
14 changes: 2 additions & 12 deletions causalml/metrics/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ def get_tmlegain(
p_col="p",
n_segment=5,
cv=None,
calibrate_propensity=True,
ci=False,
):
"""Get TMLE based average uplifts of model estimates of segments.
Expand All @@ -356,7 +355,6 @@ def get_tmlegain(
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
Returns:
(pandas.DataFrame): cumulative gains of model estimates based of TMLE
Expand All @@ -374,7 +372,7 @@ def get_tmlegain(
inference_col = [x for x in inference_col if x in df.columns]

# Initialize TMLE
tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
tmle = TMLELearner(learner, cv=cv)
ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(
X=df[inference_col], p=df[p_col], treatment=df[treatment_col], y=df[outcome_col]
)
Expand Down Expand Up @@ -454,7 +452,6 @@ def get_tmleqini(
p_col="p",
n_segment=5,
cv=None,
calibrate_propensity=True,
ci=False,
normalize=False,
):
Expand All @@ -469,7 +466,6 @@ def get_tmleqini(
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
Returns:
(pandas.DataFrame): cumulative gains of model estimates based of TMLE
Expand All @@ -487,7 +483,7 @@ def get_tmleqini(
inference_col = [x for x in inference_col if x in df.columns]

# Initialize TMLE
tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
tmle = TMLELearner(learner, cv=cv)
ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(
X=df[inference_col], p=df[p_col], treatment=df[treatment_col], y=df[outcome_col]
)
Expand Down Expand Up @@ -696,7 +692,6 @@ def plot_tmlegain(
p_col="tau",
n_segment=5,
cv=None,
calibrate_propensity=True,
ci=False,
figsize=(8, 8),
):
Expand All @@ -711,7 +706,6 @@ def plot_tmlegain(
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
"""

Expand All @@ -728,7 +722,6 @@ def plot_tmlegain(
p_col=p_col,
n_segment=n_segment,
cv=cv,
calibrate_propensity=calibrate_propensity,
)


Expand All @@ -741,7 +734,6 @@ def plot_tmleqini(
p_col="tau",
n_segment=5,
cv=None,
calibrate_propensity=True,
ci=False,
figsize=(8, 8),
):
Expand All @@ -756,7 +748,6 @@ def plot_tmleqini(
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
"""

Expand All @@ -773,7 +764,6 @@ def plot_tmleqini(
p_col=p_col,
n_segment=n_segment,
cv=cv,
calibrate_propensity=calibrate_propensity,
)


Expand Down
104 changes: 46 additions & 58 deletions causalml/propensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@


class PropensityModel(metaclass=ABCMeta):
def __init__(self, clip_bounds=(1e-3, 1 - 1e-3), **model_kwargs):
def __init__(self, clip_bounds=(1e-3, 1 - 1e-3), calibrate=True, **model_kwargs):
"""
Args:
clip_bounds (tuple): lower and upper bounds for clipping propensity scores. Bounds should be implemented
such that: 0 < lower < upper < 1, to avoid division by zero in BaseRLearner.fit_predict() step.
calibrate (bool): whether calibrate the propensity score
model_kwargs: Keyword arguments to be passed to the underlying classification model.
"""
self.clip_bounds = clip_bounds
self.calibrate = calibrate
self.model_kwargs = model_kwargs
self.model = self._model
self.calibrator = None

@property
@abstractmethod
Expand All @@ -40,6 +43,15 @@ def fit(self, X, y):
y (numpy.ndarray): a binary target vector
"""
self.model.fit(X, y)
if self.calibrate:
# Fit a calibrator to the propensity scores with IsotonicRegression.
# Ref: https://scikit-learn.org/stable/modules/isotonic.html
self.calibrator = IsotonicRegression(
out_of_bounds="clip",
y_min=self.clip_bounds[0],
y_max=self.clip_bounds[1],
)
self.calibrator.fit(self.model.predict_proba(X)[:, 1], y)

def predict(self, X):
"""
Expand All @@ -51,7 +63,11 @@ def predict(self, X):
Returns:
(numpy.ndarray): Propensity scores between 0 and 1.
"""
return np.clip(self.model.predict_proba(X)[:, 1], *self.clip_bounds)
p = self.model.predict_proba(X)[:, 1]
if self.calibrate:
p = self.calibrator.transform(p)

return np.clip(p, *self.clip_bounds)

def fit_predict(self, X, y):
"""
Expand All @@ -66,7 +82,6 @@ def fit_predict(self, X, y):
"""
self.fit(X, y)
propensity_scores = self.predict(X)
logger.info("AUC score: {:.6f}".format(auc(y, propensity_scores)))
Comment thread
jeongyoonlee marked this conversation as resolved.
return propensity_scores


Expand Down Expand Up @@ -112,12 +127,15 @@ class GradientBoostedPropensityModel(PropensityModel):
https://xgboost.readthedocs.io/en/latest/python/python_api.html
"""

def __init__(self, early_stop=False, clip_bounds=(1e-3, 1 - 1e-3), **model_kwargs):
def __init__(
self,
early_stop=False,
clip_bounds=(1e-3, 1 - 1e-3),
calibrate=True,
**model_kwargs,
):
self.early_stop = early_stop

super(GradientBoostedPropensityModel, self).__init__(
clip_bounds, **model_kwargs
)
super().__init__(clip_bounds, calibrate, **model_kwargs)

@property
def _model(self):
Expand Down Expand Up @@ -156,50 +174,25 @@ def fit(self, X, y, stop_val_size=0.2):
y_train,
eval_set=[(X_val, y_val)],
)
if self.calibrate:
Comment thread
jeongyoonlee marked this conversation as resolved.
self.calibrator = IsotonicRegression(
out_of_bounds="clip",
y_min=self.clip_bounds[0],
y_max=self.clip_bounds[1],
)
self.calibrator.fit(self.model.predict_proba(X)[:, 1], y)
else:
super(GradientBoostedPropensityModel, self).fit(X, y)

def predict(self, X):
"""
Predict propensity scores.

Args:
X (numpy.ndarray): a feature matrix

Returns:
(numpy.ndarray): Propensity scores between 0 and 1.
"""
if self.early_stop:
return np.clip(
self.model.predict_proba(X)[:, 1],
*self.clip_bounds,
)
else:
return super(GradientBoostedPropensityModel, self).predict(X)


def calibrate(ps, treatment):
"""Calibrate propensity scores with IsotonicRegression.

Ref: https://scikit-learn.org/stable/modules/isotonic.html

Args:
ps (numpy.array): a propensity score vector
treatment (numpy.array): a binary treatment vector (0: control, 1: treated)

Returns:
(numpy.array): a calibrated propensity score vector
"""

two_eps = 2.0 * np.finfo(float).eps
pm_ir = IsotonicRegression(out_of_bounds="clip", y_min=two_eps, y_max=1.0 - two_eps)
ps_ir = pm_ir.fit_transform(ps, treatment)

return ps_ir
super().fit(X, y)


def compute_propensity_score(
X, treatment, p_model=None, X_pred=None, treatment_pred=None, calibrate_p=True
X,
treatment,
p_model=None,
X_pred=None,
treatment_pred=None,
calibrate_p=True,
clip_bounds=(1e-3, 1 - 1e-3),
):
"""Generate propensity score if user didn't provide and optionally calibrate.

Expand All @@ -210,16 +203,18 @@ def compute_propensity_score(
X_pred (np.matrix, optional): features for prediction
treatment_pred (np.array or pd.Series, optional): a treatment vector for prediciton
calibrate_p (bool, optional): whether calibrate the propensity score
clip_bounds (tuple, optional): lower and upper bounds for clipping propensity scores. Bounds should be implemented
such that: 0 < lower < upper < 1, to avoid division by zero in BaseRLearner.fit_predict() step.

Returns:
(tuple)
- p (numpy.ndarray): propensity score
- p_model (PropensityModel): either the original p_model, a trained ElasticNetPropensityModel, or None if calibrate_p=True
- p_model (PropensityModel): either the original p_model or a trained ElasticNetPropensityModel
"""
if treatment_pred is None:
treatment_pred = treatment.copy()
if p_model is None:
p_model = ElasticNetPropensityModel()
p_model = ElasticNetPropensityModel(calibrate=calibrate_p)

p_model.fit(X, treatment)

Expand All @@ -231,14 +226,7 @@ def compute_propensity_score(
logger.info("predict_proba not available, using predict instead")
p = p_model.predict(X_pred)

if calibrate_p:
logger.info("Calibrating propensity scores. Returning p_model=None.")
p = calibrate(p, treatment_pred)
p_model = None

# force the p values within the range
eps = np.finfo(float).eps
p = np.where(p < 0 + eps, 0 + eps * 1.001, p)
p = np.where(p > 1 - eps, 1 - eps * 1.001, p)
p = np.clip(p, clip_bounds[0], clip_bounds[1])

return p, p_model
Loading
Loading