-
Notifications
You must be signed in to change notification settings - Fork 992
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
DoWhy does not appear to allow for the use of generalized_linear_model_estimator.GeneralizedLinearModelEstimator with mediation analysis due to "missing backdoor" paths, even if they exist in the graph.
Steps to reproduce the behavior
data = dowhy.datasets.linear_dataset(10, num_common_causes=1, num_samples=10000,
num_instruments=0, num_effect_modifiers=0,
num_treatments=1,
num_frontdoor_variables=1,
treatment_is_binary=False,
outcome_is_binary=True)
df = data['df']
print(df.head())
# Natural indirect effect (nie)
identified_estimand_nie = model.identify_effect(estimand_type="nonparametric-nie",
proceed_when_unidentifiable=True)
print(identified_estimand_nie)
import dowhy.causal_estimators.linear_regression_estimator
causal_estimate_nie = model.estimate_effect(identified_estimand_nie,
method_name="mediation.two_stage_regression",
confidence_intervals=False,
test_significance=False,
method_params = {
'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
'second_stage_model': dowhy.causal_estimators.generalized_linear_model_estimator.GeneralizedLinearModelEstimator(identified_estimand = identified_estimand_nie, glm_family=Binomial()),
}
)
print(causal_estimate_nie)
Error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[29], line 36
33 print(identified_estimand_nie)
35 import dowhy.causal_estimators.linear_regression_estimator
---> 36 causal_estimate_nie = model.estimate_effect(identified_estimand_nie,
37 method_name="mediation.two_stage_regression",
38 confidence_intervals=False,
39 test_significance=False,
40 method_params = {
41 'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
42 'second_stage_model': dowhy.causal_estimators.generalized_linear_model_estimator.GeneralizedLinearModelEstimator(identified_estimand = identified_estimand_nie, glm_family=Binomial()),
43 }
44 )
45 print(causal_estimate_nie)
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_model.py:359, in CausalModel.estimate_effect(self, identified_estimand, method_name, control_value, treatment_value, test_significance, evaluate_effect_strength, confidence_intervals, target_units, effect_modifiers, fit_estimator, method_params)
348 causal_estimator = causal_estimator_class(
349 identified_estimand,
350 test_significance=test_significance,
(...)
354 **extra_args,
355 )
357 self._estimator_cache[method_name] = causal_estimator
--> 359 return estimate_effect(
360 self._data,
361 self._treatment,
362 self._outcome,
363 identifier_name,
364 causal_estimator,
365 control_value,
366 treatment_value,
367 target_units,
368 effect_modifiers,
369 fit_estimator,
370 method_params,
371 )
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimator.py:752, in estimate_effect(data, treatment, outcome, identifier_name, estimator, control_value, treatment_value, target_units, effect_modifiers, fit_estimator, method_params)
747 return CausalEstimate(
748 None, None, None, None, None, None, control_value=control_value, treatment_value=treatment_value
749 )
751 if fit_estimator:
--> 752 estimator.fit(
753 data=data,
754 effect_modifier_names=effect_modifiers,
755 **method_params["fit_params"] if "fit_params" in method_params else {},
756 )
758 estimate = estimator.estimate_effect(
759 data,
760 treatment_value=treatment_value,
(...)
763 confidence_intervals=estimator._confidence_intervals,
764 )
766 if estimator._significance_test:
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/two_stage_regression_estimator.py:234, in TwoStageRegressionEstimator.fit(self, data, effect_modifier_names, **_)
231 elif self._target_estimand.identifier_method == "mediation":
232 self._second_stage_model._target_estimand.treatment_variable = parse_state(self._mediators_names)
--> 234 self._second_stage_model.fit(
235 data,
236 effect_modifier_names=effect_modifier_names,
237 )
239 if self._target_estimand.estimand_type == EstimandType.NONPARAMETRIC_NDE:
240 self._second_stage_model_nde._target_estimand.identifier_method = "backdoor"
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/generalized_linear_model_estimator.py:105, in GeneralizedLinearModelEstimator.fit(self, data, effect_modifier_names)
91 def fit(
92 self,
93 data: pd.DataFrame,
94 effect_modifier_names: Optional[List[str]] = None,
95 ):
96 """
97 Fits the estimator with data for effect estimation
98 :param data: data frame containing the data
(...)
103 methods support this currently.
104 """
--> 105 return super().fit(
106 data,
107 effect_modifier_names=effect_modifier_names,
108 )
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/regression_estimator.py:90, in RegressionEstimator.fit(self, data, effect_modifier_names)
87 self.reset_encoders() # Forget any existing encoders
88 self._set_effect_modifiers(data, effect_modifier_names)
---> 90 self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
91 self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
92 if len(self._observed_common_causes_names) > 0:
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_identifier/identified_estimand.py:98, in IdentifiedEstimand.get_adjustment_set(self, key)
96 if self.identifier_method == "general_adjustment":
97 return self.get_general_adjustment_variables(key)
---> 98 return self.get_backdoor_variables(key)
File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_identifier/identified_estimand.py:62, in IdentifiedEstimand.get_backdoor_variables(self, key)
60 return self.backdoor_variables[self.identifier_method]
61 elif self.backdoor_variables is not None and len(self.backdoor_variables) > 0:
---> 62 return self.backdoor_variables[self.default_backdoor_id]
63 else:
64 return []
KeyError: None
Expected behavior
Logisitc regression should be allowable with a binary outcome.
Version information:
- 0.13
Additional context
Add any other context about the problem here.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working