Issue with Logistic Regression in Mediation Analysis

**Describe the bug**
DoWhy does not appear to allow for the use of                generalized_linear_model_estimator.GeneralizedLinearModelEstimator with mediation analysis due to "missing backdoor" paths, even if they exist in the graph. 


**Steps to reproduce the behavior**

```
data = dowhy.datasets.linear_dataset(10, num_common_causes=1, num_samples=10000,
                                     num_instruments=0, num_effect_modifiers=0,
                                     num_treatments=1,
                                     num_frontdoor_variables=1,
                                     treatment_is_binary=False,
                                    outcome_is_binary=True)
df = data['df']
print(df.head())

# Natural indirect effect (nie)
identified_estimand_nie = model.identify_effect(estimand_type="nonparametric-nie",
                                            proceed_when_unidentifiable=True)
print(identified_estimand_nie)

import dowhy.causal_estimators.linear_regression_estimator
causal_estimate_nie = model.estimate_effect(identified_estimand_nie,
                                        method_name="mediation.two_stage_regression",
                                       confidence_intervals=False,
                                       test_significance=False,
                                        method_params = {
                                            'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
                                            'second_stage_model': dowhy.causal_estimators.generalized_linear_model_estimator.GeneralizedLinearModelEstimator(identified_estimand = identified_estimand_nie, glm_family=Binomial()),
                                        }
                                       )
print(causal_estimate_nie)

```
**Error:**
```
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[29], line 36
     33 print(identified_estimand_nie)
     35 import dowhy.causal_estimators.linear_regression_estimator
---> 36 causal_estimate_nie = model.estimate_effect(identified_estimand_nie,
     37                                         method_name="mediation.two_stage_regression",
     38                                        confidence_intervals=False,
     39                                        test_significance=False,
     40                                         method_params = {
     41                                             'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
     42                                             'second_stage_model': dowhy.causal_estimators.generalized_linear_model_estimator.GeneralizedLinearModelEstimator(identified_estimand = identified_estimand_nie, glm_family=Binomial()),
     43                                         }
     44                                        )
     45 print(causal_estimate_nie)

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_model.py:359, in CausalModel.estimate_effect(self, identified_estimand, method_name, control_value, treatment_value, test_significance, evaluate_effect_strength, confidence_intervals, target_units, effect_modifiers, fit_estimator, method_params)
    348         causal_estimator = causal_estimator_class(
    349             identified_estimand,
    350             test_significance=test_significance,
   (...)
    354             **extra_args,
    355         )
    357         self._estimator_cache[method_name] = causal_estimator
--> 359 return estimate_effect(
    360     self._data,
    361     self._treatment,
    362     self._outcome,
    363     identifier_name,
    364     causal_estimator,
    365     control_value,
    366     treatment_value,
    367     target_units,
    368     effect_modifiers,
    369     fit_estimator,
    370     method_params,
    371 )

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimator.py:752, in estimate_effect(data, treatment, outcome, identifier_name, estimator, control_value, treatment_value, target_units, effect_modifiers, fit_estimator, method_params)
    747     return CausalEstimate(
    748         None, None, None, None, None, None, control_value=control_value, treatment_value=treatment_value
    749     )
    751 if fit_estimator:
--> 752     estimator.fit(
    753         data=data,
    754         effect_modifier_names=effect_modifiers,
    755         **method_params["fit_params"] if "fit_params" in method_params else {},
    756     )
    758 estimate = estimator.estimate_effect(
    759     data,
    760     treatment_value=treatment_value,
   (...)
    763     confidence_intervals=estimator._confidence_intervals,
    764 )
    766 if estimator._significance_test:

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/two_stage_regression_estimator.py:234, in TwoStageRegressionEstimator.fit(self, data, effect_modifier_names, **_)
    231 elif self._target_estimand.identifier_method == "mediation":
    232     self._second_stage_model._target_estimand.treatment_variable = parse_state(self._mediators_names)
--> 234 self._second_stage_model.fit(
    235     data,
    236     effect_modifier_names=effect_modifier_names,
    237 )
    239 if self._target_estimand.estimand_type == EstimandType.NONPARAMETRIC_NDE:
    240     self._second_stage_model_nde._target_estimand.identifier_method = "backdoor"

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/generalized_linear_model_estimator.py:105, in GeneralizedLinearModelEstimator.fit(self, data, effect_modifier_names)
     91 def fit(
     92     self,
     93     data: pd.DataFrame,
     94     effect_modifier_names: Optional[List[str]] = None,
     95 ):
     96     """
     97     Fits the estimator with data for effect estimation
     98     :param data: data frame containing the data
   (...)
    103                 methods support this currently.
    104     """
--> 105     return super().fit(
    106         data,
    107         effect_modifier_names=effect_modifier_names,
    108     )

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_estimators/regression_estimator.py:90, in RegressionEstimator.fit(self, data, effect_modifier_names)
     87 self.reset_encoders()  # Forget any existing encoders
     88 self._set_effect_modifiers(data, effect_modifier_names)
---> 90 self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
     91 self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
     92 if len(self._observed_common_causes_names) > 0:

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_identifier/identified_estimand.py:98, in IdentifiedEstimand.get_adjustment_set(self, key)
     96 if self.identifier_method == "general_adjustment":
     97     return self.get_general_adjustment_variables(key)
---> 98 return self.get_backdoor_variables(key)

File ~/.conda/envs/lowpyAD/lib/python3.10/site-packages/dowhy/causal_identifier/identified_estimand.py:62, in IdentifiedEstimand.get_backdoor_variables(self, key)
     60     return self.backdoor_variables[self.identifier_method]
     61 elif self.backdoor_variables is not None and len(self.backdoor_variables) > 0:
---> 62     return self.backdoor_variables[self.default_backdoor_id]
     63 else:
     64     return []

KeyError: None
```

**Expected behavior**
Logisitc regression should be allowable with a binary outcome. 

**Version information:**
 - 0.13

**Additional context**
Add any other context about the problem here.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Issue with Logistic Regression in Mediation Analysis #1335

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue with Logistic Regression in Mediation Analysis #1335

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions