EconML MetaLearner Estimators and Categorical Variables - Error: None of Index are in the Columns #1318
Unanswered
kirant1729
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
I have been trying to use EconML MetaLearner Estimators with DoWhy, but I get the following error message when estimating the effect. This only happens when I have categorical variables as confounders. With continous variables, it all works as expected.
""None of [Index(['Factor2', 'Factor1'], dtype='object')] are in the [columns]""
This is based on simulated data
Data
np.random.seed(42)
num_rows = 1000
data = {
'Treatment': np.random.choice([0, 1], size=num_rows),
'Factor1': np.random.choice(['A', 'B', 'C'], size=num_rows),
'Factor2': np.random.choice(['D', 'E', 'F', 'G'], size=num_rows),
#'Outcomes': np.random.choice([1, 2, 3, 4, 5], size=num_rows)
'Outcomes': np.random.choice([0,1], size=num_rows)
}`
DAG
Estimate
# Get estimate (S-Learner) estimate = model.estimate_effect( identified_estimand=estimand, method_name='backdoor.econml.metalearners.SLearner', target_units='ate', method_params={ 'init_params': { 'overall_model': LGBMRegressor(n_estimators=500, max_depth=10) }, 'fit_params': {} })
Full Error Message
`---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[10], line 2
1 # Get estimate (S-Learner)
----> 2 estimate = model.estimate_effect(
3 identified_estimand=estimand,
4 method_name='backdoor.econml.metalearners.SLearner',
5 target_units='ate',
6 method_params={
7 'init_params': {
8 'overall_model': LGBMRegressor(n_estimators=500, max_depth=10)
9 },
10 'fit_params': {}
11 })
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\dowhy\causal_model.py:361, in CausalModel.estimate_effect(self, identified_estimand, method_name, control_value, treatment_value, test_significance, evaluate_effect_strength, confidence_intervals, target_units, effect_modifiers, fit_estimator, method_params)
350 causal_estimator = causal_estimator_class(
351 identified_estimand,
352 test_significance=test_significance,
(...)
356 **extra_args,
357 )
359 self._estimator_cache[method_name] = causal_estimator
--> 361 return estimate_effect(
362 self._data,
363 self._treatment,
364 self._outcome,
365 identifier_name,
366 causal_estimator,
367 control_value,
368 treatment_value,
369 target_units,
370 effect_modifiers,
371 fit_estimator,
372 method_params,
373 )
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\dowhy\causal_estimator.py:758, in estimate_effect(data, treatment, outcome, identifier_name, estimator, control_value, treatment_value, target_units, effect_modifiers, fit_estimator, method_params)
751 if fit_estimator:
752 estimator.fit(
753 data=data,
754 effect_modifier_names=effect_modifiers,
755 **method_params["fit_params"] if "fit_params" in method_params else {},
756 )
--> 758 estimate = estimator.estimate_effect(
759 data,
760 treatment_value=treatment_value,
761 control_value=control_value,
762 target_units=target_units,
763 confidence_intervals=estimator._confidence_intervals,
764 )
766 if estimator._significance_test:
767 estimator.test_significance(data, estimate.value, method=estimator._significance_test)
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\dowhy\causal_estimators\econml.py:244, in Econml.estimate_effect(self, data, treatment_value, control_value, target_units, **_)
241 # Changing shape to a list for a singleton value
242 # Note that self._control_value is assumed to be a singleton value
243 self._treatment_value = parse_state(self._treatment_value)
--> 244 est = self.effect(X_test)
245 ate = np.mean(est, axis=0) # one value per treatment value
247 if len(ate) == 1:
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\dowhy\causal_estimators\econml.py:327, in Econml.effect(self, df, *args, **kwargs)
324 def effect_fun(filtered_df, T0, T1, *args, **kwargs):
325 return self.estimator.effect(filtered_df, T0=T0, T1=T1, *args, **kwargs)
--> 327 Xdf = df[self._effect_modifier_names] if df is not None else df
328 return self.apply_multitreatment(Xdf, effect_fun, *args, **kwargs)
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\frame.py:4108, in DataFrame.getitem(self, key)
4106 if is_iterator(key):
4107 key = list(key)
-> 4108 indexer = self.columns._get_indexer_strict(key, "columns")[1]
4110 # take() does not accept boolean indexers
4111 if getattr(indexer, "dtype", None) == bool:
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:6200, in Index._get_indexer_strict(self, key, axis_name)
6197 else:
6198 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 6200 self._raise_if_missing(keyarr, indexer, axis_name)
6202 keyarr = self.take(indexer)
6203 if isinstance(key, Index):
6204 # GH 42790 - Preserve name from an Index
File \AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:6249, in Index._raise_if_missing(self, key, indexer, axis_name)
6247 if nmissing:
6248 if nmissing == len(indexer):
-> 6249 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6251 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
6252 raise KeyError(f"{not_found} not in index")
KeyError: "None of [Index(['Factor2', 'Factor1'], dtype='object')] are in the [columns]"`
Beta Was this translation helpful? Give feedback.
All reactions