py-why
diff --git a/‎docs/source/conf.py
Lines changed: 4 additions & 0 deletions b/‎docs/source/conf.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/dowhy.causal_refuters.rst
Lines changed: 8 additions & 0 deletions b/‎docs/source/dowhy.causal_refuters.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source/dowhy.utils.rst
Lines changed: 8 additions & 0 deletions b/‎docs/source/dowhy.utils.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎dowhy/causal_estimator.py
Lines changed: 43 additions & 33 deletions b/‎dowhy/causal_estimator.py
Lines changed: 43 additions & 33 deletions
diff --git a/‎dowhy/causal_estimators/causalml.py
Lines changed: 21 additions & 5 deletions b/‎dowhy/causal_estimators/causalml.py
Lines changed: 21 additions & 5 deletions
diff --git a/‎dowhy/causal_estimators/distance_matching_estimator.py
Lines changed: 30 additions & 13 deletions b/‎dowhy/causal_estimators/distance_matching_estimator.py
Lines changed: 30 additions & 13 deletions
diff --git a/‎dowhy/causal_estimators/econml.py
Lines changed: 21 additions & 3 deletions b/‎dowhy/causal_estimators/econml.py
Lines changed: 21 additions & 3 deletions
diff --git a/‎dowhy/causal_estimators/generalized_linear_model_estimator.py
Lines changed: 21 additions & 9 deletions b/‎dowhy/causal_estimators/generalized_linear_model_estimator.py
Lines changed: 21 additions & 9 deletions
@@ -197,3 +197,7 @@
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
+
+# init docstrings should also be included in class
+autoclass_content = "both"
+
@@ -36,6 +36,14 @@ dowhy.causal\_refuters.dummy\_outcome\_refuter module
    :undoc-members:
    :show-inheritance:
 
+dowhy.causal\_refuters.graph\_refuter module
+--------------------------------------------
+
+.. automodule:: dowhy.causal_refuters.graph_refuter
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 dowhy.causal\_refuters.placebo\_treatment\_refuter module
 ---------------------------------------------------------
 
 
@@ -12,6 +12,14 @@ dowhy.utils.api module
    :undoc-members:
    :show-inheritance:
 
+dowhy.utils.cit module
+----------------------
+
+.. automodule:: dowhy.utils.cit
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 dowhy.utils.cli\_helpers module
 -------------------------------
 
 
@@ -38,12 +38,21 @@ class CausalEstimator:
 
     DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"]
 
+    # std args to be removed from locals() before being passed to args_dict
+    _STD_INIT_ARGS = ('self', '__class__', 'args', 'kwargs')
+
     def __init__(self, data, identified_estimand, treatment, outcome,
                  control_value=0, treatment_value=1,
                  test_significance=False, evaluate_effect_strength=False,
                  confidence_intervals=False,
                  target_units=None, effect_modifiers=None,
-                 params=None):
+                 num_null_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST,
+                 num_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_CI,
+                 sample_size_fraction=DEFAULT_SAMPLE_SIZE_FRACTION,
+                 confidence_level=DEFAULT_CONFIDENCE_LEVEL,
+                 need_conditional_estimates='auto',
+                 num_quantiles_to_discretize_cont_cols=NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS,
+                 **kwargs):
         """Initializes an estimator with data and names of relevant variables.
 
         This method is called from the constructors of its child classes.
@@ -55,19 +64,29 @@ def __init__(self, data, identified_estimand, treatment, outcome,
         :param outcome: name of the outcome variable
         :param control_value: Value of the treatment in the control group, for effect estimation.  If treatment is multi-variate, this can be a list.
         :param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
-        :param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations that can be specified through the params dictionary. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
+        :param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
         :param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect
         :param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap.
         :param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data.
-        :param effect_modifiers: Variables on which to compute separate effects, or return a heterogeneous effect function. Not all methods support this currently.
-        :param params: (optional) Additional method parameters
-            num_null_simulations: The number of simulations for testing the statistical significance of the estimator
-            num_simulations: The number of simulations for finding the confidence interval (and/or standard error) for a estimate
-            sample_size_fraction: The size of the sample for the bootstrap estimator
-            confidence_level: The confidence level of the confidence interval estimate
-            num_quantiles_to_discretize_cont_cols: The number of quantiles into which a numeric effect modifier is split, to enable estimation of conditional treatment effect over it.
+        :param effect_modifiers: Variables on which to compute separate
+            effects, or return a heterogeneous effect function. Not all
+            methods support this currently.
+        :param num_null_simulations: The number of simulations for testing the
+            statistical significance of the estimator
+        :param num_simulations: The number of simulations for finding the
+            confidence interval (and/or standard error) for a estimate
+        :param sample_size_fraction: The size of the sample for the bootstrap
+            estimator
+        :param confidence_level: The confidence level of the confidence
+            interval estimate
+        :param need_conditional_estimates: Boolean flag indicating whether
+            conditional estimates should be computed. Defaults to True if
+            there are effect modifiers in the graph
+        :param num_quantiles_to_discretize_cont_cols: The number of quantiles
+            into which a numeric effect modifier is split, to enable
+            estimation of conditional treatment effect over it.
+        :param kwargs: (optional) Additional estimator-specific parameters
         :returns: an instance of the estimator class.
-
         """
         self._data = data
         self._target_estimand = identified_estimand
@@ -84,14 +103,9 @@ def __init__(self, data, identified_estimand, treatment, outcome,
         self._bootstrap_estimates = None  # for confidence intervals and std error
         self._bootstrap_null_estimates = None  # for significance test
         self._effect_modifiers = None
-        self.method_params = params
-
+        self.method_params = kwargs
         # Setting the default interpret method
         self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD
-        # Unpacking the keyword arguments
-        if params is not None:
-            for key, value in params.items():
-                setattr(self, key, value)
 
         self.logger = logging.getLogger(__name__)
 
@@ -114,20 +128,17 @@ def __init__(self, data, identified_estimand, treatment, outcome,
             else:
                 self._effect_modifier_names = None
 
-        # Checking if some parameters were set, otherwise setting to default values
-        if not hasattr(self, 'num_null_simulations'):
-            self.num_null_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST
-        if not hasattr(self, 'num_simulations'):
-            self.num_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_CI
-        if not hasattr(self, 'sample_size_fraction'):
-            self.sample_size_fraction = CausalEstimator.DEFAULT_SAMPLE_SIZE_FRACTION
-        if not hasattr(self, 'confidence_level'):
-            self.confidence_level = CausalEstimator.DEFAULT_CONFIDENCE_LEVEL
-        if not hasattr(self, 'num_quantiles_to_discretize_cont_cols'):
-            self.num_quantiles_to_discretize_cont_cols = CausalEstimator.NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS
+        # Check if some parameters were set, otherwise set to default values
+        self.num_null_simulations = num_null_simulations
+        self.num_simulations = num_simulations
+        self.sample_size_fraction = sample_size_fraction
+        self.confidence_level = confidence_level
+        self.num_quantiles_to_discretize_cont_cols = \
+            num_quantiles_to_discretize_cont_cols
         # Estimate conditional estimates by default
-        if not hasattr(self, 'need_conditional_estimates'):
-            self.need_conditional_estimates = bool(self._effect_modifier_names)
+        self.need_conditional_estimates = need_conditional_estimates \
+            if need_conditional_estimates != 'auto' \
+            else bool(self._effect_modifier_names)
 
     @staticmethod
     def get_estimator_object(new_data, identified_estimand, estimate):
@@ -158,7 +169,7 @@ def get_estimator_object(new_data, identified_estimand, estimate):
             confidence_intervals=estimate.params["confidence_intervals"],
             target_units=estimate.params["target_units"],
             effect_modifiers=estimate.params["effect_modifiers"],
-            params=estimate.params["method_params"]
+            **estimate.params["method_params"]
         )
 
         return new_estimator
@@ -297,7 +308,6 @@ def _generate_bootstrap_estimates(self, num_bootstrap_simulations,
         # Perform the set number of simulations
         for index in range(num_bootstrap_simulations):
             new_data = resample(self._data, n_samples=sample_size)
-
             new_estimator = type(self)(
                 new_data,
                 self._target_estimand,
@@ -310,7 +320,7 @@ def _generate_bootstrap_estimates(self, num_bootstrap_simulations,
                 confidence_intervals=False,
                 target_units=self._target_units,
                 effect_modifiers=self._effect_modifier_names,
-                params=self.method_params
+                **self.method_params
             )
             new_effect = new_estimator.estimate_effect()
             simulation_results[index] = new_effect.value
@@ -504,7 +514,7 @@ def _test_significance_with_bootstrap(self, estimate_value, num_null_simulations
                     confidence_intervals=False,
                     target_units=self._target_units,
                     effect_modifiers=self._effect_modifier_names,
-                    params=self.method_params
+                    **self.method_params
                 )
                 new_effect = new_estimator.estimate_effect()
                 null_estimates[i] = new_effect.value
 
@@ -7,11 +7,27 @@
 import causalml
 
 class Causalml(CausalEstimator):
-
-    def __init__(self, *args, **kwargs):
-
-        super().__init__(*args, **kwargs)
-
+    """ Wrapper class for estimators from the causalml library.
+
+    For a list of standard args and kwargs, see documentation for
+    :class:`~dowhy.causal_estimator.CausalEstimator`.
+
+    Supports additional parameters as listed below. For specific
+    parameters of each estimator, refer to the CausalML docs.
+
+    """
+    def __init__(self, *args, causalml_methodname, **kwargs):
+        """
+        :param causalml_methodname: Fully qualified name of causalml estimator
+            class.
+        """
+        # Required to ensure that self.method_params contains all the information
+        # to create an object of this class
+        args_dict = {k: v for k, v in locals().items()
+                     if k not in type(self)._STD_INIT_ARGS}
+        args_dict.update(kwargs)
+        super().__init__(*args, **args_dict)
+        self._causalml_methodname = causalml_methodname
         # Add the identification method used in the estimator
         self.identifier_method = self._target_estimand.identifier_method
         self.logger.debug("The identifier method used {}".format(self.identifier_method))
 
@@ -5,12 +5,35 @@
 from dowhy.causal_estimator import CausalEstimate, CausalEstimator
 
 class DistanceMatchingEstimator(CausalEstimator):
-    """ Simple matching estimator for binary treatments based on a distance metric.
-    """
+    """Simple matching estimator for binary treatments based on a distance
+    metric.
+
+    For a list of standard args and kwargs, see documentation for
+    :class:`~dowhy.causal_estimator.CausalEstimator`.
+
+    Supports additional parameters as listed below.
 
+    """
+    # allowed types of distance metric
     Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w']
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+
+    def __init__(self, *args, num_matches_per_unit=1,
+                 distance_metric="minkowski", exact_match_cols=None, **kwargs):
+        """
+        :param num_matches_per_unit: The number of matches per data point.
+            Default=1.
+        :param distance_metric: Distance metric to use. Default="minkowski"
+            that corresponds to Euclidean distance metric with p=2.
+        :param exact_match_cols: List of column names whose values should be
+        exactly matched. Typically used for columns with discrete values.
+
+        """
+        # Required to ensure that self.method_params contains all the
+        # parameters to create an object of this class
+        args_dict = {k: v for k, v in locals().items()
+                     if k not in type(self)._STD_INIT_ARGS}
+        args_dict.update(kwargs)
+        super().__init__(*args, **args_dict)
         # Check if the treatment is one-dimensional
         if len(self._treatment_name) > 1:
             error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
@@ -21,15 +44,9 @@ def __init__(self, *args, **kwargs):
             self.logger.error(error_msg)
             raise Exception(error_msg)
 
-        # Setting the number of matches per data point
-        if getattr(self, 'num_matches_per_unit', None) is None:
-            self.num_matches_per_unit = 1
-        # Default distance metric if not provided by the user
-        if getattr(self, 'distance_metric', None) is None:
-            self.distance_metric = 'minkowski' # corresponds to euclidean metric with p=2
-
-        if getattr(self, 'exact_match_cols', None) is None:
-            self.exact_match_cols = None
+        self.num_matches_per_unit = num_matches_per_unit
+        self.distance_metric = distance_metric
+        self.exact_match_cols = exact_match_cols
 
         self.logger.debug("Back-door variables used:" +
                         ",".join(self._target_estimand.get_backdoor_variables()))
 
@@ -11,9 +11,27 @@
 
 
 class Econml(CausalEstimator):
+    """Wrapper class for estimators from the EconML library.
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    For a list of standard args and kwargs, see documentation for
+    :class:`~dowhy.causal_estimator.CausalEstimator`.
+
+    Supports additional parameters as listed below. For init and fit
+    parameters of each estimator, refer to the EconML docs.
+
+    """
+    def __init__(self, *args, econml_methodname,  **kwargs):
+        """
+        :param econml_methodname: Fully qualified name of econml estimator
+            class. For example, 'econml.dml.DML'
+        """
+        # Required to ensure that self.method_params contains all the
+        # parameters to create an object of this class
+        args_dict = {k: v for k, v in locals().items()
+                     if k not in type(self)._STD_INIT_ARGS}
+        args_dict.update(kwargs)
+        super().__init__(*args, **args_dict)
+        self._econml_methodname = econml_methodname
         self.logger.info("INFO: Using EconML Estimator")
         self.identifier_method = self._target_estimand.identifier_method
         self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
@@ -154,7 +172,7 @@ def construct_symbolic_estimator(self, estimand):
             expr += "+".join(var_list)
             expr += " | " + ",".join(self._effect_modifier_names)
         return expr
-    
+
     def shap_values(self, df: pd.DataFrame, *args, **kwargs):
         return self.estimator.shap_values(
             df[self._effect_modifier_names].values, *args, **kwargs
 
@@ -1,10 +1,9 @@
-import numpy as np
-import pandas as pd
 import statsmodels.api as sm
 import itertools
 
 from dowhy.causal_estimators.regression_estimator import RegressionEstimator
 
+
 class GeneralizedLinearModelEstimator(RegressionEstimator):
     """Compute effect of treatment using a generalized linear model such as logistic regression.
 
@@ -13,16 +12,29 @@ class GeneralizedLinearModelEstimator(RegressionEstimator):
 
     """
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, glm_family=None, predict_score=True, **kwargs):
+        """For a list of args and kwargs, see documentation for
+        :class:`~dowhy.causal_estimator.CausalEstimator`.
+
+        :param glm_family: statsmodels family for the generalized linear model.
+            For example, use statsmodels.api.families.Binomial() for logistic
+            regression or statsmodels.api.families.Poisson() for count data.
+        :param predict_score: For models that have a binary output, whether
+            to output the model's score or the binary output based on the score.
+
+        """
+        # Required to ensure that self.method_params contains all the
+        # parameters needed to create an object of this class
+        args_dict = {k: v for k, v in locals().items()
+                     if k not in type(self)._STD_INIT_ARGS}
+        args_dict.update(kwargs)
+        super().__init__(*args, **args_dict)
         self.logger.info("INFO: Using Generalized Linear Model Estimator")
-        if self.method_params is not None and 'glm_family' in self.method_params:
-                self.family = self.method_params['glm_family']
+        if glm_family is not None:
+            self.family = glm_family
         else:
             raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.")
-        self.predict_score = True
-        if self.method_params is not None and 'predict_score' in self.method_params:
-                self.predict_score = self.method_params['predict_score']
+        self.predict_score = predict_score
         # Checking if Y is binary
         outcome_values = self._data[self._outcome_name].astype(int).unique()
         self.outcome_is_binary = all([v in [0,1] for v in outcome_values])