Better docs: updated the method signature and docstrings for estimator classes (#389)

* updated the method signature and docstrings for estimator classes * bug fixes * updated the args passing * fixed the bug in refutation test calls for num_ci_simulations * used std init args * updated num simulations to pass weighting test
2022-03-14 15:12:27 +05:30 · 2022-03-14 15:12:27 +05:30 · 77906cd4ed
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -197,3 +197,7 @@ epub_exclude_files = ['search.html']
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
 # init docstrings should also be included in class
 autoclass_content = "both"
--- a/docs/source/dowhy.causal_refuters.rst
+++ b/docs/source/dowhy.causal_refuters.rst
@ -36,6 +36,14 @@ dowhy.causal\_refuters.dummy\_outcome\_refuter module
   :undoc-members:
   :show-inheritance:
 dowhy.causal\_refuters.graph\_refuter module
 --------------------------------------------
 .. automodule:: dowhy.causal_refuters.graph_refuter
   :members:
   :undoc-members:
   :show-inheritance:
 dowhy.causal\_refuters.placebo\_treatment\_refuter module
 ---------------------------------------------------------
--- a/docs/source/dowhy.utils.rst
+++ b/docs/source/dowhy.utils.rst
@ -12,6 +12,14 @@ dowhy.utils.api module
   :undoc-members:
   :show-inheritance:
 dowhy.utils.cit module
 ----------------------
 .. automodule:: dowhy.utils.cit
   :members:
   :undoc-members:
   :show-inheritance:
 dowhy.utils.cli\_helpers module
 -------------------------------
--- a/dowhy/causal_estimator.py
+++ b/dowhy/causal_estimator.py
@ -38,12 +38,21 @@ class CausalEstimator:
    DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"]
    # std args to be removed from locals() before being passed to args_dict
    _STD_INIT_ARGS = ('self', '__class__', 'args', 'kwargs')
    def __init__(self, data, identified_estimand, treatment, outcome,
                 control_value=0, treatment_value=1,
                 test_significance=False, evaluate_effect_strength=False,
                 confidence_intervals=False,
                 target_units=None, effect_modifiers=None,
-                 params=None):
+                 num_null_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST,
                 num_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_CI,
                 sample_size_fraction=DEFAULT_SAMPLE_SIZE_FRACTION,
                 confidence_level=DEFAULT_CONFIDENCE_LEVEL,
                 need_conditional_estimates='auto',
                 num_quantiles_to_discretize_cont_cols=NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS,
                 **kwargs):
        """Initializes an estimator with data and names of relevant variables.
        This method is called from the constructors of its child classes.
@ -55,19 +64,29 @@ class CausalEstimator:
        :param outcome: name of the outcome variable
        :param control_value: Value of the treatment in the control group, for effect estimation.  If treatment is multi-variate, this can be a list.
        :param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
-        :param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations that can be specified through the params dictionary. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
+        :param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
        :param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect
        :param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap.
        :param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data.
-        :param effect_modifiers: Variables on which to compute separate effects, or return a heterogeneous effect function. Not all methods support this currently.
+        :param effect_modifiers: Variables on which to compute separate
-        :param params: (optional) Additional method parameters
+            effects, or return a heterogeneous effect function. Not all
-            num_null_simulations: The number of simulations for testing the statistical significance of the estimator
+            methods support this currently.
-            num_simulations: The number of simulations for finding the confidence interval (and/or standard error) for a estimate
+        :param num_null_simulations: The number of simulations for testing the
-            sample_size_fraction: The size of the sample for the bootstrap estimator
+            statistical significance of the estimator
-            confidence_level: The confidence level of the confidence interval estimate
+        :param num_simulations: The number of simulations for finding the
-            num_quantiles_to_discretize_cont_cols: The number of quantiles into which a numeric effect modifier is split, to enable estimation of conditional treatment effect over it.
+            confidence interval (and/or standard error) for a estimate
        :param sample_size_fraction: The size of the sample for the bootstrap
            estimator
        :param confidence_level: The confidence level of the confidence
            interval estimate
        :param need_conditional_estimates: Boolean flag indicating whether
            conditional estimates should be computed. Defaults to True if
            there are effect modifiers in the graph
        :param num_quantiles_to_discretize_cont_cols: The number of quantiles
            into which a numeric effect modifier is split, to enable
            estimation of conditional treatment effect over it.
        :param kwargs: (optional) Additional estimator-specific parameters
        :returns: an instance of the estimator class.
        """
        self._data = data
        self._target_estimand = identified_estimand
@ -84,14 +103,9 @@ class CausalEstimator:
        self._bootstrap_estimates = None  # for confidence intervals and std error
        self._bootstrap_null_estimates = None  # for significance test
        self._effect_modifiers = None
-        self.method_params = params
+        self.method_params = kwargs
        # Setting the default interpret method
        self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD
        # Unpacking the keyword arguments
        if params is not None:
            for key, value in params.items():
                setattr(self, key, value)
        self.logger = logging.getLogger(__name__)
@ -114,20 +128,17 @@ class CausalEstimator:
            else:
                self._effect_modifier_names = None
-        # Checking if some parameters were set, otherwise setting to default values
+        # Check if some parameters were set, otherwise set to default values
-        if not hasattr(self, 'num_null_simulations'):
+        self.num_null_simulations = num_null_simulations
-            self.num_null_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST
+        self.num_simulations = num_simulations
-        if not hasattr(self, 'num_simulations'):
+        self.sample_size_fraction = sample_size_fraction
-            self.num_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_CI
+        self.confidence_level = confidence_level
-        if not hasattr(self, 'sample_size_fraction'):
+        self.num_quantiles_to_discretize_cont_cols = \
-            self.sample_size_fraction = CausalEstimator.DEFAULT_SAMPLE_SIZE_FRACTION
+            num_quantiles_to_discretize_cont_cols
        if not hasattr(self, 'confidence_level'):
            self.confidence_level = CausalEstimator.DEFAULT_CONFIDENCE_LEVEL
        if not hasattr(self, 'num_quantiles_to_discretize_cont_cols'):
            self.num_quantiles_to_discretize_cont_cols = CausalEstimator.NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS
        # Estimate conditional estimates by default
-        if not hasattr(self, 'need_conditional_estimates'):
+        self.need_conditional_estimates = need_conditional_estimates \
-            self.need_conditional_estimates = bool(self._effect_modifier_names)
+            if need_conditional_estimates != 'auto' \
            else bool(self._effect_modifier_names)
    @staticmethod
    def get_estimator_object(new_data, identified_estimand, estimate):
@ -158,7 +169,7 @@ class CausalEstimator:
            confidence_intervals=estimate.params["confidence_intervals"],
            target_units=estimate.params["target_units"],
            effect_modifiers=estimate.params["effect_modifiers"],
-            params=estimate.params["method_params"]
+            **estimate.params["method_params"]
        )
        return new_estimator
@ -297,7 +308,6 @@ class CausalEstimator:
        # Perform the set number of simulations
        for index in range(num_bootstrap_simulations):
            new_data = resample(self._data, n_samples=sample_size)
            new_estimator = type(self)(
                new_data,
                self._target_estimand,
@ -310,7 +320,7 @@ class CausalEstimator:
                confidence_intervals=False,
                target_units=self._target_units,
                effect_modifiers=self._effect_modifier_names,
-                params=self.method_params
+                **self.method_params
            )
            new_effect = new_estimator.estimate_effect()
            simulation_results[index] = new_effect.value
@ -504,7 +514,7 @@ class CausalEstimator:
                    confidence_intervals=False,
                    target_units=self._target_units,
                    effect_modifiers=self._effect_modifier_names,
-                    params=self.method_params
+                    **self.method_params
                )
                new_effect = new_estimator.estimate_effect()
                null_estimates[i] = new_effect.value
--- a/dowhy/causal_estimators/causalml.py
+++ b/dowhy/causal_estimators/causalml.py
@ -7,11 +7,27 @@ from importlib import import_module
 import causalml
 class Causalml(CausalEstimator):
    """ Wrapper class for estimators from the causalml library.
-    def __init__(self, *args, **kwargs):
+    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-        super().__init__(*args, **kwargs)
+    Supports additional parameters as listed below. For specific
    parameters of each estimator, refer to the CausalML docs.
    """
    def __init__(self, *args, causalml_methodname, **kwargs):
        """
        :param causalml_methodname: Fully qualified name of causalml estimator
            class.
        """
        # Required to ensure that self.method_params contains all the information
        # to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        self._causalml_methodname = causalml_methodname
        # Add the identification method used in the estimator
        self.identifier_method = self._target_estimand.identifier_method
        self.logger.debug("The identifier method used {}".format(self.identifier_method))
--- a/dowhy/causal_estimators/distance_matching_estimator.py
+++ b/dowhy/causal_estimators/distance_matching_estimator.py
@ -5,12 +5,35 @@ import numpy as np
 from dowhy.causal_estimator import CausalEstimate, CausalEstimator
 class DistanceMatchingEstimator(CausalEstimator):
-    """ Simple matching estimator for binary treatments based on a distance metric.
+    """Simple matching estimator for binary treatments based on a distance
-    """
+    metric.
    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
    Supports additional parameters as listed below.
    """
    # allowed types of distance metric
    Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w']
-    def __init__(self, *args, **kwargs):
+
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, num_matches_per_unit=1,
                 distance_metric="minkowski", exact_match_cols=None, **kwargs):
        """
        :param num_matches_per_unit: The number of matches per data point.
            Default=1.
        :param distance_metric: Distance metric to use. Default="minkowski"
            that corresponds to Euclidean distance metric with p=2.
        :param exact_match_cols: List of column names whose values should be
        exactly matched. Typically used for columns with discrete values.
        """
        # Required to ensure that self.method_params contains all the
        # parameters to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        # Check if the treatment is one-dimensional
        if len(self._treatment_name) > 1:
            error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
@ -21,15 +44,9 @@ class DistanceMatchingEstimator(CausalEstimator):
            self.logger.error(error_msg)
            raise Exception(error_msg)
-        # Setting the number of matches per data point
+        self.num_matches_per_unit = num_matches_per_unit
-        if getattr(self, 'num_matches_per_unit', None) is None:
+        self.distance_metric = distance_metric
-            self.num_matches_per_unit = 1
+        self.exact_match_cols = exact_match_cols
        # Default distance metric if not provided by the user
        if getattr(self, 'distance_metric', None) is None:
            self.distance_metric = 'minkowski' # corresponds to euclidean metric with p=2
        if getattr(self, 'exact_match_cols', None) is None:
            self.exact_match_cols = None
        self.logger.debug("Back-door variables used:" +
                        ",".join(self._target_estimand.get_backdoor_variables()))
--- a/dowhy/causal_estimators/econml.py
+++ b/dowhy/causal_estimators/econml.py
@ -11,9 +11,27 @@ import econml
 class Econml(CausalEstimator):
    """Wrapper class for estimators from the EconML library.
-    def __init__(self, *args, **kwargs):
+    For a list of standard args and kwargs, see documentation for
-        super().__init__(*args, **kwargs)
+    :class:`~dowhy.causal_estimator.CausalEstimator`.
    Supports additional parameters as listed below. For init and fit
    parameters of each estimator, refer to the EconML docs.
    """
    def __init__(self, *args, econml_methodname,  **kwargs):
        """
        :param econml_methodname: Fully qualified name of econml estimator
            class. For example, 'econml.dml.DML'
        """
        # Required to ensure that self.method_params contains all the
        # parameters to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        self._econml_methodname = econml_methodname
        self.logger.info("INFO: Using EconML Estimator")
        self.identifier_method = self._target_estimand.identifier_method
        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
--- a/dowhy/causal_estimators/generalized_linear_model_estimator.py
+++ b/dowhy/causal_estimators/generalized_linear_model_estimator.py
@ -1,10 +1,9 @@
 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
 import itertools
 from dowhy.causal_estimators.regression_estimator import RegressionEstimator
 class GeneralizedLinearModelEstimator(RegressionEstimator):
    """Compute effect of treatment using a generalized linear model such as logistic regression.
@ -13,16 +12,29 @@ class GeneralizedLinearModelEstimator(RegressionEstimator):
    """
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, glm_family=None, predict_score=True, **kwargs):
-        super().__init__(*args, **kwargs)
+        """For a list of args and kwargs, see documentation for
        :class:`~dowhy.causal_estimator.CausalEstimator`.
        :param glm_family: statsmodels family for the generalized linear model.
            For example, use statsmodels.api.families.Binomial() for logistic
            regression or statsmodels.api.families.Poisson() for count data.
        :param predict_score: For models that have a binary output, whether
            to output the model's score or the binary output based on the score.
        """
        # Required to ensure that self.method_params contains all the
        # parameters needed to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        self.logger.info("INFO: Using Generalized Linear Model Estimator")
-        if self.method_params is not None and 'glm_family' in self.method_params:
+        if glm_family is not None:
-                self.family = self.method_params['glm_family']
+            self.family = glm_family
        else:
            raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.")
-        self.predict_score = True
+        self.predict_score = predict_score
        if self.method_params is not None and 'predict_score' in self.method_params:
                self.predict_score = self.method_params['predict_score']
        # Checking if Y is binary
        outcome_values = self._data[self._outcome_name].astype(int).unique()
        self.outcome_is_binary = all([v in [0,1] for v in outcome_values])
--- a/dowhy/causal_estimators/instrumental_variable_estimator.py
+++ b/dowhy/causal_estimators/instrumental_variable_estimator.py
@ -14,20 +14,29 @@ class InstrumentalVariableEstimator(CausalEstimator):
    This is also a superclass that can be inherited by other specific methods.
    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    Supports additional parameters as listed below.
    - 'iv_instrument_name': Name of the specific instrumental variable to be used. Needs to be one of the IVs identified in the identification step. Default is to use all the IV variables from the identification step.
    """
-
+    def __init__(self, *args, iv_instrument_name=None,  **kwargs):
-    def __init__(self, *args, **kwargs):
+        """
-        super().__init__(*args, **kwargs)
+        :param iv_instrument_name: Name of the specific instrumental variable
            to be used. Needs to be one of the IVs identified in the
            identification step. Default is to use all the IV variables
            from the identification step.
        """
        # Required to ensure that self.method_params contains all the information
        # to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        # choosing the instrumental variable to use
-        if getattr(self, 'iv_instrument_name', None) is None:
+        self.estimating_instrument_names = self._target_estimand.instrumental_variables
-            self.estimating_instrument_names = self._target_estimand.instrumental_variables
+        if iv_instrument_name is not None:
-        else:
+            self.estimating_instrument_names = parse_state(iv_instrument_name)
            self.estimating_instrument_names = parse_state(self.iv_instrument_name)
        self.logger.debug("Instrumental Variables used:" +
                          ",".join(self.estimating_instrument_names))
        if not self.estimating_instrument_names:
--- a/dowhy/causal_estimators/linear_regression_estimator.py
+++ b/dowhy/causal_estimators/linear_regression_estimator.py
@ -1,11 +1,9 @@
 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
 from sklearn import linear_model
 import itertools
 from dowhy.causal_estimators.regression_estimator import RegressionEstimator
 class LinearRegressionEstimator(RegressionEstimator):
    """Compute effect of treatment using linear regression.
@ -16,7 +14,17 @@ class LinearRegressionEstimator(RegressionEstimator):
    """
    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+        """For a list of args and kwargs, see documentation for
        :class:`~dowhy.causal_estimator.CausalEstimator`.
        """
        # Required to ensure that self.method_params contains all the
        # parameters to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        print(args_dict)
        super().__init__(*args, **args_dict)
        self.logger.info("INFO: Using Linear Regression Estimator")
        self._linear_model = self.model
--- a/dowhy/causal_estimators/propensity_score_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_estimator.py
@ -3,24 +3,42 @@ import pandas as pd
 from dowhy.causal_estimator import CausalEstimator
 class PropensityScoreEstimator(CausalEstimator):
    """
-    Base class for estimators that estimate effects based on propensity of treatment assignment.
+    Base class for estimators that estimate effects based on propensity of
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    treatment assignment.
-    - 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
+
-    - 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
+    For a list of standard args and kwargs, see documentation for
-    - 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
+    :class:`~dowhy.causal_estimator.CausalEstimator`.
    Supports additional parameters as listed below.
    """
-    def __init__(self, *args, propensity_score_model=None, recalculate_propensity_score=True, propensity_score_column="propensity_score", **kwargs):
+    def __init__(self, *args, propensity_score_model=None,
-        super().__init__(*args, **kwargs)
+                 recalculate_propensity_score=True,
                 propensity_score_column="propensity_score", **kwargs):
        """
        :param propensity_score_model: Model used to compute propensity score.
            Can be any classification model that supports fit() and
            predict_proba() methods. If None, LogisticRegression is used.
        :param recalculate_propensity_score: Whether the propensity score
            should be estimated. To use pre-computed propensity scores,
            set this value to False. Default=True.
        :param propensity_score_column: Column name that stores the
            propensity score. Default='propensity_score'
        """
        # Required to ensure that self.method_params contains all the
        # parameters to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        # Enable the user to pass params for a custom propensity model
-        if not hasattr(self, "propensity_score_model"):
+        self.propensity_score_model = propensity_score_model
-            self.propensity_score_model = propensity_score_model
+        self.recalculate_propensity_score = recalculate_propensity_score
-        if not hasattr(self, "recalculate_propensity_score"):
+        self.propensity_score_column = propensity_score_column
            self.recalculate_propensity_score = recalculate_propensity_score
        if not hasattr(self, "propensity_score_column"):
            self.propensity_score_column = propensity_score_column
        # Check if the treatment is one-dimensional
        if len(self._treatment_name) > 1:
--- a/dowhy/causal_estimators/propensity_score_matching_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_matching_estimator.py
@ -6,15 +6,15 @@ from dowhy.causal_estimator import CausalEstimate
 from dowhy.causal_estimators.propensity_score_estimator import PropensityScoreEstimator
 class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
-    """ Estimate effect of treatment by finding matching treated and control units based on propensity score.
+    """ Estimate effect of treatment by finding matching treated and control
    units based on propensity score.
    Straightforward application of the back-door criterion.
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-    - 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
+    Supports additional parameters as listed below.
    - 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
    - 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
    """
    def __init__(
@ -24,6 +24,17 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
        recalculate_propensity_score=True,
        propensity_score_column="propensity_score",
        **kwargs):
        """
        :param propensity_score_model: Model used to compute propensity score.
            Can be any classification model that supports fit() and
            predict_proba() methods. If None, LogisticRegression is used.
        :param recalculate_propensity_score: Whether the propensity score
            should be estimated. To use pre-computed propensity scores,
            set this value to False. Default=True.
        :param propensity_score_column: Column name that stores the
            propensity score. Default='propensity_score'
        """
        super().__init__(
            *args,
            propensity_score_model=propensity_score_model,
--- a/dowhy/causal_estimators/propensity_score_stratification_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_stratification_estimator.py
@ -11,13 +11,10 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
    Straightforward application of the back-door criterion.
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-    - 'num_strata': Number of bins by which data will be stratified. Default=50
+    Supports additional parameters as listed below.
    - 'clipping_threshold': Mininum number of treated or control units per strata. Default=10
    - 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
    - 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
    - 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
    """
@ -30,20 +27,42 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
        recalculate_propensity_score=True,
        propensity_score_column="propensity_score",
        **kwargs):
        """
        :param num_strata: Number of bins by which data will be stratified.
            Default is automatically determined.
        :param clipping_threshold: Mininum number of treated or control units
            per strata. Default=10
        :param propensity_score_model: The model used to compute propensity
            score. Can be any classification model that supports fit() and
            predict_proba() methods. If None, use
            LogisticRegression model as the default.
        :param recalculate_propensity_score: If true, force the estimator to
            estimate the propensity score. To use pre-computed propensity
            scores, set this value to False. Default=True
        :param propensity_score_column: Column name that stores the propensity
        score. Default='propensity_score'
        """
        # Required to ensure that self.method_params contains all the information
        # to create an object of this class
        args_dict = kwargs
        args_dict.update({
            'num_strata': num_strata,
            'clipping_threshold': clipping_threshold
            })
        super().__init__(
            *args,
            propensity_score_model=propensity_score_model,
            recalculate_propensity_score=recalculate_propensity_score,
            propensity_score_column=propensity_score_column,
-            **kwargs)
+            **args_dict)
        self.logger.info("Using Propensity Score Stratification Estimator")
        self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
        self.logger.info(self.symbolic_estimator)
-        if not hasattr(self, 'num_strata'):
+        # setting method-specific parameters
-            self.num_strata = num_strata
+        self.num_strata = num_strata
-        if not hasattr(self, 'clipping_threshold'):
+        self.clipping_threshold = clipping_threshold
            self.clipping_threshold = clipping_threshold
    def _estimate_effect(self):
--- a/dowhy/causal_estimators/propensity_score_weighting_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_weighting_estimator.py
@ -12,17 +12,12 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
    Straightforward application of the back-door criterion.
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-    - 'min_ps_score': Lower bound used to clip the propensity score. Default=0.05
+    Supports additional parameters as listed below.
    - 'max_ps_score': Upper bound used to clip the propensity score. Default=0.95
    - 'weighting_scheme': This is the name of weighting method to use. Can be inverse propensity score ("ips_weight", default), stabilized IPS score ("ips_stabilized_weight"), or normalized IPS score ("ips_normalized_weight")
    - 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
    - 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
    - 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
    """
    def __init__(
        self,
        *args,
@ -33,24 +28,48 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
        recalculate_propensity_score=True,
        propensity_score_column="propensity_score",
        **kwargs):
        """
        :param min_ps_score: Lower bound used to clip the propensity score.
            Default=0.05
        :param max_ps_score: Upper bound used to clip the propensity score.
            Default=0.95
        :param weighting_scheme: Weighting method to use. Can be inverse
            propensity score ("ips_weight", default), stabilized IPS score
            ("ips_stabilized_weight"), or normalized IPS score
            ("ips_normalized_weight").
        :param propensity_score_model: The model used to compute propensity
            score. Can be any classification model that supports fit() and
            predict_proba() methods. If None, use LogisticRegression model as
            the default. Default=None
        :param recalculate_propensity_score: If true, force the estimator to
            estimate the propensity score. To use pre-computed propensity
            scores, set this value to false. Default=True
        :param propensity_score_column: Column name that stores the
            propensity score. Default='propensity_score'
        """
        # Required to ensure that self.method_params contains all the information
        # to create an object of this class
        args_dict = kwargs
        args_dict.update({
            'min_ps_score': min_ps_score,
            'max_ps_score': max_ps_score,
            'weighting_scheme': weighting_scheme
            })
        super().__init__(
            *args,
            propensity_score_model=propensity_score_model,
            recalculate_propensity_score=recalculate_propensity_score,
            propensity_score_column=propensity_score_column,
-            **kwargs)
+            **args_dict)
        self.logger.info("INFO: Using Propensity Score Weighting Estimator")
        self.symbolic_estimator = self.construct_symbolic_estimator(
            self._target_estimand)
        self.logger.info(self.symbolic_estimator)
-        if not hasattr(self, "weighting_scheme"):
+        # Setting method specific parameters
-            # 'ips_weight', 'ips_normalized_weight', 'ips_stabilized_weight'
+        self.weighting_scheme = weighting_scheme
-            self.weighting_scheme = weighting_scheme
+        self.min_ps_score = min_ps_score
-        if not hasattr(self, "min_ps_score"):
+        self.max_ps_score = max_ps_score
            self.min_ps_score = min_ps_score
        if not hasattr(self, "max_ps_score"):
            self.max_ps_score = max_ps_score
    def _estimate_effect(self):
        if self.recalculate_propensity_score is True:
--- a/dowhy/causal_estimators/regression_discontinuity_estimator.py
+++ b/dowhy/causal_estimators/regression_discontinuity_estimator.py
@ -11,17 +11,34 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
    Estimates effect by transforming the problem to an instrumental variables
    problem.
-    Supports additional parameters that can be specified in the estimate_effect() method.
+    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
-    * 'rd_variable_name': name of the variable on which the discontinuity occurs. This is the instrument.
+    Supports additional parameters as listed below.
    * 'rd_threshold_value': Threshold at which the discontinuity occurs.
    * 'rd_bandwidth': Distance from the threshold within which confounders can be considered the same between treatment and control. Considered band is (threshold +- bandwidth)
    """
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, rd_variable_name=None,
-        super().__init__(*args, **kwargs)
+            rd_threshold_value=None, rd_bandwidth=None, **kwargs):
        """
        :param rd_variable_name: Name of the variable on which the
            discontinuity occurs. This is the instrument.
        :param rd_threshold_value: Threshold at which the discontinuity occurs.
        :param rd_bandwidth: Distance from the threshold within which
            confounders can be considered the same between treatment and
            control. Considered band is (threshold +- bandwidth)
        """
        # Required to ensure that self.method_params contains all the information
        # to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        self.logger.info("Using Regression Discontinuity Estimator")
        self.rd_variable_name = rd_variable_name
        self.rd_threshold_value = rd_threshold_value
        self.rd_bandwidth = rd_bandwidth
        self.rd_variable = self._data[self.rd_variable_name]
        self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
@ -46,7 +63,7 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
            ['local_treatment'],
            ['local_outcome'],
            test_significance=self._significance_test,
-            params={'iv_instrument_name': 'local_rd_variable'}
+            iv_instrument_name='local_rd_variable'
        )
        est = iv_estimator.estimate_effect()
        return est
--- a/dowhy/causal_estimators/regression_estimator.py
+++ b/dowhy/causal_estimators/regression_estimator.py
@ -8,11 +8,20 @@ from dowhy.causal_estimator import CausalEstimator
 class RegressionEstimator(CausalEstimator):
    """Compute effect of treatment using some regression function.
-    Fits a regression model for estimating the outcome using treatment(s) and confounders.
+    Fits a regression model for estimating the outcome using treatment(s) and
    confounders.
    Base class for all regression models, inherited by
    LinearRegressionEstimator and GeneralizedLinearModelEstimator.
    """
    def __init__(self, *args, **kwargs):
        """For a list of standard args and kwargs, see documentation for
        :class:`~dowhy.causal_estimator.CausalEstimator`.
        """
        super().__init__(*args, **kwargs)
        self.logger.debug("Back-door variables used:" +
                          ",".join(self._target_estimand.get_backdoor_variables()))
--- a/dowhy/causal_estimators/two_stage_regression_estimator.py
+++ b/dowhy/causal_estimators/two_stage_regression_estimator.py
@ -8,16 +8,39 @@ from dowhy.causal_identifier import CausalIdentifier
 from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator
 from dowhy.utils.api import parse_state
 class TwoStageRegressionEstimator(CausalEstimator):
-    """Compute treatment effect whenever the effect is fully mediated by another variable (front-door) or when there is an instrument available.
+    """Compute treatment effect whenever the effect is fully mediated by
    another variable (front-door) or when there is an instrument available.
    Currently only supports a linear model for the effects.
    For a list of standard args and kwargs, see documentation for
    :class:`~dowhy.causal_estimator.CausalEstimator`.
    Supports additional parameters as listed below.
    """
    # First stage statistical model
    DEFAULT_FIRST_STAGE_MODEL = LinearRegressionEstimator
    # Second stage statistical model
    DEFAULT_SECOND_STAGE_MODEL = LinearRegressionEstimator
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, first_stage_model=None,
-        super().__init__(*args, **kwargs)
+                 second_stage_model=None, **kwargs):
        """
        :param first_stage_model: First stage estimator to be used. Default is
            linear regression.
        :param second_stage_model: Second stage estimator to be used. Default
            is linear regression.
        """
        # Required to ensure that self.method_params contains all the
        # parameters needed to create an object of this class
        args_dict = {k: v for k, v in locals().items()
                     if k not in type(self)._STD_INIT_ARGS}
        args_dict.update(kwargs)
        super().__init__(*args, **args_dict)
        self.logger.info("INFO: Using Two Stage Regression Estimator")
        # Check if the treatment is one-dimensional
        if len(self._treatment_name) > 1:
@ -58,29 +81,18 @@ class TwoStageRegressionEstimator(CausalEstimator):
                error_msg = "No instrumental variable present. Two stage regression is not applicable"
                self.logger.error(error_msg)
-        if 'first_stage_model' in self.method_params:
+        if first_stage_model is not None:
-            self.first_stage_model = self.method_params['first_stage_model']
+            self.first_stage_model = first_stage_model
        else:
            self.first_stage_model = self.__class__.DEFAULT_FIRST_STAGE_MODEL
            self.logger.warning("First stage model not provided. Defaulting to sklearn.linear_model.LinearRegression.")
-        if 'second_stage_model' in self.method_params:
+        if second_stage_model is not None:
-            self.second_stage_model = self.method_params['second_stage_model']
+            self.second_stage_model = second_stage_model
        else:
            self.second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL
            self.logger.warning("Second stage model not provided. Defaulting to backdoor.linear_regression.")
    def _estimate_effect(self):
        #first_stage_features = self.build_first_stage_features()
        #fs_model = self.first_stage_model()
        #if self._target_estimand.identifier_method=="frontdoor":
        #    first_stage_outcome = self._frontdoor_variables
        #elif self._target_estimand.identifier_method=="mediation":
        #    first_stage_outcome = self._mediators
        #fs_model.fit(first_stage_features, self._frontdoor_variables)
        #self.logger.debug("Coefficients of the fitted model: " +
        #                  ",".join(map(str, fs_model.coef_)))
        #residuals = self._frontdoor_variables - fs_model.predict(first_stage_features)
        #self._data["residual"] = residuals
        estimate_value = None
        # First stage
        modified_target_estimand = copy.deepcopy(self._target_estimand)
@ -102,7 +114,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
                 confidence_intervals = self._confidence_intervals,
                 target_units=self._target_units,
                 effect_modifiers=self._effect_modifier_names,
-                 params=self.method_params)._estimate_effect()
+                 **self.method_params)._estimate_effect()
        # Second Stage
        modified_target_estimand = copy.deepcopy(self._target_estimand)
@ -124,7 +136,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
                 confidence_intervals = self._confidence_intervals,
                 target_units=self._target_units,
                 effect_modifiers=self._effect_modifier_names,
-                 params=self.method_params)._estimate_effect()
+                 **self.method_params)._estimate_effect()
        # Combining the two estimates
        natural_indirect_effect = first_stage_estimate.value * second_stage_estimate.value
        # This same estimate is valid for frontdoor as well as mediation (NIE)
@ -149,7 +161,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
                     confidence_intervals = self._confidence_intervals,
                     target_units=self._target_units,
                     effect_modifiers=self._effect_modifier_names,
-                     params=self.method_params)._estimate_effect()
+                     **self.method_params)._estimate_effect()
            natural_direct_effect = total_effect_estimate.value - natural_indirect_effect
            estimate_value = natural_direct_effect
            self.symbolic_estimator = self.construct_symbolic_estimator(
--- a/dowhy/causal_identifier.py
+++ b/dowhy/causal_identifier.py
@ -10,14 +10,10 @@ from dowhy.utils.api import parse_state
 class CausalIdentifier:
    """Class that implements different identification methods.
    Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
    Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
    If you'd like to contribute, please raise an issue or a pull request on Github.
    """
    NONPARAMETRIC_ATE="nonparametric-ate"
    NONPARAMETRIC_NDE="nonparametric-nde"
--- a/dowhy/causal_model.py
+++ b/dowhy/causal_model.py
@ -268,8 +268,9 @@ class CausalModel:
                    if method_params is None:
                        method_params = {}
                    # Define the third-party estimation method to be used
-                    method_params["_" + third_party_estimator_package + "_methodname"] = estimator_name
+                    method_params[third_party_estimator_package + "_methodname"] = estimator_name
            else: # For older dowhy methods
                print(estimator_name)
                # Process the dowhy estimators
                causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")
        if identified_estimand.no_directed_path:
@ -291,6 +292,8 @@ class CausalModel:
                    extra_args = method_params.get("init_params", {})
                else:
                    extra_args = {}
                if method_params is None:
                    method_params = {}
                self.causal_estimator = causal_estimator_class(
                    self._data,
                    identified_estimand,
@ -302,7 +305,7 @@ class CausalModel:
                    confidence_intervals = confidence_intervals,
                    target_units = target_units,
                    effect_modifiers = effect_modifiers,
-                    params=method_params,
+                    **method_params,
                    **extra_args)
            else:
                # Estimator had been computed in a previous call
@ -334,9 +337,11 @@ class CausalModel:
        :param identified_estimand: a probability expression
            that represents the effect to be estimated. Output of
            CausalModel.identify_effect method
-        :param method_name: any of the estimation method to be used. See docs for estimate_effect method for a list of supported estimation methods.
+        :param method_name: any of the estimation method to be used. See docs
            for estimate_effect method for a list of supported estimation methods.
        :param fit_estimator: Boolean flag on whether to fit the estimator.
-            Setting it to False is useful to compute the do-operation on new data using a previously fitted estimator.
+            Setting it to False is useful to compute the do-operation on new
            data using a previously fitted estimator.
        :param method_params: Dictionary containing any method-specific parameters. These are passed directly to the estimating method.
        :returns: an instance of the CausalEstimate class, containing the causal effect estimate
@ -369,7 +374,7 @@ class CausalModel:
                    identified_estimand,
                    self._treatment, self._outcome,
                    test_significance=False,
-                    params=method_params
+                    **method_params
                )
            else:
                # Estimator had been computed in a previous call
@ -469,12 +474,11 @@ class CausalModel:
        :param k: number of covariates in set Z 
        :param independence_test: dictionary containing methods to test conditional independece in data
        :param independence_constraints: list of implications to be test input by the user in the format 
-        [(x,y,(z1,z2)),
+            [(x,y,(z1,z2)),
-        (x,y, (z3,))
+            (x,y, (z3,))
-        ]
+            ]
        : returns: an instance of GraphRefuter class
        """
        if independence_test is not None:
            test_for_continuous = independence_test['test_for_continuous']
            test_for_discrete = independence_test['test_for_discrete']
--- a/tests/causal_estimators/base.py
+++ b/tests/causal_estimators/base.py
@ -65,8 +65,9 @@ class TestEstimator(object):
            confidence_intervals = confidence_intervals,
            target_units = "ate",
            effect_modifiers = data["effect_modifier_names"],
-            params=method_params
+            **method_params
        )
        true_ate = data["ate"]
        ate_estimate = estimator_ate.estimate_effect()
        str(ate_estimate) # checking if str output is correctly created
--- a/tests/causal_estimators/test_generalized_linear_model_estimator.py
+++ b/tests/causal_estimators/test_generalized_linear_model_estimator.py
@ -28,7 +28,7 @@ class TestGeneralizedLinearModelEstimator(object):
                confidence_intervals=[True,],
                test_significance=[True,],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 10,
                    'num_null_simulations': 10,
                    'glm_family': sm.families.Binomial(),
                    'predict_score': True
--- a/tests/causal_estimators/test_instrumental_variable_estimator.py
+++ b/tests/causal_estimators/test_instrumental_variable_estimator.py
@ -32,7 +32,7 @@ class TestInstrumentalVariableEstimator(object):
        configs = [dict(zip(keys, v)) for v in itertools.product(*values)]
        for cfg in configs:
            print("\nConfig:", cfg)
-            cfg['method_params']= None
+            cfg['method_params']= {}
            if cfg["num_instruments"] >= cfg["num_treatments"]:
                estimator_tester.average_treatment_effect_test(**cfg)
            else:
--- a/tests/causal_estimators/test_linear_regression_estimator.py
+++ b/tests/causal_estimators/test_linear_regression_estimator.py
@ -37,7 +37,7 @@ class TestLinearRegressionEstimator(object):
                confidence_intervals=[True,],
                test_significance=[True,],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 10,
                    'num_null_simulations': 10
                    }
                )
--- a/tests/causal_estimators/test_propensity_score_matching_estimator.py
+++ b/tests/causal_estimators/test_propensity_score_matching_estimator.py
@ -25,7 +25,7 @@ class TestPropensityScoreMatchingEstimator(object):
                confidence_intervals=[False,],
                test_significance=[False,],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 10,
                    'num_null_simulations': 10
                    }
                )
--- a/tests/causal_estimators/test_propensity_score_stratification_estimator.py
+++ b/tests/causal_estimators/test_propensity_score_stratification_estimator.py
@ -25,7 +25,7 @@ class TestPropensityScoreStratificationEstimator(object):
                confidence_intervals=[True,],
                test_significance=[True,],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 10,
                    'num_null_simulations': 10
                    }
                )
--- a/tests/causal_estimators/test_propensity_score_weighting_estimator.py
+++ b/tests/causal_estimators/test_propensity_score_weighting_estimator.py
@ -26,10 +26,10 @@ class TestPropensityScoreWeightingEstimator(object):
                num_treatments=num_treatments,
                treatment_is_binary=treatment_is_binary,
                outcome_is_binary=outcome_is_binary,
-                confidence_intervals=[True,],
+                confidence_intervals=[True, ],
-                test_significance=[True,],
+                test_significance=[True, ],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 1,
-                    'num_null_simulations': 10
+                    'num_null_simulations': 1
                    }
                )
--- a/tests/causal_estimators/test_two_stage_regression_estimator.py
+++ b/tests/causal_estimators/test_two_stage_regression_estimator.py
@ -28,7 +28,7 @@ class TestTwoStageRegressionEstimator(object):
                confidence_intervals=[True,],
                test_significance=[False,],
                method_params={
-                    'num_ci_simulations': 10,
+                    'num_simulations': 10,
                    'num_null_simulations': 10
                    }
                )