Better docs: updated the method signature and docstrings for estimator classes (#389)
* updated the method signature and docstrings for estimator classes * bug fixes * updated the args passing * fixed the bug in refutation test calls for num_ci_simulations * used std init args * updated num simulations to pass weighting test
This commit is contained in:
Родитель
5ba0a4ab94
Коммит
77906cd4ed
|
@ -197,3 +197,7 @@ epub_exclude_files = ['search.html']
|
||||||
|
|
||||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||||
todo_include_todos = True
|
todo_include_todos = True
|
||||||
|
|
||||||
|
# init docstrings should also be included in class
|
||||||
|
autoclass_content = "both"
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,14 @@ dowhy.causal\_refuters.dummy\_outcome\_refuter module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
dowhy.causal\_refuters.graph\_refuter module
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: dowhy.causal_refuters.graph_refuter
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
dowhy.causal\_refuters.placebo\_treatment\_refuter module
|
dowhy.causal\_refuters.placebo\_treatment\_refuter module
|
||||||
---------------------------------------------------------
|
---------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,14 @@ dowhy.utils.api module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
dowhy.utils.cit module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
.. automodule:: dowhy.utils.cit
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
dowhy.utils.cli\_helpers module
|
dowhy.utils.cli\_helpers module
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
|
|
@ -38,12 +38,21 @@ class CausalEstimator:
|
||||||
|
|
||||||
DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"]
|
DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"]
|
||||||
|
|
||||||
|
# std args to be removed from locals() before being passed to args_dict
|
||||||
|
_STD_INIT_ARGS = ('self', '__class__', 'args', 'kwargs')
|
||||||
|
|
||||||
def __init__(self, data, identified_estimand, treatment, outcome,
|
def __init__(self, data, identified_estimand, treatment, outcome,
|
||||||
control_value=0, treatment_value=1,
|
control_value=0, treatment_value=1,
|
||||||
test_significance=False, evaluate_effect_strength=False,
|
test_significance=False, evaluate_effect_strength=False,
|
||||||
confidence_intervals=False,
|
confidence_intervals=False,
|
||||||
target_units=None, effect_modifiers=None,
|
target_units=None, effect_modifiers=None,
|
||||||
params=None):
|
num_null_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST,
|
||||||
|
num_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_CI,
|
||||||
|
sample_size_fraction=DEFAULT_SAMPLE_SIZE_FRACTION,
|
||||||
|
confidence_level=DEFAULT_CONFIDENCE_LEVEL,
|
||||||
|
need_conditional_estimates='auto',
|
||||||
|
num_quantiles_to_discretize_cont_cols=NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS,
|
||||||
|
**kwargs):
|
||||||
"""Initializes an estimator with data and names of relevant variables.
|
"""Initializes an estimator with data and names of relevant variables.
|
||||||
|
|
||||||
This method is called from the constructors of its child classes.
|
This method is called from the constructors of its child classes.
|
||||||
|
@ -55,19 +64,29 @@ class CausalEstimator:
|
||||||
:param outcome: name of the outcome variable
|
:param outcome: name of the outcome variable
|
||||||
:param control_value: Value of the treatment in the control group, for effect estimation. If treatment is multi-variate, this can be a list.
|
:param control_value: Value of the treatment in the control group, for effect estimation. If treatment is multi-variate, this can be a list.
|
||||||
:param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
|
:param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
|
||||||
:param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations that can be specified through the params dictionary. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
|
:param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
|
||||||
:param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect
|
:param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect
|
||||||
:param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap.
|
:param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap.
|
||||||
:param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data.
|
:param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data.
|
||||||
:param effect_modifiers: Variables on which to compute separate effects, or return a heterogeneous effect function. Not all methods support this currently.
|
:param effect_modifiers: Variables on which to compute separate
|
||||||
:param params: (optional) Additional method parameters
|
effects, or return a heterogeneous effect function. Not all
|
||||||
num_null_simulations: The number of simulations for testing the statistical significance of the estimator
|
methods support this currently.
|
||||||
num_simulations: The number of simulations for finding the confidence interval (and/or standard error) for a estimate
|
:param num_null_simulations: The number of simulations for testing the
|
||||||
sample_size_fraction: The size of the sample for the bootstrap estimator
|
statistical significance of the estimator
|
||||||
confidence_level: The confidence level of the confidence interval estimate
|
:param num_simulations: The number of simulations for finding the
|
||||||
num_quantiles_to_discretize_cont_cols: The number of quantiles into which a numeric effect modifier is split, to enable estimation of conditional treatment effect over it.
|
confidence interval (and/or standard error) for a estimate
|
||||||
|
:param sample_size_fraction: The size of the sample for the bootstrap
|
||||||
|
estimator
|
||||||
|
:param confidence_level: The confidence level of the confidence
|
||||||
|
interval estimate
|
||||||
|
:param need_conditional_estimates: Boolean flag indicating whether
|
||||||
|
conditional estimates should be computed. Defaults to True if
|
||||||
|
there are effect modifiers in the graph
|
||||||
|
:param num_quantiles_to_discretize_cont_cols: The number of quantiles
|
||||||
|
into which a numeric effect modifier is split, to enable
|
||||||
|
estimation of conditional treatment effect over it.
|
||||||
|
:param kwargs: (optional) Additional estimator-specific parameters
|
||||||
:returns: an instance of the estimator class.
|
:returns: an instance of the estimator class.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self._data = data
|
self._data = data
|
||||||
self._target_estimand = identified_estimand
|
self._target_estimand = identified_estimand
|
||||||
|
@ -84,14 +103,9 @@ class CausalEstimator:
|
||||||
self._bootstrap_estimates = None # for confidence intervals and std error
|
self._bootstrap_estimates = None # for confidence intervals and std error
|
||||||
self._bootstrap_null_estimates = None # for significance test
|
self._bootstrap_null_estimates = None # for significance test
|
||||||
self._effect_modifiers = None
|
self._effect_modifiers = None
|
||||||
self.method_params = params
|
self.method_params = kwargs
|
||||||
|
|
||||||
# Setting the default interpret method
|
# Setting the default interpret method
|
||||||
self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD
|
self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD
|
||||||
# Unpacking the keyword arguments
|
|
||||||
if params is not None:
|
|
||||||
for key, value in params.items():
|
|
||||||
setattr(self, key, value)
|
|
||||||
|
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -114,20 +128,17 @@ class CausalEstimator:
|
||||||
else:
|
else:
|
||||||
self._effect_modifier_names = None
|
self._effect_modifier_names = None
|
||||||
|
|
||||||
# Checking if some parameters were set, otherwise setting to default values
|
# Check if some parameters were set, otherwise set to default values
|
||||||
if not hasattr(self, 'num_null_simulations'):
|
self.num_null_simulations = num_null_simulations
|
||||||
self.num_null_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST
|
self.num_simulations = num_simulations
|
||||||
if not hasattr(self, 'num_simulations'):
|
self.sample_size_fraction = sample_size_fraction
|
||||||
self.num_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_CI
|
self.confidence_level = confidence_level
|
||||||
if not hasattr(self, 'sample_size_fraction'):
|
self.num_quantiles_to_discretize_cont_cols = \
|
||||||
self.sample_size_fraction = CausalEstimator.DEFAULT_SAMPLE_SIZE_FRACTION
|
num_quantiles_to_discretize_cont_cols
|
||||||
if not hasattr(self, 'confidence_level'):
|
|
||||||
self.confidence_level = CausalEstimator.DEFAULT_CONFIDENCE_LEVEL
|
|
||||||
if not hasattr(self, 'num_quantiles_to_discretize_cont_cols'):
|
|
||||||
self.num_quantiles_to_discretize_cont_cols = CausalEstimator.NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS
|
|
||||||
# Estimate conditional estimates by default
|
# Estimate conditional estimates by default
|
||||||
if not hasattr(self, 'need_conditional_estimates'):
|
self.need_conditional_estimates = need_conditional_estimates \
|
||||||
self.need_conditional_estimates = bool(self._effect_modifier_names)
|
if need_conditional_estimates != 'auto' \
|
||||||
|
else bool(self._effect_modifier_names)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_estimator_object(new_data, identified_estimand, estimate):
|
def get_estimator_object(new_data, identified_estimand, estimate):
|
||||||
|
@ -158,7 +169,7 @@ class CausalEstimator:
|
||||||
confidence_intervals=estimate.params["confidence_intervals"],
|
confidence_intervals=estimate.params["confidence_intervals"],
|
||||||
target_units=estimate.params["target_units"],
|
target_units=estimate.params["target_units"],
|
||||||
effect_modifiers=estimate.params["effect_modifiers"],
|
effect_modifiers=estimate.params["effect_modifiers"],
|
||||||
params=estimate.params["method_params"]
|
**estimate.params["method_params"]
|
||||||
)
|
)
|
||||||
|
|
||||||
return new_estimator
|
return new_estimator
|
||||||
|
@ -297,7 +308,6 @@ class CausalEstimator:
|
||||||
# Perform the set number of simulations
|
# Perform the set number of simulations
|
||||||
for index in range(num_bootstrap_simulations):
|
for index in range(num_bootstrap_simulations):
|
||||||
new_data = resample(self._data, n_samples=sample_size)
|
new_data = resample(self._data, n_samples=sample_size)
|
||||||
|
|
||||||
new_estimator = type(self)(
|
new_estimator = type(self)(
|
||||||
new_data,
|
new_data,
|
||||||
self._target_estimand,
|
self._target_estimand,
|
||||||
|
@ -310,7 +320,7 @@ class CausalEstimator:
|
||||||
confidence_intervals=False,
|
confidence_intervals=False,
|
||||||
target_units=self._target_units,
|
target_units=self._target_units,
|
||||||
effect_modifiers=self._effect_modifier_names,
|
effect_modifiers=self._effect_modifier_names,
|
||||||
params=self.method_params
|
**self.method_params
|
||||||
)
|
)
|
||||||
new_effect = new_estimator.estimate_effect()
|
new_effect = new_estimator.estimate_effect()
|
||||||
simulation_results[index] = new_effect.value
|
simulation_results[index] = new_effect.value
|
||||||
|
@ -504,7 +514,7 @@ class CausalEstimator:
|
||||||
confidence_intervals=False,
|
confidence_intervals=False,
|
||||||
target_units=self._target_units,
|
target_units=self._target_units,
|
||||||
effect_modifiers=self._effect_modifier_names,
|
effect_modifiers=self._effect_modifier_names,
|
||||||
params=self.method_params
|
**self.method_params
|
||||||
)
|
)
|
||||||
new_effect = new_estimator.estimate_effect()
|
new_effect = new_estimator.estimate_effect()
|
||||||
null_estimates[i] = new_effect.value
|
null_estimates[i] = new_effect.value
|
||||||
|
|
|
@ -7,11 +7,27 @@ from importlib import import_module
|
||||||
import causalml
|
import causalml
|
||||||
|
|
||||||
class Causalml(CausalEstimator):
|
class Causalml(CausalEstimator):
|
||||||
|
""" Wrapper class for estimators from the causalml library.
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
Supports additional parameters as listed below. For specific
|
||||||
|
parameters of each estimator, refer to the CausalML docs.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, *args, causalml_methodname, **kwargs):
|
||||||
|
"""
|
||||||
|
:param causalml_methodname: Fully qualified name of causalml estimator
|
||||||
|
class.
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the information
|
||||||
|
# to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
|
self._causalml_methodname = causalml_methodname
|
||||||
# Add the identification method used in the estimator
|
# Add the identification method used in the estimator
|
||||||
self.identifier_method = self._target_estimand.identifier_method
|
self.identifier_method = self._target_estimand.identifier_method
|
||||||
self.logger.debug("The identifier method used {}".format(self.identifier_method))
|
self.logger.debug("The identifier method used {}".format(self.identifier_method))
|
||||||
|
|
|
@ -5,12 +5,35 @@ import numpy as np
|
||||||
from dowhy.causal_estimator import CausalEstimate, CausalEstimator
|
from dowhy.causal_estimator import CausalEstimate, CausalEstimator
|
||||||
|
|
||||||
class DistanceMatchingEstimator(CausalEstimator):
|
class DistanceMatchingEstimator(CausalEstimator):
|
||||||
""" Simple matching estimator for binary treatments based on a distance metric.
|
"""Simple matching estimator for binary treatments based on a distance
|
||||||
"""
|
metric.
|
||||||
|
|
||||||
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
Supports additional parameters as listed below.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# allowed types of distance metric
|
||||||
Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w']
|
Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w']
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
def __init__(self, *args, num_matches_per_unit=1,
|
||||||
|
distance_metric="minkowski", exact_match_cols=None, **kwargs):
|
||||||
|
"""
|
||||||
|
:param num_matches_per_unit: The number of matches per data point.
|
||||||
|
Default=1.
|
||||||
|
:param distance_metric: Distance metric to use. Default="minkowski"
|
||||||
|
that corresponds to Euclidean distance metric with p=2.
|
||||||
|
:param exact_match_cols: List of column names whose values should be
|
||||||
|
exactly matched. Typically used for columns with discrete values.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
# Check if the treatment is one-dimensional
|
# Check if the treatment is one-dimensional
|
||||||
if len(self._treatment_name) > 1:
|
if len(self._treatment_name) > 1:
|
||||||
error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
|
error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
|
||||||
|
@ -21,15 +44,9 @@ class DistanceMatchingEstimator(CausalEstimator):
|
||||||
self.logger.error(error_msg)
|
self.logger.error(error_msg)
|
||||||
raise Exception(error_msg)
|
raise Exception(error_msg)
|
||||||
|
|
||||||
# Setting the number of matches per data point
|
self.num_matches_per_unit = num_matches_per_unit
|
||||||
if getattr(self, 'num_matches_per_unit', None) is None:
|
self.distance_metric = distance_metric
|
||||||
self.num_matches_per_unit = 1
|
self.exact_match_cols = exact_match_cols
|
||||||
# Default distance metric if not provided by the user
|
|
||||||
if getattr(self, 'distance_metric', None) is None:
|
|
||||||
self.distance_metric = 'minkowski' # corresponds to euclidean metric with p=2
|
|
||||||
|
|
||||||
if getattr(self, 'exact_match_cols', None) is None:
|
|
||||||
self.exact_match_cols = None
|
|
||||||
|
|
||||||
self.logger.debug("Back-door variables used:" +
|
self.logger.debug("Back-door variables used:" +
|
||||||
",".join(self._target_estimand.get_backdoor_variables()))
|
",".join(self._target_estimand.get_backdoor_variables()))
|
||||||
|
|
|
@ -11,9 +11,27 @@ import econml
|
||||||
|
|
||||||
|
|
||||||
class Econml(CausalEstimator):
|
class Econml(CausalEstimator):
|
||||||
|
"""Wrapper class for estimators from the EconML library.
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
For a list of standard args and kwargs, see documentation for
|
||||||
super().__init__(*args, **kwargs)
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
Supports additional parameters as listed below. For init and fit
|
||||||
|
parameters of each estimator, refer to the EconML docs.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, *args, econml_methodname, **kwargs):
|
||||||
|
"""
|
||||||
|
:param econml_methodname: Fully qualified name of econml estimator
|
||||||
|
class. For example, 'econml.dml.DML'
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
|
self._econml_methodname = econml_methodname
|
||||||
self.logger.info("INFO: Using EconML Estimator")
|
self.logger.info("INFO: Using EconML Estimator")
|
||||||
self.identifier_method = self._target_estimand.identifier_method
|
self.identifier_method = self._target_estimand.identifier_method
|
||||||
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
|
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from dowhy.causal_estimators.regression_estimator import RegressionEstimator
|
from dowhy.causal_estimators.regression_estimator import RegressionEstimator
|
||||||
|
|
||||||
|
|
||||||
class GeneralizedLinearModelEstimator(RegressionEstimator):
|
class GeneralizedLinearModelEstimator(RegressionEstimator):
|
||||||
"""Compute effect of treatment using a generalized linear model such as logistic regression.
|
"""Compute effect of treatment using a generalized linear model such as logistic regression.
|
||||||
|
|
||||||
|
@ -13,16 +12,29 @@ class GeneralizedLinearModelEstimator(RegressionEstimator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, glm_family=None, predict_score=True, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
"""For a list of args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
:param glm_family: statsmodels family for the generalized linear model.
|
||||||
|
For example, use statsmodels.api.families.Binomial() for logistic
|
||||||
|
regression or statsmodels.api.families.Poisson() for count data.
|
||||||
|
:param predict_score: For models that have a binary output, whether
|
||||||
|
to output the model's score or the binary output based on the score.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters needed to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
self.logger.info("INFO: Using Generalized Linear Model Estimator")
|
self.logger.info("INFO: Using Generalized Linear Model Estimator")
|
||||||
if self.method_params is not None and 'glm_family' in self.method_params:
|
if glm_family is not None:
|
||||||
self.family = self.method_params['glm_family']
|
self.family = glm_family
|
||||||
else:
|
else:
|
||||||
raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.")
|
raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.")
|
||||||
self.predict_score = True
|
self.predict_score = predict_score
|
||||||
if self.method_params is not None and 'predict_score' in self.method_params:
|
|
||||||
self.predict_score = self.method_params['predict_score']
|
|
||||||
# Checking if Y is binary
|
# Checking if Y is binary
|
||||||
outcome_values = self._data[self._outcome_name].astype(int).unique()
|
outcome_values = self._data[self._outcome_name].astype(int).unique()
|
||||||
self.outcome_is_binary = all([v in [0,1] for v in outcome_values])
|
self.outcome_is_binary = all([v in [0,1] for v in outcome_values])
|
||||||
|
|
|
@ -14,20 +14,29 @@ class InstrumentalVariableEstimator(CausalEstimator):
|
||||||
|
|
||||||
This is also a superclass that can be inherited by other specific methods.
|
This is also a superclass that can be inherited by other specific methods.
|
||||||
|
|
||||||
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
Supports additional parameters as listed below.
|
||||||
|
|
||||||
- 'iv_instrument_name': Name of the specific instrumental variable to be used. Needs to be one of the IVs identified in the identification step. Default is to use all the IV variables from the identification step.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, *args, iv_instrument_name=None, **kwargs):
|
||||||
def __init__(self, *args, **kwargs):
|
"""
|
||||||
super().__init__(*args, **kwargs)
|
:param iv_instrument_name: Name of the specific instrumental variable
|
||||||
|
to be used. Needs to be one of the IVs identified in the
|
||||||
|
identification step. Default is to use all the IV variables
|
||||||
|
from the identification step.
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the information
|
||||||
|
# to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
# choosing the instrumental variable to use
|
# choosing the instrumental variable to use
|
||||||
if getattr(self, 'iv_instrument_name', None) is None:
|
self.estimating_instrument_names = self._target_estimand.instrumental_variables
|
||||||
self.estimating_instrument_names = self._target_estimand.instrumental_variables
|
if iv_instrument_name is not None:
|
||||||
else:
|
self.estimating_instrument_names = parse_state(iv_instrument_name)
|
||||||
self.estimating_instrument_names = parse_state(self.iv_instrument_name)
|
|
||||||
self.logger.debug("Instrumental Variables used:" +
|
self.logger.debug("Instrumental Variables used:" +
|
||||||
",".join(self.estimating_instrument_names))
|
",".join(self.estimating_instrument_names))
|
||||||
if not self.estimating_instrument_names:
|
if not self.estimating_instrument_names:
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
from sklearn import linear_model
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from dowhy.causal_estimators.regression_estimator import RegressionEstimator
|
from dowhy.causal_estimators.regression_estimator import RegressionEstimator
|
||||||
|
|
||||||
|
|
||||||
class LinearRegressionEstimator(RegressionEstimator):
|
class LinearRegressionEstimator(RegressionEstimator):
|
||||||
"""Compute effect of treatment using linear regression.
|
"""Compute effect of treatment using linear regression.
|
||||||
|
|
||||||
|
@ -16,7 +14,17 @@ class LinearRegressionEstimator(RegressionEstimator):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
"""For a list of args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
print(args_dict)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
self.logger.info("INFO: Using Linear Regression Estimator")
|
self.logger.info("INFO: Using Linear Regression Estimator")
|
||||||
self._linear_model = self.model
|
self._linear_model = self.model
|
||||||
|
|
||||||
|
|
|
@ -3,24 +3,42 @@ import pandas as pd
|
||||||
|
|
||||||
from dowhy.causal_estimator import CausalEstimator
|
from dowhy.causal_estimator import CausalEstimator
|
||||||
|
|
||||||
|
|
||||||
class PropensityScoreEstimator(CausalEstimator):
|
class PropensityScoreEstimator(CausalEstimator):
|
||||||
"""
|
"""
|
||||||
Base class for estimators that estimate effects based on propensity of treatment assignment.
|
Base class for estimators that estimate effects based on propensity of
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
treatment assignment.
|
||||||
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
|
|
||||||
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
|
For a list of standard args and kwargs, see documentation for
|
||||||
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
Supports additional parameters as listed below.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, *args, propensity_score_model=None, recalculate_propensity_score=True, propensity_score_column="propensity_score", **kwargs):
|
def __init__(self, *args, propensity_score_model=None,
|
||||||
super().__init__(*args, **kwargs)
|
recalculate_propensity_score=True,
|
||||||
|
propensity_score_column="propensity_score", **kwargs):
|
||||||
|
"""
|
||||||
|
:param propensity_score_model: Model used to compute propensity score.
|
||||||
|
Can be any classification model that supports fit() and
|
||||||
|
predict_proba() methods. If None, LogisticRegression is used.
|
||||||
|
:param recalculate_propensity_score: Whether the propensity score
|
||||||
|
should be estimated. To use pre-computed propensity scores,
|
||||||
|
set this value to False. Default=True.
|
||||||
|
:param propensity_score_column: Column name that stores the
|
||||||
|
propensity score. Default='propensity_score'
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
|
|
||||||
# Enable the user to pass params for a custom propensity model
|
# Enable the user to pass params for a custom propensity model
|
||||||
if not hasattr(self, "propensity_score_model"):
|
self.propensity_score_model = propensity_score_model
|
||||||
self.propensity_score_model = propensity_score_model
|
self.recalculate_propensity_score = recalculate_propensity_score
|
||||||
if not hasattr(self, "recalculate_propensity_score"):
|
self.propensity_score_column = propensity_score_column
|
||||||
self.recalculate_propensity_score = recalculate_propensity_score
|
|
||||||
if not hasattr(self, "propensity_score_column"):
|
|
||||||
self.propensity_score_column = propensity_score_column
|
|
||||||
|
|
||||||
# Check if the treatment is one-dimensional
|
# Check if the treatment is one-dimensional
|
||||||
if len(self._treatment_name) > 1:
|
if len(self._treatment_name) > 1:
|
||||||
|
|
|
@ -6,15 +6,15 @@ from dowhy.causal_estimator import CausalEstimate
|
||||||
from dowhy.causal_estimators.propensity_score_estimator import PropensityScoreEstimator
|
from dowhy.causal_estimators.propensity_score_estimator import PropensityScoreEstimator
|
||||||
|
|
||||||
class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
|
class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
|
||||||
""" Estimate effect of treatment by finding matching treated and control units based on propensity score.
|
""" Estimate effect of treatment by finding matching treated and control
|
||||||
|
units based on propensity score.
|
||||||
|
|
||||||
Straightforward application of the back-door criterion.
|
Straightforward application of the back-door criterion.
|
||||||
|
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
|
Supports additional parameters as listed below.
|
||||||
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
|
|
||||||
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -24,6 +24,17 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
|
||||||
recalculate_propensity_score=True,
|
recalculate_propensity_score=True,
|
||||||
propensity_score_column="propensity_score",
|
propensity_score_column="propensity_score",
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
"""
|
||||||
|
:param propensity_score_model: Model used to compute propensity score.
|
||||||
|
Can be any classification model that supports fit() and
|
||||||
|
predict_proba() methods. If None, LogisticRegression is used.
|
||||||
|
:param recalculate_propensity_score: Whether the propensity score
|
||||||
|
should be estimated. To use pre-computed propensity scores,
|
||||||
|
set this value to False. Default=True.
|
||||||
|
:param propensity_score_column: Column name that stores the
|
||||||
|
propensity score. Default='propensity_score'
|
||||||
|
|
||||||
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
*args,
|
*args,
|
||||||
propensity_score_model=propensity_score_model,
|
propensity_score_model=propensity_score_model,
|
||||||
|
|
|
@ -11,13 +11,10 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
|
||||||
|
|
||||||
Straightforward application of the back-door criterion.
|
Straightforward application of the back-door criterion.
|
||||||
|
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
- 'num_strata': Number of bins by which data will be stratified. Default=50
|
Supports additional parameters as listed below.
|
||||||
- 'clipping_threshold': Mininum number of treated or control units per strata. Default=10
|
|
||||||
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
|
|
||||||
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
|
|
||||||
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -30,20 +27,42 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
|
||||||
recalculate_propensity_score=True,
|
recalculate_propensity_score=True,
|
||||||
propensity_score_column="propensity_score",
|
propensity_score_column="propensity_score",
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
"""
|
||||||
|
:param num_strata: Number of bins by which data will be stratified.
|
||||||
|
Default is automatically determined.
|
||||||
|
:param clipping_threshold: Mininum number of treated or control units
|
||||||
|
per strata. Default=10
|
||||||
|
:param propensity_score_model: The model used to compute propensity
|
||||||
|
score. Can be any classification model that supports fit() and
|
||||||
|
predict_proba() methods. If None, use
|
||||||
|
LogisticRegression model as the default.
|
||||||
|
:param recalculate_propensity_score: If true, force the estimator to
|
||||||
|
estimate the propensity score. To use pre-computed propensity
|
||||||
|
scores, set this value to False. Default=True
|
||||||
|
:param propensity_score_column: Column name that stores the propensity
|
||||||
|
score. Default='propensity_score'
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the information
|
||||||
|
# to create an object of this class
|
||||||
|
args_dict = kwargs
|
||||||
|
args_dict.update({
|
||||||
|
'num_strata': num_strata,
|
||||||
|
'clipping_threshold': clipping_threshold
|
||||||
|
})
|
||||||
super().__init__(
|
super().__init__(
|
||||||
*args,
|
*args,
|
||||||
propensity_score_model=propensity_score_model,
|
propensity_score_model=propensity_score_model,
|
||||||
recalculate_propensity_score=recalculate_propensity_score,
|
recalculate_propensity_score=recalculate_propensity_score,
|
||||||
propensity_score_column=propensity_score_column,
|
propensity_score_column=propensity_score_column,
|
||||||
**kwargs)
|
**args_dict)
|
||||||
|
|
||||||
self.logger.info("Using Propensity Score Stratification Estimator")
|
self.logger.info("Using Propensity Score Stratification Estimator")
|
||||||
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
||||||
self.logger.info(self.symbolic_estimator)
|
self.logger.info(self.symbolic_estimator)
|
||||||
if not hasattr(self, 'num_strata'):
|
# setting method-specific parameters
|
||||||
self.num_strata = num_strata
|
self.num_strata = num_strata
|
||||||
if not hasattr(self, 'clipping_threshold'):
|
self.clipping_threshold = clipping_threshold
|
||||||
self.clipping_threshold = clipping_threshold
|
|
||||||
|
|
||||||
|
|
||||||
def _estimate_effect(self):
|
def _estimate_effect(self):
|
||||||
|
|
|
@ -12,17 +12,12 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
|
||||||
|
|
||||||
Straightforward application of the back-door criterion.
|
Straightforward application of the back-door criterion.
|
||||||
|
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
- 'min_ps_score': Lower bound used to clip the propensity score. Default=0.05
|
Supports additional parameters as listed below.
|
||||||
- 'max_ps_score': Upper bound used to clip the propensity score. Default=0.95
|
|
||||||
- 'weighting_scheme': This is the name of weighting method to use. Can be inverse propensity score ("ips_weight", default), stabilized IPS score ("ips_stabilized_weight"), or normalized IPS score ("ips_normalized_weight")
|
|
||||||
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
|
|
||||||
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
|
|
||||||
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*args,
|
*args,
|
||||||
|
@ -33,24 +28,48 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
|
||||||
recalculate_propensity_score=True,
|
recalculate_propensity_score=True,
|
||||||
propensity_score_column="propensity_score",
|
propensity_score_column="propensity_score",
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
"""
|
||||||
|
:param min_ps_score: Lower bound used to clip the propensity score.
|
||||||
|
Default=0.05
|
||||||
|
:param max_ps_score: Upper bound used to clip the propensity score.
|
||||||
|
Default=0.95
|
||||||
|
:param weighting_scheme: Weighting method to use. Can be inverse
|
||||||
|
propensity score ("ips_weight", default), stabilized IPS score
|
||||||
|
("ips_stabilized_weight"), or normalized IPS score
|
||||||
|
("ips_normalized_weight").
|
||||||
|
:param propensity_score_model: The model used to compute propensity
|
||||||
|
score. Can be any classification model that supports fit() and
|
||||||
|
predict_proba() methods. If None, use LogisticRegression model as
|
||||||
|
the default. Default=None
|
||||||
|
:param recalculate_propensity_score: If true, force the estimator to
|
||||||
|
estimate the propensity score. To use pre-computed propensity
|
||||||
|
scores, set this value to false. Default=True
|
||||||
|
:param propensity_score_column: Column name that stores the
|
||||||
|
propensity score. Default='propensity_score'
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the information
|
||||||
|
# to create an object of this class
|
||||||
|
args_dict = kwargs
|
||||||
|
args_dict.update({
|
||||||
|
'min_ps_score': min_ps_score,
|
||||||
|
'max_ps_score': max_ps_score,
|
||||||
|
'weighting_scheme': weighting_scheme
|
||||||
|
})
|
||||||
super().__init__(
|
super().__init__(
|
||||||
*args,
|
*args,
|
||||||
propensity_score_model=propensity_score_model,
|
propensity_score_model=propensity_score_model,
|
||||||
recalculate_propensity_score=recalculate_propensity_score,
|
recalculate_propensity_score=recalculate_propensity_score,
|
||||||
propensity_score_column=propensity_score_column,
|
propensity_score_column=propensity_score_column,
|
||||||
**kwargs)
|
**args_dict)
|
||||||
|
|
||||||
self.logger.info("INFO: Using Propensity Score Weighting Estimator")
|
self.logger.info("INFO: Using Propensity Score Weighting Estimator")
|
||||||
self.symbolic_estimator = self.construct_symbolic_estimator(
|
self.symbolic_estimator = self.construct_symbolic_estimator(
|
||||||
self._target_estimand)
|
self._target_estimand)
|
||||||
self.logger.info(self.symbolic_estimator)
|
self.logger.info(self.symbolic_estimator)
|
||||||
if not hasattr(self, "weighting_scheme"):
|
# Setting method specific parameters
|
||||||
# 'ips_weight', 'ips_normalized_weight', 'ips_stabilized_weight'
|
self.weighting_scheme = weighting_scheme
|
||||||
self.weighting_scheme = weighting_scheme
|
self.min_ps_score = min_ps_score
|
||||||
if not hasattr(self, "min_ps_score"):
|
self.max_ps_score = max_ps_score
|
||||||
self.min_ps_score = min_ps_score
|
|
||||||
if not hasattr(self, "max_ps_score"):
|
|
||||||
self.max_ps_score = max_ps_score
|
|
||||||
|
|
||||||
def _estimate_effect(self):
|
def _estimate_effect(self):
|
||||||
if self.recalculate_propensity_score is True:
|
if self.recalculate_propensity_score is True:
|
||||||
|
|
|
@ -11,17 +11,34 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
|
||||||
Estimates effect by transforming the problem to an instrumental variables
|
Estimates effect by transforming the problem to an instrumental variables
|
||||||
problem.
|
problem.
|
||||||
|
|
||||||
Supports additional parameters that can be specified in the estimate_effect() method.
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
* 'rd_variable_name': name of the variable on which the discontinuity occurs. This is the instrument.
|
Supports additional parameters as listed below.
|
||||||
* 'rd_threshold_value': Threshold at which the discontinuity occurs.
|
|
||||||
* 'rd_bandwidth': Distance from the threshold within which confounders can be considered the same between treatment and control. Considered band is (threshold +- bandwidth)
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, rd_variable_name=None,
|
||||||
super().__init__(*args, **kwargs)
|
rd_threshold_value=None, rd_bandwidth=None, **kwargs):
|
||||||
|
"""
|
||||||
|
:param rd_variable_name: Name of the variable on which the
|
||||||
|
discontinuity occurs. This is the instrument.
|
||||||
|
:param rd_threshold_value: Threshold at which the discontinuity occurs.
|
||||||
|
:param rd_bandwidth: Distance from the threshold within which
|
||||||
|
confounders can be considered the same between treatment and
|
||||||
|
control. Considered band is (threshold +- bandwidth)
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the information
|
||||||
|
# to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
self.logger.info("Using Regression Discontinuity Estimator")
|
self.logger.info("Using Regression Discontinuity Estimator")
|
||||||
|
self.rd_variable_name = rd_variable_name
|
||||||
|
self.rd_threshold_value = rd_threshold_value
|
||||||
|
self.rd_bandwidth = rd_bandwidth
|
||||||
self.rd_variable = self._data[self.rd_variable_name]
|
self.rd_variable = self._data[self.rd_variable_name]
|
||||||
|
|
||||||
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
||||||
|
@ -46,7 +63,7 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
|
||||||
['local_treatment'],
|
['local_treatment'],
|
||||||
['local_outcome'],
|
['local_outcome'],
|
||||||
test_significance=self._significance_test,
|
test_significance=self._significance_test,
|
||||||
params={'iv_instrument_name': 'local_rd_variable'}
|
iv_instrument_name='local_rd_variable'
|
||||||
)
|
)
|
||||||
est = iv_estimator.estimate_effect()
|
est = iv_estimator.estimate_effect()
|
||||||
return est
|
return est
|
||||||
|
|
|
@ -8,11 +8,20 @@ from dowhy.causal_estimator import CausalEstimator
|
||||||
class RegressionEstimator(CausalEstimator):
|
class RegressionEstimator(CausalEstimator):
|
||||||
"""Compute effect of treatment using some regression function.
|
"""Compute effect of treatment using some regression function.
|
||||||
|
|
||||||
Fits a regression model for estimating the outcome using treatment(s) and confounders.
|
Fits a regression model for estimating the outcome using treatment(s) and
|
||||||
|
confounders.
|
||||||
|
|
||||||
|
Base class for all regression models, inherited by
|
||||||
|
LinearRegressionEstimator and GeneralizedLinearModelEstimator.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
"""For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.logger.debug("Back-door variables used:" +
|
self.logger.debug("Back-door variables used:" +
|
||||||
",".join(self._target_estimand.get_backdoor_variables()))
|
",".join(self._target_estimand.get_backdoor_variables()))
|
||||||
|
|
|
@ -8,16 +8,39 @@ from dowhy.causal_identifier import CausalIdentifier
|
||||||
from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator
|
from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator
|
||||||
from dowhy.utils.api import parse_state
|
from dowhy.utils.api import parse_state
|
||||||
|
|
||||||
|
|
||||||
class TwoStageRegressionEstimator(CausalEstimator):
|
class TwoStageRegressionEstimator(CausalEstimator):
|
||||||
"""Compute treatment effect whenever the effect is fully mediated by another variable (front-door) or when there is an instrument available.
|
"""Compute treatment effect whenever the effect is fully mediated by
|
||||||
|
another variable (front-door) or when there is an instrument available.
|
||||||
|
|
||||||
Currently only supports a linear model for the effects.
|
Currently only supports a linear model for the effects.
|
||||||
|
|
||||||
|
For a list of standard args and kwargs, see documentation for
|
||||||
|
:class:`~dowhy.causal_estimator.CausalEstimator`.
|
||||||
|
|
||||||
|
Supports additional parameters as listed below.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# First stage statistical model
|
||||||
DEFAULT_FIRST_STAGE_MODEL = LinearRegressionEstimator
|
DEFAULT_FIRST_STAGE_MODEL = LinearRegressionEstimator
|
||||||
|
# Second stage statistical model
|
||||||
DEFAULT_SECOND_STAGE_MODEL = LinearRegressionEstimator
|
DEFAULT_SECOND_STAGE_MODEL = LinearRegressionEstimator
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, first_stage_model=None,
|
||||||
super().__init__(*args, **kwargs)
|
second_stage_model=None, **kwargs):
|
||||||
|
"""
|
||||||
|
:param first_stage_model: First stage estimator to be used. Default is
|
||||||
|
linear regression.
|
||||||
|
:param second_stage_model: Second stage estimator to be used. Default
|
||||||
|
is linear regression.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Required to ensure that self.method_params contains all the
|
||||||
|
# parameters needed to create an object of this class
|
||||||
|
args_dict = {k: v for k, v in locals().items()
|
||||||
|
if k not in type(self)._STD_INIT_ARGS}
|
||||||
|
args_dict.update(kwargs)
|
||||||
|
super().__init__(*args, **args_dict)
|
||||||
self.logger.info("INFO: Using Two Stage Regression Estimator")
|
self.logger.info("INFO: Using Two Stage Regression Estimator")
|
||||||
# Check if the treatment is one-dimensional
|
# Check if the treatment is one-dimensional
|
||||||
if len(self._treatment_name) > 1:
|
if len(self._treatment_name) > 1:
|
||||||
|
@ -58,29 +81,18 @@ class TwoStageRegressionEstimator(CausalEstimator):
|
||||||
error_msg = "No instrumental variable present. Two stage regression is not applicable"
|
error_msg = "No instrumental variable present. Two stage regression is not applicable"
|
||||||
self.logger.error(error_msg)
|
self.logger.error(error_msg)
|
||||||
|
|
||||||
if 'first_stage_model' in self.method_params:
|
if first_stage_model is not None:
|
||||||
self.first_stage_model = self.method_params['first_stage_model']
|
self.first_stage_model = first_stage_model
|
||||||
else:
|
else:
|
||||||
self.first_stage_model = self.__class__.DEFAULT_FIRST_STAGE_MODEL
|
self.first_stage_model = self.__class__.DEFAULT_FIRST_STAGE_MODEL
|
||||||
self.logger.warning("First stage model not provided. Defaulting to sklearn.linear_model.LinearRegression.")
|
self.logger.warning("First stage model not provided. Defaulting to sklearn.linear_model.LinearRegression.")
|
||||||
if 'second_stage_model' in self.method_params:
|
if second_stage_model is not None:
|
||||||
self.second_stage_model = self.method_params['second_stage_model']
|
self.second_stage_model = second_stage_model
|
||||||
else:
|
else:
|
||||||
self.second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL
|
self.second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL
|
||||||
self.logger.warning("Second stage model not provided. Defaulting to backdoor.linear_regression.")
|
self.logger.warning("Second stage model not provided. Defaulting to backdoor.linear_regression.")
|
||||||
|
|
||||||
def _estimate_effect(self):
|
def _estimate_effect(self):
|
||||||
#first_stage_features = self.build_first_stage_features()
|
|
||||||
#fs_model = self.first_stage_model()
|
|
||||||
#if self._target_estimand.identifier_method=="frontdoor":
|
|
||||||
# first_stage_outcome = self._frontdoor_variables
|
|
||||||
#elif self._target_estimand.identifier_method=="mediation":
|
|
||||||
# first_stage_outcome = self._mediators
|
|
||||||
#fs_model.fit(first_stage_features, self._frontdoor_variables)
|
|
||||||
#self.logger.debug("Coefficients of the fitted model: " +
|
|
||||||
# ",".join(map(str, fs_model.coef_)))
|
|
||||||
#residuals = self._frontdoor_variables - fs_model.predict(first_stage_features)
|
|
||||||
#self._data["residual"] = residuals
|
|
||||||
estimate_value = None
|
estimate_value = None
|
||||||
# First stage
|
# First stage
|
||||||
modified_target_estimand = copy.deepcopy(self._target_estimand)
|
modified_target_estimand = copy.deepcopy(self._target_estimand)
|
||||||
|
@ -102,7 +114,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
|
||||||
confidence_intervals = self._confidence_intervals,
|
confidence_intervals = self._confidence_intervals,
|
||||||
target_units=self._target_units,
|
target_units=self._target_units,
|
||||||
effect_modifiers=self._effect_modifier_names,
|
effect_modifiers=self._effect_modifier_names,
|
||||||
params=self.method_params)._estimate_effect()
|
**self.method_params)._estimate_effect()
|
||||||
|
|
||||||
# Second Stage
|
# Second Stage
|
||||||
modified_target_estimand = copy.deepcopy(self._target_estimand)
|
modified_target_estimand = copy.deepcopy(self._target_estimand)
|
||||||
|
@ -124,7 +136,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
|
||||||
confidence_intervals = self._confidence_intervals,
|
confidence_intervals = self._confidence_intervals,
|
||||||
target_units=self._target_units,
|
target_units=self._target_units,
|
||||||
effect_modifiers=self._effect_modifier_names,
|
effect_modifiers=self._effect_modifier_names,
|
||||||
params=self.method_params)._estimate_effect()
|
**self.method_params)._estimate_effect()
|
||||||
# Combining the two estimates
|
# Combining the two estimates
|
||||||
natural_indirect_effect = first_stage_estimate.value * second_stage_estimate.value
|
natural_indirect_effect = first_stage_estimate.value * second_stage_estimate.value
|
||||||
# This same estimate is valid for frontdoor as well as mediation (NIE)
|
# This same estimate is valid for frontdoor as well as mediation (NIE)
|
||||||
|
@ -149,7 +161,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
|
||||||
confidence_intervals = self._confidence_intervals,
|
confidence_intervals = self._confidence_intervals,
|
||||||
target_units=self._target_units,
|
target_units=self._target_units,
|
||||||
effect_modifiers=self._effect_modifier_names,
|
effect_modifiers=self._effect_modifier_names,
|
||||||
params=self.method_params)._estimate_effect()
|
**self.method_params)._estimate_effect()
|
||||||
natural_direct_effect = total_effect_estimate.value - natural_indirect_effect
|
natural_direct_effect = total_effect_estimate.value - natural_indirect_effect
|
||||||
estimate_value = natural_direct_effect
|
estimate_value = natural_direct_effect
|
||||||
self.symbolic_estimator = self.construct_symbolic_estimator(
|
self.symbolic_estimator = self.construct_symbolic_estimator(
|
||||||
|
|
|
@ -10,14 +10,10 @@ from dowhy.utils.api import parse_state
|
||||||
|
|
||||||
|
|
||||||
class CausalIdentifier:
|
class CausalIdentifier:
|
||||||
|
|
||||||
"""Class that implements different identification methods.
|
"""Class that implements different identification methods.
|
||||||
|
|
||||||
Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
|
Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
|
||||||
|
|
||||||
Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
|
|
||||||
If you'd like to contribute, please raise an issue or a pull request on Github.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
NONPARAMETRIC_ATE="nonparametric-ate"
|
NONPARAMETRIC_ATE="nonparametric-ate"
|
||||||
NONPARAMETRIC_NDE="nonparametric-nde"
|
NONPARAMETRIC_NDE="nonparametric-nde"
|
||||||
|
|
|
@ -268,8 +268,9 @@ class CausalModel:
|
||||||
if method_params is None:
|
if method_params is None:
|
||||||
method_params = {}
|
method_params = {}
|
||||||
# Define the third-party estimation method to be used
|
# Define the third-party estimation method to be used
|
||||||
method_params["_" + third_party_estimator_package + "_methodname"] = estimator_name
|
method_params[third_party_estimator_package + "_methodname"] = estimator_name
|
||||||
else: # For older dowhy methods
|
else: # For older dowhy methods
|
||||||
|
print(estimator_name)
|
||||||
# Process the dowhy estimators
|
# Process the dowhy estimators
|
||||||
causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")
|
causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")
|
||||||
if identified_estimand.no_directed_path:
|
if identified_estimand.no_directed_path:
|
||||||
|
@ -291,6 +292,8 @@ class CausalModel:
|
||||||
extra_args = method_params.get("init_params", {})
|
extra_args = method_params.get("init_params", {})
|
||||||
else:
|
else:
|
||||||
extra_args = {}
|
extra_args = {}
|
||||||
|
if method_params is None:
|
||||||
|
method_params = {}
|
||||||
self.causal_estimator = causal_estimator_class(
|
self.causal_estimator = causal_estimator_class(
|
||||||
self._data,
|
self._data,
|
||||||
identified_estimand,
|
identified_estimand,
|
||||||
|
@ -302,7 +305,7 @@ class CausalModel:
|
||||||
confidence_intervals = confidence_intervals,
|
confidence_intervals = confidence_intervals,
|
||||||
target_units = target_units,
|
target_units = target_units,
|
||||||
effect_modifiers = effect_modifiers,
|
effect_modifiers = effect_modifiers,
|
||||||
params=method_params,
|
**method_params,
|
||||||
**extra_args)
|
**extra_args)
|
||||||
else:
|
else:
|
||||||
# Estimator had been computed in a previous call
|
# Estimator had been computed in a previous call
|
||||||
|
@ -334,9 +337,11 @@ class CausalModel:
|
||||||
:param identified_estimand: a probability expression
|
:param identified_estimand: a probability expression
|
||||||
that represents the effect to be estimated. Output of
|
that represents the effect to be estimated. Output of
|
||||||
CausalModel.identify_effect method
|
CausalModel.identify_effect method
|
||||||
:param method_name: any of the estimation method to be used. See docs for estimate_effect method for a list of supported estimation methods.
|
:param method_name: any of the estimation method to be used. See docs
|
||||||
|
for estimate_effect method for a list of supported estimation methods.
|
||||||
:param fit_estimator: Boolean flag on whether to fit the estimator.
|
:param fit_estimator: Boolean flag on whether to fit the estimator.
|
||||||
Setting it to False is useful to compute the do-operation on new data using a previously fitted estimator.
|
Setting it to False is useful to compute the do-operation on new
|
||||||
|
data using a previously fitted estimator.
|
||||||
:param method_params: Dictionary containing any method-specific parameters. These are passed directly to the estimating method.
|
:param method_params: Dictionary containing any method-specific parameters. These are passed directly to the estimating method.
|
||||||
|
|
||||||
:returns: an instance of the CausalEstimate class, containing the causal effect estimate
|
:returns: an instance of the CausalEstimate class, containing the causal effect estimate
|
||||||
|
@ -369,7 +374,7 @@ class CausalModel:
|
||||||
identified_estimand,
|
identified_estimand,
|
||||||
self._treatment, self._outcome,
|
self._treatment, self._outcome,
|
||||||
test_significance=False,
|
test_significance=False,
|
||||||
params=method_params
|
**method_params
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Estimator had been computed in a previous call
|
# Estimator had been computed in a previous call
|
||||||
|
@ -469,12 +474,11 @@ class CausalModel:
|
||||||
:param k: number of covariates in set Z
|
:param k: number of covariates in set Z
|
||||||
:param independence_test: dictionary containing methods to test conditional independece in data
|
:param independence_test: dictionary containing methods to test conditional independece in data
|
||||||
:param independence_constraints: list of implications to be test input by the user in the format
|
:param independence_constraints: list of implications to be test input by the user in the format
|
||||||
[(x,y,(z1,z2)),
|
[(x,y,(z1,z2)),
|
||||||
(x,y, (z3,))
|
(x,y, (z3,))
|
||||||
]
|
]
|
||||||
: returns: an instance of GraphRefuter class
|
: returns: an instance of GraphRefuter class
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if independence_test is not None:
|
if independence_test is not None:
|
||||||
test_for_continuous = independence_test['test_for_continuous']
|
test_for_continuous = independence_test['test_for_continuous']
|
||||||
test_for_discrete = independence_test['test_for_discrete']
|
test_for_discrete = independence_test['test_for_discrete']
|
||||||
|
|
|
@ -65,8 +65,9 @@ class TestEstimator(object):
|
||||||
confidence_intervals = confidence_intervals,
|
confidence_intervals = confidence_intervals,
|
||||||
target_units = "ate",
|
target_units = "ate",
|
||||||
effect_modifiers = data["effect_modifier_names"],
|
effect_modifiers = data["effect_modifier_names"],
|
||||||
params=method_params
|
**method_params
|
||||||
)
|
)
|
||||||
|
|
||||||
true_ate = data["ate"]
|
true_ate = data["ate"]
|
||||||
ate_estimate = estimator_ate.estimate_effect()
|
ate_estimate = estimator_ate.estimate_effect()
|
||||||
str(ate_estimate) # checking if str output is correctly created
|
str(ate_estimate) # checking if str output is correctly created
|
||||||
|
|
|
@ -28,7 +28,7 @@ class TestGeneralizedLinearModelEstimator(object):
|
||||||
confidence_intervals=[True,],
|
confidence_intervals=[True,],
|
||||||
test_significance=[True,],
|
test_significance=[True,],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 10,
|
||||||
'num_null_simulations': 10,
|
'num_null_simulations': 10,
|
||||||
'glm_family': sm.families.Binomial(),
|
'glm_family': sm.families.Binomial(),
|
||||||
'predict_score': True
|
'predict_score': True
|
||||||
|
|
|
@ -32,7 +32,7 @@ class TestInstrumentalVariableEstimator(object):
|
||||||
configs = [dict(zip(keys, v)) for v in itertools.product(*values)]
|
configs = [dict(zip(keys, v)) for v in itertools.product(*values)]
|
||||||
for cfg in configs:
|
for cfg in configs:
|
||||||
print("\nConfig:", cfg)
|
print("\nConfig:", cfg)
|
||||||
cfg['method_params']= None
|
cfg['method_params']= {}
|
||||||
if cfg["num_instruments"] >= cfg["num_treatments"]:
|
if cfg["num_instruments"] >= cfg["num_treatments"]:
|
||||||
estimator_tester.average_treatment_effect_test(**cfg)
|
estimator_tester.average_treatment_effect_test(**cfg)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -37,7 +37,7 @@ class TestLinearRegressionEstimator(object):
|
||||||
confidence_intervals=[True,],
|
confidence_intervals=[True,],
|
||||||
test_significance=[True,],
|
test_significance=[True,],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 10,
|
||||||
'num_null_simulations': 10
|
'num_null_simulations': 10
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -25,7 +25,7 @@ class TestPropensityScoreMatchingEstimator(object):
|
||||||
confidence_intervals=[False,],
|
confidence_intervals=[False,],
|
||||||
test_significance=[False,],
|
test_significance=[False,],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 10,
|
||||||
'num_null_simulations': 10
|
'num_null_simulations': 10
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -25,7 +25,7 @@ class TestPropensityScoreStratificationEstimator(object):
|
||||||
confidence_intervals=[True,],
|
confidence_intervals=[True,],
|
||||||
test_significance=[True,],
|
test_significance=[True,],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 10,
|
||||||
'num_null_simulations': 10
|
'num_null_simulations': 10
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -26,10 +26,10 @@ class TestPropensityScoreWeightingEstimator(object):
|
||||||
num_treatments=num_treatments,
|
num_treatments=num_treatments,
|
||||||
treatment_is_binary=treatment_is_binary,
|
treatment_is_binary=treatment_is_binary,
|
||||||
outcome_is_binary=outcome_is_binary,
|
outcome_is_binary=outcome_is_binary,
|
||||||
confidence_intervals=[True,],
|
confidence_intervals=[True, ],
|
||||||
test_significance=[True,],
|
test_significance=[True, ],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 1,
|
||||||
'num_null_simulations': 10
|
'num_null_simulations': 1
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -28,7 +28,7 @@ class TestTwoStageRegressionEstimator(object):
|
||||||
confidence_intervals=[True,],
|
confidence_intervals=[True,],
|
||||||
test_significance=[False,],
|
test_significance=[False,],
|
||||||
method_params={
|
method_params={
|
||||||
'num_ci_simulations': 10,
|
'num_simulations': 10,
|
||||||
'num_null_simulations': 10
|
'num_null_simulations': 10
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче