Better docs: updated the method signature and docstrings for estimator classes (#389)

* updated the method signature and docstrings for estimator classes

* bug fixes

* updated the args passing

* fixed the bug in refutation test calls for num_ci_simulations

* used std init args

* updated num simulations to pass weighting test
This commit is contained in:
Amit Sharma 2022-03-14 15:12:27 +05:30 коммит произвёл GitHub
Родитель 5ba0a4ab94
Коммит 77906cd4ed
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
27 изменённых файлов: 408 добавлений и 192 удалений

Просмотреть файл

@ -197,3 +197,7 @@ epub_exclude_files = ['search.html']
# If true, `todo` and `todoList` produce output, else they produce nothing. # If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True todo_include_todos = True
# init docstrings should also be included in class
autoclass_content = "both"

Просмотреть файл

@ -36,6 +36,14 @@ dowhy.causal\_refuters.dummy\_outcome\_refuter module
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
dowhy.causal\_refuters.graph\_refuter module
--------------------------------------------
.. automodule:: dowhy.causal_refuters.graph_refuter
:members:
:undoc-members:
:show-inheritance:
dowhy.causal\_refuters.placebo\_treatment\_refuter module dowhy.causal\_refuters.placebo\_treatment\_refuter module
--------------------------------------------------------- ---------------------------------------------------------

Просмотреть файл

@ -12,6 +12,14 @@ dowhy.utils.api module
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
dowhy.utils.cit module
----------------------
.. automodule:: dowhy.utils.cit
:members:
:undoc-members:
:show-inheritance:
dowhy.utils.cli\_helpers module dowhy.utils.cli\_helpers module
------------------------------- -------------------------------

Просмотреть файл

@ -38,12 +38,21 @@ class CausalEstimator:
DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"] DEFAULT_INTERPRET_METHOD = ["textual_effect_interpreter"]
# std args to be removed from locals() before being passed to args_dict
_STD_INIT_ARGS = ('self', '__class__', 'args', 'kwargs')
def __init__(self, data, identified_estimand, treatment, outcome, def __init__(self, data, identified_estimand, treatment, outcome,
control_value=0, treatment_value=1, control_value=0, treatment_value=1,
test_significance=False, evaluate_effect_strength=False, test_significance=False, evaluate_effect_strength=False,
confidence_intervals=False, confidence_intervals=False,
target_units=None, effect_modifiers=None, target_units=None, effect_modifiers=None,
params=None): num_null_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST,
num_simulations=DEFAULT_NUMBER_OF_SIMULATIONS_CI,
sample_size_fraction=DEFAULT_SAMPLE_SIZE_FRACTION,
confidence_level=DEFAULT_CONFIDENCE_LEVEL,
need_conditional_estimates='auto',
num_quantiles_to_discretize_cont_cols=NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS,
**kwargs):
"""Initializes an estimator with data and names of relevant variables. """Initializes an estimator with data and names of relevant variables.
This method is called from the constructors of its child classes. This method is called from the constructors of its child classes.
@ -55,19 +64,29 @@ class CausalEstimator:
:param outcome: name of the outcome variable :param outcome: name of the outcome variable
:param control_value: Value of the treatment in the control group, for effect estimation. If treatment is multi-variate, this can be a list. :param control_value: Value of the treatment in the control group, for effect estimation. If treatment is multi-variate, this can be a list.
:param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list. :param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
:param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations that can be specified through the params dictionary. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap. :param test_significance: Binary flag or a string indicating whether to test significance and by which method. All estimators support test_significance="bootstrap" that estimates a p-value for the obtained estimate using the bootstrap method. Individual estimators can override this to support custom testing methods. The bootstrap method supports an optional parameter, num_null_simulations. If False, no testing is done. If True, significance of the estimate is tested using the custom method if available, otherwise by bootstrap.
:param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect :param evaluate_effect_strength: (Experimental) whether to evaluate the strength of effect
:param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap. :param confidence_intervals: Binary flag or a string indicating whether the confidence intervals should be computed and which method should be used. All methods support estimation of confidence intervals using the bootstrap method by using the parameter confidence_intervals="bootstrap". The bootstrap method takes in two arguments (num_simulations and sample_size_fraction) that can be optionally specified in the params dictionary. Estimators may also override this to implement their own confidence interval method. If this parameter is False, no confidence intervals are computed. If True, confidence intervals are computed by the estimator's specific method if available, otherwise through bootstrap.
:param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data. :param target_units: The units for which the treatment effect should be estimated. This can be a string for common specifications of target units (namely, "ate", "att" and "atc"). It can also be a lambda function that can be used as an index for the data (pandas DataFrame). Alternatively, it can be a new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data.
:param effect_modifiers: Variables on which to compute separate effects, or return a heterogeneous effect function. Not all methods support this currently. :param effect_modifiers: Variables on which to compute separate
:param params: (optional) Additional method parameters effects, or return a heterogeneous effect function. Not all
num_null_simulations: The number of simulations for testing the statistical significance of the estimator methods support this currently.
num_simulations: The number of simulations for finding the confidence interval (and/or standard error) for a estimate :param num_null_simulations: The number of simulations for testing the
sample_size_fraction: The size of the sample for the bootstrap estimator statistical significance of the estimator
confidence_level: The confidence level of the confidence interval estimate :param num_simulations: The number of simulations for finding the
num_quantiles_to_discretize_cont_cols: The number of quantiles into which a numeric effect modifier is split, to enable estimation of conditional treatment effect over it. confidence interval (and/or standard error) for a estimate
:param sample_size_fraction: The size of the sample for the bootstrap
estimator
:param confidence_level: The confidence level of the confidence
interval estimate
:param need_conditional_estimates: Boolean flag indicating whether
conditional estimates should be computed. Defaults to True if
there are effect modifiers in the graph
:param num_quantiles_to_discretize_cont_cols: The number of quantiles
into which a numeric effect modifier is split, to enable
estimation of conditional treatment effect over it.
:param kwargs: (optional) Additional estimator-specific parameters
:returns: an instance of the estimator class. :returns: an instance of the estimator class.
""" """
self._data = data self._data = data
self._target_estimand = identified_estimand self._target_estimand = identified_estimand
@ -84,14 +103,9 @@ class CausalEstimator:
self._bootstrap_estimates = None # for confidence intervals and std error self._bootstrap_estimates = None # for confidence intervals and std error
self._bootstrap_null_estimates = None # for significance test self._bootstrap_null_estimates = None # for significance test
self._effect_modifiers = None self._effect_modifiers = None
self.method_params = params self.method_params = kwargs
# Setting the default interpret method # Setting the default interpret method
self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD self.interpret_method = CausalEstimator.DEFAULT_INTERPRET_METHOD
# Unpacking the keyword arguments
if params is not None:
for key, value in params.items():
setattr(self, key, value)
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
@ -114,20 +128,17 @@ class CausalEstimator:
else: else:
self._effect_modifier_names = None self._effect_modifier_names = None
# Checking if some parameters were set, otherwise setting to default values # Check if some parameters were set, otherwise set to default values
if not hasattr(self, 'num_null_simulations'): self.num_null_simulations = num_null_simulations
self.num_null_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_STAT_TEST self.num_simulations = num_simulations
if not hasattr(self, 'num_simulations'): self.sample_size_fraction = sample_size_fraction
self.num_simulations = CausalEstimator.DEFAULT_NUMBER_OF_SIMULATIONS_CI self.confidence_level = confidence_level
if not hasattr(self, 'sample_size_fraction'): self.num_quantiles_to_discretize_cont_cols = \
self.sample_size_fraction = CausalEstimator.DEFAULT_SAMPLE_SIZE_FRACTION num_quantiles_to_discretize_cont_cols
if not hasattr(self, 'confidence_level'):
self.confidence_level = CausalEstimator.DEFAULT_CONFIDENCE_LEVEL
if not hasattr(self, 'num_quantiles_to_discretize_cont_cols'):
self.num_quantiles_to_discretize_cont_cols = CausalEstimator.NUM_QUANTILES_TO_DISCRETIZE_CONT_COLS
# Estimate conditional estimates by default # Estimate conditional estimates by default
if not hasattr(self, 'need_conditional_estimates'): self.need_conditional_estimates = need_conditional_estimates \
self.need_conditional_estimates = bool(self._effect_modifier_names) if need_conditional_estimates != 'auto' \
else bool(self._effect_modifier_names)
@staticmethod @staticmethod
def get_estimator_object(new_data, identified_estimand, estimate): def get_estimator_object(new_data, identified_estimand, estimate):
@ -158,7 +169,7 @@ class CausalEstimator:
confidence_intervals=estimate.params["confidence_intervals"], confidence_intervals=estimate.params["confidence_intervals"],
target_units=estimate.params["target_units"], target_units=estimate.params["target_units"],
effect_modifiers=estimate.params["effect_modifiers"], effect_modifiers=estimate.params["effect_modifiers"],
params=estimate.params["method_params"] **estimate.params["method_params"]
) )
return new_estimator return new_estimator
@ -297,7 +308,6 @@ class CausalEstimator:
# Perform the set number of simulations # Perform the set number of simulations
for index in range(num_bootstrap_simulations): for index in range(num_bootstrap_simulations):
new_data = resample(self._data, n_samples=sample_size) new_data = resample(self._data, n_samples=sample_size)
new_estimator = type(self)( new_estimator = type(self)(
new_data, new_data,
self._target_estimand, self._target_estimand,
@ -310,7 +320,7 @@ class CausalEstimator:
confidence_intervals=False, confidence_intervals=False,
target_units=self._target_units, target_units=self._target_units,
effect_modifiers=self._effect_modifier_names, effect_modifiers=self._effect_modifier_names,
params=self.method_params **self.method_params
) )
new_effect = new_estimator.estimate_effect() new_effect = new_estimator.estimate_effect()
simulation_results[index] = new_effect.value simulation_results[index] = new_effect.value
@ -504,7 +514,7 @@ class CausalEstimator:
confidence_intervals=False, confidence_intervals=False,
target_units=self._target_units, target_units=self._target_units,
effect_modifiers=self._effect_modifier_names, effect_modifiers=self._effect_modifier_names,
params=self.method_params **self.method_params
) )
new_effect = new_estimator.estimate_effect() new_effect = new_estimator.estimate_effect()
null_estimates[i] = new_effect.value null_estimates[i] = new_effect.value

Просмотреть файл

@ -7,11 +7,27 @@ from importlib import import_module
import causalml import causalml
class Causalml(CausalEstimator): class Causalml(CausalEstimator):
""" Wrapper class for estimators from the causalml library.
def __init__(self, *args, **kwargs): For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
super().__init__(*args, **kwargs) Supports additional parameters as listed below. For specific
parameters of each estimator, refer to the CausalML docs.
"""
def __init__(self, *args, causalml_methodname, **kwargs):
"""
:param causalml_methodname: Fully qualified name of causalml estimator
class.
"""
# Required to ensure that self.method_params contains all the information
# to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self._causalml_methodname = causalml_methodname
# Add the identification method used in the estimator # Add the identification method used in the estimator
self.identifier_method = self._target_estimand.identifier_method self.identifier_method = self._target_estimand.identifier_method
self.logger.debug("The identifier method used {}".format(self.identifier_method)) self.logger.debug("The identifier method used {}".format(self.identifier_method))

Просмотреть файл

@ -5,12 +5,35 @@ import numpy as np
from dowhy.causal_estimator import CausalEstimate, CausalEstimator from dowhy.causal_estimator import CausalEstimate, CausalEstimator
class DistanceMatchingEstimator(CausalEstimator): class DistanceMatchingEstimator(CausalEstimator):
""" Simple matching estimator for binary treatments based on a distance metric. """Simple matching estimator for binary treatments based on a distance
""" metric.
For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters as listed below.
"""
# allowed types of distance metric
Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w'] Valid_Dist_Metric_Params = ['p', 'V', 'VI', 'w']
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) def __init__(self, *args, num_matches_per_unit=1,
distance_metric="minkowski", exact_match_cols=None, **kwargs):
"""
:param num_matches_per_unit: The number of matches per data point.
Default=1.
:param distance_metric: Distance metric to use. Default="minkowski"
that corresponds to Euclidean distance metric with p=2.
:param exact_match_cols: List of column names whose values should be
exactly matched. Typically used for columns with discrete values.
"""
# Required to ensure that self.method_params contains all the
# parameters to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
# Check if the treatment is one-dimensional # Check if the treatment is one-dimensional
if len(self._treatment_name) > 1: if len(self._treatment_name) > 1:
error_msg = str(self.__class__) + "cannot handle more than one treatment variable" error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
@ -21,15 +44,9 @@ class DistanceMatchingEstimator(CausalEstimator):
self.logger.error(error_msg) self.logger.error(error_msg)
raise Exception(error_msg) raise Exception(error_msg)
# Setting the number of matches per data point self.num_matches_per_unit = num_matches_per_unit
if getattr(self, 'num_matches_per_unit', None) is None: self.distance_metric = distance_metric
self.num_matches_per_unit = 1 self.exact_match_cols = exact_match_cols
# Default distance metric if not provided by the user
if getattr(self, 'distance_metric', None) is None:
self.distance_metric = 'minkowski' # corresponds to euclidean metric with p=2
if getattr(self, 'exact_match_cols', None) is None:
self.exact_match_cols = None
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.get_backdoor_variables())) ",".join(self._target_estimand.get_backdoor_variables()))

Просмотреть файл

@ -11,9 +11,27 @@ import econml
class Econml(CausalEstimator): class Econml(CausalEstimator):
"""Wrapper class for estimators from the EconML library.
def __init__(self, *args, **kwargs): For a list of standard args and kwargs, see documentation for
super().__init__(*args, **kwargs) :class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters as listed below. For init and fit
parameters of each estimator, refer to the EconML docs.
"""
def __init__(self, *args, econml_methodname, **kwargs):
"""
:param econml_methodname: Fully qualified name of econml estimator
class. For example, 'econml.dml.DML'
"""
# Required to ensure that self.method_params contains all the
# parameters to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self._econml_methodname = econml_methodname
self.logger.info("INFO: Using EconML Estimator") self.logger.info("INFO: Using EconML Estimator")
self.identifier_method = self._target_estimand.identifier_method self.identifier_method = self._target_estimand.identifier_method
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy() self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
@ -154,7 +172,7 @@ class Econml(CausalEstimator):
expr += "+".join(var_list) expr += "+".join(var_list)
expr += " | " + ",".join(self._effect_modifier_names) expr += " | " + ",".join(self._effect_modifier_names)
return expr return expr
def shap_values(self, df: pd.DataFrame, *args, **kwargs): def shap_values(self, df: pd.DataFrame, *args, **kwargs):
return self.estimator.shap_values( return self.estimator.shap_values(
df[self._effect_modifier_names].values, *args, **kwargs df[self._effect_modifier_names].values, *args, **kwargs

Просмотреть файл

@ -1,10 +1,9 @@
import numpy as np
import pandas as pd
import statsmodels.api as sm import statsmodels.api as sm
import itertools import itertools
from dowhy.causal_estimators.regression_estimator import RegressionEstimator from dowhy.causal_estimators.regression_estimator import RegressionEstimator
class GeneralizedLinearModelEstimator(RegressionEstimator): class GeneralizedLinearModelEstimator(RegressionEstimator):
"""Compute effect of treatment using a generalized linear model such as logistic regression. """Compute effect of treatment using a generalized linear model such as logistic regression.
@ -13,16 +12,29 @@ class GeneralizedLinearModelEstimator(RegressionEstimator):
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, glm_family=None, predict_score=True, **kwargs):
super().__init__(*args, **kwargs) """For a list of args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
:param glm_family: statsmodels family for the generalized linear model.
For example, use statsmodels.api.families.Binomial() for logistic
regression or statsmodels.api.families.Poisson() for count data.
:param predict_score: For models that have a binary output, whether
to output the model's score or the binary output based on the score.
"""
# Required to ensure that self.method_params contains all the
# parameters needed to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self.logger.info("INFO: Using Generalized Linear Model Estimator") self.logger.info("INFO: Using Generalized Linear Model Estimator")
if self.method_params is not None and 'glm_family' in self.method_params: if glm_family is not None:
self.family = self.method_params['glm_family'] self.family = glm_family
else: else:
raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.") raise ValueError("Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression.")
self.predict_score = True self.predict_score = predict_score
if self.method_params is not None and 'predict_score' in self.method_params:
self.predict_score = self.method_params['predict_score']
# Checking if Y is binary # Checking if Y is binary
outcome_values = self._data[self._outcome_name].astype(int).unique() outcome_values = self._data[self._outcome_name].astype(int).unique()
self.outcome_is_binary = all([v in [0,1] for v in outcome_values]) self.outcome_is_binary = all([v in [0,1] for v in outcome_values])

Просмотреть файл

@ -14,20 +14,29 @@ class InstrumentalVariableEstimator(CausalEstimator):
This is also a superclass that can be inherited by other specific methods. This is also a superclass that can be inherited by other specific methods.
For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters that can be specified in the estimate_effect() method. Supports additional parameters as listed below.
- 'iv_instrument_name': Name of the specific instrumental variable to be used. Needs to be one of the IVs identified in the identification step. Default is to use all the IV variables from the identification step.
""" """
def __init__(self, *args, iv_instrument_name=None, **kwargs):
def __init__(self, *args, **kwargs): """
super().__init__(*args, **kwargs) :param iv_instrument_name: Name of the specific instrumental variable
to be used. Needs to be one of the IVs identified in the
identification step. Default is to use all the IV variables
from the identification step.
"""
# Required to ensure that self.method_params contains all the information
# to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
# choosing the instrumental variable to use # choosing the instrumental variable to use
if getattr(self, 'iv_instrument_name', None) is None: self.estimating_instrument_names = self._target_estimand.instrumental_variables
self.estimating_instrument_names = self._target_estimand.instrumental_variables if iv_instrument_name is not None:
else: self.estimating_instrument_names = parse_state(iv_instrument_name)
self.estimating_instrument_names = parse_state(self.iv_instrument_name)
self.logger.debug("Instrumental Variables used:" + self.logger.debug("Instrumental Variables used:" +
",".join(self.estimating_instrument_names)) ",".join(self.estimating_instrument_names))
if not self.estimating_instrument_names: if not self.estimating_instrument_names:

Просмотреть файл

@ -1,11 +1,9 @@
import numpy as np
import pandas as pd
import statsmodels.api as sm import statsmodels.api as sm
from sklearn import linear_model
import itertools import itertools
from dowhy.causal_estimators.regression_estimator import RegressionEstimator from dowhy.causal_estimators.regression_estimator import RegressionEstimator
class LinearRegressionEstimator(RegressionEstimator): class LinearRegressionEstimator(RegressionEstimator):
"""Compute effect of treatment using linear regression. """Compute effect of treatment using linear regression.
@ -16,7 +14,17 @@ class LinearRegressionEstimator(RegressionEstimator):
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) """For a list of args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
"""
# Required to ensure that self.method_params contains all the
# parameters to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
print(args_dict)
super().__init__(*args, **args_dict)
self.logger.info("INFO: Using Linear Regression Estimator") self.logger.info("INFO: Using Linear Regression Estimator")
self._linear_model = self.model self._linear_model = self.model

Просмотреть файл

@ -3,24 +3,42 @@ import pandas as pd
from dowhy.causal_estimator import CausalEstimator from dowhy.causal_estimator import CausalEstimator
class PropensityScoreEstimator(CausalEstimator): class PropensityScoreEstimator(CausalEstimator):
"""
Base class for estimators that estimate effects based on propensity of treatment assignment.
Supports additional parameters that can be specified in the estimate_effect() method.
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
""" """
def __init__(self, *args, propensity_score_model=None, recalculate_propensity_score=True, propensity_score_column="propensity_score", **kwargs): Base class for estimators that estimate effects based on propensity of
super().__init__(*args, **kwargs) treatment assignment.
For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters as listed below.
"""
def __init__(self, *args, propensity_score_model=None,
recalculate_propensity_score=True,
propensity_score_column="propensity_score", **kwargs):
"""
:param propensity_score_model: Model used to compute propensity score.
Can be any classification model that supports fit() and
predict_proba() methods. If None, LogisticRegression is used.
:param recalculate_propensity_score: Whether the propensity score
should be estimated. To use pre-computed propensity scores,
set this value to False. Default=True.
:param propensity_score_column: Column name that stores the
propensity score. Default='propensity_score'
"""
# Required to ensure that self.method_params contains all the
# parameters to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
# Enable the user to pass params for a custom propensity model # Enable the user to pass params for a custom propensity model
if not hasattr(self, "propensity_score_model"): self.propensity_score_model = propensity_score_model
self.propensity_score_model = propensity_score_model self.recalculate_propensity_score = recalculate_propensity_score
if not hasattr(self, "recalculate_propensity_score"): self.propensity_score_column = propensity_score_column
self.recalculate_propensity_score = recalculate_propensity_score
if not hasattr(self, "propensity_score_column"):
self.propensity_score_column = propensity_score_column
# Check if the treatment is one-dimensional # Check if the treatment is one-dimensional
if len(self._treatment_name) > 1: if len(self._treatment_name) > 1:
@ -35,7 +53,7 @@ class PropensityScoreEstimator(CausalEstimator):
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.get_backdoor_variables())) ",".join(self._target_estimand.get_backdoor_variables()))
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables() self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
if self._observed_common_causes_names: if self._observed_common_causes_names:
@ -52,7 +70,7 @@ class PropensityScoreEstimator(CausalEstimator):
def construct_symbolic_estimator(self, estimand): def construct_symbolic_estimator(self, estimand):
''' '''
A symbolic string that conveys what each estimator does. A symbolic string that conveys what each estimator does.
For instance, linear regression is expressed as For instance, linear regression is expressed as
y ~ bx + e y ~ bx + e
''' '''

Просмотреть файл

@ -6,26 +6,37 @@ from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimators.propensity_score_estimator import PropensityScoreEstimator from dowhy.causal_estimators.propensity_score_estimator import PropensityScoreEstimator
class PropensityScoreMatchingEstimator(PropensityScoreEstimator): class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
""" Estimate effect of treatment by finding matching treated and control units based on propensity score. """ Estimate effect of treatment by finding matching treated and control
units based on propensity score.
Straightforward application of the back-door criterion. Straightforward application of the back-door criterion.
Supports additional parameters that can be specified in the estimate_effect() method. For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None Supports additional parameters as listed below.
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
""" """
def __init__( def __init__(
self, self,
*args, *args,
propensity_score_model=None, propensity_score_model=None,
recalculate_propensity_score=True, recalculate_propensity_score=True,
propensity_score_column="propensity_score", propensity_score_column="propensity_score",
**kwargs): **kwargs):
"""
:param propensity_score_model: Model used to compute propensity score.
Can be any classification model that supports fit() and
predict_proba() methods. If None, LogisticRegression is used.
:param recalculate_propensity_score: Whether the propensity score
should be estimated. To use pre-computed propensity scores,
set this value to False. Default=True.
:param propensity_score_column: Column name that stores the
propensity score. Default='propensity_score'
"""
super().__init__( super().__init__(
*args, *args,
propensity_score_model=propensity_score_model, propensity_score_model=propensity_score_model,
recalculate_propensity_score=recalculate_propensity_score, recalculate_propensity_score=recalculate_propensity_score,
propensity_score_column=propensity_score_column, propensity_score_column=propensity_score_column,

Просмотреть файл

@ -11,13 +11,10 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
Straightforward application of the back-door criterion. Straightforward application of the back-door criterion.
Supports additional parameters that can be specified in the estimate_effect() method. For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
- 'num_strata': Number of bins by which data will be stratified. Default=50 Supports additional parameters as listed below.
- 'clipping_threshold': Mininum number of treated or control units per strata. Default=10
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
""" """
@ -30,20 +27,42 @@ class PropensityScoreStratificationEstimator(PropensityScoreEstimator):
recalculate_propensity_score=True, recalculate_propensity_score=True,
propensity_score_column="propensity_score", propensity_score_column="propensity_score",
**kwargs): **kwargs):
"""
:param num_strata: Number of bins by which data will be stratified.
Default is automatically determined.
:param clipping_threshold: Mininum number of treated or control units
per strata. Default=10
:param propensity_score_model: The model used to compute propensity
score. Can be any classification model that supports fit() and
predict_proba() methods. If None, use
LogisticRegression model as the default.
:param recalculate_propensity_score: If true, force the estimator to
estimate the propensity score. To use pre-computed propensity
scores, set this value to False. Default=True
:param propensity_score_column: Column name that stores the propensity
score. Default='propensity_score'
"""
# Required to ensure that self.method_params contains all the information
# to create an object of this class
args_dict = kwargs
args_dict.update({
'num_strata': num_strata,
'clipping_threshold': clipping_threshold
})
super().__init__( super().__init__(
*args, *args,
propensity_score_model=propensity_score_model, propensity_score_model=propensity_score_model,
recalculate_propensity_score=recalculate_propensity_score, recalculate_propensity_score=recalculate_propensity_score,
propensity_score_column=propensity_score_column, propensity_score_column=propensity_score_column,
**kwargs) **args_dict)
self.logger.info("Using Propensity Score Stratification Estimator") self.logger.info("Using Propensity Score Stratification Estimator")
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand) self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
self.logger.info(self.symbolic_estimator) self.logger.info(self.symbolic_estimator)
if not hasattr(self, 'num_strata'): # setting method-specific parameters
self.num_strata = num_strata self.num_strata = num_strata
if not hasattr(self, 'clipping_threshold'): self.clipping_threshold = clipping_threshold
self.clipping_threshold = clipping_threshold
def _estimate_effect(self): def _estimate_effect(self):

Просмотреть файл

@ -12,45 +12,64 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
Straightforward application of the back-door criterion. Straightforward application of the back-door criterion.
Supports additional parameters that can be specified in the estimate_effect() method. For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
- 'min_ps_score': Lower bound used to clip the propensity score. Default=0.05 Supports additional parameters as listed below.
- 'max_ps_score': Upper bound used to clip the propensity score. Default=0.95
- 'weighting_scheme': This is the name of weighting method to use. Can be inverse propensity score ("ips_weight", default), stabilized IPS score ("ips_stabilized_weight"), or normalized IPS score ("ips_normalized_weight")
- 'propensity_score_model': The model used to compute propensity score. Could be any classification model that supports fit() and predict_proba() methods. If None, use LogisticRegression model as the default. Default=None
- 'recalculate_propensity_score': If true, force the estimator to calculate the propensity score. To use pre-computed propensity score, set this value to false. Default=True
- 'propensity_score_column': column name that stores the propensity score. Default='propensity_score'
""" """
def __init__( def __init__(
self, self,
*args, *args,
min_ps_score=0.05, min_ps_score=0.05,
max_ps_score=0.95, max_ps_score=0.95,
weighting_scheme='ips_weight', weighting_scheme='ips_weight',
propensity_score_model=None, propensity_score_model=None,
recalculate_propensity_score=True, recalculate_propensity_score=True,
propensity_score_column="propensity_score", propensity_score_column="propensity_score",
**kwargs): **kwargs):
"""
:param min_ps_score: Lower bound used to clip the propensity score.
Default=0.05
:param max_ps_score: Upper bound used to clip the propensity score.
Default=0.95
:param weighting_scheme: Weighting method to use. Can be inverse
propensity score ("ips_weight", default), stabilized IPS score
("ips_stabilized_weight"), or normalized IPS score
("ips_normalized_weight").
:param propensity_score_model: The model used to compute propensity
score. Can be any classification model that supports fit() and
predict_proba() methods. If None, use LogisticRegression model as
the default. Default=None
:param recalculate_propensity_score: If true, force the estimator to
estimate the propensity score. To use pre-computed propensity
scores, set this value to false. Default=True
:param propensity_score_column: Column name that stores the
propensity score. Default='propensity_score'
"""
# Required to ensure that self.method_params contains all the information
# to create an object of this class
args_dict = kwargs
args_dict.update({
'min_ps_score': min_ps_score,
'max_ps_score': max_ps_score,
'weighting_scheme': weighting_scheme
})
super().__init__( super().__init__(
*args, *args,
propensity_score_model=propensity_score_model, propensity_score_model=propensity_score_model,
recalculate_propensity_score=recalculate_propensity_score, recalculate_propensity_score=recalculate_propensity_score,
propensity_score_column=propensity_score_column, propensity_score_column=propensity_score_column,
**kwargs) **args_dict)
self.logger.info("INFO: Using Propensity Score Weighting Estimator") self.logger.info("INFO: Using Propensity Score Weighting Estimator")
self.symbolic_estimator = self.construct_symbolic_estimator( self.symbolic_estimator = self.construct_symbolic_estimator(
self._target_estimand) self._target_estimand)
self.logger.info(self.symbolic_estimator) self.logger.info(self.symbolic_estimator)
if not hasattr(self, "weighting_scheme"): # Setting method specific parameters
# 'ips_weight', 'ips_normalized_weight', 'ips_stabilized_weight' self.weighting_scheme = weighting_scheme
self.weighting_scheme = weighting_scheme self.min_ps_score = min_ps_score
if not hasattr(self, "min_ps_score"): self.max_ps_score = max_ps_score
self.min_ps_score = min_ps_score
if not hasattr(self, "max_ps_score"):
self.max_ps_score = max_ps_score
def _estimate_effect(self): def _estimate_effect(self):
if self.recalculate_propensity_score is True: if self.recalculate_propensity_score is True:

Просмотреть файл

@ -11,17 +11,34 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
Estimates effect by transforming the problem to an instrumental variables Estimates effect by transforming the problem to an instrumental variables
problem. problem.
Supports additional parameters that can be specified in the estimate_effect() method. For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
* 'rd_variable_name': name of the variable on which the discontinuity occurs. This is the instrument. Supports additional parameters as listed below.
* 'rd_threshold_value': Threshold at which the discontinuity occurs.
* 'rd_bandwidth': Distance from the threshold within which confounders can be considered the same between treatment and control. Considered band is (threshold +- bandwidth)
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, rd_variable_name=None,
super().__init__(*args, **kwargs) rd_threshold_value=None, rd_bandwidth=None, **kwargs):
"""
:param rd_variable_name: Name of the variable on which the
discontinuity occurs. This is the instrument.
:param rd_threshold_value: Threshold at which the discontinuity occurs.
:param rd_bandwidth: Distance from the threshold within which
confounders can be considered the same between treatment and
control. Considered band is (threshold +- bandwidth)
"""
# Required to ensure that self.method_params contains all the information
# to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self.logger.info("Using Regression Discontinuity Estimator") self.logger.info("Using Regression Discontinuity Estimator")
self.rd_variable_name = rd_variable_name
self.rd_threshold_value = rd_threshold_value
self.rd_bandwidth = rd_bandwidth
self.rd_variable = self._data[self.rd_variable_name] self.rd_variable = self._data[self.rd_variable_name]
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand) self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
@ -46,7 +63,7 @@ class RegressionDiscontinuityEstimator(CausalEstimator):
['local_treatment'], ['local_treatment'],
['local_outcome'], ['local_outcome'],
test_significance=self._significance_test, test_significance=self._significance_test,
params={'iv_instrument_name': 'local_rd_variable'} iv_instrument_name='local_rd_variable'
) )
est = iv_estimator.estimate_effect() est = iv_estimator.estimate_effect()
return est return est

Просмотреть файл

@ -8,11 +8,20 @@ from dowhy.causal_estimator import CausalEstimator
class RegressionEstimator(CausalEstimator): class RegressionEstimator(CausalEstimator):
"""Compute effect of treatment using some regression function. """Compute effect of treatment using some regression function.
Fits a regression model for estimating the outcome using treatment(s) and confounders. Fits a regression model for estimating the outcome using treatment(s) and
confounders.
Base class for all regression models, inherited by
LinearRegressionEstimator and GeneralizedLinearModelEstimator.
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
"""For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
"""
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.get_backdoor_variables())) ",".join(self._target_estimand.get_backdoor_variables()))

Просмотреть файл

@ -8,16 +8,39 @@ from dowhy.causal_identifier import CausalIdentifier
from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator
from dowhy.utils.api import parse_state from dowhy.utils.api import parse_state
class TwoStageRegressionEstimator(CausalEstimator): class TwoStageRegressionEstimator(CausalEstimator):
"""Compute treatment effect whenever the effect is fully mediated by another variable (front-door) or when there is an instrument available. """Compute treatment effect whenever the effect is fully mediated by
another variable (front-door) or when there is an instrument available.
Currently only supports a linear model for the effects. Currently only supports a linear model for the effects.
For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters as listed below.
""" """
# First stage statistical model
DEFAULT_FIRST_STAGE_MODEL = LinearRegressionEstimator DEFAULT_FIRST_STAGE_MODEL = LinearRegressionEstimator
# Second stage statistical model
DEFAULT_SECOND_STAGE_MODEL = LinearRegressionEstimator DEFAULT_SECOND_STAGE_MODEL = LinearRegressionEstimator
def __init__(self, *args, **kwargs): def __init__(self, *args, first_stage_model=None,
super().__init__(*args, **kwargs) second_stage_model=None, **kwargs):
"""
:param first_stage_model: First stage estimator to be used. Default is
linear regression.
:param second_stage_model: Second stage estimator to be used. Default
is linear regression.
"""
# Required to ensure that self.method_params contains all the
# parameters needed to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self.logger.info("INFO: Using Two Stage Regression Estimator") self.logger.info("INFO: Using Two Stage Regression Estimator")
# Check if the treatment is one-dimensional # Check if the treatment is one-dimensional
if len(self._treatment_name) > 1: if len(self._treatment_name) > 1:
@ -58,29 +81,18 @@ class TwoStageRegressionEstimator(CausalEstimator):
error_msg = "No instrumental variable present. Two stage regression is not applicable" error_msg = "No instrumental variable present. Two stage regression is not applicable"
self.logger.error(error_msg) self.logger.error(error_msg)
if 'first_stage_model' in self.method_params: if first_stage_model is not None:
self.first_stage_model = self.method_params['first_stage_model'] self.first_stage_model = first_stage_model
else: else:
self.first_stage_model = self.__class__.DEFAULT_FIRST_STAGE_MODEL self.first_stage_model = self.__class__.DEFAULT_FIRST_STAGE_MODEL
self.logger.warning("First stage model not provided. Defaulting to sklearn.linear_model.LinearRegression.") self.logger.warning("First stage model not provided. Defaulting to sklearn.linear_model.LinearRegression.")
if 'second_stage_model' in self.method_params: if second_stage_model is not None:
self.second_stage_model = self.method_params['second_stage_model'] self.second_stage_model = second_stage_model
else: else:
self.second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL self.second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL
self.logger.warning("Second stage model not provided. Defaulting to backdoor.linear_regression.") self.logger.warning("Second stage model not provided. Defaulting to backdoor.linear_regression.")
def _estimate_effect(self): def _estimate_effect(self):
#first_stage_features = self.build_first_stage_features()
#fs_model = self.first_stage_model()
#if self._target_estimand.identifier_method=="frontdoor":
# first_stage_outcome = self._frontdoor_variables
#elif self._target_estimand.identifier_method=="mediation":
# first_stage_outcome = self._mediators
#fs_model.fit(first_stage_features, self._frontdoor_variables)
#self.logger.debug("Coefficients of the fitted model: " +
# ",".join(map(str, fs_model.coef_)))
#residuals = self._frontdoor_variables - fs_model.predict(first_stage_features)
#self._data["residual"] = residuals
estimate_value = None estimate_value = None
# First stage # First stage
modified_target_estimand = copy.deepcopy(self._target_estimand) modified_target_estimand = copy.deepcopy(self._target_estimand)
@ -102,7 +114,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
confidence_intervals = self._confidence_intervals, confidence_intervals = self._confidence_intervals,
target_units=self._target_units, target_units=self._target_units,
effect_modifiers=self._effect_modifier_names, effect_modifiers=self._effect_modifier_names,
params=self.method_params)._estimate_effect() **self.method_params)._estimate_effect()
# Second Stage # Second Stage
modified_target_estimand = copy.deepcopy(self._target_estimand) modified_target_estimand = copy.deepcopy(self._target_estimand)
@ -124,7 +136,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
confidence_intervals = self._confidence_intervals, confidence_intervals = self._confidence_intervals,
target_units=self._target_units, target_units=self._target_units,
effect_modifiers=self._effect_modifier_names, effect_modifiers=self._effect_modifier_names,
params=self.method_params)._estimate_effect() **self.method_params)._estimate_effect()
# Combining the two estimates # Combining the two estimates
natural_indirect_effect = first_stage_estimate.value * second_stage_estimate.value natural_indirect_effect = first_stage_estimate.value * second_stage_estimate.value
# This same estimate is valid for frontdoor as well as mediation (NIE) # This same estimate is valid for frontdoor as well as mediation (NIE)
@ -149,7 +161,7 @@ class TwoStageRegressionEstimator(CausalEstimator):
confidence_intervals = self._confidence_intervals, confidence_intervals = self._confidence_intervals,
target_units=self._target_units, target_units=self._target_units,
effect_modifiers=self._effect_modifier_names, effect_modifiers=self._effect_modifier_names,
params=self.method_params)._estimate_effect() **self.method_params)._estimate_effect()
natural_direct_effect = total_effect_estimate.value - natural_indirect_effect natural_direct_effect = total_effect_estimate.value - natural_indirect_effect
estimate_value = natural_direct_effect estimate_value = natural_direct_effect
self.symbolic_estimator = self.construct_symbolic_estimator( self.symbolic_estimator = self.construct_symbolic_estimator(

Просмотреть файл

@ -10,14 +10,10 @@ from dowhy.utils.api import parse_state
class CausalIdentifier: class CausalIdentifier:
"""Class that implements different identification methods. """Class that implements different identification methods.
Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided. Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
If you'd like to contribute, please raise an issue or a pull request on Github.
""" """
NONPARAMETRIC_ATE="nonparametric-ate" NONPARAMETRIC_ATE="nonparametric-ate"
NONPARAMETRIC_NDE="nonparametric-nde" NONPARAMETRIC_NDE="nonparametric-nde"

Просмотреть файл

@ -268,8 +268,9 @@ class CausalModel:
if method_params is None: if method_params is None:
method_params = {} method_params = {}
# Define the third-party estimation method to be used # Define the third-party estimation method to be used
method_params["_" + third_party_estimator_package + "_methodname"] = estimator_name method_params[third_party_estimator_package + "_methodname"] = estimator_name
else: # For older dowhy methods else: # For older dowhy methods
print(estimator_name)
# Process the dowhy estimators # Process the dowhy estimators
causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator") causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")
if identified_estimand.no_directed_path: if identified_estimand.no_directed_path:
@ -291,6 +292,8 @@ class CausalModel:
extra_args = method_params.get("init_params", {}) extra_args = method_params.get("init_params", {})
else: else:
extra_args = {} extra_args = {}
if method_params is None:
method_params = {}
self.causal_estimator = causal_estimator_class( self.causal_estimator = causal_estimator_class(
self._data, self._data,
identified_estimand, identified_estimand,
@ -302,7 +305,7 @@ class CausalModel:
confidence_intervals = confidence_intervals, confidence_intervals = confidence_intervals,
target_units = target_units, target_units = target_units,
effect_modifiers = effect_modifiers, effect_modifiers = effect_modifiers,
params=method_params, **method_params,
**extra_args) **extra_args)
else: else:
# Estimator had been computed in a previous call # Estimator had been computed in a previous call
@ -334,9 +337,11 @@ class CausalModel:
:param identified_estimand: a probability expression :param identified_estimand: a probability expression
that represents the effect to be estimated. Output of that represents the effect to be estimated. Output of
CausalModel.identify_effect method CausalModel.identify_effect method
:param method_name: any of the estimation method to be used. See docs for estimate_effect method for a list of supported estimation methods. :param method_name: any of the estimation method to be used. See docs
for estimate_effect method for a list of supported estimation methods.
:param fit_estimator: Boolean flag on whether to fit the estimator. :param fit_estimator: Boolean flag on whether to fit the estimator.
Setting it to False is useful to compute the do-operation on new data using a previously fitted estimator. Setting it to False is useful to compute the do-operation on new
data using a previously fitted estimator.
:param method_params: Dictionary containing any method-specific parameters. These are passed directly to the estimating method. :param method_params: Dictionary containing any method-specific parameters. These are passed directly to the estimating method.
:returns: an instance of the CausalEstimate class, containing the causal effect estimate :returns: an instance of the CausalEstimate class, containing the causal effect estimate
@ -369,7 +374,7 @@ class CausalModel:
identified_estimand, identified_estimand,
self._treatment, self._outcome, self._treatment, self._outcome,
test_significance=False, test_significance=False,
params=method_params **method_params
) )
else: else:
# Estimator had been computed in a previous call # Estimator had been computed in a previous call
@ -462,26 +467,25 @@ class CausalModel:
def refute_graph(self, k= 1, independence_test = None , independence_constraints = None ): def refute_graph(self, k= 1, independence_test = None , independence_constraints = None ):
""" """
Check if the dependencies in input graph matches with the dataset - Check if the dependencies in input graph matches with the dataset -
( X Y ) | Z ( X Y ) | Z
where X and Y are considered as singleton sets currently where X and Y are considered as singleton sets currently
Z can have multiple variables Z can have multiple variables
:param k: number of covariates in set Z :param k: number of covariates in set Z
:param independence_test: dictionary containing methods to test conditional independece in data :param independence_test: dictionary containing methods to test conditional independece in data
:param independence_constraints: list of implications to be test input by the user in the format :param independence_constraints: list of implications to be test input by the user in the format
[(x,y,(z1,z2)), [(x,y,(z1,z2)),
(x,y, (z3,)) (x,y, (z3,))
] ]
: returns: an instance of GraphRefuter class : returns: an instance of GraphRefuter class
""" """
if independence_test is not None: if independence_test is not None:
test_for_continuous = independence_test['test_for_continuous'] test_for_continuous = independence_test['test_for_continuous']
test_for_discrete = independence_test['test_for_discrete'] test_for_discrete = independence_test['test_for_discrete']
refuter = GraphRefuter(data = self._data, method_name_continuous= test_for_continuous, method_name_discrete = test_for_discrete) refuter = GraphRefuter(data = self._data, method_name_continuous= test_for_continuous, method_name_discrete = test_for_discrete)
else: else:
refuter = GraphRefuter(data = self._data) refuter = GraphRefuter(data = self._data)
if independence_constraints is None: if independence_constraints is None:
all_nodes=list(self._graph.get_all_nodes(include_unobserved=False)) all_nodes=list(self._graph.get_all_nodes(include_unobserved=False))
@ -510,7 +514,7 @@ class CausalModel:
res = refuter.refute_model(independence_constraints = independence_constraints) res = refuter.refute_model(independence_constraints = independence_constraints)
self.logger.info(refuter._refutation_passed) self.logger.info(refuter._refutation_passed)
return res return res

Просмотреть файл

@ -65,8 +65,9 @@ class TestEstimator(object):
confidence_intervals = confidence_intervals, confidence_intervals = confidence_intervals,
target_units = "ate", target_units = "ate",
effect_modifiers = data["effect_modifier_names"], effect_modifiers = data["effect_modifier_names"],
params=method_params **method_params
) )
true_ate = data["ate"] true_ate = data["ate"]
ate_estimate = estimator_ate.estimate_effect() ate_estimate = estimator_ate.estimate_effect()
str(ate_estimate) # checking if str output is correctly created str(ate_estimate) # checking if str output is correctly created

Просмотреть файл

@ -28,7 +28,7 @@ class TestGeneralizedLinearModelEstimator(object):
confidence_intervals=[True,], confidence_intervals=[True,],
test_significance=[True,], test_significance=[True,],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 10,
'num_null_simulations': 10, 'num_null_simulations': 10,
'glm_family': sm.families.Binomial(), 'glm_family': sm.families.Binomial(),
'predict_score': True 'predict_score': True

Просмотреть файл

@ -32,7 +32,7 @@ class TestInstrumentalVariableEstimator(object):
configs = [dict(zip(keys, v)) for v in itertools.product(*values)] configs = [dict(zip(keys, v)) for v in itertools.product(*values)]
for cfg in configs: for cfg in configs:
print("\nConfig:", cfg) print("\nConfig:", cfg)
cfg['method_params']= None cfg['method_params']= {}
if cfg["num_instruments"] >= cfg["num_treatments"]: if cfg["num_instruments"] >= cfg["num_treatments"]:
estimator_tester.average_treatment_effect_test(**cfg) estimator_tester.average_treatment_effect_test(**cfg)
else: else:

Просмотреть файл

@ -37,7 +37,7 @@ class TestLinearRegressionEstimator(object):
confidence_intervals=[True,], confidence_intervals=[True,],
test_significance=[True,], test_significance=[True,],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 10,
'num_null_simulations': 10 'num_null_simulations': 10
} }
) )

Просмотреть файл

@ -25,7 +25,7 @@ class TestPropensityScoreMatchingEstimator(object):
confidence_intervals=[False,], confidence_intervals=[False,],
test_significance=[False,], test_significance=[False,],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 10,
'num_null_simulations': 10 'num_null_simulations': 10
} }
) )

Просмотреть файл

@ -25,7 +25,7 @@ class TestPropensityScoreStratificationEstimator(object):
confidence_intervals=[True,], confidence_intervals=[True,],
test_significance=[True,], test_significance=[True,],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 10,
'num_null_simulations': 10 'num_null_simulations': 10
} }
) )

Просмотреть файл

@ -26,10 +26,10 @@ class TestPropensityScoreWeightingEstimator(object):
num_treatments=num_treatments, num_treatments=num_treatments,
treatment_is_binary=treatment_is_binary, treatment_is_binary=treatment_is_binary,
outcome_is_binary=outcome_is_binary, outcome_is_binary=outcome_is_binary,
confidence_intervals=[True,], confidence_intervals=[True, ],
test_significance=[True,], test_significance=[True, ],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 1,
'num_null_simulations': 10 'num_null_simulations': 1
} }
) )

Просмотреть файл

@ -28,7 +28,7 @@ class TestTwoStageRegressionEstimator(object):
confidence_intervals=[True,], confidence_intervals=[True,],
test_significance=[False,], test_significance=[False,],
method_params={ method_params={
'num_ci_simulations': 10, 'num_simulations': 10,
'num_null_simulations': 10 'num_null_simulations': 10
} }
) )