added type checking for treatment variable for methods that expect bool. user is expected to specify the right dtype for each column

This commit is contained in:
Amit Sharma 2019-12-02 18:23:09 +05:30
Родитель 98b6ed2a82
Коммит 04e2bff070
8 изменённых файлов: 136 добавлений и 195 удалений

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -4,6 +4,7 @@ import pandas as pd
from dowhy.causal_estimator import CausalEstimate from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator from dowhy.causal_estimator import CausalEstimator
import statsmodels.api as sm
class LinearRegressionEstimator(CausalEstimator): class LinearRegressionEstimator(CausalEstimator):
"""Compute effect of treatment using linear regression. """Compute effect of treatment using linear regression.
@ -30,21 +31,24 @@ class LinearRegressionEstimator(CausalEstimator):
self._linear_model = None self._linear_model = None
def _estimate_effect(self): def _estimate_effect(self):
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1) if self._effect_modifiers is None:
if len(self._observed_common_causes_names)>0: treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
features = np.concatenate((treatment_2d, self._observed_common_causes), if len(self._observed_common_causes_names)>0:
axis=1) features = np.concatenate((treatment_2d, self._observed_common_causes),
else: axis=1)
features = treatment_2d else:
self._linear_model = linear_model.LinearRegression() features = treatment_2d
self._linear_model.fit(features, self._outcome) self._linear_model = linear_model.LinearRegression()
coefficients = self._linear_model.coef_ self._linear_model.fit(features, self._outcome)
self.logger.debug("Coefficients of the fitted linear model: " + coefficients = self._linear_model.coef_
",".join(map(str, coefficients))) self.logger.debug("Coefficients of the fitted linear model: " +
estimate = CausalEstimate(estimate=coefficients[0], ",".join(map(str, coefficients)))
estimate = CausalEstimate(estimate=coefficients[0],
target_estimand=self._target_estimand, target_estimand=self._target_estimand,
realized_estimand_expr=self.symbolic_estimator, realized_estimand_expr=self.symbolic_estimator,
intercept=self._linear_model.intercept_) intercept=self._linear_model.intercept_)
else:
pass #TODO
return estimate return estimate
def construct_symbolic_estimator(self, estimand): def construct_symbolic_estimator(self, estimand):

Просмотреть файл

@ -10,6 +10,12 @@ class PropensityScoreMatchingEstimator(CausalEstimator):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
error_msg = "Propensity Score Matching method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables)) ",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables self._observed_common_causes_names = self._target_estimand.backdoor_variables
@ -22,6 +28,8 @@ class PropensityScoreMatchingEstimator(CausalEstimator):
self.logger.error(error_msg) self.logger.error(error_msg)
raise Exception(error_msg) raise Exception(error_msg)
self.logger.info("INFO: Using Propensity Score Matching Estimator") self.logger.info("INFO: Using Propensity Score Matching Estimator")
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand) self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
self.logger.info(self.symbolic_estimator) self.logger.info(self.symbolic_estimator)

Просмотреть файл

@ -14,6 +14,11 @@ class PropensityScoreStratificationEstimator(CausalEstimator):
def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs): def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
error_msg = "Propensity Score Stratification method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables)) ",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables self._observed_common_causes_names = self._target_estimand.backdoor_variables

Просмотреть файл

@ -15,6 +15,11 @@ class PropensityScoreWeightingEstimator(CausalEstimator):
def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs): def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
error_msg = "Propensity Score Weighting method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
self.logger.debug("Back-door variables used:" + self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables)) ",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables self._observed_common_causes_names = self._target_estimand.backdoor_variables

Просмотреть файл

@ -40,6 +40,9 @@ class AddUnobservedCommonCause(CausalRefuter):
if self.effect_on_t == "binary_flip": if self.effect_on_t == "binary_flip":
new_data['temp_rand_no'] = np.random.random(num_rows) new_data['temp_rand_no'] = np.random.random(num_rows)
new_data.loc[new_data['temp_rand_no'] <= self.kappa_t, self._treatment_name ] = 1- new_data[self._treatment_name] new_data.loc[new_data['temp_rand_no'] <= self.kappa_t, self._treatment_name ] = 1- new_data[self._treatment_name]
if pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
for tname in self._treatment_name:
new_data = new_data.astype({tname: 'bool'}, copy=False)
new_data.pop('temp_rand_no') new_data.pop('temp_rand_no')
elif self.effect_on_t == "linear": elif self.effect_on_t == "linear":
confounder_t_effect = self.kappa_t * w_random confounder_t_effect = self.kappa_t * w_random

Просмотреть файл

@ -51,6 +51,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
t += W @ c1 # + np.random.normal(0, 0.01) t += W @ c1 # + np.random.normal(0, 0.01)
if num_instruments > 0: if num_instruments > 0:
t += Z @ cz t += Z @ cz
# Converting treatment to binary if required
if treatment_is_binary: if treatment_is_binary:
t = np.vectorize(stochastically_convert_to_binary)(t) t = np.vectorize(stochastically_convert_to_binary)(t)
y = beta*t # + np.random.normal(0,0.01) y = beta*t # + np.random.normal(0,0.01)
@ -76,6 +77,11 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
other_variables = None other_variables = None
col_names = effect_modifiers + instruments + common_causes + [treatment, outcome] col_names = effect_modifiers + instruments + common_causes + [treatment, outcome]
data = pd.DataFrame(data, columns=col_names) data = pd.DataFrame(data, columns=col_names)
# Specifying the correct dtypes
if treatment_is_binary:
data = data.astype({treatment:'bool'}, copy=False)
# Now specifying the corresponding graph strings
dot_graph = ('digraph {{ {0} ->{1};' dot_graph = ('digraph {{ {0} ->{1};'
' U[label="Unobserved Confounders"];' ' U[label="Unobserved Confounders"];'
' U->{0}; U->{1};' ' U->{0}; U->{1};'