added type checking for treatment variable for methods that expect bool. user is expected to specify the right dtype for each column
This commit is contained in:
Родитель
98b6ed2a82
Коммит
04e2bff070
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -4,6 +4,7 @@ import pandas as pd
|
||||||
from dowhy.causal_estimator import CausalEstimate
|
from dowhy.causal_estimator import CausalEstimate
|
||||||
from dowhy.causal_estimator import CausalEstimator
|
from dowhy.causal_estimator import CausalEstimator
|
||||||
|
|
||||||
|
import statsmodels.api as sm
|
||||||
|
|
||||||
class LinearRegressionEstimator(CausalEstimator):
|
class LinearRegressionEstimator(CausalEstimator):
|
||||||
"""Compute effect of treatment using linear regression.
|
"""Compute effect of treatment using linear regression.
|
||||||
|
@ -30,21 +31,24 @@ class LinearRegressionEstimator(CausalEstimator):
|
||||||
self._linear_model = None
|
self._linear_model = None
|
||||||
|
|
||||||
def _estimate_effect(self):
|
def _estimate_effect(self):
|
||||||
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
|
if self._effect_modifiers is None:
|
||||||
if len(self._observed_common_causes_names)>0:
|
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
|
||||||
features = np.concatenate((treatment_2d, self._observed_common_causes),
|
if len(self._observed_common_causes_names)>0:
|
||||||
axis=1)
|
features = np.concatenate((treatment_2d, self._observed_common_causes),
|
||||||
else:
|
axis=1)
|
||||||
features = treatment_2d
|
else:
|
||||||
self._linear_model = linear_model.LinearRegression()
|
features = treatment_2d
|
||||||
self._linear_model.fit(features, self._outcome)
|
self._linear_model = linear_model.LinearRegression()
|
||||||
coefficients = self._linear_model.coef_
|
self._linear_model.fit(features, self._outcome)
|
||||||
self.logger.debug("Coefficients of the fitted linear model: " +
|
coefficients = self._linear_model.coef_
|
||||||
",".join(map(str, coefficients)))
|
self.logger.debug("Coefficients of the fitted linear model: " +
|
||||||
estimate = CausalEstimate(estimate=coefficients[0],
|
",".join(map(str, coefficients)))
|
||||||
|
estimate = CausalEstimate(estimate=coefficients[0],
|
||||||
target_estimand=self._target_estimand,
|
target_estimand=self._target_estimand,
|
||||||
realized_estimand_expr=self.symbolic_estimator,
|
realized_estimand_expr=self.symbolic_estimator,
|
||||||
intercept=self._linear_model.intercept_)
|
intercept=self._linear_model.intercept_)
|
||||||
|
else:
|
||||||
|
pass #TODO
|
||||||
return estimate
|
return estimate
|
||||||
|
|
||||||
def construct_symbolic_estimator(self, estimand):
|
def construct_symbolic_estimator(self, estimand):
|
||||||
|
|
|
@ -10,6 +10,12 @@ class PropensityScoreMatchingEstimator(CausalEstimator):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
# Checking if treatment is binary
|
||||||
|
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
|
||||||
|
error_msg = "Propensity Score Matching method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
|
||||||
|
raise Exception(error_msg)
|
||||||
|
|
||||||
self.logger.debug("Back-door variables used:" +
|
self.logger.debug("Back-door variables used:" +
|
||||||
",".join(self._target_estimand.backdoor_variables))
|
",".join(self._target_estimand.backdoor_variables))
|
||||||
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
||||||
|
@ -22,6 +28,8 @@ class PropensityScoreMatchingEstimator(CausalEstimator):
|
||||||
self.logger.error(error_msg)
|
self.logger.error(error_msg)
|
||||||
raise Exception(error_msg)
|
raise Exception(error_msg)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
self.logger.info("INFO: Using Propensity Score Matching Estimator")
|
self.logger.info("INFO: Using Propensity Score Matching Estimator")
|
||||||
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
|
||||||
self.logger.info(self.symbolic_estimator)
|
self.logger.info(self.symbolic_estimator)
|
||||||
|
|
|
@ -14,6 +14,11 @@ class PropensityScoreStratificationEstimator(CausalEstimator):
|
||||||
|
|
||||||
def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs):
|
def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
# Checking if treatment is binary
|
||||||
|
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
|
||||||
|
error_msg = "Propensity Score Stratification method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
|
||||||
|
raise Exception(error_msg)
|
||||||
|
|
||||||
self.logger.debug("Back-door variables used:" +
|
self.logger.debug("Back-door variables used:" +
|
||||||
",".join(self._target_estimand.backdoor_variables))
|
",".join(self._target_estimand.backdoor_variables))
|
||||||
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
||||||
|
|
|
@ -15,6 +15,11 @@ class PropensityScoreWeightingEstimator(CausalEstimator):
|
||||||
|
|
||||||
def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs):
|
def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
# Checking if treatment is binary
|
||||||
|
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
|
||||||
|
error_msg = "Propensity Score Weighting method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
|
||||||
|
raise Exception(error_msg)
|
||||||
|
|
||||||
self.logger.debug("Back-door variables used:" +
|
self.logger.debug("Back-door variables used:" +
|
||||||
",".join(self._target_estimand.backdoor_variables))
|
",".join(self._target_estimand.backdoor_variables))
|
||||||
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
self._observed_common_causes_names = self._target_estimand.backdoor_variables
|
||||||
|
|
|
@ -40,6 +40,9 @@ class AddUnobservedCommonCause(CausalRefuter):
|
||||||
if self.effect_on_t == "binary_flip":
|
if self.effect_on_t == "binary_flip":
|
||||||
new_data['temp_rand_no'] = np.random.random(num_rows)
|
new_data['temp_rand_no'] = np.random.random(num_rows)
|
||||||
new_data.loc[new_data['temp_rand_no'] <= self.kappa_t, self._treatment_name ] = 1- new_data[self._treatment_name]
|
new_data.loc[new_data['temp_rand_no'] <= self.kappa_t, self._treatment_name ] = 1- new_data[self._treatment_name]
|
||||||
|
if pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
|
||||||
|
for tname in self._treatment_name:
|
||||||
|
new_data = new_data.astype({tname: 'bool'}, copy=False)
|
||||||
new_data.pop('temp_rand_no')
|
new_data.pop('temp_rand_no')
|
||||||
elif self.effect_on_t == "linear":
|
elif self.effect_on_t == "linear":
|
||||||
confounder_t_effect = self.kappa_t * w_random
|
confounder_t_effect = self.kappa_t * w_random
|
||||||
|
|
|
@ -51,6 +51,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
||||||
t += W @ c1 # + np.random.normal(0, 0.01)
|
t += W @ c1 # + np.random.normal(0, 0.01)
|
||||||
if num_instruments > 0:
|
if num_instruments > 0:
|
||||||
t += Z @ cz
|
t += Z @ cz
|
||||||
|
# Converting treatment to binary if required
|
||||||
if treatment_is_binary:
|
if treatment_is_binary:
|
||||||
t = np.vectorize(stochastically_convert_to_binary)(t)
|
t = np.vectorize(stochastically_convert_to_binary)(t)
|
||||||
y = beta*t # + np.random.normal(0,0.01)
|
y = beta*t # + np.random.normal(0,0.01)
|
||||||
|
@ -76,6 +77,11 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
||||||
other_variables = None
|
other_variables = None
|
||||||
col_names = effect_modifiers + instruments + common_causes + [treatment, outcome]
|
col_names = effect_modifiers + instruments + common_causes + [treatment, outcome]
|
||||||
data = pd.DataFrame(data, columns=col_names)
|
data = pd.DataFrame(data, columns=col_names)
|
||||||
|
# Specifying the correct dtypes
|
||||||
|
if treatment_is_binary:
|
||||||
|
data = data.astype({treatment:'bool'}, copy=False)
|
||||||
|
|
||||||
|
# Now specifying the corresponding graph strings
|
||||||
dot_graph = ('digraph {{ {0} ->{1};'
|
dot_graph = ('digraph {{ {0} ->{1};'
|
||||||
' U[label="Unobserved Confounders"];'
|
' U[label="Unobserved Confounders"];'
|
||||||
' U->{0}; U->{1};'
|
' U->{0}; U->{1};'
|
||||||
|
|
Загрузка…
Ссылка в новой задаче