added type checking for treatment variable for methods that expect bool. user is expected to specify the right dtype for each column

2019-12-02 18:23:09 +05:30 · 2019-12-02 18:23:09 +05:30 · 04e2bff070
--- a/docs/source/example_notebooks/dowhy-conditional-treatment-effects.ipynb
+++ b/docs/source/example_notebooks/dowhy-conditional-treatment-effects.ipynb
--- a/docs/source/example_notebooks/dowhy_simple_example.ipynb
+++ b/docs/source/example_notebooks/dowhy_simple_example.ipynb
--- a/dowhy/causal_estimators/linear_regression_estimator.py
+++ b/dowhy/causal_estimators/linear_regression_estimator.py
@ -4,6 +4,7 @@ import pandas as pd
 from dowhy.causal_estimator import CausalEstimate
 from dowhy.causal_estimator import CausalEstimator

+import statsmodels.api as sm

 class LinearRegressionEstimator(CausalEstimator):
    """Compute effect of treatment using linear regression.
@ -30,21 +31,24 @@ class LinearRegressionEstimator(CausalEstimator):
        self._linear_model = None

    def _estimate_effect(self):
-        treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
-        if len(self._observed_common_causes_names)>0:
-            features = np.concatenate((treatment_2d, self._observed_common_causes),
-                                  axis=1)
-        else:
-            features = treatment_2d
-        self._linear_model = linear_model.LinearRegression()
-        self._linear_model.fit(features, self._outcome)
-        coefficients = self._linear_model.coef_
-        self.logger.debug("Coefficients of the fitted linear model: " +
-                          ",".join(map(str, coefficients)))
-        estimate = CausalEstimate(estimate=coefficients[0],
+        if self._effect_modifiers is None:
+            treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
+            if len(self._observed_common_causes_names)>0:
+                features = np.concatenate((treatment_2d, self._observed_common_causes),
+                                      axis=1)
+            else:
+                features = treatment_2d
+            self._linear_model = linear_model.LinearRegression()
+            self._linear_model.fit(features, self._outcome)
+            coefficients = self._linear_model.coef_
+            self.logger.debug("Coefficients of the fitted linear model: " +
+                              ",".join(map(str, coefficients)))
+            estimate = CausalEstimate(estimate=coefficients[0],
                                  target_estimand=self._target_estimand,
                                  realized_estimand_expr=self.symbolic_estimator,
                                  intercept=self._linear_model.intercept_)
+        else:
+            pass #TODO
        return estimate

    def construct_symbolic_estimator(self, estimand):
--- a/dowhy/causal_estimators/propensity_score_matching_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_matching_estimator.py
@ -10,6 +10,12 @@ class PropensityScoreMatchingEstimator(CausalEstimator):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
+
+        # Checking if treatment is binary
+        if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
+            error_msg = "Propensity Score Matching method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
+            raise Exception(error_msg)
+
        self.logger.debug("Back-door variables used:" +
                          ",".join(self._target_estimand.backdoor_variables))
        self._observed_common_causes_names = self._target_estimand.backdoor_variables
@ -22,6 +28,8 @@ class PropensityScoreMatchingEstimator(CausalEstimator):
            self.logger.error(error_msg)
            raise Exception(error_msg)

+
+
        self.logger.info("INFO: Using Propensity Score Matching Estimator")
        self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
        self.logger.info(self.symbolic_estimator)
--- a/dowhy/causal_estimators/propensity_score_stratification_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_stratification_estimator.py
@ -14,6 +14,11 @@ class PropensityScoreStratificationEstimator(CausalEstimator):

    def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs):
        super().__init__(*args,  **kwargs)
+        # Checking if treatment is binary
+        if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
+            error_msg = "Propensity Score Stratification method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
+            raise Exception(error_msg)
+
        self.logger.debug("Back-door variables used:" +
                          ",".join(self._target_estimand.backdoor_variables))
        self._observed_common_causes_names = self._target_estimand.backdoor_variables
--- a/dowhy/causal_estimators/propensity_score_weighting_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_weighting_estimator.py
@ -15,6 +15,11 @@ class PropensityScoreWeightingEstimator(CausalEstimator):

    def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs):
        super().__init__(*args, **kwargs)
+        # Checking if treatment is binary
+        if not pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
+            error_msg = "Propensity Score Weighting method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
+            raise Exception(error_msg)
+
        self.logger.debug("Back-door variables used:" +
                          ",".join(self._target_estimand.backdoor_variables))
        self._observed_common_causes_names = self._target_estimand.backdoor_variables
--- a/dowhy/causal_refuters/add_unobserved_common_cause.py
+++ b/dowhy/causal_refuters/add_unobserved_common_cause.py
@ -40,6 +40,9 @@ class AddUnobservedCommonCause(CausalRefuter):
        if self.effect_on_t == "binary_flip":
            new_data['temp_rand_no'] = np.random.random(num_rows)
            new_data.loc[new_data['temp_rand_no'] <= self.kappa_t, self._treatment_name ]  = 1- new_data[self._treatment_name]
+            if pd.api.types.is_bool_dtype(self._data[self._treatment_name]):
+                for tname in self._treatment_name:
+                    new_data = new_data.astype({tname: 'bool'}, copy=False)
            new_data.pop('temp_rand_no')
        elif self.effect_on_t == "linear":
            confounder_t_effect = self.kappa_t * w_random
--- a/dowhy/datasets.py
+++ b/dowhy/datasets.py
@ -51,6 +51,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
        t += W @ c1  # + np.random.normal(0, 0.01)
    if num_instruments > 0:
        t += Z @ cz
+    # Converting treatment to binary if required
    if treatment_is_binary:
        t = np.vectorize(stochastically_convert_to_binary)(t)
    y =  beta*t  # + np.random.normal(0,0.01)
@ -76,6 +77,11 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
    other_variables = None
    col_names = effect_modifiers + instruments + common_causes + [treatment, outcome]
    data = pd.DataFrame(data, columns=col_names)
+    # Specifying the correct dtypes
+    if treatment_is_binary:
+        data = data.astype({treatment:'bool'}, copy=False)
+
+    # Now specifying the corresponding graph strings
    dot_graph = ('digraph {{ {0} ->{1};'
                 ' U[label="Unobserved Confounders"];'
                 ' U->{0}; U->{1};'