added cate estimator in linear regression to setup support for cate

2019-12-04 22:11:33 +05:30 · 2019-12-04 22:11:33 +05:30 · b5d7405063
--- a/docs/source/example_notebooks/dowhy-conditional-treatment-effects.ipynb
+++ b/docs/source/example_notebooks/dowhy-conditional-treatment-effects.ipynb
--- a/dowhy/causal_estimator.py
+++ b/dowhy/causal_estimator.py
@ -50,6 +50,17 @@ class CausalEstimator:

        self.logger = logging.getLogger(__name__)

+        # Setting more values
+        if self._data is not None:
+            self._treatment = self._data[self._treatment_name]
+            self._outcome = self._data[self._outcome_name]
+
+        # Now saving the effect modifiers
+        if self._effect_modifier_names:
+            self._effect_modifiers = self._data[self._effect_modifier_names]
+            self.logger.debug("Effect modifiers: " +
+                          ",".join(self._effect_modifier_names))
+
    def _estimate_effect(self):
        raise NotImplementedError

@ -62,14 +73,6 @@ class CausalEstimator:
        :returns: point estimate of causal effect

        """
-        self._treatment = self._data[self._treatment_name]
-        self._outcome = self._data[self._outcome_name]
-
-        # Now saving the effect modifiers
-        if self._effect_modifier_names:
-            self._effect_modifiers = self._data[self._effect_modifier_names]
-            self.logger.debug("Effect modifiers: " +
-                          ",".join(self._effect_modifier_names))

        est = self._estimate_effect()
        self._estimate = est
@ -102,10 +105,7 @@ class CausalEstimator:
        :returns:

        """
-        self._treatment = self._data[self._treatment_name]
-        self._outcome = self._data[self._outcome_name]
        est = self._do(x)
-
        return est

    def construct_symbolic_estimator(self, estimand):
--- a/dowhy/causal_estimators/linear_regression_estimator.py
+++ b/dowhy/causal_estimators/linear_regression_estimator.py
@ -31,24 +31,15 @@ class LinearRegressionEstimator(CausalEstimator):
        self._linear_model = None

    def _estimate_effect(self):
-        if self._effect_modifiers is None:
-            treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
-            if len(self._observed_common_causes_names)>0:
-                features = np.concatenate((treatment_2d, self._observed_common_causes),
-                                      axis=1)
-            else:
-                features = treatment_2d
-            self._linear_model = linear_model.LinearRegression()
-            self._linear_model.fit(features, self._outcome)
-            coefficients = self._linear_model.coef_
-            self.logger.debug("Coefficients of the fitted linear model: " +
-                              ",".join(map(str, coefficients)))
-            estimate = CausalEstimate(estimate=coefficients[0],
-                                  target_estimand=self._target_estimand,
-                                  realized_estimand_expr=self.symbolic_estimator,
-                                  intercept=self._linear_model.intercept_)
-        else:
-            pass #TODO
+        features, self._linear_model = self._build_linear_model()
+        coefficients = self._linear_model.coef_
+        self.logger.debug("Coefficients of the fitted linear model: " +
+                          ",".join(map(str, coefficients)))
+        effect_estimate = self._do(1) - self._do(0)
+        estimate = CausalEstimate(estimate=effect_estimate,
+                              target_estimand=self._target_estimand,
+                              realized_estimand_expr=self.symbolic_estimator,
+                              intercept=self._linear_model.intercept_)
        return estimate

    def construct_symbolic_estimator(self, estimand):
@ -57,19 +48,31 @@ class LinearRegressionEstimator(CausalEstimator):
        expr += "+".join(var_list)
        return expr

-    def _build_linear_model(self):
+    def _build_features(self):
        treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
-        features = np.concatenate((treatment_2d, self._observed_common_causes),
+        if len(self._observed_common_causes_names)>0:
+            features = np.concatenate((treatment_2d, self._observed_common_causes),
                                  axis=1)
+        else:
+            features = treatment_2d
+        if self._effect_modifier_names:
+            for i in range(treatment_2d.shape[1]):
+                curr_treatment = treatment_2d[:,i]
+                new_features = curr_treatment[:, np.newaxis] * self._effect_modifiers.to_numpy()
+                features = np.concatenate((features, new_features), axis=1)
+        return features
+
+    def _build_linear_model(self):
+        features = self._build_features()
        model = linear_model.LinearRegression()
        model.fit(features, self._outcome)
-        self._linear_model = model
+        return (features, model)

    def _do(self, x):
        if not self._linear_model:
-            self._build_linear_model()
+            _, self._linear_model = self._build_linear_model()
        interventional_treatment_2d = np.full(self._treatment.shape, x).reshape(len(self._treatment), -1)
-        features = np.concatenate((interventional_treatment_2d, self._observed_common_causes),
-                                  axis=1)
-        interventional_outcomes = self._linear_model.predict(features)
+        features = self._build_features()#np.concatenate((interventional_treatment_2d, self._observed_common_causes),axis=1)
+        new_features = np.concatenate((interventional_treatment_2d, features[:,1: ]), axis=1)
+        interventional_outcomes = self._linear_model.predict(new_features)
        return interventional_outcomes.mean()
--- a/dowhy/datasets.py
+++ b/dowhy/datasets.py
@ -16,6 +16,7 @@ def stochastically_convert_to_binary(x):

 def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
                   num_effect_modifiers=0, treatment_is_binary=True):
+    W, X, Z, c1, c2, ce, cz = [None]*7
    beta = float(beta)
    if num_common_causes > 0:
        range_c1 = beta*0.5
@ -46,7 +47,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
        ce = np.random.uniform(0, range_ce, num_effect_modifiers)
    # TODO - test all our methods with random noise added to covariates (instead of the stochastic treatment assignment)

-    t = np.random.normal(0, 1)
+    t = np.random.normal(0, 1, num_samples)
    if num_common_causes > 0:
        t += W @ c1  # + np.random.normal(0, 0.01)
    if num_instruments > 0:
@ -54,11 +55,15 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
    # Converting treatment to binary if required
    if treatment_is_binary:
        t = np.vectorize(stochastically_convert_to_binary)(t)
-    y =  beta*t  # + np.random.normal(0,0.01)
-    if num_common_causes > 0:
-        y += W @ c2
-    if num_effect_modifiers > 0:
-        y += X @ ce
+
+    def _compute_y(t, W, X, beta, c2, ce):
+        y =  beta*t  # + np.random.normal(0,0.01)
+        if num_common_causes > 0:
+            y += W @ c2
+        if num_effect_modifiers > 0:
+            y += (X @ ce) * t
+        return y
+    y = _compute_y(t, W, X, beta, c2, ce)

    data = np.column_stack((t, y))
    if num_common_causes > 0:
@ -71,7 +76,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
    treatment = "v"
    outcome = "y"
    common_causes = [("W" + str(i)) for i in range(0, num_common_causes)]
-    ate = beta
+    ate = np.mean(_compute_y(np.ones(num_samples), W, X, beta, c2, ce) - _compute_y(np.zeros(num_samples), W, X, beta, c2, ce))
    instruments = [("Z" + str(i)) for i in range(0, num_instruments)]
    effect_modifiers =[("X" + str(i)) for i in range(0, num_effect_modifiers)]
    other_variables = None
@ -118,7 +123,6 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
    }
    return ret_dict

-
 def xy_dataset(num_samples, effect=True, sd_error=1):
    treatment = 'Treatment'
    outcome = 'Outcome'
--- a/tests/test_causal_estimator.py
+++ b/tests/test_causal_estimator.py
@ -1,5 +1,4 @@
 import unittest
-
 import pytest

 from dowhy.causal_estimator import CausalEstimator