added cate estimator in linear regression to setup support for cate

This commit is contained in:
Amit Sharma 2019-12-04 22:11:33 +05:30
Родитель 04e2bff070
Коммит b5d7405063
5 изменённых файлов: 244 добавлений и 89 удалений

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -50,6 +50,17 @@ class CausalEstimator:
self.logger = logging.getLogger(__name__)
# Setting more values
if self._data is not None:
self._treatment = self._data[self._treatment_name]
self._outcome = self._data[self._outcome_name]
# Now saving the effect modifiers
if self._effect_modifier_names:
self._effect_modifiers = self._data[self._effect_modifier_names]
self.logger.debug("Effect modifiers: " +
",".join(self._effect_modifier_names))
def _estimate_effect(self):
raise NotImplementedError
@ -62,14 +73,6 @@ class CausalEstimator:
:returns: point estimate of causal effect
"""
self._treatment = self._data[self._treatment_name]
self._outcome = self._data[self._outcome_name]
# Now saving the effect modifiers
if self._effect_modifier_names:
self._effect_modifiers = self._data[self._effect_modifier_names]
self.logger.debug("Effect modifiers: " +
",".join(self._effect_modifier_names))
est = self._estimate_effect()
self._estimate = est
@ -102,10 +105,7 @@ class CausalEstimator:
:returns:
"""
self._treatment = self._data[self._treatment_name]
self._outcome = self._data[self._outcome_name]
est = self._do(x)
return est
def construct_symbolic_estimator(self, estimand):

Просмотреть файл

@ -31,24 +31,15 @@ class LinearRegressionEstimator(CausalEstimator):
self._linear_model = None
def _estimate_effect(self):
if self._effect_modifiers is None:
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
if len(self._observed_common_causes_names)>0:
features = np.concatenate((treatment_2d, self._observed_common_causes),
axis=1)
else:
features = treatment_2d
self._linear_model = linear_model.LinearRegression()
self._linear_model.fit(features, self._outcome)
coefficients = self._linear_model.coef_
self.logger.debug("Coefficients of the fitted linear model: " +
",".join(map(str, coefficients)))
estimate = CausalEstimate(estimate=coefficients[0],
target_estimand=self._target_estimand,
realized_estimand_expr=self.symbolic_estimator,
intercept=self._linear_model.intercept_)
else:
pass #TODO
features, self._linear_model = self._build_linear_model()
coefficients = self._linear_model.coef_
self.logger.debug("Coefficients of the fitted linear model: " +
",".join(map(str, coefficients)))
effect_estimate = self._do(1) - self._do(0)
estimate = CausalEstimate(estimate=effect_estimate,
target_estimand=self._target_estimand,
realized_estimand_expr=self.symbolic_estimator,
intercept=self._linear_model.intercept_)
return estimate
def construct_symbolic_estimator(self, estimand):
@ -57,19 +48,31 @@ class LinearRegressionEstimator(CausalEstimator):
expr += "+".join(var_list)
return expr
def _build_linear_model(self):
def _build_features(self):
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
features = np.concatenate((treatment_2d, self._observed_common_causes),
if len(self._observed_common_causes_names)>0:
features = np.concatenate((treatment_2d, self._observed_common_causes),
axis=1)
else:
features = treatment_2d
if self._effect_modifier_names:
for i in range(treatment_2d.shape[1]):
curr_treatment = treatment_2d[:,i]
new_features = curr_treatment[:, np.newaxis] * self._effect_modifiers.to_numpy()
features = np.concatenate((features, new_features), axis=1)
return features
def _build_linear_model(self):
features = self._build_features()
model = linear_model.LinearRegression()
model.fit(features, self._outcome)
self._linear_model = model
return (features, model)
def _do(self, x):
if not self._linear_model:
self._build_linear_model()
_, self._linear_model = self._build_linear_model()
interventional_treatment_2d = np.full(self._treatment.shape, x).reshape(len(self._treatment), -1)
features = np.concatenate((interventional_treatment_2d, self._observed_common_causes),
axis=1)
interventional_outcomes = self._linear_model.predict(features)
features = self._build_features()#np.concatenate((interventional_treatment_2d, self._observed_common_causes),axis=1)
new_features = np.concatenate((interventional_treatment_2d, features[:,1: ]), axis=1)
interventional_outcomes = self._linear_model.predict(new_features)
return interventional_outcomes.mean()

Просмотреть файл

@ -16,6 +16,7 @@ def stochastically_convert_to_binary(x):
def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
num_effect_modifiers=0, treatment_is_binary=True):
W, X, Z, c1, c2, ce, cz = [None]*7
beta = float(beta)
if num_common_causes > 0:
range_c1 = beta*0.5
@ -46,7 +47,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
ce = np.random.uniform(0, range_ce, num_effect_modifiers)
# TODO - test all our methods with random noise added to covariates (instead of the stochastic treatment assignment)
t = np.random.normal(0, 1)
t = np.random.normal(0, 1, num_samples)
if num_common_causes > 0:
t += W @ c1 # + np.random.normal(0, 0.01)
if num_instruments > 0:
@ -54,11 +55,15 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
# Converting treatment to binary if required
if treatment_is_binary:
t = np.vectorize(stochastically_convert_to_binary)(t)
y = beta*t # + np.random.normal(0,0.01)
if num_common_causes > 0:
y += W @ c2
if num_effect_modifiers > 0:
y += X @ ce
def _compute_y(t, W, X, beta, c2, ce):
y = beta*t # + np.random.normal(0,0.01)
if num_common_causes > 0:
y += W @ c2
if num_effect_modifiers > 0:
y += (X @ ce) * t
return y
y = _compute_y(t, W, X, beta, c2, ce)
data = np.column_stack((t, y))
if num_common_causes > 0:
@ -71,7 +76,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
treatment = "v"
outcome = "y"
common_causes = [("W" + str(i)) for i in range(0, num_common_causes)]
ate = beta
ate = np.mean(_compute_y(np.ones(num_samples), W, X, beta, c2, ce) - _compute_y(np.zeros(num_samples), W, X, beta, c2, ce))
instruments = [("Z" + str(i)) for i in range(0, num_instruments)]
effect_modifiers =[("X" + str(i)) for i in range(0, num_effect_modifiers)]
other_variables = None
@ -118,7 +123,6 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
}
return ret_dict
def xy_dataset(num_samples, effect=True, sd_error=1):
treatment = 'Treatment'
outcome = 'Outcome'

Просмотреть файл

@ -1,5 +1,4 @@
import unittest
import pytest
from dowhy.causal_estimator import CausalEstimator