added cate estimator in linear regression to setup support for cate
This commit is contained in:
Родитель
04e2bff070
Коммит
b5d7405063
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -50,6 +50,17 @@ class CausalEstimator:
|
|||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Setting more values
|
||||
if self._data is not None:
|
||||
self._treatment = self._data[self._treatment_name]
|
||||
self._outcome = self._data[self._outcome_name]
|
||||
|
||||
# Now saving the effect modifiers
|
||||
if self._effect_modifier_names:
|
||||
self._effect_modifiers = self._data[self._effect_modifier_names]
|
||||
self.logger.debug("Effect modifiers: " +
|
||||
",".join(self._effect_modifier_names))
|
||||
|
||||
def _estimate_effect(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -62,14 +73,6 @@ class CausalEstimator:
|
|||
:returns: point estimate of causal effect
|
||||
|
||||
"""
|
||||
self._treatment = self._data[self._treatment_name]
|
||||
self._outcome = self._data[self._outcome_name]
|
||||
|
||||
# Now saving the effect modifiers
|
||||
if self._effect_modifier_names:
|
||||
self._effect_modifiers = self._data[self._effect_modifier_names]
|
||||
self.logger.debug("Effect modifiers: " +
|
||||
",".join(self._effect_modifier_names))
|
||||
|
||||
est = self._estimate_effect()
|
||||
self._estimate = est
|
||||
|
@ -102,10 +105,7 @@ class CausalEstimator:
|
|||
:returns:
|
||||
|
||||
"""
|
||||
self._treatment = self._data[self._treatment_name]
|
||||
self._outcome = self._data[self._outcome_name]
|
||||
est = self._do(x)
|
||||
|
||||
return est
|
||||
|
||||
def construct_symbolic_estimator(self, estimand):
|
||||
|
|
|
@ -31,24 +31,15 @@ class LinearRegressionEstimator(CausalEstimator):
|
|||
self._linear_model = None
|
||||
|
||||
def _estimate_effect(self):
|
||||
if self._effect_modifiers is None:
|
||||
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
|
||||
if len(self._observed_common_causes_names)>0:
|
||||
features = np.concatenate((treatment_2d, self._observed_common_causes),
|
||||
axis=1)
|
||||
else:
|
||||
features = treatment_2d
|
||||
self._linear_model = linear_model.LinearRegression()
|
||||
self._linear_model.fit(features, self._outcome)
|
||||
coefficients = self._linear_model.coef_
|
||||
self.logger.debug("Coefficients of the fitted linear model: " +
|
||||
",".join(map(str, coefficients)))
|
||||
estimate = CausalEstimate(estimate=coefficients[0],
|
||||
target_estimand=self._target_estimand,
|
||||
realized_estimand_expr=self.symbolic_estimator,
|
||||
intercept=self._linear_model.intercept_)
|
||||
else:
|
||||
pass #TODO
|
||||
features, self._linear_model = self._build_linear_model()
|
||||
coefficients = self._linear_model.coef_
|
||||
self.logger.debug("Coefficients of the fitted linear model: " +
|
||||
",".join(map(str, coefficients)))
|
||||
effect_estimate = self._do(1) - self._do(0)
|
||||
estimate = CausalEstimate(estimate=effect_estimate,
|
||||
target_estimand=self._target_estimand,
|
||||
realized_estimand_expr=self.symbolic_estimator,
|
||||
intercept=self._linear_model.intercept_)
|
||||
return estimate
|
||||
|
||||
def construct_symbolic_estimator(self, estimand):
|
||||
|
@ -57,19 +48,31 @@ class LinearRegressionEstimator(CausalEstimator):
|
|||
expr += "+".join(var_list)
|
||||
return expr
|
||||
|
||||
def _build_linear_model(self):
|
||||
def _build_features(self):
|
||||
treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
|
||||
features = np.concatenate((treatment_2d, self._observed_common_causes),
|
||||
if len(self._observed_common_causes_names)>0:
|
||||
features = np.concatenate((treatment_2d, self._observed_common_causes),
|
||||
axis=1)
|
||||
else:
|
||||
features = treatment_2d
|
||||
if self._effect_modifier_names:
|
||||
for i in range(treatment_2d.shape[1]):
|
||||
curr_treatment = treatment_2d[:,i]
|
||||
new_features = curr_treatment[:, np.newaxis] * self._effect_modifiers.to_numpy()
|
||||
features = np.concatenate((features, new_features), axis=1)
|
||||
return features
|
||||
|
||||
def _build_linear_model(self):
|
||||
features = self._build_features()
|
||||
model = linear_model.LinearRegression()
|
||||
model.fit(features, self._outcome)
|
||||
self._linear_model = model
|
||||
return (features, model)
|
||||
|
||||
def _do(self, x):
|
||||
if not self._linear_model:
|
||||
self._build_linear_model()
|
||||
_, self._linear_model = self._build_linear_model()
|
||||
interventional_treatment_2d = np.full(self._treatment.shape, x).reshape(len(self._treatment), -1)
|
||||
features = np.concatenate((interventional_treatment_2d, self._observed_common_causes),
|
||||
axis=1)
|
||||
interventional_outcomes = self._linear_model.predict(features)
|
||||
features = self._build_features()#np.concatenate((interventional_treatment_2d, self._observed_common_causes),axis=1)
|
||||
new_features = np.concatenate((interventional_treatment_2d, features[:,1: ]), axis=1)
|
||||
interventional_outcomes = self._linear_model.predict(new_features)
|
||||
return interventional_outcomes.mean()
|
||||
|
|
|
@ -16,6 +16,7 @@ def stochastically_convert_to_binary(x):
|
|||
|
||||
def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
||||
num_effect_modifiers=0, treatment_is_binary=True):
|
||||
W, X, Z, c1, c2, ce, cz = [None]*7
|
||||
beta = float(beta)
|
||||
if num_common_causes > 0:
|
||||
range_c1 = beta*0.5
|
||||
|
@ -46,7 +47,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
|||
ce = np.random.uniform(0, range_ce, num_effect_modifiers)
|
||||
# TODO - test all our methods with random noise added to covariates (instead of the stochastic treatment assignment)
|
||||
|
||||
t = np.random.normal(0, 1)
|
||||
t = np.random.normal(0, 1, num_samples)
|
||||
if num_common_causes > 0:
|
||||
t += W @ c1 # + np.random.normal(0, 0.01)
|
||||
if num_instruments > 0:
|
||||
|
@ -54,11 +55,15 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
|||
# Converting treatment to binary if required
|
||||
if treatment_is_binary:
|
||||
t = np.vectorize(stochastically_convert_to_binary)(t)
|
||||
y = beta*t # + np.random.normal(0,0.01)
|
||||
if num_common_causes > 0:
|
||||
y += W @ c2
|
||||
if num_effect_modifiers > 0:
|
||||
y += X @ ce
|
||||
|
||||
def _compute_y(t, W, X, beta, c2, ce):
|
||||
y = beta*t # + np.random.normal(0,0.01)
|
||||
if num_common_causes > 0:
|
||||
y += W @ c2
|
||||
if num_effect_modifiers > 0:
|
||||
y += (X @ ce) * t
|
||||
return y
|
||||
y = _compute_y(t, W, X, beta, c2, ce)
|
||||
|
||||
data = np.column_stack((t, y))
|
||||
if num_common_causes > 0:
|
||||
|
@ -71,7 +76,7 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
|||
treatment = "v"
|
||||
outcome = "y"
|
||||
common_causes = [("W" + str(i)) for i in range(0, num_common_causes)]
|
||||
ate = beta
|
||||
ate = np.mean(_compute_y(np.ones(num_samples), W, X, beta, c2, ce) - _compute_y(np.zeros(num_samples), W, X, beta, c2, ce))
|
||||
instruments = [("Z" + str(i)) for i in range(0, num_instruments)]
|
||||
effect_modifiers =[("X" + str(i)) for i in range(0, num_effect_modifiers)]
|
||||
other_variables = None
|
||||
|
@ -118,7 +123,6 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
|
|||
}
|
||||
return ret_dict
|
||||
|
||||
|
||||
def xy_dataset(num_samples, effect=True, sd_error=1):
|
||||
treatment = 'Treatment'
|
||||
outcome = 'Outcome'
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from dowhy.causal_estimator import CausalEstimator
|
||||
|
|
Загрузка…
Ссылка в новой задаче