fixed weighting bug in stabilized propensity score weighting (#281)

This commit is contained in:
Amit Sharma 2021-06-06 21:15:17 +05:30 коммит произвёл GitHub
Родитель 21fccf133a
Коммит 1c7a72a3ee
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 136 добавлений и 71 удалений

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -47,22 +47,22 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
num_units = len(self._data[self._treatment_name[0]])
num_treatment_units = sum(self._data[self._treatment_name[0]])
num_control_units = num_units - num_treatment_units
# Vanilla IPS estimator
self._data['ips_weight'] = (1/num_units) * (
# Vanilla IPS estimator
self._data['ips_weight'] = (
self._data[self._treatment_name[0]] / self._data['ps'] +
(1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps'])
)
self._data['tips_weight'] = (1/num_treatment_units) * (
self._data['tips_weight'] = (
self._data[self._treatment_name[0]] +
(1 - self._data[self._treatment_name[0]]) * self._data['ps']/ (1 - self._data['ps'])
)
self._data['cips_weight'] = (1/num_control_units) * (
self._data['cips_weight'] = (
self._data[self._treatment_name[0]] * (1 - self._data['ps'])/ self._data['ps'] +
(1 - self._data[self._treatment_name[0]])
)
# Also known as the Hajek estimator
# The Hajek estimator (or the self-normalized estimator)
self._data['ips_normalized_weight'] = (
self._data[self._treatment_name[0]] / self._data['ps'] / ipst_sum +
(1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']) / ipsc_sum
@ -80,31 +80,22 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
(1 - self._data[self._treatment_name[0]])/ipsc_for_atc_sum
)
# Stabilized weights
# Stabilized weights (from Robins, Hernan, Brumback (2000))
# Paper: Marginal Structural Models and Causal Inference in Epidemiology
p_treatment = sum(self._data[self._treatment_name[0]])/num_units
self._data['ips_stabilized_weight'] = (1/num_units) * (
self._data['ips_stabilized_weight'] = (
self._data[self._treatment_name[0]] / self._data['ps'] * p_treatment +
(1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']) * (1- p_treatment)
)
self._data['tips_stabilized_weight'] = (1/num_treatment_units) * (
self._data['tips_stabilized_weight'] = (
self._data[self._treatment_name[0]] * p_treatment +
(1 - self._data[self._treatment_name[0]]) * self._data['ps'] / (1 - self._data['ps']) * (1- p_treatment)
)
self._data['cips_stabilized_weight'] = (1/num_control_units) * (
self._data['cips_stabilized_weight'] = (
self._data[self._treatment_name[0]] * (1 - self._data['ps']) / self._data['ps'] * p_treatment +
(1 - self._data[self._treatment_name[0]])* (1-p_treatment)
)
# Simple normalized estimator (commented out for now)
#ips_sum = self._data['ips_weight'].sum()
#self._data['nips_weight'] = self._data['ips_weight'] / ips_sum
#self._data['ips2'] = self._data['ps'] / (1 - self._data['ps'])
#treated_ips_sum = (self._data['ips2'] * self._data[self._treatment_name[0]]).sum()
#control_ips_sum = (self._data['ips2'] * (1 - self._data[self._treatment_name[0]])).sum()
#self._data['itps_weight'] = self._data['ips2'] / treated_ips_sum
#self._data['icps_weight'] = self._data['ips2'] / control_ips_sum
if self._target_units == "ate":
weighting_scheme_name = self.weighting_scheme
elif self._target_units == "att":
@ -125,7 +116,12 @@ class PropensityScoreWeightingEstimator(PropensityScoreEstimator):
(1 - self._data[self._treatment_name[0]]) *
self._data[self._outcome_name]
)
est = self._data['d_y'].sum() - self._data['dbar_y'].sum()
sum_dy_weights = np.sum(
self._data[self._treatment_name[0]] * self._data[weighting_scheme_name])
sum_dbary_weights = np.sum(
(1 - self._data[self._treatment_name[0]]) * self._data[weighting_scheme_name])
# Subtracting the weighted means
est = self._data['d_y'].sum() / sum_dy_weights - self._data['dbar_y'].sum() / sum_dbary_weights
# TODO - how can we add additional information into the returned estimate?