Vectorize operations for propensity score matching (#1179)

* Add vector operations

Added todo comment

Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com>

formatting fix

Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com>

bug fix with string name

Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com>

* Vectorize remaining list

Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com>

---------

Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com>
This commit is contained in:
Rahul Shrestha 2024-06-04 12:20:40 +02:00 коммит произвёл GitHub
Родитель 5d8fdd0992
Коммит 72e3ba055e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
1 изменённых файлов: 17 добавлений и 15 удалений

Просмотреть файл

@ -1,5 +1,6 @@
from typing import Any, List, Optional, Union
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
@ -120,7 +121,7 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
# TODO remove neighbors that are more than a given radius apart
# estimate ATT on treated by summing over difference between matched neighbors
# Estimating ATT on treated by summing over difference between matched neighbors
control_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
control[self.propensity_score_column].values.reshape(-1, 1)
)
@ -129,27 +130,28 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
self.logger.debug(distances)
att = 0
numtreatedunits = treated.shape[0]
for i in range(numtreatedunits):
treated_outcome = treated.iloc[i][self._target_estimand.outcome_variable[0]].item()
control_outcome = control.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
att += treated_outcome - control_outcome
outcome_variable = self._target_estimand.outcome_variable[0]
treated_outcomes = treated[outcome_variable]
control_outcomes = list(control.iloc[indices.flatten()][outcome_variable])
att /= numtreatedunits
att = (treated_outcomes - control_outcomes).mean()
# Estimating ATC
# Now computing ATC
treated_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
treated[self.propensity_score_column].values.reshape(-1, 1)
)
distances, indices = treated_neighbors.kneighbors(control[self.propensity_score_column].values.reshape(-1, 1))
atc = 0
numcontrolunits = control.shape[0]
for i in range(numcontrolunits):
control_outcome = control.iloc[i][self._target_estimand.outcome_variable[0]].item()
treated_outcome = treated.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
atc += treated_outcome - control_outcome
atc /= numcontrolunits
atc = 0
outcome_variable = self._target_estimand.outcome_variable[0]
control_outcomes = control[outcome_variable]
treated_outcomes = list(treated.iloc[indices.flatten()][outcome_variable])
atc = (treated_outcomes - control_outcomes).mean()
numtreatedunits = treated.shape[0]
numcontrolunits = control.shape[0]
if target_units == "att":
est = att