Vectorize operations for propensity score matching (#1179)
* Add vector operations Added todo comment Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com> formatting fix Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com> bug fix with string name Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com> * Vectorize remaining list Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com> --------- Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com>
This commit is contained in:
Родитель
5d8fdd0992
Коммит
72e3ba055e
|
@ -1,5 +1,6 @@
|
||||||
from typing import Any, List, Optional, Union
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.neighbors import NearestNeighbors
|
from sklearn.neighbors import NearestNeighbors
|
||||||
|
|
||||||
|
@ -120,7 +121,7 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
|
||||||
|
|
||||||
# TODO remove neighbors that are more than a given radius apart
|
# TODO remove neighbors that are more than a given radius apart
|
||||||
|
|
||||||
# estimate ATT on treated by summing over difference between matched neighbors
|
# Estimating ATT on treated by summing over difference between matched neighbors
|
||||||
control_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
|
control_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
|
||||||
control[self.propensity_score_column].values.reshape(-1, 1)
|
control[self.propensity_score_column].values.reshape(-1, 1)
|
||||||
)
|
)
|
||||||
|
@ -129,27 +130,28 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
|
||||||
self.logger.debug(distances)
|
self.logger.debug(distances)
|
||||||
|
|
||||||
att = 0
|
att = 0
|
||||||
numtreatedunits = treated.shape[0]
|
outcome_variable = self._target_estimand.outcome_variable[0]
|
||||||
for i in range(numtreatedunits):
|
treated_outcomes = treated[outcome_variable]
|
||||||
treated_outcome = treated.iloc[i][self._target_estimand.outcome_variable[0]].item()
|
control_outcomes = list(control.iloc[indices.flatten()][outcome_variable])
|
||||||
control_outcome = control.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
|
|
||||||
att += treated_outcome - control_outcome
|
|
||||||
|
|
||||||
att /= numtreatedunits
|
att = (treated_outcomes - control_outcomes).mean()
|
||||||
|
|
||||||
|
# Estimating ATC
|
||||||
|
|
||||||
# Now computing ATC
|
|
||||||
treated_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
|
treated_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
|
||||||
treated[self.propensity_score_column].values.reshape(-1, 1)
|
treated[self.propensity_score_column].values.reshape(-1, 1)
|
||||||
)
|
)
|
||||||
distances, indices = treated_neighbors.kneighbors(control[self.propensity_score_column].values.reshape(-1, 1))
|
distances, indices = treated_neighbors.kneighbors(control[self.propensity_score_column].values.reshape(-1, 1))
|
||||||
atc = 0
|
|
||||||
numcontrolunits = control.shape[0]
|
|
||||||
for i in range(numcontrolunits):
|
|
||||||
control_outcome = control.iloc[i][self._target_estimand.outcome_variable[0]].item()
|
|
||||||
treated_outcome = treated.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
|
|
||||||
atc += treated_outcome - control_outcome
|
|
||||||
|
|
||||||
atc /= numcontrolunits
|
atc = 0
|
||||||
|
outcome_variable = self._target_estimand.outcome_variable[0]
|
||||||
|
control_outcomes = control[outcome_variable]
|
||||||
|
treated_outcomes = list(treated.iloc[indices.flatten()][outcome_variable])
|
||||||
|
|
||||||
|
atc = (treated_outcomes - control_outcomes).mean()
|
||||||
|
|
||||||
|
numtreatedunits = treated.shape[0]
|
||||||
|
numcontrolunits = control.shape[0]
|
||||||
|
|
||||||
if target_units == "att":
|
if target_units == "att":
|
||||||
est = att
|
est = att
|
||||||
|
|
Загрузка…
Ссылка в новой задаче