Vectorize operations for propensity score matching (#1179)

* Add vector operations Added todo comment Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com> formatting fix Signed-off-by: Rahul Shrestha <rahulshrestha0101@gmail.com> bug fix with string name Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com> * Vectorize remaining list Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com> --------- Signed-off-by: rahulbshrestha <rahulshrestha0101@gmail.com>
2024-06-04 12:20:40 +02:00 · 2024-06-04 12:20:40 +02:00 · 72e3ba055e
--- a/dowhy/causal_estimators/propensity_score_matching_estimator.py
+++ b/dowhy/causal_estimators/propensity_score_matching_estimator.py
@ -1,5 +1,6 @@
 from typing import Any, List, Optional, Union
 import numpy as np
 import pandas as pd
 from sklearn.neighbors import NearestNeighbors
@ -120,7 +121,7 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
        # TODO remove neighbors that are more than a given radius apart
-        # estimate ATT on treated by summing over difference between matched neighbors
+        # Estimating ATT on treated by summing over difference between matched neighbors
        control_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
            control[self.propensity_score_column].values.reshape(-1, 1)
        )
@ -129,27 +130,28 @@ class PropensityScoreMatchingEstimator(PropensityScoreEstimator):
        self.logger.debug(distances)
        att = 0
-        numtreatedunits = treated.shape[0]
+        outcome_variable = self._target_estimand.outcome_variable[0]
-        for i in range(numtreatedunits):
+        treated_outcomes = treated[outcome_variable]
-            treated_outcome = treated.iloc[i][self._target_estimand.outcome_variable[0]].item()
+        control_outcomes = list(control.iloc[indices.flatten()][outcome_variable])
            control_outcome = control.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
            att += treated_outcome - control_outcome
-        att /= numtreatedunits
+        att = (treated_outcomes - control_outcomes).mean()
        # Estimating ATC
        # Now computing ATC
        treated_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
            treated[self.propensity_score_column].values.reshape(-1, 1)
        )
        distances, indices = treated_neighbors.kneighbors(control[self.propensity_score_column].values.reshape(-1, 1))
        atc = 0
        numcontrolunits = control.shape[0]
        for i in range(numcontrolunits):
            control_outcome = control.iloc[i][self._target_estimand.outcome_variable[0]].item()
            treated_outcome = treated.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
            atc += treated_outcome - control_outcome
-        atc /= numcontrolunits
+        atc = 0
        outcome_variable = self._target_estimand.outcome_variable[0]
        control_outcomes = control[outcome_variable]
        treated_outcomes = list(treated.iloc[indices.flatten()][outcome_variable])
        atc = (treated_outcomes - control_outcomes).mean()
        numtreatedunits = treated.shape[0]
        numcontrolunits = control.shape[0]
        if target_units == "att":
            est = att