From 1fbeb76f821db7d61aceafe2b189a0f3f8838bbc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 20:13:02 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...utomated Machine Learning For EconML.ipynb | 4 +-- notebooks/CATE validation.ipynb | 8 ++--- ...nd Orthogonal Random Forest Examples.ipynb | 20 +++++------ ...sal Model Selection with the RScorer.ipynb | 8 ++--- notebooks/Choosing First Stage Models.ipynb | 12 +++---- ...nline Media Company - EconML + DoWhy.ipynb | 14 ++++---- ...mentation at An Online Media Company.ipynb | 2 +- ... A Software Company - EconML + DoWhy.ipynb | 12 +++---- ...line Travel Company - EconML + DoWhy.ipynb | 14 ++++---- ... Testing at An Online Travel Company.ipynb | 4 +-- ...f training program - Lalonde dataset.ipynb | 34 +++++++++---------- notebooks/Deep IV Examples.ipynb | 4 +-- .../Double Machine Learning Examples.ipynb | 12 +++---- ... Robust Learner and Interpretability.ipynb | 10 +++--- ...mic Double Machine Learning Examples.ipynb | 8 ++--- notebooks/Generalized Random Forests.ipynb | 10 +++--- notebooks/Interpretability with SHAP.ipynb | 2 +- notebooks/Metalearners Examples.ipynb | 12 +++---- ...licy Learning with Trees and Forests.ipynb | 4 +-- notebooks/Scaling EconML using Ray.ipynb | 4 +-- ...nterpretation for Ames Housing Price.ipynb | 20 +++++------ .../Treatment Featurization Examples.ipynb | 20 +++++------ ...ted Double Machine Learning Examples.ipynb | 4 +-- 23 files changed, 121 insertions(+), 121 deletions(-) diff --git a/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb b/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb index 98b67817..15fa876f 100644 --- a/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb +++ b/notebooks/AutomatedML/Automated Machine Learning For EconML.ipynb @@ -180,7 +180,7 @@ " return\n", "\n", " def fit(self, X, y, sample_weight = None, **fit_params):\n", - " self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_ \n", + " self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_\n", " for gcv in self._gcv_list])\n", " self.best_estimator_ = self._gcv_list[self.best_ind_].best_estimator_\n", " self.best_score_ = self._gcv_list[self.best_ind_].best_score_\n", @@ -934,7 +934,7 @@ "plt.plot(X_test, expected_te, 'b--', label='True effect')\n", "plt.ylabel('Treatment Effect')\n", "plt.xlabel('x')\n", - "plt.ylim(-0.5, 1.5) \n", + "plt.ylim(-0.5, 1.5)\n", "plt.legend()\n", "plt.show()" ] diff --git a/notebooks/CATE validation.ipynb b/notebooks/CATE validation.ipynb index ecf36672..5ac13bac 100644 --- a/notebooks/CATE validation.ipynb +++ b/notebooks/CATE validation.ipynb @@ -246,8 +246,8 @@ "source": [ "# Initialize DRTester and fit/predict nuisance models\n", "dml_tester = DRTester(\n", - " model_regression=model_regression, \n", - " model_propensity=model_propensity, \n", + " model_regression=model_regression,\n", + " model_propensity=model_propensity,\n", " cate=est_dm\n", ").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n", "\n", @@ -607,8 +607,8 @@ "source": [ "# Initialize DRTester and fit/predict nuisance models\n", "t_tester = DRTester(\n", - " model_regression=model_regression, \n", - " model_propensity=model_propensity, \n", + " model_regression=model_regression,\n", + " model_propensity=model_propensity,\n", " cate=est_t\n", ").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n", "\n", diff --git a/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb b/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb index 23a2b64d..5745ed67 100644 --- a/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb +++ b/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb @@ -125,11 +125,11 @@ "coefs_Y = np.random.uniform(0, 1, size=support_size)\n", "def epsilon_sample(n):\n", " return np.random.uniform(-1, 1, size=n)\n", - "# Treatment support \n", + "# Treatment support\n", "support_T = support_Y\n", "coefs_T = np.random.uniform(0, 1, size=support_size)\n", "def eta_sample(n):\n", - " return np.random.uniform(-1, 1, size=n) \n", + " return np.random.uniform(-1, 1, size=n)\n", "\n", "# Generate controls, covariates, treatments and outcomes\n", "W = np.random.normal(0, 1, size=(n, n_w))\n", @@ -558,7 +558,7 @@ "support_T = support_Y\n", "coefs_T = np.random.uniform(0, 1, size=support_size)\n", "def eta_sample(n):\n", - " return np.random.uniform(-1, 1, size=n) \n", + " return np.random.uniform(-1, 1, size=n)\n", "\n", "# Generate controls, covariates, treatments and outcomes\n", "W = np.random.normal(0, 1, size=(n, n_w))\n", @@ -595,7 +595,7 @@ " max_depth=30, subsample_ratio=subsample_ratio,\n", " propensity_model = LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n", " model_Y = Lasso(alpha=lambda_reg),\n", - " propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'), \n", + " propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n", " model_Y_final=WeightedLasso(alpha=lambda_reg)\n", ")" ] @@ -899,11 +899,11 @@ " coefs_Y = np.random.uniform(0, 1, size=support_size)\n", " def epsilon_sample(n):\n", " return np.random.uniform(-1, 1, size=n)\n", - " # Treatment support \n", + " # Treatment support\n", " support_T = support_Y\n", " coefs_T = np.random.uniform(0, 1, size=(support_size, n_treatments))\n", " def eta_sample(n):\n", - " return np.random.uniform(-1, 1, size=n) \n", + " return np.random.uniform(-1, 1, size=n)\n", " # Generate controls, covariates, treatments and outcomes\n", " W = np.random.normal(0, 1, size=(n, n_w))\n", " X = np.random.uniform(0, 1, size=(n, n_x))\n", @@ -1514,7 +1514,7 @@ "\n", "if not os.path.isfile(file_name):\n", " print(\"Downloading file (this might take a few seconds)...\")\n", - " urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\", \n", + " urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\",\n", " file_name)\n", "oj_data = pd.read_csv(file_name)\n", "oj_data.head()" @@ -1564,11 +1564,11 @@ "outputs": [], "source": [ "est = DMLOrthoForest(\n", - " n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth, \n", + " n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth,\n", " subsample_ratio=subsample_ratio,\n", " model_T=Lasso(alpha=0.1),\n", " model_Y=Lasso(alpha=0.1),\n", - " model_T_final=WeightedLassoCVWrapper(cv=3), \n", + " model_T_final=WeightedLassoCVWrapper(cv=3),\n", " model_Y_final=WeightedLassoCVWrapper(cv=3)\n", " )" ] @@ -1615,7 +1615,7 @@ "metadata": {}, "outputs": [], "source": [ - "min_income = 10.0 \n", + "min_income = 10.0\n", "max_income = 11.1\n", "delta = (max_income - min_income) / 100\n", "X_test = np.arange(min_income, max_income + delta - 0.001, delta).reshape(-1, 1)" diff --git a/notebooks/Causal Model Selection with the RScorer.ipynb b/notebooks/Causal Model Selection with the RScorer.ipynb index c1c1d16c..672f7bf9 100644 --- a/notebooks/Causal Model Selection with the RScorer.ipynb +++ b/notebooks/Causal Model Selection with the RScorer.ipynb @@ -47,7 +47,7 @@ "source": [ "## Ignore warnings\n", "import warnings\n", - "warnings.filterwarnings('ignore') " + "warnings.filterwarnings('ignore')" ] }, { @@ -123,7 +123,7 @@ "support_T = support_Y\n", "coefs_T = np.random.uniform(0, 1, size=support_size)\n", "def eta_sample(n):\n", - " return np.random.uniform(-1, 1, size=n) \n", + " return np.random.uniform(-1, 1, size=n)\n", "\n", "# Generate controls, covariates, treatments and outcomes\n", "X = np.random.uniform(0, 1, size=(n, n_x))\n", @@ -446,7 +446,7 @@ "outputs": [], "source": [ "X_train, X_val, T_train, T_val,\\\n", - "Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te, \n", + "Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te,\n", " test_size=.3, random_state=123)" ] }, @@ -632,7 +632,7 @@ "source": [ "# Visualization of bias distribution\n", "plt.figure(figsize=(15, 5))\n", - "plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] + \n", + "plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] +\n", " [np.abs(best.effect(X).flatten() - expected_te)] +\n", " [np.abs(ensemble.effect(X).flatten() - expected_te)], showmeans=True)\n", "plt.ylabel(\"Bias distribution\")\n", diff --git a/notebooks/Choosing First Stage Models.ipynb b/notebooks/Choosing First Stage Models.ipynb index 983b4f66..dcc431a2 100644 --- a/notebooks/Choosing First Stage Models.ipynb +++ b/notebooks/Choosing First Stage Models.ipynb @@ -158,8 +158,8 @@ ], "source": [ "def first_stage():\n", - " return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None], \n", - " \"n_estimators\": (50, 100, 200)}, \n", + " return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None],\n", + " \"n_estimators\": (50, 100, 200)},\n", " cv=2, n_jobs=-1)\n", "est = LinearDML(\n", " model_y=first_stage(),\n", @@ -379,10 +379,10 @@ ], "source": [ "def first_stage():\n", - " return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()], \n", - " param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]}, \n", - " {\"max_depth\": [3, 5, None], \n", - " \"n_estimators\": [50, 100, 200]}], \n", + " return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()],\n", + " param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]},\n", + " {\"max_depth\": [3, 5, None],\n", + " \"n_estimators\": [50, 100, 200]}],\n", " cv=2)" ] }, diff --git a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb index 5d57bfd4..32b54bf4 100644 --- a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb @@ -293,7 +293,7 @@ "Y = train_data[\"log_demand\"].values\n", "T = train_data[\"log_price\"].values\n", "X = train_data[[\"income\"]].values # features\n", - "confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\", \n", + "confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\",\n", " \"is_US\", \"songs_purchased\"]\n", "W = train_data[confounder_names].values" ] @@ -352,7 +352,7 @@ ], "source": [ "# fit through dowhy\n", - "est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n", + "est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n", " outcome_names=[\"log_demand\"], treatment_names=[\"log_price\"], feature_names=[\"income\"],\n", " confounder_names=confounder_names, inference=\"statsmodels\")" ] @@ -382,7 +382,7 @@ " )\n", "except Exception:\n", " # Fall back on default graph view\n", - " est_dw.view_model(layout=None) " + " est_dw.view_model(layout=None)" ] }, { @@ -707,7 +707,7 @@ "source": [ "# Get treatment effect and its confidence interval\n", "te_pred = est_nonparam_dw.effect(X_test).flatten()\n", - "te_pred_interval = est_nonparam_dw.effect_interval(X_test) " + "te_pred_interval = est_nonparam_dw.effect_interval(X_test)" ] }, { @@ -902,7 +902,7 @@ ], "source": [ "res_placebo = est_nonparam_dw.refute_estimate(\n", - " method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n", + " method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n", " num_simulations=3\n", ")\n", "print(res_placebo)" @@ -951,7 +951,7 @@ ], "source": [ "res_subset = est_nonparam_dw.refute_estimate(\n", - " method_name=\"data_subset_refuter\", subset_fraction=0.8, \n", + " method_name=\"data_subset_refuter\", subset_fraction=0.8,\n", " num_simulations=3)\n", "print(res_subset)" ] @@ -1079,7 +1079,7 @@ "policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n", "\n", "## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n", - "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n", + "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n", " -0.1, 0.1, 1, policy))\n", "\n", "## give everyone -10% discount\n", diff --git a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb index c2425388..3a704cc4 100644 --- a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb @@ -729,7 +729,7 @@ "policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n", "\n", "## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n", - "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n", + "policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n", " -0.1, 0.1, 1, policy))\n", "\n", "## give everyone -10% discount\n", diff --git a/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb b/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb index 2a5033dd..e7ca1992 100644 --- a/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company - EconML + DoWhy.ipynb @@ -78,7 +78,7 @@ "# EconML imports\n", "from econml.dr import LinearDRLearner\n", "\n", - "# DoWhy imports \n", + "# DoWhy imports\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", @@ -284,7 +284,7 @@ "W = multi_data.drop(\n", " columns=[\"Tech Support\", \"Discount\", \"Revenue\", \"Size\"]\n", ") # controls\n", - "confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\", \n", + "confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\",\n", " \"IT Spend\", \"Employee Count\", \"PC Count\"]" ] }, @@ -487,7 +487,7 @@ "source": [ "# fit through dowhy\n", "test_customers = X.iloc[:1000].values\n", - "est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n", + "est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n", " outcome_names=[\"Revenue\"], treatment_names=[\"discrete_T\"], feature_names=[\"Size\"],\n", " confounder_names=confounder_names, target_units=test_customers)" ] @@ -537,7 +537,7 @@ " )\n", "except Exception:\n", " # Fall back on default graph view\n", - " est_dw.view_model(layout=None) " + " est_dw.view_model(layout=None)" ] }, { @@ -1327,7 +1327,7 @@ ], "source": [ "res_placebo = est_dw.refute_estimate(\n", - " method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n", + " method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n", " num_simulations=5\n", ")\n", "print(res_placebo)" @@ -1426,7 +1426,7 @@ "source": [ "# Removing a random subset of the data\n", "res_subset = est_dw.refute_estimate(\n", - " method_name=\"data_subset_refuter\", subset_fraction=0.8, \n", + " method_name=\"data_subset_refuter\", subset_fraction=0.8,\n", " num_simulations=3)\n", "print(res_subset)" ] diff --git a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb index dc10d41e..4e984096 100644 --- a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb @@ -86,7 +86,7 @@ "import lightgbm as lgb\n", "from sklearn.preprocessing import PolynomialFeatures\n", "\n", - "# DoWhy imports \n", + "# DoWhy imports\n", "\n", "# EconML imports\n", "from econml.iv.dr import LinearIntentToTreatDRIV\n", @@ -145,7 +145,7 @@ "outputs": [], "source": [ "# Import the sample AB data\n", - "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n", + "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n", "ab_data = pd.read_csv(file_url)" ] }, @@ -363,7 +363,7 @@ }, "outputs": [], "source": [ - "# Define underlying treatment effect function \n", + "# Define underlying treatment effect function\n", "def TE_fn(X):\n", " return (0.2 + 0.3 * X['days_visited_free_pre'] - 0.2 * X['days_visited_hs_pre'] + X['os_type_osx']).values\n", "true_TE = TE_fn(X_data)\n", @@ -461,7 +461,7 @@ "source": [ "# Visualize causal graph\n", "plt.figure(figsize=(10,8))\n", - "est_dw.view_model(layout=None) " + "est_dw.view_model(layout=None)" ] }, { @@ -1094,7 +1094,7 @@ ], "source": [ "res_unobserved = est_dw.refute_estimate(method_name=\"add_unobserved_common_cause\",\n", - " confounders_effect_on_treatment=\"binary_flip\", \n", + " confounders_effect_on_treatment=\"binary_flip\",\n", " confounders_effect_on_outcome=\"linear\",\n", " effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5)\n", "print(res_unobserved)" @@ -1127,7 +1127,7 @@ } ], "source": [ - "res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n", + "res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n", " num_simulations=2)\n", "print(res_placebo)" ] @@ -1167,7 +1167,7 @@ ], "source": [ "# Removing a random subset of the data\n", - "res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8, \n", + "res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8,\n", " num_simulations=2)\n", "print(res_subset)" ] diff --git a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb index 783a70dc..9c2d7d5a 100644 --- a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb @@ -133,7 +133,7 @@ "outputs": [], "source": [ "# Import the sample AB data\n", - "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n", + "file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n", "ab_data = pd.read_csv(file_url)" ] }, @@ -351,7 +351,7 @@ }, "outputs": [], "source": [ - "# Define underlying treatment effect function \n", + "# Define underlying treatment effect function\n", "def TE_fn(X):\r\n", " return (0.2 + 0.3 * X[\"days_visited_free_pre\"] - 0.2 * X[\"days_visited_hs_pre\"] + X[\"os_type_osx\"]).values\n", "true_TE = TE_fn(X_data)\n", diff --git a/notebooks/CustomerScenarios/Case Study - Using EconML to evaluate the treatment effect of training program - Lalonde dataset.ipynb b/notebooks/CustomerScenarios/Case Study - Using EconML to evaluate the treatment effect of training program - Lalonde dataset.ipynb index 19920090..bc0242ed 100644 --- a/notebooks/CustomerScenarios/Case Study - Using EconML to evaluate the treatment effect of training program - Lalonde dataset.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Using EconML to evaluate the treatment effect of training program - Lalonde dataset.ipynb @@ -153,17 +153,17 @@ " W = reg_data[control_names].values\n", " # scale W\n", " scaler = StandardScaler()\n", - " W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n", + " W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n", " T = reg_data[\"treated\"]\n", " y = reg_data[outcome_name]\n", - " \n", + "\n", " # select the best nuisances model out of econml estimator\n", " model_y=first_stage_reg(W, y)\n", " model_t=first_stage_clf(W, T)\n", - " \n", + "\n", " if model_type=='dml':\n", " est = LinearDML(model_y=model_y,\n", - " model_t=model_t, \n", + " model_t=model_t,\n", " discrete_treatment=True, mc_iters=5,cv=5)\n", " elif model_type=='dr':\n", " est = LinearDRLearner(model_regression=model_y,\n", @@ -226,7 +226,7 @@ " # dml\n", " summ_dic1[\"method\"].append(\"DML full controls\")\n", " effect,se,lb,ub=econml_homo_model_wrapper(df_all, econml_controls,outcome_name,\"dml\",\n", - " cols_to_scale=cols_to_scale, print_summary=False) \n", + " cols_to_scale=cols_to_scale, print_summary=False)\n", " summ_dic[\"DML full controls\"].append([effect,se])\n", " summ_dic1[\"point_estimate\"].append(effect)\n", " summ_dic1[\"stderr\"].append(se)\n", @@ -235,13 +235,13 @@ " # dr\n", " summ_dic1[\"method\"].append(\"DR full controls\")\n", " effect,se,lb,ub=econml_homo_model_wrapper(df_all, econml_controls,outcome_name,\"dr\",\n", - " cols_to_scale=cols_to_scale, print_summary=False) \n", + " cols_to_scale=cols_to_scale, print_summary=False)\n", " summ_dic[\"DR full controls\"].append([effect,se])\n", " summ_dic1[\"point_estimate\"].append(effect)\n", " summ_dic1[\"stderr\"].append(se)\n", " summ_dic1[\"lower_bound\"].append(lb)\n", " summ_dic1[\"upper_bound\"].append(ub)\n", - " \n", + "\n", " return summ_dic,summ_dic1\n", "\n", "# error bar helper function\n", @@ -377,9 +377,9 @@ "complete_ols_columns_female=basic_ols_columns+[\"married\",\"re75\",\"re75_dummy\",\"re74\",\"re74_dummy\",\"re_diff_pre\",\n", " \"afdc75\",\"nchildren75\",\"haschild\"]\n", "# econml controls (exclude treatment)\n", - "econml_controls_male= ['age', 'age_2', 'educ', 're75','re74','re_diff_pre','nodegree', \n", + "econml_controls_male= ['age', 'age_2', 'educ', 're75','re74','re_diff_pre','nodegree',\n", " 'black', 'hisp', 'married','re75_dummy','re74_dummy']\n", - "econml_controls_female= ['age', 'age_2', 'educ','nchildren75', 're75','re74','re_diff_pre','nodegree', \n", + "econml_controls_female= ['age', 'age_2', 'educ','nchildren75', 're75','re74','re_diff_pre','nodegree',\n", " 'black', 'hisp', 'married','re75_dummy','re74_dummy','afdc75','haschild']" ] }, @@ -391,11 +391,11 @@ "source": [ "# preprocessing data\n", "male_control, male_treatment, male_psid1, male_psid3, male_cps1, male_cps3 = [\n", - " preprocessing(df,outcome_name_male,complete_ols_columns_male) \n", + " preprocessing(df,outcome_name_male,complete_ols_columns_male)\n", " for df in (male_control, male_treatment, male_psid1, male_psid3, male_cps1, male_cps3)\n", "]\n", "female_control, female_treatment, female_psid1, female_psid2 =[\n", - " preprocessing(df,outcome_name_female,complete_ols_columns_female) \n", + " preprocessing(df,outcome_name_female,complete_ols_columns_female)\n", " for df in (female_control, female_treatment, female_psid1, female_psid2)\n", "]" ] @@ -788,7 +788,7 @@ "# scale numeric features\n", "cols_to_scale=6\n", "scaler = StandardScaler()\n", - "X=np.hstack([scaler.fit_transform(X[:, :cols_to_scale]).astype(np.float32), X[:, cols_to_scale:]]) " + "X=np.hstack([scaler.fit_transform(X[:, :cols_to_scale]).astype(np.float32), X[:, cols_to_scale:]])" ] }, { @@ -813,7 +813,7 @@ "X = None\n", "W = df_cps[econml_controls_male].values\n", "# scale W\n", - "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n", + "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n", "T = df_cps[\"treated\"]\n", "y = df_cps[outcome_name_male]" ] @@ -837,7 +837,7 @@ "# train dml with sample weight 100 times\n", "p_value_with_weight=[]\n", "point_estimate_with_weight=[]\n", - "for _ in range(100): \n", + "for _ in range(100):\n", " est=LinearDML(model_t=model_t,model_y=model_y,discrete_treatment=True,mc_iters=10,cv=3)\n", " est.fit(y, T, X=None, W=W, sample_weight=weight,inference=\"statsmodels\")\n", " point_estimate_with_weight.append(est.intercept_)\n", @@ -853,7 +853,7 @@ "# train dml without sample weight 100 times\n", "p_value_without_weight=[]\n", "point_estimate_without_weight=[]\n", - "for _ in range(100): \n", + "for _ in range(100):\n", " est1=LinearDML(model_t=model_t,model_y=model_y,discrete_treatment=True,mc_iters=10,cv=3)\n", " est1.fit(y, T, X=None,W=W,inference=\"statsmodels\")\n", " point_estimate_without_weight.append(est1.intercept_)\n", @@ -969,7 +969,7 @@ "# scale W\n", "cols_to_scale=6\n", "scaler = StandardScaler()\n", - "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n", + "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n", "T = df[\"treated\"]\n", "y = df[outcome_name_male]" ] @@ -1051,7 +1051,7 @@ "# scale W\n", "cols_to_scale=6\n", "scaler = StandardScaler()\n", - "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n", + "W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n", "T = df_cps[\"treated\"]\n", "y = df_cps[outcome_name_male]" ] diff --git a/notebooks/Deep IV Examples.ipynb b/notebooks/Deep IV Examples.ipynb index 38da0090..0df28955 100644 --- a/notebooks/Deep IV Examples.ipynb +++ b/notebooks/Deep IV Examples.ipynb @@ -169,7 +169,7 @@ } ], "source": [ - "# Outcome equation \n", + "# Outcome equation\n", "y = t*t / 10 - x*t / 10 + e\n", "\n", "# The endogeneity problem is clear, the latent error enters both treatment and outcome equally\n", @@ -246,7 +246,7 @@ " use_upper_bound_loss = False, # whether to use an approximation to the true loss\n", " n_gradient_samples = 1, # number of samples to use in second estimate of the response\n", " # (to make loss estimate unbiased)\n", - " optimizer='adam', # Keras optimizer to use for training - see https://keras.io/optimizers/ \n", + " optimizer='adam', # Keras optimizer to use for training - see https://keras.io/optimizers/\n", " first_stage_options = keras_fit_options, # options for training treatment model\n", " second_stage_options = keras_fit_options) # options for training response model" ] diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb index 940bbee9..cdfc9f30 100644 --- a/notebooks/Double Machine Learning Examples.ipynb +++ b/notebooks/Double Machine Learning Examples.ipynb @@ -772,7 +772,7 @@ "support_T = support_Y\n", "coefs_T = np.random.uniform(0, 1, size=support_size)\n", "def eta_sample(n):\n", - " return np.random.uniform(-1, 1, size=n) \n", + " return np.random.uniform(-1, 1, size=n)\n", "\n", "# Generate controls, covariates, treatments and outcomes\n", "W = np.random.normal(0, 1, size=(n, n_w))\n", @@ -2288,8 +2288,8 @@ "source": [ "# Prepare data\n", "oj_data['price'] = np.log(oj_data[\"price\"])\n", - "# Transform dataset. \n", - "# For each store in each week, get a vector of logmove and a vector of logprice for each brand. \n", + "# Transform dataset.\n", + "# For each store in each week, get a vector of logmove and a vector of logprice for each brand.\n", "# Other features are store specific, will be the same for all brands.\n", "groupbylist = [\"store\", \"week\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\",\n", " \"HHLARGE\", \"WORKWOM\", \"HVAL150\",\n", @@ -2298,7 +2298,7 @@ " columns=oj_data.groupby(groupbylist).cumcount(),\n", " values=['logmove', 'price'],\n", " aggfunc='sum').reset_index()\n", - "oj_data1.columns = oj_data1.columns.map('{0[0]}{0[1]}'.format) \n", + "oj_data1.columns = oj_data1.columns.map('{0[0]}{0[1]}'.format)\n", "oj_data1 = oj_data1.rename(index=str,\n", " columns={\"logmove0\": \"logmove_T\",\n", " \"logmove1\": \"logmove_M\",\n", @@ -2441,9 +2441,9 @@ " plt.plot(X_test, te_pred[:, i, j],\n", " color=\"C{}\".format(str(3 * i + j)),\n", " label=\"OJ Elasticity {} to {}\".format(dic[j], dic[i]))\n", - " plt.fill_between(X_test.flatten(), \n", + " plt.fill_between(X_test.flatten(),\n", " te_pred_interval[0][:, i, j],\n", - " te_pred_interval[1][:, i,j], \n", + " te_pred_interval[1][:, i,j],\n", " color=\"C{}\".format(str(3*i+j)), alpha=.5, label=\"1-99% CI\")\n", " plt.xlabel(r'Scale(Income)')\n", " plt.ylabel('Orange Juice Elasticity')\n", diff --git a/notebooks/Doubly Robust Learner and Interpretability.ipynb b/notebooks/Doubly Robust Learner and Interpretability.ipynb index 2e0803f5..3fa11373 100644 --- a/notebooks/Doubly Robust Learner and Interpretability.ipynb +++ b/notebooks/Doubly Robust Learner and Interpretability.ipynb @@ -260,7 +260,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style(\"darkgrid\"):\n", - " fig, ax = plt.subplots(1,1) \n", + " fig, ax = plt.subplots(1,1)\n", " x = np.arange(len(point))\n", " plt.errorbar(x, point, yerr, fmt='o')\n", " ax.set_xticks(x)\n", @@ -561,7 +561,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style(\"darkgrid\"):\n", - " fig, ax = plt.subplots(1,1) \n", + " fig, ax = plt.subplots(1,1)\n", " x = np.arange(len(point))\n", " plt.errorbar(x, point, yerr, fmt='o')\n", " ax.set_xticks(x)\n", @@ -844,7 +844,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style(\"darkgrid\"):\n", - " fig, ax = plt.subplots(1,1) \n", + " fig, ax = plt.subplots(1,1)\n", " x = np.arange(len(point))\n", " plt.errorbar(x, point, yerr, fmt='o')\n", " ax.set_xticks(x)\n", @@ -902,7 +902,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style('darkgrid'):\n", - " fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n", + " fig, ax = plt.subplots(1,1, figsize=(20, 5))\n", " x = np.arange(len(point))\n", " stat_sig = (lower>0) | (upper<0)\n", " plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n", @@ -1254,7 +1254,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style('darkgrid'):\n", - " fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n", + " fig, ax = plt.subplots(1,1, figsize=(20, 5))\n", " x = np.arange(len(point))\n", " stat_sig = (lower>0) | (upper<0)\n", " plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n", diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index 7d86378f..1addbf6b 100755 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -171,8 +171,8 @@ "outputs": [], "source": [ "est = DynamicDML(\n", - " model_y=LassoCV(cv=3, max_iter=1000), \n", - " model_t=MultiTaskLassoCV(cv=3, max_iter=1000), \n", + " model_y=LassoCV(cv=3, max_iter=1000),\n", + " model_t=MultiTaskLassoCV(cv=3, max_iter=1000),\n", " cv=3)" ] }, @@ -444,8 +444,8 @@ "outputs": [], "source": [ "est = DynamicDML(\n", - " model_y=LassoCV(cv=3), \n", - " model_t=MultiTaskLassoCV(cv=3), \n", + " model_y=LassoCV(cv=3),\n", + " model_t=MultiTaskLassoCV(cv=3),\n", " cv=3)" ] }, diff --git a/notebooks/Generalized Random Forests.ipynb b/notebooks/Generalized Random Forests.ipynb index 0effa91a..a2ff6205 100644 --- a/notebooks/Generalized Random Forests.ipynb +++ b/notebooks/Generalized Random Forests.ipynb @@ -85,7 +85,7 @@ "# true_te = lambda X: np.hstack([X[:, [0]]**2 + 1, np.ones((X.shape[0], n_treatments - 1))])\n", "# true_te = lambda X: np.hstack([X[:, [0]]>0, np.ones((X.shape[0], n_treatments - 1))])\n", "def true_te(X):\n", - " return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n", + " return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n", " np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n", "X = np.random.normal(0, 1, size=(n_samples, n_features))\n", "T = np.random.normal(0, 1, size=(n_samples, n_treatments))\n", @@ -811,7 +811,7 @@ "# true_te = lambda X: np.hstack([X[:, [0]]**2 + 1, np.ones((X.shape[0], n_treatments - 1))])\n", "# true_te = lambda X: np.hstack([X[:, [0]]>0, np.ones((X.shape[0], n_treatments - 1))])\n", "def true_te(X):\n", - " return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n", + " return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n", " np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n", "X = np.random.normal(0, 1, size=(n_samples, n_features))\n", "W = np.random.normal(0, 1, size=(n_samples, n_features))\n", @@ -1485,7 +1485,7 @@ "from econml.utilities import cross_product\n", "\n", "class CustomGRF(BaseGRF):\n", - " \n", + "\n", " def _get_alpha_and_pointJ(self, X, T, y, *, Z):\n", " T = np.hstack([T, T**2])\n", " Z = np.hstack([Z, Z**2])\n", @@ -1493,8 +1493,8 @@ " T = np.hstack([T, np.ones((T.shape[0], 1))])\n", " Z = np.hstack([Z, np.ones((T.shape[0], 1))])\n", " return y * Z, cross_product(Z, T)\n", - " \n", - " \n", + "\n", + "\n", " def _get_n_outputs_decomposition(self, X, T, y, *, Z):\n", " n_relevant_outputs = T.shape[1] * 2\n", " n_outputs = n_relevant_outputs\n", diff --git a/notebooks/Interpretability with SHAP.ipynb b/notebooks/Interpretability with SHAP.ipynb index d44f66b5..d53ddc99 100644 --- a/notebooks/Interpretability with SHAP.ipynb +++ b/notebooks/Interpretability with SHAP.ipynb @@ -294,7 +294,7 @@ "n_treatments = 2\n", "n_outputs = 3\n", "def true_te(X):\n", - " return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n", + " return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n", " np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n", "X = np.random.normal(0, 1, size=(n_samples, n_features))\n", "W = np.random.normal(0, 1, size=(n_samples, n_features))\n", diff --git a/notebooks/Metalearners Examples.ipynb b/notebooks/Metalearners Examples.ipynb index 876dc786..a5d28af7 100644 --- a/notebooks/Metalearners Examples.ipynb +++ b/notebooks/Metalearners Examples.ipynb @@ -58,7 +58,7 @@ "# Main imports\n", "from econml.metalearners import TLearner, SLearner, XLearner, DomainAdaptationLearner\n", "\n", - "# Helper imports \n", + "# Helper imports\n", "import numpy as np\n", "from numpy.random import binomial, multivariate_normal, normal, uniform\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor\n", @@ -215,7 +215,7 @@ "source": [ "# Instantiate X learner\n", "models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n", - "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n", + "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n", " min_samples_leaf=int(n/100))\n", "X_learner = XLearner(models=models, propensity_model=propensity_model)\n", "# Train X_learner\n", @@ -233,7 +233,7 @@ "# Instantiate Domain Adaptation learner\n", "models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n", "final_models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n", - "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n", + "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n", " min_samples_leaf=int(n/100))\n", "DA_learner = DomainAdaptationLearner(models=models,\n", " final_models=final_models,\n", @@ -262,7 +262,7 @@ "from econml.dr import DRLearner\n", "outcome_model = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n", "pseudo_treatment_model = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n", - "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n", + "propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n", " min_samples_leaf=int(n/100))\n", "\n", "DR_learner = DRLearner(model_regression=outcome_model, model_propensity=propensity_model,\n", @@ -334,7 +334,7 @@ "source": [ "# Visualization of bias distribution\n", "expected_te = np.apply_along_axis(treatment_effect, 1, X_test)\n", - "plt.violinplot([np.abs(T_te - expected_te), \n", + "plt.violinplot([np.abs(T_te - expected_te),\n", " np.abs(S_te - expected_te),\n", " np.abs(DA_te - expected_te),\n", " np.abs(X_te - expected_te),\n", @@ -478,7 +478,7 @@ ], "source": [ "# Visualization of bias distribution\n", - "plt.violinplot([np.abs(T_te - expected_te), \n", + "plt.violinplot([np.abs(T_te - expected_te),\n", " np.abs(S_te - expected_te),\n", " np.abs(DA_te - expected_te),\n", " np.abs(X_te - expected_te),\n", diff --git a/notebooks/Policy Learning with Trees and Forests.ipynb b/notebooks/Policy Learning with Trees and Forests.ipynb index 10ee9a43..7147a025 100644 --- a/notebooks/Policy Learning with Trees and Forests.ipynb +++ b/notebooks/Policy Learning with Trees and Forests.ipynb @@ -214,7 +214,7 @@ "source": [ "X = np.random.normal(size=(1000, 10))\n", "T = np.random.binomial(2, .5, size=(1000,))\n", - "y = (X[:, 0]) * (T==1) + (-X[:, 0]) * (T==2) " + "y = (X[:, 0]) * (T==1) + (-X[:, 0]) * (T==2)" ] }, { @@ -453,7 +453,7 @@ ], "source": [ "est = DRPolicyForest(n_estimators=1000,\n", - " max_depth=2, \n", + " max_depth=2,\n", " min_samples_leaf=50,\n", " max_samples=.8,\n", " honest=True,\n", diff --git a/notebooks/Scaling EconML using Ray.ipynb b/notebooks/Scaling EconML using Ray.ipynb index 22603b5d..45ed69e3 100644 --- a/notebooks/Scaling EconML using Ray.ipynb +++ b/notebooks/Scaling EconML using Ray.ipynb @@ -251,13 +251,13 @@ " ray_remote_func_options=ray_opts,\n", " cv=cv,\n", " mc_iters=1)\n", - " \n", + "\n", " start_time = time.time()\n", " est.fit(y, T, X=X, W=None)\n", " runtime = time.time() - start_time\n", " runtimes.append(runtime)\n", " return runtimes\n", - " \n" + "\n" ] }, { diff --git a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb index 90f19337..ea732627 100644 --- a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb +++ b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb @@ -542,28 +542,28 @@ " .assign(\n", " AgeAtSale = lambda df: df['YrSold'].sub(df['YearBuilt']), # add interpretable year columns\n", " YearsSinceRemodel = lambda df: df['YrSold'].sub(df['YearRemodAdd']).clip(lower = 0), # clip lower for outlier\n", - " \n", + "\n", " HasDeck = lambda df: df['WoodDeckSF'].gt(0).map(int),\n", - " HasPorch = lambda df: \n", + " HasPorch = lambda df:\n", " df[['OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch']]\n", " .gt(0)\n", " .max(axis = 1)\n", " .map(int),\n", - " \n", + "\n", " HasFireplace = lambda df: df['Fireplaces'].clip(upper = 1).map(int),\n", " HasFence = lambda df: df['Fence'].notna().map(int)\n", " )\n", - " \n", + "\n", " # drop year columns\n", " .drop(\n", " columns = [\n", - " 'GarageYrBlt', 'YearBuilt', 'YrSold', 'YearRemodAdd', \n", + " 'GarageYrBlt', 'YearBuilt', 'YrSold', 'YearRemodAdd',\n", " 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch',\n", " 'FireplaceQu', 'Fireplaces',\n", - " 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', \n", + " 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',\n", " '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GarageArea', 'PoolArea'\n", " ]\n", - " ) \n", + " )\n", " .assign(LotFrontage = lambda df: df['LotFrontage'].fillna(0)) # fill missing with 0\n", " .fillna('NA') # rest of missing values are in categorical columns, so fill with NA category\n", " .assign(Intercept = 1) # add constant column for OLS\n", @@ -2736,9 +2736,9 @@ "whatif_y = y_test.loc[whatif_df.index]\n", "\n", "cf = ca.whatif(\n", - " whatif_df, \n", - " whatif_df['HasFireplace'].add(1).clip(upper = 1), \n", - " 'HasFireplace', \n", + " whatif_df,\n", + " whatif_df['HasFireplace'].add(1).clip(upper = 1),\n", + " 'HasFireplace',\n", " whatif_y)\n", "print(\"Current average housing price on test set: \", whatif_y.mean())\n", "print(\n", diff --git a/notebooks/Treatment Featurization Examples.ipynb b/notebooks/Treatment Featurization Examples.ipynb index a249e4fa..c32fafb9 100644 --- a/notebooks/Treatment Featurization Examples.ipynb +++ b/notebooks/Treatment Featurization Examples.ipynb @@ -271,7 +271,7 @@ " test_T.squeeze(),\n", " lb.squeeze(),\n", " ub.squeeze(),\n", - " alpha = 0.4, \n", + " alpha = 0.4,\n", ")\n", "\n", "lb, ub = bad_est.effect_interval(T0 = np.zeros(shape=(100, 1)), T1=test_T)\n", @@ -319,7 +319,7 @@ "Z = np.random.normal(loc = 3, scale = 5, size = (n, 1))\n", "T = np.random.uniform(low = 0, high = 10, size = (n, 1)) + 0.5*W[:, [0]] + Z + unobserved_confounder\n", "epsilon = np.random.normal(size = (n, 1)) * 50\n", - "Y = 0.5*T**2 + W[:, [1]] + unobserved_confounder + epsilon \n", + "Y = 0.5*T**2 + W[:, [1]] + unobserved_confounder + epsilon\n", "\n", "test_T = np.arange(0, 10, step = 0.1).reshape(-1, 1)\n", "\n", @@ -394,8 +394,8 @@ ], "source": [ "est = OrthoIV(\n", - " model_t_xwz=RandomForestRegressor(), \n", - " projection=True, \n", + " model_t_xwz=RandomForestRegressor(),\n", + " projection=True,\n", " treatment_featurizer = featurizer\n", ")\n", "est.fit(Y=Y, T=T, W=W, Z=Z)\n", @@ -467,7 +467,7 @@ " test_T.squeeze(),\n", " lb.squeeze(),\n", " ub.squeeze(),\n", - " alpha = 0.4, \n", + " alpha = 0.4,\n", ")\n", "\n", "lb, ub = bad_est.effect_interval(T0 = np.zeros(shape=(100, 1)), T1=test_T)\n", @@ -659,7 +659,7 @@ "\n", "for x_val in x_vals:\n", " plt.figure(figsize=(10, 6))\n", - " \n", + "\n", " xtest = np.ones(test_T.shape) * x_val\n", " # Plot point estimates\n", " plt.plot(\n", @@ -691,7 +691,7 @@ " test_T.squeeze(),\n", " lb.squeeze(),\n", " ub.squeeze(),\n", - " alpha = 0.4, \n", + " alpha = 0.4,\n", " )\n", "\n", "\n", @@ -1209,7 +1209,7 @@ "df = (\n", " pd.DataFrame({'X': X[:ns, 0], 'marginal effect': eff[:, 0], 'lb': lb[:, 0], 'ub': ub[:, 0], 'true': true[:, 0]})\n", " .assign(\n", - " presciption = lambda df: df['marginal effect'].gt(0).map({True: 'Increase treatment', \n", + " presciption = lambda df: df['marginal effect'].gt(0).map({True: 'Increase treatment',\n", " False: 'Decrease treatment'})\n", " )\n", ")\n", @@ -1247,8 +1247,8 @@ "source": [ "plt.figure(figsize=(10, 6))\n", "plt.errorbar(\n", - " df['X'], df['marginal effect'], \n", - " yerr=[df['marginal effect'] - df['lb'], df['ub'] - df['marginal effect']], \n", + " df['X'], df['marginal effect'],\n", + " yerr=[df['marginal effect'] - df['lb'], df['ub'] - df['marginal effect']],\n", " fmt='o', alpha = 0.8, label = 'Estimated Marginal Effect')\n", "plt.scatter(df['X'], df['true'], marker='x', color='green', label = 'True Marginal Effect')\n", "\n", diff --git a/notebooks/Weighted Double Machine Learning Examples.ipynb b/notebooks/Weighted Double Machine Learning Examples.ipynb index ec30a78c..cfe03685 100644 --- a/notebooks/Weighted Double Machine Learning Examples.ipynb +++ b/notebooks/Weighted Double Machine Learning Examples.ipynb @@ -331,7 +331,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style('darkgrid'):\n", - " fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n", + " fig, ax = plt.subplots(1,1, figsize=(20, 5))\n", " x = np.arange(len(point))\n", " stat_sig = (lower>0) | (upper<0)\n", " plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n", @@ -426,7 +426,7 @@ "yerr[1, :] = upper - point\n", "\n", "with sns.axes_style('darkgrid'):\n", - " fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n", + " fig, ax = plt.subplots(1,1, figsize=(20, 5))\n", " x = np.arange(len(point))\n", " stat_sig = (lower>0) | (upper<0)\n", " plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n",