[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
This commit is contained in:
pre-commit-ci[bot] 2024-07-19 20:13:02 +00:00 коммит произвёл Keith Battocchi
Родитель f52f42218e
Коммит 1fbeb76f82
23 изменённых файлов: 121 добавлений и 121 удалений

Просмотреть файл

@ -180,7 +180,7 @@
" return\n",
"\n",
" def fit(self, X, y, sample_weight = None, **fit_params):\n",
" self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_ \n",
" self.best_ind_ = np.argmax([gcv.fit(X, y, sample_weight = sample_weight, **fit_params).best_score_\n",
" for gcv in self._gcv_list])\n",
" self.best_estimator_ = self._gcv_list[self.best_ind_].best_estimator_\n",
" self.best_score_ = self._gcv_list[self.best_ind_].best_score_\n",
@ -934,7 +934,7 @@
"plt.plot(X_test, expected_te, 'b--', label='True effect')\n",
"plt.ylabel('Treatment Effect')\n",
"plt.xlabel('x')\n",
"plt.ylim(-0.5, 1.5) \n",
"plt.ylim(-0.5, 1.5)\n",
"plt.legend()\n",
"plt.show()"
]

Просмотреть файл

@ -246,8 +246,8 @@
"source": [
"# Initialize DRTester and fit/predict nuisance models\n",
"dml_tester = DRTester(\n",
" model_regression=model_regression, \n",
" model_propensity=model_propensity, \n",
" model_regression=model_regression,\n",
" model_propensity=model_propensity,\n",
" cate=est_dm\n",
").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n",
"\n",
@ -607,8 +607,8 @@
"source": [
"# Initialize DRTester and fit/predict nuisance models\n",
"t_tester = DRTester(\n",
" model_regression=model_regression, \n",
" model_propensity=model_propensity, \n",
" model_regression=model_regression,\n",
" model_propensity=model_propensity,\n",
" cate=est_t\n",
").fit_nuisance(Xval, Dval, Yval, Xtrain, Dtrain, Ytrain)\n",
"\n",

Просмотреть файл

@ -125,11 +125,11 @@
"coefs_Y = np.random.uniform(0, 1, size=support_size)\n",
"def epsilon_sample(n):\n",
" return np.random.uniform(-1, 1, size=n)\n",
"# Treatment support \n",
"# Treatment support\n",
"support_T = support_Y\n",
"coefs_T = np.random.uniform(0, 1, size=support_size)\n",
"def eta_sample(n):\n",
" return np.random.uniform(-1, 1, size=n) \n",
" return np.random.uniform(-1, 1, size=n)\n",
"\n",
"# Generate controls, covariates, treatments and outcomes\n",
"W = np.random.normal(0, 1, size=(n, n_w))\n",
@ -558,7 +558,7 @@
"support_T = support_Y\n",
"coefs_T = np.random.uniform(0, 1, size=support_size)\n",
"def eta_sample(n):\n",
" return np.random.uniform(-1, 1, size=n) \n",
" return np.random.uniform(-1, 1, size=n)\n",
"\n",
"# Generate controls, covariates, treatments and outcomes\n",
"W = np.random.normal(0, 1, size=(n, n_w))\n",
@ -595,7 +595,7 @@
" max_depth=30, subsample_ratio=subsample_ratio,\n",
" propensity_model = LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n",
" model_Y = Lasso(alpha=lambda_reg),\n",
" propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'), \n",
" propensity_model_final=LogisticRegression(C=1/(X.shape[0]*lambda_reg), penalty='l1', solver='saga'),\n",
" model_Y_final=WeightedLasso(alpha=lambda_reg)\n",
")"
]
@ -899,11 +899,11 @@
" coefs_Y = np.random.uniform(0, 1, size=support_size)\n",
" def epsilon_sample(n):\n",
" return np.random.uniform(-1, 1, size=n)\n",
" # Treatment support \n",
" # Treatment support\n",
" support_T = support_Y\n",
" coefs_T = np.random.uniform(0, 1, size=(support_size, n_treatments))\n",
" def eta_sample(n):\n",
" return np.random.uniform(-1, 1, size=n) \n",
" return np.random.uniform(-1, 1, size=n)\n",
" # Generate controls, covariates, treatments and outcomes\n",
" W = np.random.normal(0, 1, size=(n, n_w))\n",
" X = np.random.uniform(0, 1, size=(n, n_x))\n",
@ -1514,7 +1514,7 @@
"\n",
"if not os.path.isfile(file_name):\n",
" print(\"Downloading file (this might take a few seconds)...\")\n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\", \n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\",\n",
" file_name)\n",
"oj_data = pd.read_csv(file_name)\n",
"oj_data.head()"
@ -1564,11 +1564,11 @@
"outputs": [],
"source": [
"est = DMLOrthoForest(\n",
" n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth, \n",
" n_trees=n_trees, min_leaf_size=min_leaf_size, max_depth=max_depth,\n",
" subsample_ratio=subsample_ratio,\n",
" model_T=Lasso(alpha=0.1),\n",
" model_Y=Lasso(alpha=0.1),\n",
" model_T_final=WeightedLassoCVWrapper(cv=3), \n",
" model_T_final=WeightedLassoCVWrapper(cv=3),\n",
" model_Y_final=WeightedLassoCVWrapper(cv=3)\n",
" )"
]
@ -1615,7 +1615,7 @@
"metadata": {},
"outputs": [],
"source": [
"min_income = 10.0 \n",
"min_income = 10.0\n",
"max_income = 11.1\n",
"delta = (max_income - min_income) / 100\n",
"X_test = np.arange(min_income, max_income + delta - 0.001, delta).reshape(-1, 1)"

Просмотреть файл

@ -47,7 +47,7 @@
"source": [
"## Ignore warnings\n",
"import warnings\n",
"warnings.filterwarnings('ignore') "
"warnings.filterwarnings('ignore')"
]
},
{
@ -123,7 +123,7 @@
"support_T = support_Y\n",
"coefs_T = np.random.uniform(0, 1, size=support_size)\n",
"def eta_sample(n):\n",
" return np.random.uniform(-1, 1, size=n) \n",
" return np.random.uniform(-1, 1, size=n)\n",
"\n",
"# Generate controls, covariates, treatments and outcomes\n",
"X = np.random.uniform(0, 1, size=(n, n_x))\n",
@ -446,7 +446,7 @@
"outputs": [],
"source": [
"X_train, X_val, T_train, T_val,\\\n",
"Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te, \n",
"Y_train, Y_val, expected_te_train, expected_te_val = train_test_split(X, T, Y, expected_te,\n",
" test_size=.3, random_state=123)"
]
},
@ -632,7 +632,7 @@
"source": [
"# Visualization of bias distribution\n",
"plt.figure(figsize=(15, 5))\n",
"plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] + \n",
"plt.violinplot([np.abs(mdl.effect(X).flatten() - expected_te) for _, mdl in models] +\n",
" [np.abs(best.effect(X).flatten() - expected_te)] +\n",
" [np.abs(ensemble.effect(X).flatten() - expected_te)], showmeans=True)\n",
"plt.ylabel(\"Bias distribution\")\n",

Просмотреть файл

@ -158,8 +158,8 @@
],
"source": [
"def first_stage():\n",
" return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None], \n",
" \"n_estimators\": (50, 100, 200)}, \n",
" return GridSearchCV(estimator=GradientBoostingRegressor(), param_grid={\"max_depth\": [3, 5, None],\n",
" \"n_estimators\": (50, 100, 200)},\n",
" cv=2, n_jobs=-1)\n",
"est = LinearDML(\n",
" model_y=first_stage(),\n",
@ -379,10 +379,10 @@
],
"source": [
"def first_stage():\n",
" return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()], \n",
" param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]}, \n",
" {\"max_depth\": [3, 5, None], \n",
" \"n_estimators\": [50, 100, 200]}], \n",
" return GridSearchCVList([Lasso(max_iter=10000), GradientBoostingRegressor()],\n",
" param_grid_list=[{\"alpha\": [0.001, 0.01, 0.1, 1, 10]},\n",
" {\"max_depth\": [3, 5, None],\n",
" \"n_estimators\": [50, 100, 200]}],\n",
" cv=2)"
]
},

Просмотреть файл

@ -293,7 +293,7 @@
"Y = train_data[\"log_demand\"].values\n",
"T = train_data[\"log_price\"].values\n",
"X = train_data[[\"income\"]].values # features\n",
"confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\", \n",
"confounder_names = [\"account_age\", \"age\", \"avg_hours\", \"days_visited\", \"friends_count\", \"has_membership\",\n",
" \"is_US\", \"songs_purchased\"]\n",
"W = train_data[confounder_names].values"
]
@ -352,7 +352,7 @@
],
"source": [
"# fit through dowhy\n",
"est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n",
"est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n",
" outcome_names=[\"log_demand\"], treatment_names=[\"log_price\"], feature_names=[\"income\"],\n",
" confounder_names=confounder_names, inference=\"statsmodels\")"
]
@ -382,7 +382,7 @@
" )\n",
"except Exception:\n",
" # Fall back on default graph view\n",
" est_dw.view_model(layout=None) "
" est_dw.view_model(layout=None)"
]
},
{
@ -707,7 +707,7 @@
"source": [
"# Get treatment effect and its confidence interval\n",
"te_pred = est_nonparam_dw.effect(X_test).flatten()\n",
"te_pred_interval = est_nonparam_dw.effect_interval(X_test) "
"te_pred_interval = est_nonparam_dw.effect_interval(X_test)"
]
},
{
@ -902,7 +902,7 @@
],
"source": [
"res_placebo = est_nonparam_dw.refute_estimate(\n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
" num_simulations=3\n",
")\n",
"print(res_placebo)"
@ -951,7 +951,7 @@
],
"source": [
"res_subset = est_nonparam_dw.refute_estimate(\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
" method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
" num_simulations=3)\n",
"print(res_subset)"
]
@ -1079,7 +1079,7 @@
"policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n",
"\n",
"## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n",
"policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n",
"policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n",
" -0.1, 0.1, 1, policy))\n",
"\n",
"## give everyone -10% discount\n",

Просмотреть файл

@ -729,7 +729,7 @@
"policy_dic[\"Give No One Discount\"] = np.mean(revenue_fn(train_data, 0, 0.1, 1, np.ones(len(X))))\n",
"\n",
"## follow our policy, but give -10% discount for the group doesn't recommend to give discount\n",
"policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data, \n",
"policy_dic[\"Our Policy + Give Negative Discount for No-Discount Group\"] = np.mean(revenue_fn(train_data,\n",
" -0.1, 0.1, 1, policy))\n",
"\n",
"## give everyone -10% discount\n",

Просмотреть файл

@ -78,7 +78,7 @@
"# EconML imports\n",
"from econml.dr import LinearDRLearner\n",
"\n",
"# DoWhy imports \n",
"# DoWhy imports\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
@ -284,7 +284,7 @@
"W = multi_data.drop(\n",
" columns=[\"Tech Support\", \"Discount\", \"Revenue\", \"Size\"]\n",
") # controls\n",
"confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\", \n",
"confounder_names = [\"Global Flag\", \"Major Flag\", \"SMC Flag\", \"Commercial Flag\",\n",
" \"IT Spend\", \"Employee Count\", \"PC Count\"]"
]
},
@ -487,7 +487,7 @@
"source": [
"# fit through dowhy\n",
"test_customers = X.iloc[:1000].values\n",
"est_dw = est.dowhy.fit(Y, T, X=X, W=W, \n",
"est_dw = est.dowhy.fit(Y, T, X=X, W=W,\n",
" outcome_names=[\"Revenue\"], treatment_names=[\"discrete_T\"], feature_names=[\"Size\"],\n",
" confounder_names=confounder_names, target_units=test_customers)"
]
@ -537,7 +537,7 @@
" )\n",
"except Exception:\n",
" # Fall back on default graph view\n",
" est_dw.view_model(layout=None) "
" est_dw.view_model(layout=None)"
]
},
{
@ -1327,7 +1327,7 @@
],
"source": [
"res_placebo = est_dw.refute_estimate(\n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
" num_simulations=5\n",
")\n",
"print(res_placebo)"
@ -1426,7 +1426,7 @@
"source": [
"# Removing a random subset of the data\n",
"res_subset = est_dw.refute_estimate(\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
" method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
" num_simulations=3)\n",
"print(res_subset)"
]

Просмотреть файл

@ -86,7 +86,7 @@
"import lightgbm as lgb\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"\n",
"# DoWhy imports \n",
"# DoWhy imports\n",
"\n",
"# EconML imports\n",
"from econml.iv.dr import LinearIntentToTreatDRIV\n",
@ -145,7 +145,7 @@
"outputs": [],
"source": [
"# Import the sample AB data\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n",
"ab_data = pd.read_csv(file_url)"
]
},
@ -363,7 +363,7 @@
},
"outputs": [],
"source": [
"# Define underlying treatment effect function \n",
"# Define underlying treatment effect function\n",
"def TE_fn(X):\n",
" return (0.2 + 0.3 * X['days_visited_free_pre'] - 0.2 * X['days_visited_hs_pre'] + X['os_type_osx']).values\n",
"true_TE = TE_fn(X_data)\n",
@ -461,7 +461,7 @@
"source": [
"# Visualize causal graph\n",
"plt.figure(figsize=(10,8))\n",
"est_dw.view_model(layout=None) "
"est_dw.view_model(layout=None)"
]
},
{
@ -1094,7 +1094,7 @@
],
"source": [
"res_unobserved = est_dw.refute_estimate(method_name=\"add_unobserved_common_cause\",\n",
" confounders_effect_on_treatment=\"binary_flip\", \n",
" confounders_effect_on_treatment=\"binary_flip\",\n",
" confounders_effect_on_outcome=\"linear\",\n",
" effect_strength_on_treatment=0.05, effect_strength_on_outcome=0.5)\n",
"print(res_unobserved)"
@ -1127,7 +1127,7 @@
}
],
"source": [
"res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\", \n",
"res_placebo = est_dw.refute_estimate(method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
" num_simulations=2)\n",
"print(res_placebo)"
]
@ -1167,7 +1167,7 @@
],
"source": [
"# Removing a random subset of the data\n",
"res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8, \n",
"res_subset = est_dw.refute_estimate(method_name=\"data_subset_refuter\", subset_fraction=0.8,\n",
" num_simulations=2)\n",
"print(res_subset)"
]

Просмотреть файл

@ -133,7 +133,7 @@
"outputs": [],
"source": [
"# Import the sample AB data\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\"\n",
"ab_data = pd.read_csv(file_url)"
]
},
@ -351,7 +351,7 @@
},
"outputs": [],
"source": [
"# Define underlying treatment effect function \n",
"# Define underlying treatment effect function\n",
"def TE_fn(X):\r\n",
" return (0.2 + 0.3 * X[\"days_visited_free_pre\"] - 0.2 * X[\"days_visited_hs_pre\"] + X[\"os_type_osx\"]).values\n",
"true_TE = TE_fn(X_data)\n",

Просмотреть файл

@ -153,17 +153,17 @@
" W = reg_data[control_names].values\n",
" # scale W\n",
" scaler = StandardScaler()\n",
" W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n",
" W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n",
" T = reg_data[\"treated\"]\n",
" y = reg_data[outcome_name]\n",
" \n",
"\n",
" # select the best nuisances model out of econml estimator\n",
" model_y=first_stage_reg(W, y)\n",
" model_t=first_stage_clf(W, T)\n",
" \n",
"\n",
" if model_type=='dml':\n",
" est = LinearDML(model_y=model_y,\n",
" model_t=model_t, \n",
" model_t=model_t,\n",
" discrete_treatment=True, mc_iters=5,cv=5)\n",
" elif model_type=='dr':\n",
" est = LinearDRLearner(model_regression=model_y,\n",
@ -226,7 +226,7 @@
" # dml\n",
" summ_dic1[\"method\"].append(\"DML full controls\")\n",
" effect,se,lb,ub=econml_homo_model_wrapper(df_all, econml_controls,outcome_name,\"dml\",\n",
" cols_to_scale=cols_to_scale, print_summary=False) \n",
" cols_to_scale=cols_to_scale, print_summary=False)\n",
" summ_dic[\"DML full controls\"].append([effect,se])\n",
" summ_dic1[\"point_estimate\"].append(effect)\n",
" summ_dic1[\"stderr\"].append(se)\n",
@ -235,13 +235,13 @@
" # dr\n",
" summ_dic1[\"method\"].append(\"DR full controls\")\n",
" effect,se,lb,ub=econml_homo_model_wrapper(df_all, econml_controls,outcome_name,\"dr\",\n",
" cols_to_scale=cols_to_scale, print_summary=False) \n",
" cols_to_scale=cols_to_scale, print_summary=False)\n",
" summ_dic[\"DR full controls\"].append([effect,se])\n",
" summ_dic1[\"point_estimate\"].append(effect)\n",
" summ_dic1[\"stderr\"].append(se)\n",
" summ_dic1[\"lower_bound\"].append(lb)\n",
" summ_dic1[\"upper_bound\"].append(ub)\n",
" \n",
"\n",
" return summ_dic,summ_dic1\n",
"\n",
"# error bar helper function\n",
@ -377,9 +377,9 @@
"complete_ols_columns_female=basic_ols_columns+[\"married\",\"re75\",\"re75_dummy\",\"re74\",\"re74_dummy\",\"re_diff_pre\",\n",
" \"afdc75\",\"nchildren75\",\"haschild\"]\n",
"# econml controls (exclude treatment)\n",
"econml_controls_male= ['age', 'age_2', 'educ', 're75','re74','re_diff_pre','nodegree', \n",
"econml_controls_male= ['age', 'age_2', 'educ', 're75','re74','re_diff_pre','nodegree',\n",
" 'black', 'hisp', 'married','re75_dummy','re74_dummy']\n",
"econml_controls_female= ['age', 'age_2', 'educ','nchildren75', 're75','re74','re_diff_pre','nodegree', \n",
"econml_controls_female= ['age', 'age_2', 'educ','nchildren75', 're75','re74','re_diff_pre','nodegree',\n",
" 'black', 'hisp', 'married','re75_dummy','re74_dummy','afdc75','haschild']"
]
},
@ -391,11 +391,11 @@
"source": [
"# preprocessing data\n",
"male_control, male_treatment, male_psid1, male_psid3, male_cps1, male_cps3 = [\n",
" preprocessing(df,outcome_name_male,complete_ols_columns_male) \n",
" preprocessing(df,outcome_name_male,complete_ols_columns_male)\n",
" for df in (male_control, male_treatment, male_psid1, male_psid3, male_cps1, male_cps3)\n",
"]\n",
"female_control, female_treatment, female_psid1, female_psid2 =[\n",
" preprocessing(df,outcome_name_female,complete_ols_columns_female) \n",
" preprocessing(df,outcome_name_female,complete_ols_columns_female)\n",
" for df in (female_control, female_treatment, female_psid1, female_psid2)\n",
"]"
]
@ -788,7 +788,7 @@
"# scale numeric features\n",
"cols_to_scale=6\n",
"scaler = StandardScaler()\n",
"X=np.hstack([scaler.fit_transform(X[:, :cols_to_scale]).astype(np.float32), X[:, cols_to_scale:]]) "
"X=np.hstack([scaler.fit_transform(X[:, :cols_to_scale]).astype(np.float32), X[:, cols_to_scale:]])"
]
},
{
@ -813,7 +813,7 @@
"X = None\n",
"W = df_cps[econml_controls_male].values\n",
"# scale W\n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n",
"T = df_cps[\"treated\"]\n",
"y = df_cps[outcome_name_male]"
]
@ -837,7 +837,7 @@
"# train dml with sample weight 100 times\n",
"p_value_with_weight=[]\n",
"point_estimate_with_weight=[]\n",
"for _ in range(100): \n",
"for _ in range(100):\n",
" est=LinearDML(model_t=model_t,model_y=model_y,discrete_treatment=True,mc_iters=10,cv=3)\n",
" est.fit(y, T, X=None, W=W, sample_weight=weight,inference=\"statsmodels\")\n",
" point_estimate_with_weight.append(est.intercept_)\n",
@ -853,7 +853,7 @@
"# train dml without sample weight 100 times\n",
"p_value_without_weight=[]\n",
"point_estimate_without_weight=[]\n",
"for _ in range(100): \n",
"for _ in range(100):\n",
" est1=LinearDML(model_t=model_t,model_y=model_y,discrete_treatment=True,mc_iters=10,cv=3)\n",
" est1.fit(y, T, X=None,W=W,inference=\"statsmodels\")\n",
" point_estimate_without_weight.append(est1.intercept_)\n",
@ -969,7 +969,7 @@
"# scale W\n",
"cols_to_scale=6\n",
"scaler = StandardScaler()\n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n",
"T = df[\"treated\"]\n",
"y = df[outcome_name_male]"
]
@ -1051,7 +1051,7 @@
"# scale W\n",
"cols_to_scale=6\n",
"scaler = StandardScaler()\n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]]) \n",
"W = np.hstack([scaler.fit_transform(W[:, :cols_to_scale]).astype(np.float32), W[:, cols_to_scale:]])\n",
"T = df_cps[\"treated\"]\n",
"y = df_cps[outcome_name_male]"
]

Просмотреть файл

@ -169,7 +169,7 @@
}
],
"source": [
"# Outcome equation \n",
"# Outcome equation\n",
"y = t*t / 10 - x*t / 10 + e\n",
"\n",
"# The endogeneity problem is clear, the latent error enters both treatment and outcome equally\n",
@ -246,7 +246,7 @@
" use_upper_bound_loss = False, # whether to use an approximation to the true loss\n",
" n_gradient_samples = 1, # number of samples to use in second estimate of the response\n",
" # (to make loss estimate unbiased)\n",
" optimizer='adam', # Keras optimizer to use for training - see https://keras.io/optimizers/ \n",
" optimizer='adam', # Keras optimizer to use for training - see https://keras.io/optimizers/\n",
" first_stage_options = keras_fit_options, # options for training treatment model\n",
" second_stage_options = keras_fit_options) # options for training response model"
]

Просмотреть файл

@ -772,7 +772,7 @@
"support_T = support_Y\n",
"coefs_T = np.random.uniform(0, 1, size=support_size)\n",
"def eta_sample(n):\n",
" return np.random.uniform(-1, 1, size=n) \n",
" return np.random.uniform(-1, 1, size=n)\n",
"\n",
"# Generate controls, covariates, treatments and outcomes\n",
"W = np.random.normal(0, 1, size=(n, n_w))\n",
@ -2288,8 +2288,8 @@
"source": [
"# Prepare data\n",
"oj_data['price'] = np.log(oj_data[\"price\"])\n",
"# Transform dataset. \n",
"# For each store in each week, get a vector of logmove and a vector of logprice for each brand. \n",
"# Transform dataset.\n",
"# For each store in each week, get a vector of logmove and a vector of logprice for each brand.\n",
"# Other features are store specific, will be the same for all brands.\n",
"groupbylist = [\"store\", \"week\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\",\n",
" \"HHLARGE\", \"WORKWOM\", \"HVAL150\",\n",
@ -2298,7 +2298,7 @@
" columns=oj_data.groupby(groupbylist).cumcount(),\n",
" values=['logmove', 'price'],\n",
" aggfunc='sum').reset_index()\n",
"oj_data1.columns = oj_data1.columns.map('{0[0]}{0[1]}'.format) \n",
"oj_data1.columns = oj_data1.columns.map('{0[0]}{0[1]}'.format)\n",
"oj_data1 = oj_data1.rename(index=str,\n",
" columns={\"logmove0\": \"logmove_T\",\n",
" \"logmove1\": \"logmove_M\",\n",
@ -2441,9 +2441,9 @@
" plt.plot(X_test, te_pred[:, i, j],\n",
" color=\"C{}\".format(str(3 * i + j)),\n",
" label=\"OJ Elasticity {} to {}\".format(dic[j], dic[i]))\n",
" plt.fill_between(X_test.flatten(), \n",
" plt.fill_between(X_test.flatten(),\n",
" te_pred_interval[0][:, i, j],\n",
" te_pred_interval[1][:, i,j], \n",
" te_pred_interval[1][:, i,j],\n",
" color=\"C{}\".format(str(3*i+j)), alpha=.5, label=\"1-99% CI\")\n",
" plt.xlabel(r'Scale(Income)')\n",
" plt.ylabel('Orange Juice Elasticity')\n",

Просмотреть файл

@ -260,7 +260,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style(\"darkgrid\"):\n",
" fig, ax = plt.subplots(1,1) \n",
" fig, ax = plt.subplots(1,1)\n",
" x = np.arange(len(point))\n",
" plt.errorbar(x, point, yerr, fmt='o')\n",
" ax.set_xticks(x)\n",
@ -561,7 +561,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style(\"darkgrid\"):\n",
" fig, ax = plt.subplots(1,1) \n",
" fig, ax = plt.subplots(1,1)\n",
" x = np.arange(len(point))\n",
" plt.errorbar(x, point, yerr, fmt='o')\n",
" ax.set_xticks(x)\n",
@ -844,7 +844,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style(\"darkgrid\"):\n",
" fig, ax = plt.subplots(1,1) \n",
" fig, ax = plt.subplots(1,1)\n",
" x = np.arange(len(point))\n",
" plt.errorbar(x, point, yerr, fmt='o')\n",
" ax.set_xticks(x)\n",
@ -902,7 +902,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style('darkgrid'):\n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5))\n",
" x = np.arange(len(point))\n",
" stat_sig = (lower>0) | (upper<0)\n",
" plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n",
@ -1254,7 +1254,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style('darkgrid'):\n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5))\n",
" x = np.arange(len(point))\n",
" stat_sig = (lower>0) | (upper<0)\n",
" plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n",

Просмотреть файл

@ -171,8 +171,8 @@
"outputs": [],
"source": [
"est = DynamicDML(\n",
" model_y=LassoCV(cv=3, max_iter=1000), \n",
" model_t=MultiTaskLassoCV(cv=3, max_iter=1000), \n",
" model_y=LassoCV(cv=3, max_iter=1000),\n",
" model_t=MultiTaskLassoCV(cv=3, max_iter=1000),\n",
" cv=3)"
]
},
@ -444,8 +444,8 @@
"outputs": [],
"source": [
"est = DynamicDML(\n",
" model_y=LassoCV(cv=3), \n",
" model_t=MultiTaskLassoCV(cv=3), \n",
" model_y=LassoCV(cv=3),\n",
" model_t=MultiTaskLassoCV(cv=3),\n",
" cv=3)"
]
},

Просмотреть файл

@ -85,7 +85,7 @@
"# true_te = lambda X: np.hstack([X[:, [0]]**2 + 1, np.ones((X.shape[0], n_treatments - 1))])\n",
"# true_te = lambda X: np.hstack([X[:, [0]]>0, np.ones((X.shape[0], n_treatments - 1))])\n",
"def true_te(X):\n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n",
" np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n",
"X = np.random.normal(0, 1, size=(n_samples, n_features))\n",
"T = np.random.normal(0, 1, size=(n_samples, n_treatments))\n",
@ -811,7 +811,7 @@
"# true_te = lambda X: np.hstack([X[:, [0]]**2 + 1, np.ones((X.shape[0], n_treatments - 1))])\n",
"# true_te = lambda X: np.hstack([X[:, [0]]>0, np.ones((X.shape[0], n_treatments - 1))])\n",
"def true_te(X):\n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n",
" np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n",
"X = np.random.normal(0, 1, size=(n_samples, n_features))\n",
"W = np.random.normal(0, 1, size=(n_samples, n_features))\n",
@ -1485,7 +1485,7 @@
"from econml.utilities import cross_product\n",
"\n",
"class CustomGRF(BaseGRF):\n",
" \n",
"\n",
" def _get_alpha_and_pointJ(self, X, T, y, *, Z):\n",
" T = np.hstack([T, T**2])\n",
" Z = np.hstack([Z, Z**2])\n",
@ -1493,8 +1493,8 @@
" T = np.hstack([T, np.ones((T.shape[0], 1))])\n",
" Z = np.hstack([Z, np.ones((T.shape[0], 1))])\n",
" return y * Z, cross_product(Z, T)\n",
" \n",
" \n",
"\n",
"\n",
" def _get_n_outputs_decomposition(self, X, T, y, *, Z):\n",
" n_relevant_outputs = T.shape[1] * 2\n",
" n_outputs = n_relevant_outputs\n",

Просмотреть файл

@ -294,7 +294,7 @@
"n_treatments = 2\n",
"n_outputs = 3\n",
"def true_te(X):\n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]], \n",
" return np.hstack([(X[:, [0]] > 0) * X[:, [0]],\n",
" np.ones((X.shape[0], n_treatments - 1)) * np.arange(1, n_treatments).reshape(1, -1)])\n",
"X = np.random.normal(0, 1, size=(n_samples, n_features))\n",
"W = np.random.normal(0, 1, size=(n_samples, n_features))\n",

Просмотреть файл

@ -58,7 +58,7 @@
"# Main imports\n",
"from econml.metalearners import TLearner, SLearner, XLearner, DomainAdaptationLearner\n",
"\n",
"# Helper imports \n",
"# Helper imports\n",
"import numpy as np\n",
"from numpy.random import binomial, multivariate_normal, normal, uniform\n",
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor\n",
@ -215,7 +215,7 @@
"source": [
"# Instantiate X learner\n",
"models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n",
" min_samples_leaf=int(n/100))\n",
"X_learner = XLearner(models=models, propensity_model=propensity_model)\n",
"# Train X_learner\n",
@ -233,7 +233,7 @@
"# Instantiate Domain Adaptation learner\n",
"models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n",
"final_models = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n",
" min_samples_leaf=int(n/100))\n",
"DA_learner = DomainAdaptationLearner(models=models,\n",
" final_models=final_models,\n",
@ -262,7 +262,7 @@
"from econml.dr import DRLearner\n",
"outcome_model = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n",
"pseudo_treatment_model = GradientBoostingRegressor(n_estimators=100, max_depth=6, min_samples_leaf=int(n/100))\n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6, \n",
"propensity_model = RandomForestClassifier(n_estimators=100, max_depth=6,\n",
" min_samples_leaf=int(n/100))\n",
"\n",
"DR_learner = DRLearner(model_regression=outcome_model, model_propensity=propensity_model,\n",
@ -334,7 +334,7 @@
"source": [
"# Visualization of bias distribution\n",
"expected_te = np.apply_along_axis(treatment_effect, 1, X_test)\n",
"plt.violinplot([np.abs(T_te - expected_te), \n",
"plt.violinplot([np.abs(T_te - expected_te),\n",
" np.abs(S_te - expected_te),\n",
" np.abs(DA_te - expected_te),\n",
" np.abs(X_te - expected_te),\n",
@ -478,7 +478,7 @@
],
"source": [
"# Visualization of bias distribution\n",
"plt.violinplot([np.abs(T_te - expected_te), \n",
"plt.violinplot([np.abs(T_te - expected_te),\n",
" np.abs(S_te - expected_te),\n",
" np.abs(DA_te - expected_te),\n",
" np.abs(X_te - expected_te),\n",

Просмотреть файл

@ -214,7 +214,7 @@
"source": [
"X = np.random.normal(size=(1000, 10))\n",
"T = np.random.binomial(2, .5, size=(1000,))\n",
"y = (X[:, 0]) * (T==1) + (-X[:, 0]) * (T==2) "
"y = (X[:, 0]) * (T==1) + (-X[:, 0]) * (T==2)"
]
},
{
@ -453,7 +453,7 @@
],
"source": [
"est = DRPolicyForest(n_estimators=1000,\n",
" max_depth=2, \n",
" max_depth=2,\n",
" min_samples_leaf=50,\n",
" max_samples=.8,\n",
" honest=True,\n",

Просмотреть файл

@ -251,13 +251,13 @@
" ray_remote_func_options=ray_opts,\n",
" cv=cv,\n",
" mc_iters=1)\n",
" \n",
"\n",
" start_time = time.time()\n",
" est.fit(y, T, X=X, W=None)\n",
" runtime = time.time() - start_time\n",
" runtimes.append(runtime)\n",
" return runtimes\n",
" \n"
"\n"
]
},
{

Просмотреть файл

@ -542,28 +542,28 @@
" .assign(\n",
" AgeAtSale = lambda df: df['YrSold'].sub(df['YearBuilt']), # add interpretable year columns\n",
" YearsSinceRemodel = lambda df: df['YrSold'].sub(df['YearRemodAdd']).clip(lower = 0), # clip lower for outlier\n",
" \n",
"\n",
" HasDeck = lambda df: df['WoodDeckSF'].gt(0).map(int),\n",
" HasPorch = lambda df: \n",
" HasPorch = lambda df:\n",
" df[['OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch']]\n",
" .gt(0)\n",
" .max(axis = 1)\n",
" .map(int),\n",
" \n",
"\n",
" HasFireplace = lambda df: df['Fireplaces'].clip(upper = 1).map(int),\n",
" HasFence = lambda df: df['Fence'].notna().map(int)\n",
" )\n",
" \n",
"\n",
" # drop year columns\n",
" .drop(\n",
" columns = [\n",
" 'GarageYrBlt', 'YearBuilt', 'YrSold', 'YearRemodAdd', \n",
" 'GarageYrBlt', 'YearBuilt', 'YrSold', 'YearRemodAdd',\n",
" 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch',\n",
" 'FireplaceQu', 'Fireplaces',\n",
" 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', \n",
" 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',\n",
" '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GarageArea', 'PoolArea'\n",
" ]\n",
" ) \n",
" )\n",
" .assign(LotFrontage = lambda df: df['LotFrontage'].fillna(0)) # fill missing with 0\n",
" .fillna('NA') # rest of missing values are in categorical columns, so fill with NA category\n",
" .assign(Intercept = 1) # add constant column for OLS\n",
@ -2736,9 +2736,9 @@
"whatif_y = y_test.loc[whatif_df.index]\n",
"\n",
"cf = ca.whatif(\n",
" whatif_df, \n",
" whatif_df['HasFireplace'].add(1).clip(upper = 1), \n",
" 'HasFireplace', \n",
" whatif_df,\n",
" whatif_df['HasFireplace'].add(1).clip(upper = 1),\n",
" 'HasFireplace',\n",
" whatif_y)\n",
"print(\"Current average housing price on test set: \", whatif_y.mean())\n",
"print(\n",

Просмотреть файл

@ -271,7 +271,7 @@
" test_T.squeeze(),\n",
" lb.squeeze(),\n",
" ub.squeeze(),\n",
" alpha = 0.4, \n",
" alpha = 0.4,\n",
")\n",
"\n",
"lb, ub = bad_est.effect_interval(T0 = np.zeros(shape=(100, 1)), T1=test_T)\n",
@ -319,7 +319,7 @@
"Z = np.random.normal(loc = 3, scale = 5, size = (n, 1))\n",
"T = np.random.uniform(low = 0, high = 10, size = (n, 1)) + 0.5*W[:, [0]] + Z + unobserved_confounder\n",
"epsilon = np.random.normal(size = (n, 1)) * 50\n",
"Y = 0.5*T**2 + W[:, [1]] + unobserved_confounder + epsilon \n",
"Y = 0.5*T**2 + W[:, [1]] + unobserved_confounder + epsilon\n",
"\n",
"test_T = np.arange(0, 10, step = 0.1).reshape(-1, 1)\n",
"\n",
@ -394,8 +394,8 @@
],
"source": [
"est = OrthoIV(\n",
" model_t_xwz=RandomForestRegressor(), \n",
" projection=True, \n",
" model_t_xwz=RandomForestRegressor(),\n",
" projection=True,\n",
" treatment_featurizer = featurizer\n",
")\n",
"est.fit(Y=Y, T=T, W=W, Z=Z)\n",
@ -467,7 +467,7 @@
" test_T.squeeze(),\n",
" lb.squeeze(),\n",
" ub.squeeze(),\n",
" alpha = 0.4, \n",
" alpha = 0.4,\n",
")\n",
"\n",
"lb, ub = bad_est.effect_interval(T0 = np.zeros(shape=(100, 1)), T1=test_T)\n",
@ -659,7 +659,7 @@
"\n",
"for x_val in x_vals:\n",
" plt.figure(figsize=(10, 6))\n",
" \n",
"\n",
" xtest = np.ones(test_T.shape) * x_val\n",
" # Plot point estimates\n",
" plt.plot(\n",
@ -691,7 +691,7 @@
" test_T.squeeze(),\n",
" lb.squeeze(),\n",
" ub.squeeze(),\n",
" alpha = 0.4, \n",
" alpha = 0.4,\n",
" )\n",
"\n",
"\n",
@ -1209,7 +1209,7 @@
"df = (\n",
" pd.DataFrame({'X': X[:ns, 0], 'marginal effect': eff[:, 0], 'lb': lb[:, 0], 'ub': ub[:, 0], 'true': true[:, 0]})\n",
" .assign(\n",
" presciption = lambda df: df['marginal effect'].gt(0).map({True: 'Increase treatment', \n",
" presciption = lambda df: df['marginal effect'].gt(0).map({True: 'Increase treatment',\n",
" False: 'Decrease treatment'})\n",
" )\n",
")\n",
@ -1247,8 +1247,8 @@
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.errorbar(\n",
" df['X'], df['marginal effect'], \n",
" yerr=[df['marginal effect'] - df['lb'], df['ub'] - df['marginal effect']], \n",
" df['X'], df['marginal effect'],\n",
" yerr=[df['marginal effect'] - df['lb'], df['ub'] - df['marginal effect']],\n",
" fmt='o', alpha = 0.8, label = 'Estimated Marginal Effect')\n",
"plt.scatter(df['X'], df['true'], marker='x', color='green', label = 'True Marginal Effect')\n",
"\n",

Просмотреть файл

@ -331,7 +331,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style('darkgrid'):\n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5))\n",
" x = np.arange(len(point))\n",
" stat_sig = (lower>0) | (upper<0)\n",
" plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n",
@ -426,7 +426,7 @@
"yerr[1, :] = upper - point\n",
"\n",
"with sns.axes_style('darkgrid'):\n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5)) \n",
" fig, ax = plt.subplots(1,1, figsize=(20, 5))\n",
" x = np.arange(len(point))\n",
" stat_sig = (lower>0) | (upper<0)\n",
" plt.errorbar(x[stat_sig], point[stat_sig], yerr[:, stat_sig], fmt='o', label='stat_sig')\n",