From d6d1af847ddc419755c5d1885a263e5842f3b20d Mon Sep 17 00:00:00 2001 From: Brian Quistorff Date: Wed, 4 Nov 2020 16:43:36 -0800 Subject: [PATCH] EOD --- .gitignore | 2 +- SparseSC.sln | 1 + replication/sc2010.py | 43 +++++++++++++++++++++++++++---------------- test/test.pyproj | 3 --- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index ac67dd6..d47f885 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # ignore testing data **/data/ -test/*.pkl +replication/*.pkl # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/SparseSC.sln b/SparseSC.sln index 4def15c..b2f93ae 100644 --- a/SparseSC.sln +++ b/SparseSC.sln @@ -20,6 +20,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A docs\overview.md = docs\overview.md docs\performance-notes.md = docs\performance-notes.md README.md = README.md + replication\sc2010.py = replication\sc2010.py EndProjectSection EndProject Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}" diff --git a/replication/sc2010.py b/replication/sc2010.py index ffda0e5..1c67697 100644 --- a/replication/sc2010.py +++ b/replication/sc2010.py @@ -1,5 +1,7 @@ import pickle +import random +import scipy import numpy as np import pandas as pd @@ -8,21 +10,21 @@ import SparseSC as SC random.seed(12345) np.random.seed(101101001) -pkl_file = "smoking_fits.pkl" +pkl_file = "../replication/smoking_fits.pkl" smoking_df = pd.read_stata("../replication/smoking.dta") smoking_df['year'] = smoking_df['year'].astype('int') smoking_df = smoking_df.set_index(['state', 'year']) +Y = smoking_df[['cigsale']].unstack('year') T0 = 19 i_t = 2 #unit 3, but zero-index treated_units = [i_t] control_units = [u for u in range(Y.shape[0]) if u not in treated_units] -Y = smoking_df[['cigsale']].unstack('year') Y_names = Y.columns.get_level_values('year') Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]] -Y.isnull().sum().sum() #0 +print(Y.isnull().sum().sum()) #0 Y = Y.values T = Y.shape[1] T1 = T-T0 @@ -53,15 +55,15 @@ X_Y_pre_names = X_full_names + Y_pre_names X_Y_pre_names_arr = np.array(X_Y_pre_names) fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t]) -print(len(np.diag(fast_fit.V))) -print(np.diag(fast_fit.V)) -Y_post_sc = fast_fit.predict(Y_post) -Y_pre_sc = fast_fit.predict(Y_pre) -post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2)) -pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2)) -print(pre_mse) #192.210632448 -print(post_mse) #129.190437803 -X_Y_pre_names_arr[fast_fit.match_space_desc>0] +#print(len(np.diag(fast_fit.V))) +#print(np.diag(fast_fit.V)) +#Y_post_sc = fast_fit.predict(Y_post) +#Y_pre_sc = fast_fit.predict(Y_pre) +#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2)) +#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2)) +#print(pre_mse) #192.210632448 +#print(post_mse) #129.190437803 +#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0]) full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t]) @@ -70,14 +72,23 @@ print(np.diag(full_fit.V)[np.diag(full_fit.V)>0]) print(sum(np.diag(full_fit.V>0))) full_Y_post_sc = full_fit.predict(Y_post) full_Y_pre_sc = full_fit.predict(Y_pre) -full_post_mse = np.mean(np.power(Y_post[control_units, :] - full_Y_post_sc[control_units, :], 2)) -full_pre_mse = np.mean(np.power(Y_pre[control_units, :] - full_Y_pre_sc[control_units, :], 2)) +full_Y_pre_effect = Y_pre - full_Y_pre_sc +full_Y_post_effect = Y_post - full_Y_post_sc + +print(full_Y_pre_effect[control_units, :].mean()) #0.0775621971136 +print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316) +full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2)) print(full_pre_mse) #186.969136939 + +print(full_Y_post_effect[control_units, :].mean()) #0.036591103495529125 +print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364) +full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2)) print(full_post_mse) #142.422049057 -X_Y_pre_names_arr[np.diag(full_fit.V)>0] + +print(X_Y_pre_names_arr[np.diag(full_fit.V)>0]) with open(pkl_file, "wb" ) as output_file: pickle.dump( (fast_fit, full_fit), output_file) with open(pkl_file, "rb" ) as input_file: - fast_fit, full_fit = pickle.dump(input_file) + fast_fit, full_fit = pickle.load(input_file) diff --git a/test/test.pyproj b/test/test.pyproj index 1b32808..aa9c53f 100644 --- a/test/test.pyproj +++ b/test/test.pyproj @@ -23,9 +23,6 @@ false - - Code - Code