This commit is contained in:
Brian Quistorff 2020-11-04 16:43:36 -08:00
Родитель 09a813da2d
Коммит d6d1af847d
4 изменённых файлов: 29 добавлений и 20 удалений

2
.gitignore поставляемый
Просмотреть файл

@ -1,6 +1,6 @@
# ignore testing data # ignore testing data
**/data/ **/data/
test/*.pkl replication/*.pkl
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

Просмотреть файл

@ -20,6 +20,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A
docs\overview.md = docs\overview.md docs\overview.md = docs\overview.md
docs\performance-notes.md = docs\performance-notes.md docs\performance-notes.md = docs\performance-notes.md
README.md = README.md README.md = README.md
replication\sc2010.py = replication\sc2010.py
EndProjectSection EndProjectSection
EndProject EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}" Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"

Просмотреть файл

@ -1,5 +1,7 @@
import pickle import pickle
import random
import scipy
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -8,21 +10,21 @@ import SparseSC as SC
random.seed(12345) random.seed(12345)
np.random.seed(101101001) np.random.seed(101101001)
pkl_file = "smoking_fits.pkl" pkl_file = "../replication/smoking_fits.pkl"
smoking_df = pd.read_stata("../replication/smoking.dta") smoking_df = pd.read_stata("../replication/smoking.dta")
smoking_df['year'] = smoking_df['year'].astype('int') smoking_df['year'] = smoking_df['year'].astype('int')
smoking_df = smoking_df.set_index(['state', 'year']) smoking_df = smoking_df.set_index(['state', 'year'])
Y = smoking_df[['cigsale']].unstack('year')
T0 = 19 T0 = 19
i_t = 2 #unit 3, but zero-index i_t = 2 #unit 3, but zero-index
treated_units = [i_t] treated_units = [i_t]
control_units = [u for u in range(Y.shape[0]) if u not in treated_units] control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
Y = smoking_df[['cigsale']].unstack('year')
Y_names = Y.columns.get_level_values('year') Y_names = Y.columns.get_level_values('year')
Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]] Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
Y.isnull().sum().sum() #0 print(Y.isnull().sum().sum()) #0
Y = Y.values Y = Y.values
T = Y.shape[1] T = Y.shape[1]
T1 = T-T0 T1 = T-T0
@ -53,15 +55,15 @@ X_Y_pre_names = X_full_names + Y_pre_names
X_Y_pre_names_arr = np.array(X_Y_pre_names) X_Y_pre_names_arr = np.array(X_Y_pre_names)
fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t]) fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
print(len(np.diag(fast_fit.V))) #print(len(np.diag(fast_fit.V)))
print(np.diag(fast_fit.V)) #print(np.diag(fast_fit.V))
Y_post_sc = fast_fit.predict(Y_post) #Y_post_sc = fast_fit.predict(Y_post)
Y_pre_sc = fast_fit.predict(Y_pre) #Y_pre_sc = fast_fit.predict(Y_pre)
post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2)) #post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2)) #pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
print(pre_mse) #192.210632448 #print(pre_mse) #192.210632448
print(post_mse) #129.190437803 #print(post_mse) #129.190437803
X_Y_pre_names_arr[fast_fit.match_space_desc>0] #print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])
full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t]) full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
@ -70,14 +72,23 @@ print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
print(sum(np.diag(full_fit.V>0))) print(sum(np.diag(full_fit.V>0)))
full_Y_post_sc = full_fit.predict(Y_post) full_Y_post_sc = full_fit.predict(Y_post)
full_Y_pre_sc = full_fit.predict(Y_pre) full_Y_pre_sc = full_fit.predict(Y_pre)
full_post_mse = np.mean(np.power(Y_post[control_units, :] - full_Y_post_sc[control_units, :], 2)) full_Y_pre_effect = Y_pre - full_Y_pre_sc
full_pre_mse = np.mean(np.power(Y_pre[control_units, :] - full_Y_pre_sc[control_units, :], 2)) full_Y_post_effect = Y_post - full_Y_post_sc
print(full_Y_pre_effect[control_units, :].mean()) #0.0775621971136
print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316)
full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))
print(full_pre_mse) #186.969136939 print(full_pre_mse) #186.969136939
print(full_Y_post_effect[control_units, :].mean()) #0.036591103495529125
print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364)
full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))
print(full_post_mse) #142.422049057 print(full_post_mse) #142.422049057
X_Y_pre_names_arr[np.diag(full_fit.V)>0]
print(X_Y_pre_names_arr[np.diag(full_fit.V)>0])
with open(pkl_file, "wb" ) as output_file: with open(pkl_file, "wb" ) as output_file:
pickle.dump( (fast_fit, full_fit), output_file) pickle.dump( (fast_fit, full_fit), output_file)
with open(pkl_file, "rb" ) as input_file: with open(pkl_file, "rb" ) as input_file:
fast_fit, full_fit = pickle.dump(input_file) fast_fit, full_fit = pickle.load(input_file)

Просмотреть файл

@ -23,9 +23,6 @@
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging> <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<Compile Include="sc2010.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="test_simulation.py"> <Compile Include="test_simulation.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>