This commit is contained in:
Brian Quistorff 2020-11-04 16:43:36 -08:00
Родитель 09a813da2d
Коммит d6d1af847d
4 изменённых файлов: 29 добавлений и 20 удалений

2
.gitignore поставляемый
Просмотреть файл

@ -1,6 +1,6 @@
# ignore testing data
**/data/
test/*.pkl
replication/*.pkl
# Byte-compiled / optimized / DLL files
__pycache__/

Просмотреть файл

@ -20,6 +20,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A
docs\overview.md = docs\overview.md
docs\performance-notes.md = docs\performance-notes.md
README.md = README.md
replication\sc2010.py = replication\sc2010.py
EndProjectSection
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"

Просмотреть файл

@ -1,5 +1,7 @@
import pickle
import random
import scipy
import numpy as np
import pandas as pd
@ -8,21 +10,21 @@ import SparseSC as SC
random.seed(12345)
np.random.seed(101101001)
pkl_file = "smoking_fits.pkl"
pkl_file = "../replication/smoking_fits.pkl"
smoking_df = pd.read_stata("../replication/smoking.dta")
smoking_df['year'] = smoking_df['year'].astype('int')
smoking_df = smoking_df.set_index(['state', 'year'])
Y = smoking_df[['cigsale']].unstack('year')
T0 = 19
i_t = 2 #unit 3, but zero-index
treated_units = [i_t]
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
Y = smoking_df[['cigsale']].unstack('year')
Y_names = Y.columns.get_level_values('year')
Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
Y.isnull().sum().sum() #0
print(Y.isnull().sum().sum()) #0
Y = Y.values
T = Y.shape[1]
T1 = T-T0
@ -53,15 +55,15 @@ X_Y_pre_names = X_full_names + Y_pre_names
X_Y_pre_names_arr = np.array(X_Y_pre_names)
fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
print(len(np.diag(fast_fit.V)))
print(np.diag(fast_fit.V))
Y_post_sc = fast_fit.predict(Y_post)
Y_pre_sc = fast_fit.predict(Y_pre)
post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
print(pre_mse) #192.210632448
print(post_mse) #129.190437803
X_Y_pre_names_arr[fast_fit.match_space_desc>0]
#print(len(np.diag(fast_fit.V)))
#print(np.diag(fast_fit.V))
#Y_post_sc = fast_fit.predict(Y_post)
#Y_pre_sc = fast_fit.predict(Y_pre)
#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
#print(pre_mse) #192.210632448
#print(post_mse) #129.190437803
#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])
full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
@ -70,14 +72,23 @@ print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
print(sum(np.diag(full_fit.V>0)))
full_Y_post_sc = full_fit.predict(Y_post)
full_Y_pre_sc = full_fit.predict(Y_pre)
full_post_mse = np.mean(np.power(Y_post[control_units, :] - full_Y_post_sc[control_units, :], 2))
full_pre_mse = np.mean(np.power(Y_pre[control_units, :] - full_Y_pre_sc[control_units, :], 2))
full_Y_pre_effect = Y_pre - full_Y_pre_sc
full_Y_post_effect = Y_post - full_Y_post_sc
print(full_Y_pre_effect[control_units, :].mean()) #0.0775621971136
print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316)
full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))
print(full_pre_mse) #186.969136939
print(full_Y_post_effect[control_units, :].mean()) #0.036591103495529125
print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364)
full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))
print(full_post_mse) #142.422049057
X_Y_pre_names_arr[np.diag(full_fit.V)>0]
print(X_Y_pre_names_arr[np.diag(full_fit.V)>0])
with open(pkl_file, "wb" ) as output_file:
pickle.dump( (fast_fit, full_fit), output_file)
with open(pkl_file, "rb" ) as input_file:
fast_fit, full_fit = pickle.dump(input_file)
fast_fit, full_fit = pickle.load(input_file)

Просмотреть файл

@ -23,9 +23,6 @@
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
</PropertyGroup>
<ItemGroup>
<Compile Include="sc2010.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="test_simulation.py">
<SubType>Code</SubType>
</Compile>