зеркало из https://github.com/microsoft/SparseSC.git
This commit is contained in:
Родитель
09a813da2d
Коммит
d6d1af847d
|
@ -1,6 +1,6 @@
|
||||||
# ignore testing data
|
# ignore testing data
|
||||||
**/data/
|
**/data/
|
||||||
test/*.pkl
|
replication/*.pkl
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
|
@ -20,6 +20,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A
|
||||||
docs\overview.md = docs\overview.md
|
docs\overview.md = docs\overview.md
|
||||||
docs\performance-notes.md = docs\performance-notes.md
|
docs\performance-notes.md = docs\performance-notes.md
|
||||||
README.md = README.md
|
README.md = README.md
|
||||||
|
replication\sc2010.py = replication\sc2010.py
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"
|
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import pickle
|
import pickle
|
||||||
|
import random
|
||||||
|
|
||||||
|
import scipy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
@ -8,21 +10,21 @@ import SparseSC as SC
|
||||||
random.seed(12345)
|
random.seed(12345)
|
||||||
np.random.seed(101101001)
|
np.random.seed(101101001)
|
||||||
|
|
||||||
pkl_file = "smoking_fits.pkl"
|
pkl_file = "../replication/smoking_fits.pkl"
|
||||||
|
|
||||||
smoking_df = pd.read_stata("../replication/smoking.dta")
|
smoking_df = pd.read_stata("../replication/smoking.dta")
|
||||||
smoking_df['year'] = smoking_df['year'].astype('int')
|
smoking_df['year'] = smoking_df['year'].astype('int')
|
||||||
smoking_df = smoking_df.set_index(['state', 'year'])
|
smoking_df = smoking_df.set_index(['state', 'year'])
|
||||||
|
Y = smoking_df[['cigsale']].unstack('year')
|
||||||
T0 = 19
|
T0 = 19
|
||||||
i_t = 2 #unit 3, but zero-index
|
i_t = 2 #unit 3, but zero-index
|
||||||
treated_units = [i_t]
|
treated_units = [i_t]
|
||||||
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
|
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
|
||||||
|
|
||||||
|
|
||||||
Y = smoking_df[['cigsale']].unstack('year')
|
|
||||||
Y_names = Y.columns.get_level_values('year')
|
Y_names = Y.columns.get_level_values('year')
|
||||||
Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
|
Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
|
||||||
Y.isnull().sum().sum() #0
|
print(Y.isnull().sum().sum()) #0
|
||||||
Y = Y.values
|
Y = Y.values
|
||||||
T = Y.shape[1]
|
T = Y.shape[1]
|
||||||
T1 = T-T0
|
T1 = T-T0
|
||||||
|
@ -53,15 +55,15 @@ X_Y_pre_names = X_full_names + Y_pre_names
|
||||||
X_Y_pre_names_arr = np.array(X_Y_pre_names)
|
X_Y_pre_names_arr = np.array(X_Y_pre_names)
|
||||||
|
|
||||||
fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
|
fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
|
||||||
print(len(np.diag(fast_fit.V)))
|
#print(len(np.diag(fast_fit.V)))
|
||||||
print(np.diag(fast_fit.V))
|
#print(np.diag(fast_fit.V))
|
||||||
Y_post_sc = fast_fit.predict(Y_post)
|
#Y_post_sc = fast_fit.predict(Y_post)
|
||||||
Y_pre_sc = fast_fit.predict(Y_pre)
|
#Y_pre_sc = fast_fit.predict(Y_pre)
|
||||||
post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
|
#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
|
||||||
pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
|
#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
|
||||||
print(pre_mse) #192.210632448
|
#print(pre_mse) #192.210632448
|
||||||
print(post_mse) #129.190437803
|
#print(post_mse) #129.190437803
|
||||||
X_Y_pre_names_arr[fast_fit.match_space_desc>0]
|
#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])
|
||||||
|
|
||||||
|
|
||||||
full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
|
full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
|
||||||
|
@ -70,14 +72,23 @@ print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
|
||||||
print(sum(np.diag(full_fit.V>0)))
|
print(sum(np.diag(full_fit.V>0)))
|
||||||
full_Y_post_sc = full_fit.predict(Y_post)
|
full_Y_post_sc = full_fit.predict(Y_post)
|
||||||
full_Y_pre_sc = full_fit.predict(Y_pre)
|
full_Y_pre_sc = full_fit.predict(Y_pre)
|
||||||
full_post_mse = np.mean(np.power(Y_post[control_units, :] - full_Y_post_sc[control_units, :], 2))
|
full_Y_pre_effect = Y_pre - full_Y_pre_sc
|
||||||
full_pre_mse = np.mean(np.power(Y_pre[control_units, :] - full_Y_pre_sc[control_units, :], 2))
|
full_Y_post_effect = Y_post - full_Y_post_sc
|
||||||
|
|
||||||
|
print(full_Y_pre_effect[control_units, :].mean()) #0.0775621971136
|
||||||
|
print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316)
|
||||||
|
full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))
|
||||||
print(full_pre_mse) #186.969136939
|
print(full_pre_mse) #186.969136939
|
||||||
|
|
||||||
|
print(full_Y_post_effect[control_units, :].mean()) #0.036591103495529125
|
||||||
|
print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364)
|
||||||
|
full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))
|
||||||
print(full_post_mse) #142.422049057
|
print(full_post_mse) #142.422049057
|
||||||
X_Y_pre_names_arr[np.diag(full_fit.V)>0]
|
|
||||||
|
print(X_Y_pre_names_arr[np.diag(full_fit.V)>0])
|
||||||
|
|
||||||
with open(pkl_file, "wb" ) as output_file:
|
with open(pkl_file, "wb" ) as output_file:
|
||||||
pickle.dump( (fast_fit, full_fit), output_file)
|
pickle.dump( (fast_fit, full_fit), output_file)
|
||||||
|
|
||||||
with open(pkl_file, "rb" ) as input_file:
|
with open(pkl_file, "rb" ) as input_file:
|
||||||
fast_fit, full_fit = pickle.dump(input_file)
|
fast_fit, full_fit = pickle.load(input_file)
|
||||||
|
|
|
@ -23,9 +23,6 @@
|
||||||
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
|
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Compile Include="sc2010.py">
|
|
||||||
<SubType>Code</SubType>
|
|
||||||
</Compile>
|
|
||||||
<Compile Include="test_simulation.py">
|
<Compile Include="test_simulation.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
|
Загрузка…
Ссылка в новой задаче