EOD

2020-11-04 16:43:36 -08:00 · 2020-11-04 16:43:36 -08:00 · d6d1af847d
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,6 @@
 # ignore testing data
 **/data/
-test/*.pkl
+replication/*.pkl

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/SparseSC.sln
+++ b/SparseSC.sln
@ -20,6 +20,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A
 		docs\overview.md = docs\overview.md
 		docs\performance-notes.md = docs\performance-notes.md
 		README.md = README.md
+		replication\sc2010.py = replication\sc2010.py
 	EndProjectSection
 EndProject
 Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"
--- a/replication/sc2010.py
+++ b/replication/sc2010.py
@ -1,5 +1,7 @@
 import pickle
+import random

+import scipy
 import numpy as np
 import pandas as pd

@ -8,21 +10,21 @@ import SparseSC as SC
 random.seed(12345)
 np.random.seed(101101001)

-pkl_file = "smoking_fits.pkl"
+pkl_file = "../replication/smoking_fits.pkl"

 smoking_df = pd.read_stata("../replication/smoking.dta")
 smoking_df['year'] = smoking_df['year'].astype('int')
 smoking_df = smoking_df.set_index(['state', 'year'])
+Y = smoking_df[['cigsale']].unstack('year')
 T0 = 19
 i_t = 2 #unit 3, but zero-index
 treated_units = [i_t]
 control_units = [u for u in range(Y.shape[0]) if u not in treated_units]


-Y = smoking_df[['cigsale']].unstack('year')
 Y_names = Y.columns.get_level_values('year')
 Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
-Y.isnull().sum().sum() #0
+print(Y.isnull().sum().sum()) #0
 Y = Y.values
 T = Y.shape[1]
 T1 = T-T0
@ -53,15 +55,15 @@ X_Y_pre_names = X_full_names + Y_pre_names
 X_Y_pre_names_arr = np.array(X_Y_pre_names)

 fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
-print(len(np.diag(fast_fit.V)))
-print(np.diag(fast_fit.V))
-Y_post_sc = fast_fit.predict(Y_post)
-Y_pre_sc = fast_fit.predict(Y_pre)
-post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
-pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
-print(pre_mse) #192.210632448
-print(post_mse) #129.190437803
-X_Y_pre_names_arr[fast_fit.match_space_desc>0]
+#print(len(np.diag(fast_fit.V)))
+#print(np.diag(fast_fit.V))
+#Y_post_sc = fast_fit.predict(Y_post)
+#Y_pre_sc = fast_fit.predict(Y_pre)
+#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
+#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
+#print(pre_mse) #192.210632448
+#print(post_mse) #129.190437803
+#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])


 full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
@ -70,14 +72,23 @@ print(np.diag(full_fit.V)[np.diag(full_fit.V)>0])
 print(sum(np.diag(full_fit.V>0)))
 full_Y_post_sc = full_fit.predict(Y_post)
 full_Y_pre_sc = full_fit.predict(Y_pre)
-full_post_mse = np.mean(np.power(Y_post[control_units, :] - full_Y_post_sc[control_units, :], 2))
-full_pre_mse = np.mean(np.power(Y_pre[control_units, :] - full_Y_pre_sc[control_units, :], 2))
+full_Y_pre_effect = Y_pre - full_Y_pre_sc
+full_Y_post_effect = Y_post - full_Y_post_sc
+
+print(full_Y_pre_effect[control_units, :].mean()) #0.0775621971136
+print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316)
+full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))
 print(full_pre_mse) #186.969136939
+
+print(full_Y_post_effect[control_units, :].mean()) #0.036591103495529125
+print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) #Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364)
+full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))
 print(full_post_mse) #142.422049057
-X_Y_pre_names_arr[np.diag(full_fit.V)>0]
+
+print(X_Y_pre_names_arr[np.diag(full_fit.V)>0])

 with open(pkl_file, "wb" ) as output_file:
    pickle.dump( (fast_fit, full_fit),  output_file)

 with open(pkl_file, "rb" ) as input_file:
-    fast_fit, full_fit = pickle.dump(input_file)
+    fast_fit, full_fit = pickle.load(input_file)
--- a/test/test.pyproj
+++ b/test/test.pyproj
@ -23,9 +23,6 @@
    <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
  </PropertyGroup>
  <ItemGroup>
-    <Compile Include="sc2010.py">
-      <SubType>Code</SubType>
-    </Compile>
    <Compile Include="test_simulation.py">
      <SubType>Code</SubType>
    </Compile>