This commit is contained in:
Brian Quistorff 2020-12-23 14:47:56 -05:00
Родитель d78c72cbaa
Коммит 2979638871
3 изменённых файлов: 678 добавлений и 143 удалений

Просмотреть файл

@ -20,7 +20,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Misc", "Misc", "{AA17F266-A
docs\overview.md = docs\overview.md
docs\performance-notes.md = docs\performance-notes.md
README.md = README.md
replication\sc2010.py = replication\sc2010.py
EndProjectSection
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "test", "test\test.pyproj", "{01446F94-F552-4EE1-90DC-E93A0DB22B4C}"

678
replication/sc2010.ipynb Normal file
Просмотреть файл

@ -0,0 +1,678 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import random\n",
"\n",
"import scipy\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"try:\n",
" import SparseSC as SC\n",
"except ImportError:\n",
" raise RuntimeError(\"SparseSC is not installed. Use 'pip install -e .' or 'conda develop .' from repo root to install in dev mode\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"random.seed(12345)\n",
"np.random.seed(101101001)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"pkl_file = \"../replication/smoking_fits.pkl\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"smoking_df = pd.read_stata(\"../replication/smoking.dta\")\n",
"smoking_df['year'] = smoking_df['year'].astype('int')\n",
"smoking_df = smoking_df.set_index(['state', 'year'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(39, 31)\n"
]
}
],
"source": [
"Y = smoking_df[['cigsale']].unstack('year')\n",
"T0 = 19\n",
"i_t = 2 #unit 3, but zero-index\n",
"treated_units = [i_t]\n",
"control_units = [u for u in range(Y.shape[0]) if u not in treated_units]\n",
"print(Y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0\n"
]
}
],
"source": [
"Y_names = Y.columns.get_level_values('year')\n",
"Y_pre_names = [\"cigsale(\" + str(i) + \")\" for i in Y_names[:T0]]\n",
"print(Y.isnull().sum().sum()) #0\n",
"Y = Y.values\n",
"T = Y.shape[1]\n",
"T1 = T-T0\n",
"Y_pre,Y_post = Y[:,:T0], Y[:,T0:]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Stata: synth cigsale beer(1984(1)1988) lnincome retprice age15to24 cigsale(1988) cigsale(1980) cigsale(1975), xperiod(1980(1)1988) trunit(3) trperiod(1989) \n",
"\n",
"year_ind = smoking_df.index.get_level_values('year')\n",
"beer_pre = smoking_df.loc[np.logical_and(year_ind>=1984, year_ind<=1988),[\"beer\"]]\n",
"Xother_pre = smoking_df.loc[np.logical_and(year_ind>=1980, year_ind<=1988), ['lnincome', 'retprice', 'age15to24']]\n",
"X_avgs = pd.concat((beer_pre.groupby('state').mean(), \n",
" Xother_pre.groupby('state').mean())\n",
" , axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#X_spot = pd.DataFrame({'cigsale_75':smoking_df.xs(1975, level='year')[\"cigsale\"], \n",
"# 'cigsale_80':smoking_df.xs(1980, level='year')[\"cigsale\"], \n",
"# 'cigsale_88':smoking_df.xs(1988, level='year')[\"cigsale\"]})\n",
"#X_orig = pd.concat((X_avgs, X_spot), axis=1)\n",
"#X_orig.isnull().sum().sum() #0"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"X_full = pd.concat((X_avgs, beer_pre.unstack('year'), Xother_pre.unstack('year')), axis=1)\n",
"X_full_names = [c[0] + \"(\" + str(c[1]) + \")\" if len(c)==2 else c for c in X_full.columns]\n",
"X_full.isnull().sum().sum() #0\n",
"X_full = X_full.values\n",
"X_Y_pre = np.concatenate((X_full, Y_pre), axis=1)\n",
"X_Y_pre_names = X_full_names + Y_pre_names\n",
"X_Y_pre_names_arr = np.array(X_Y_pre_names)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def print_summary(full_fit, Y_pre, Y_post, Y_sc):\n",
" full_Y_pre_sc,full_Y_post_sc = Y_sc[:,:T0], Y_sc[:,T0:]\n",
" print(\"V: \" + str(np.diag(full_fit.V)))\n",
" print(\"V>0: \" + str(np.diag(full_fit.V)[np.diag(full_fit.V)>0]))\n",
" print(\"#V>0: \" + str(sum(np.diag(full_fit.V>0))))\n",
" full_Y_pre_effect = Y_pre - full_Y_pre_sc\n",
" full_Y_post_effect = Y_post - full_Y_post_sc\n",
"\n",
" print(\"Avg bias pre: \" + str(full_Y_pre_effect[control_units, :].mean()))\n",
" print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0)) \n",
" full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))\n",
" print(\"Avg MSE pre: \" + str(full_pre_mse) )\n",
"\n",
" print(\"Avg effect post: \" + str(full_Y_post_effect[control_units, :].mean()))\n",
" print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0)) \n",
" full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))\n",
" print(\"Avg MSE post: \" + str(full_post_mse) )\n",
"\n",
" print(X_Y_pre_names_arr[np.diag(full_fit.V)>0])"
]
},
{
"source": [
"Fast"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"if False:\n",
" fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])\n",
" #print(len(np.diag(fast_fit.V)))\n",
" #print(np.diag(fast_fit.V))\n",
" #Y_post_sc = fast_fit.predict(Y_post)\n",
" #Y_pre_sc = fast_fit.predict(Y_pre)\n",
" #post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))\n",
" #pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))\n",
" #print(pre_mse) #192.210632448\n",
" #print(post_mse) #129.190437803\n",
" #print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])"
]
},
{
"source": [
"Full"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"mental R^2:: 0.035163, learning rate: 0.37634, alpha: 7.39737, zeros: 43\n",
"[STOP ITERATION: alpha is None] i: 8, grad: [ 7116440.58580422 6148010.63086454 19808390.19043673 6014178.31106925\n",
" 7209057.53591002 7160593.65541474 7120106.9391149 7068657.08419721\n",
" 7023872.99104069 6145447.33909881 6146854.74783401 6147288.1917992\n",
" 6148121.37201822 6148464.17076636 6149032.27686649 6149000.35596599\n",
" 6148981.0436949 6148918.44998959 11907623.61242215 12771335.7824121\n",
" 13854688.89545191 16957597.71550036 21625881.36699763 23631655.60836795\n",
" 24240335.24134829 27545460.09614398 33254341.26738478 6014189.71453357\n",
" 6014186.7375416 6014183.8446364 6014181.03581291 6014178.31106925\n",
" 6014175.67041432 6014173.11383363 6014170.64133792 6014168.25292671\n",
" 11572198.31839551 9284757.35135063 7993259.45809541 5812600.58213996\n",
" 4704479.54744229 3869620.39815495 2710559.15838274 817724.83691314\n",
" 2598129.26761536 5028154.74820869 4530436.91170173 7133098.93086807\n",
" 4876862.93235536 3982335.60786063 4937130.90564412 2504528.20560773\n",
" 357628.97571059 -701174.07780308 -4029140.43811078], step: [-3.29572805e-05 -2.84723393e-05 -9.17355612e-05 -2.78525421e-05\n",
" -3.33862031e-05 -3.31617598e-05 -3.29742599e-05 -3.27359881e-05\n",
" -3.25285863e-05 -2.84604683e-05 -2.84669862e-05 -2.84689935e-05\n",
" -2.84728521e-05 -2.84744397e-05 -2.84770706e-05 -2.84769228e-05\n",
" -2.84768334e-05 -2.84765435e-05 -5.51459520e-05 -5.91459298e-05\n",
" -6.41630970e-05 -7.85331230e-05 -1.00152630e-04 -1.09441664e-04\n",
" -1.12260549e-04 -1.27567067e-04 -1.54005733e-04 -2.78525950e-05\n",
" -2.78525812e-05 -2.78525678e-05 -2.78525548e-05 -2.78525421e-05\n",
" -2.78525299e-05 -2.78525181e-05 -2.78525066e-05 -2.78524956e-05\n",
" -5.35925483e-05 -4.29990735e-05 -3.70179573e-05 -2.69190061e-05\n",
" -2.17871350e-05 -1.79207798e-05 -1.25529971e-05 -3.78700369e-06\n",
" -1.20323178e-05 -2.32861224e-05 -2.09811180e-05 -3.30344277e-05\n",
" -2.25854678e-05 -1.84427805e-05 -2.28645776e-05 -1.15988376e-05\n",
" -1.65623226e-06 3.24724003e-06 1.86595405e-05]\n",
"Fold 9,w_pen: 354.6808, value 13 of 20, time elapsed: 2.5606 sec.\n",
"[Path] i: 0, In Sample R^2: 0.164265, incremental R^2:: 0.164265, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.308744, incremental R^2:: 0.144479, learning rate: 0.24691, alpha: 1.00000, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.429675, incremental R^2:: 0.120931, learning rate: 0.27435, alpha: 1.00000, zeros: 36\n",
"[Path] i: 3, In Sample R^2: 0.521946, incremental R^2:: 0.092271, learning rate: 0.30483, alpha: 1.00000, zeros: 36\n",
"[Path] i: 4, In Sample R^2: 0.578036, incremental R^2:: 0.056090, learning rate: 0.33870, alpha: 1.00000, zeros: 36\n",
"[Path] i: 5, In Sample R^2: 0.597065, incremental R^2:: 0.019030, learning rate: 0.30483, alpha: 0.79276, zeros: 36\n",
"[Path] i: 6, In Sample R^2: 0.625186, incremental R^2:: 0.028120, learning rate: 0.33870, alpha: 5.00737, zeros: 39\n",
"[STOP ITERATION: alpha is None] i: 7, grad: [ 6.65773513e+06 6.08475434e+06 1.33252338e+07 6.01415938e+06\n",
" 6.73084207e+06 6.69256374e+06 6.65525778e+06 6.61961273e+06\n",
" 6.59035901e+06 6.08359478e+06 6.08456286e+06 6.08477015e+06\n",
" 6.08511885e+06 6.08504998e+06 6.08525270e+06 6.08498876e+06\n",
" 6.08482614e+06 6.08461492e+06 9.04762995e+06 9.43861679e+06\n",
" 1.00136330e+07 1.16322938e+07 1.43805595e+07 1.54779006e+07\n",
" 1.56742215e+07 1.75490725e+07 2.10561109e+07 6.01416617e+06\n",
" 6.01416440e+06 6.01416267e+06 6.01416100e+06 6.01415938e+06\n",
" 6.01415782e+06 6.01415630e+06 6.01415485e+06 6.01415344e+06\n",
" 9.27760969e+06 7.93170070e+06 7.93916993e+06 6.52128680e+06\n",
" 5.63677183e+06 5.28320283e+06 4.69769179e+06 2.99237346e+06\n",
" 3.61226686e+06 4.91912018e+06 3.98357134e+06 5.71352322e+06\n",
" 4.08801094e+06 3.23824543e+06 3.49024856e+06 1.54879727e+06\n",
" 1.91232797e+04 -8.52251635e+05 -3.11556985e+06], step: [-6.09620486e-05 -5.57155073e-05 -1.22013498e-04 -5.50690992e-05\n",
" -6.16314577e-05 -6.12809593e-05 -6.09393646e-05 -6.06129780e-05\n",
" -6.03451141e-05 -5.57048896e-05 -5.57137539e-05 -5.57156520e-05\n",
" -5.57188449e-05 -5.57182143e-05 -5.57200705e-05 -5.57176537e-05\n",
" -5.57161647e-05 -5.57142306e-05 -8.28452988e-05 -8.64253991e-05\n",
" -9.16905779e-05 -1.06511966e-04 -1.31676666e-04 -1.41724552e-04\n",
" -1.43522179e-04 -1.60689392e-04 -1.92801851e-04 -5.50691614e-05\n",
" -5.50691451e-05 -5.50691293e-05 -5.50691140e-05 -5.50690992e-05\n",
" -5.50690849e-05 -5.50690710e-05 -5.50690577e-05 -5.50690448e-05\n",
" -8.49511255e-05 -7.26272094e-05 -7.26956020e-05 -5.97126492e-05\n",
" -5.16135220e-05 -4.83760410e-05 -4.30147654e-05 -2.73998909e-05\n",
" -3.30759910e-05 -4.50422910e-05 -3.64758682e-05 -5.23163018e-05\n",
" -3.74321773e-05 -2.96512359e-05 -3.19587214e-05 -1.41816778e-05\n",
" -1.75103738e-07 7.80370570e-06 2.85279478e-05]\n",
"Fold 9,w_pen: 733.8836, value 14 of 20, time elapsed: 3.1382 sec.\n",
"[Path] i: 0, In Sample R^2: 0.162277, incremental R^2:: 0.162277, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.303340, incremental R^2:: 0.141063, learning rate: 0.24691, alpha: 1.00000, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.417742, incremental R^2:: 0.114403, learning rate: 0.27435, alpha: 1.00000, zeros: 36\n",
"[Path] i: 3, In Sample R^2: 0.496592, incremental R^2:: 0.078850, learning rate: 0.30483, alpha: 1.00000, zeros: 36\n",
"[Path] i: 4, In Sample R^2: 0.529934, incremental R^2:: 0.033342, learning rate: 0.33870, alpha: 1.00000, zeros: 36\n",
"[Path] i: 5, In Sample R^2: 0.540898, incremental R^2:: 0.010964, learning rate: 0.37634, alpha: 1.69210, zeros: 36\n",
"[STOP ITERATION: alpha is None] i: 6, grad: [ 6475397.65208361 6066415.75144976 11642467.23297486 6014153.89796574\n",
" 6539017.06148542 6503567.23541421 6471898.97119262 6442424.22802768\n",
" 6420142.64264573 6065794.08630641 6066541.85809675 6066633.82274914\n",
" 6066813.13093028 6066648.75515064 6066704.77519997 6066404.30738484\n",
" 6066217.59716975 6065966.14551919 8332624.23603594 8616720.45477556\n",
" 9057235.00499859 10327392.89126889 12438766.60215778 13378151.19514647\n",
" 13428021.67585828 14905341.29746386 17710587.0935341 6014159.17642876\n",
" 6014157.79378899 6014156.4531664 6014155.15455952 6014153.89796574\n",
" 6014152.68339114 6014151.5108265 6014150.38027869 6014149.29174742\n",
" 5712261.12136504 4459411.34978612 4112261.92059962 2800136.1895333\n",
" 2028314.78430143 1598859.12935089 702023.53441315 -459907.37509773\n",
" 174992.05312653 1396969.31778017 597422.97363261 2066914.93818627\n",
" 764794.86116744 350272.46188471 924144.14388958 -464809.50989923\n",
" -1575022.28584632 -2074885.58752528 -3928013.99552939], step: [-1.08299316e-04 -1.01459202e-04 -1.94717191e-04 -1.00585136e-04\n",
" -1.09363334e-04 -1.08770445e-04 -1.08240802e-04 -1.07747845e-04\n",
" -1.07375191e-04 -1.01448805e-04 -1.01461311e-04 -1.01462849e-04\n",
" -1.01465848e-04 -1.01463099e-04 -1.01464036e-04 -1.01459011e-04\n",
" -1.01455888e-04 -1.01451682e-04 -1.39360941e-04 -1.44112375e-04\n",
" -1.51479865e-04 -1.72722920e-04 -2.08035088e-04 -2.23746047e-04\n",
" -2.24580118e-04 -2.49287898e-04 -2.96204894e-04 -1.00585225e-04\n",
" -1.00585201e-04 -1.00585179e-04 -1.00585157e-04 -1.00585136e-04\n",
" -1.00585116e-04 -1.00585096e-04 -1.00585077e-04 -1.00585059e-04\n",
" -9.55360593e-05 -7.45824776e-05 -6.87764951e-05 -4.68315386e-05\n",
" -3.39230293e-05 -2.67404969e-05 -1.17411583e-05 7.69182944e-06\n",
" -2.92669589e-06 -2.33639431e-05 -9.99174153e-06 -3.45686067e-05\n",
" -1.27909922e-05 -5.85821446e-06 -1.54560669e-05 7.77381635e-06\n",
" 2.63418320e-05 3.47019137e-05 6.56949972e-05]\n",
"Fold 9,w_pen: 1518.5066, value 15 of 20, time elapsed: 2.3886 sec.\n",
"[Path] i: 0, In Sample R^2: 0.157100, incremental R^2:: 0.157100, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.288977, incremental R^2:: 0.131878, learning rate: 0.24691, alpha: 1.00000, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.385531, incremental R^2:: 0.096553, learning rate: 0.27435, alpha: 1.00000, zeros: 36\n",
"[Path] i: 3, In Sample R^2: 0.433192, incremental R^2:: 0.047662, learning rate: 0.30483, alpha: 1.00000, zeros: 36\n",
"[Path] i: 4, In Sample R^2: 0.443202, incremental R^2:: 0.010010, learning rate: 0.33870, alpha: 1.11443, zeros: 36\n",
"[STOP ITERATION: alpha is None] i: 5, grad: [ 6306183.34364611 6051811.17764009 10257384.66914446 6014149.52262628\n",
" 6349121.4131662 6323306.35949591 6304309.70787138 6284143.25592647\n",
" 6270151.22677957 6051485.64056111 6051991.65810627 6052021.09559765\n",
" 6052092.42279695 6051972.5715623 6051970.82928547 6051743.75796167\n",
" 6051603.67341065 6051403.6657994 7787538.10320892 8012376.74892391\n",
" 8338054.82305892 9322692.79268365 10800656.96706634 11568006.66320152\n",
" 11587943.71170875 12693439.59413664 14694306.41888458 6014153.25849369\n",
" 6014152.28036914 6014151.33168377 6014150.41243669 6014149.52262628\n",
" 6014148.66225682 6014147.83132163 6014147.02982547 6014146.25776803\n",
" 3112862.24044684 1991446.78227508 1109854.2469119 134694.81512341\n",
" -332960.06859801 -727554.07691491 -1754276.70696162 -2192434.55843725\n",
" -1391877.62567418 -250840.39740154 -599278.64753078 507463.19643904\n",
" -320786.75129262 -325351.95668861 470421.97347935 -206624.48918801\n",
" -861416.99404356 -1036666.20093607 -2271055.41938827], step: [-1.40091260e-04 -1.34440407e-04 -2.27866820e-04 -1.33603757e-04\n",
" -1.41045125e-04 -1.40471647e-04 -1.40049638e-04 -1.39601642e-04\n",
" -1.39290810e-04 -1.34433175e-04 -1.34444416e-04 -1.34445070e-04\n",
" -1.34446655e-04 -1.34443992e-04 -1.34443954e-04 -1.34438909e-04\n",
" -1.34435797e-04 -1.34431354e-04 -1.72999415e-04 -1.77994184e-04\n",
" -1.85229091e-04 -2.07102729e-04 -2.39935562e-04 -2.56982163e-04\n",
" -2.57425062e-04 -2.81983548e-04 -3.26432613e-04 -1.33603840e-04\n",
" -1.33603818e-04 -1.33603797e-04 -1.33603777e-04 -1.33603757e-04\n",
" -1.33603738e-04 -1.33603719e-04 -1.33603702e-04 -1.33603684e-04\n",
" -6.91519372e-05 -4.42398000e-05 -2.46553060e-05 -2.99223245e-06\n",
" 7.39667611e-06 1.61625443e-05 3.89710894e-05 4.87047242e-05\n",
" 3.09204284e-05 5.57239546e-06 1.33129179e-05 -1.12732464e-05\n",
" 7.12624702e-06 7.22766263e-06 -1.04503792e-05 4.59014328e-06\n",
" 1.91362962e-05 2.30294406e-05 5.04512791e-05]\n",
"Fold 9,w_pen: 3141.9998, value 16 of 20, time elapsed: 1.6716 sec.\n",
"[Path] i: 0, In Sample R^2: 0.141598, incremental R^2:: 0.141598, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.246451, incremental R^2:: 0.104852, learning rate: 0.24691, alpha: 1.00000, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.300983, incremental R^2:: 0.054533, learning rate: 0.27435, alpha: 1.00000, zeros: 36\n",
"[Path] i: 3, In Sample R^2: 0.318474, incremental R^2:: 0.017491, learning rate: 0.30483, alpha: 2.29762, zeros: 36\n",
"[STOP ITERATION: alpha is None] i: 4, grad: [ 6170289.23111453 6036153.733176 8597440.78701287 6014144.78160391\n",
" 6194920.89410277 6179400.38970386 6169493.53052807 6157654.21827988\n",
" 6150068.03466743 6036008.88738855 6036306.60271792 6036313.76441342\n",
" 6036331.19712583 6036250.56779605 6036229.97210601 6036080.86321833\n",
" 6035991.92151329 6035859.24674445 7100822.96721399 7241995.74696301\n",
" 7433775.69272211 8044878.59396305 8910229.58709567 9403214.45617723\n",
" 9396442.50507382 10073586.33406701 11264537.65381921 6014146.96582382\n",
" 6014146.39400886 6014145.83936762 6014145.30189958 6014144.78160391\n",
" 6014144.27848316 6014143.79253327 6014143.32375702 6014142.87215419\n",
" 3020772.87294314 2238820.04719329 1444501.44737167 853152.43968287\n",
" 607388.0077964 339062.63312032 -447641.42791603 -558757.70956229\n",
" 69841.0861821 879919.75928709 762804.93118302 1455030.02870612\n",
" 1000252.6118736 1120878.5365233 1766016.26246754 1488842.66196894\n",
" 1150482.609808 1114161.77859069 420196.02332575], step: [-1.83537710e-04 -1.79547796e-04 -2.55734300e-04 -1.78893131e-04\n",
" -1.84270389e-04 -1.83808725e-04 -1.83514042e-04 -1.83161877e-04\n",
" -1.82936223e-04 -1.79543488e-04 -1.79552344e-04 -1.79552557e-04\n",
" -1.79553075e-04 -1.79550677e-04 -1.79550064e-04 -1.79545629e-04\n",
" -1.79542983e-04 -1.79539037e-04 -2.11216807e-04 -2.15416047e-04\n",
" -2.21120618e-04 -2.39298117e-04 -2.65038327e-04 -2.79702358e-04\n",
" -2.79500924e-04 -2.99642837e-04 -3.35068158e-04 -1.78893196e-04\n",
" -1.78893179e-04 -1.78893163e-04 -1.78893147e-04 -1.78893131e-04\n",
" -1.78893116e-04 -1.78893102e-04 -1.78893088e-04 -1.78893075e-04\n",
" -8.98540920e-05 -6.65945938e-05 -4.29672708e-05 -2.53773590e-05\n",
" -1.80669982e-05 -1.00855530e-05 1.33152725e-05 1.66204706e-05\n",
" -2.07745092e-06 -2.61735636e-05 -2.26899364e-05 -4.32804476e-05\n",
" -2.97529122e-05 -3.33409784e-05 -5.25308570e-05 -4.42862178e-05\n",
" -3.42215634e-05 -3.31411858e-05 -1.24988980e-05]\n",
"Fold 9,w_pen: 6501.2315, value 17 of 20, time elapsed: 1.7904 sec.\n",
"[Path] i: 0, In Sample R^2: 0.093320, incremental R^2:: 0.093320, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.139228, incremental R^2:: 0.045908, learning rate: 0.20000, alpha: 0.93559, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.168784, incremental R^2:: 0.029555, learning rate: 0.22222, alpha: 3.23011, zeros: 36\n",
"[STOP ITERATION: alpha is None] i: 3, grad: [ 6086933.12292972 6026513.90397286 7688763.69587233 6014141.86312938\n",
" 6102392.19172146 6091865.42802261 6086740.98126443 6078919.7618119\n",
" 6074828.12964729 6026495.49852332 6026691.84561053 6026690.16998821\n",
" 6026665.32066503 6026581.81346307 6026536.0958339 6026403.75192337\n",
" 6026332.6370975 6026218.57211893 6722849.19452673 6820694.03070837\n",
" 6929775.96048434 7343520.46621154 7877488.4148071 8236874.51050237\n",
" 8181037.77654913 8634281.57590663 9406282.40045992 6014143.16133891\n",
" 6014142.82117422 6014142.49141795 6014142.17206981 6014141.86312938\n",
" 6014141.56459839 6014141.27647397 6014140.99875798 6014140.73145022\n",
" 2038264.33337919 1334183.1401064 404153.37511412 -32378.33817579\n",
" -177803.50204467 -420335.82483711 -1209696.64330092 -1094632.88391374\n",
" -456451.22049609 283101.48534087 323365.347223 839487.15616863\n",
" 587587.04950857 860403.10807732 1556359.41780936 1539260.94605255\n",
" 1386043.51992963 1465149.3093677 1059829.51668771], step: [-1.79973793e-04 -1.78187363e-04 -2.27335496e-04 -1.77821556e-04\n",
" -1.80430875e-04 -1.80119627e-04 -1.79968112e-04 -1.79736860e-04\n",
" -1.79615882e-04 -1.78186819e-04 -1.78192625e-04 -1.78192575e-04\n",
" -1.78191840e-04 -1.78189371e-04 -1.78188020e-04 -1.78184106e-04\n",
" -1.78182004e-04 -1.78178631e-04 -1.98776074e-04 -2.01669075e-04\n",
" -2.04894326e-04 -2.17127608e-04 -2.32915565e-04 -2.43541619e-04\n",
" -2.41890681e-04 -2.55291847e-04 -2.78117779e-04 -1.77821595e-04\n",
" -1.77821585e-04 -1.77821575e-04 -1.77821565e-04 -1.77821556e-04\n",
" -1.77821547e-04 -1.77821539e-04 -1.77821531e-04 -1.77821523e-04\n",
" -6.02658441e-05 -3.94481088e-05 -1.19496985e-05 9.57337990e-07\n",
" 5.25715824e-06 1.24281688e-05 3.57673870e-05 3.23652696e-05\n",
" 1.34960013e-05 -8.37052864e-06 -9.56101978e-06 -2.48213155e-05\n",
" -1.73733254e-05 -2.54397424e-05 -4.60172474e-05 -4.55116928e-05\n",
" -4.09814769e-05 -4.33204165e-05 -3.13362302e-05]\n",
"Fold 9,w_pen: 13451.9457, value 18 of 20, time elapsed: 1.9315 sec.\n",
"[STOP ITERATION: alpha is None] i: 0, grad: [ 5994321.29434768 6014227.25586741 6741378.89521603 6014138.11919597\n",
" 6004730.42517278 5996971.5052699 5994340.89073791 5988537.97554522\n",
" 5987099.63641015 6014382.76029146 6014520.96287477 6014525.90076626\n",
" 6014441.1280347 6014285.19213224 6014184.19229764 6013999.85697604\n",
" 6013917.85542329 6013775.75061043 6314065.65034707 6368214.49707111\n",
" 6370748.31517289 6589018.59676402 6824673.78226106 7072423.761421\n",
" 6878462.85880315 7127043.3718084 7568194.35334082 6014138.48622833\n",
" 6014138.38836971 6014138.29457824 6014138.20485369 6014138.11919597\n",
" 6014138.03760636 6014137.96008256 6014137.88662599 6014137.81723635\n",
" -577929.57114723 -1422919.3887777 -2767487.01544875 -3214559.7328572\n",
" -3361316.64055665 -3680082.2052055 -4719224.87091999 -4388809.1443151\n",
" -3584176.50292202 -2706913.29448894 -2522043.06334772 -2053789.8315418\n",
" -2155639.77243763 -1598353.05061206 -608955.2868524 -370656.55623618\n",
" -354680.0643211 -105559.17344876 -295531.98189821], step: [-1.98780759e-04 -1.99440871e-04 -2.23554319e-04 -1.99437915e-04\n",
" -1.99125942e-04 -1.98868644e-04 -1.98781409e-04 -1.98588975e-04\n",
" -1.98541278e-04 -1.99446028e-04 -1.99450611e-04 -1.99450774e-04\n",
" -1.99447963e-04 -1.99442792e-04 -1.99439443e-04 -1.99433330e-04\n",
" -1.99430611e-04 -1.99425898e-04 -2.09383965e-04 -2.11179623e-04\n",
" -2.11263648e-04 -2.18501821e-04 -2.26316503e-04 -2.34532267e-04\n",
" -2.28100230e-04 -2.36343536e-04 -2.50972770e-04 -1.99437927e-04\n",
" -1.99437924e-04 -1.99437921e-04 -1.99437918e-04 -1.99437915e-04\n",
" -1.99437912e-04 -1.99437910e-04 -1.99437907e-04 -1.99437905e-04\n",
" 1.91650186e-05 4.71861587e-05 9.17740546e-05 1.06599662e-04\n",
" 1.11466343e-04 1.22037091e-04 1.56496633e-04 1.45539548e-04\n",
" 1.18856713e-04 8.97653384e-05 8.36347619e-05 6.81067766e-05\n",
" 7.14842747e-05 5.30038043e-05 2.01938782e-05 1.22915319e-05\n",
" 1.17617273e-05 3.50050182e-06 9.80028744e-06]\n",
"Fold 9,w_pen: 27833.9331, value 19 of 20, time elapsed: 0.7101 sec.\n",
"[STOP ITERATION: gradient is zero] i: 0\n",
"Fold 9,w_pen: 57592.2509, value 20 of 20, time elapsed: 0.1840 sec.\n",
"[Path] i: 0, In Sample R^2: 0.165019, incremental R^2:: 0.165019, learning rate: 0.22222, alpha: 1.00000, zeros: 36\n",
"[Path] i: 1, In Sample R^2: 0.310785, incremental R^2:: 0.145766, learning rate: 0.24691, alpha: 1.00000, zeros: 36\n",
"[Path] i: 2, In Sample R^2: 0.434118, incremental R^2:: 0.123333, learning rate: 0.27435, alpha: 1.00000, zeros: 36\n",
"[Path] i: 3, In Sample R^2: 0.531418, incremental R^2:: 0.097300, learning rate: 0.30483, alpha: 1.00000, zeros: 36\n",
"[Path] i: 4, In Sample R^2: 0.598476, incremental R^2:: 0.067057, learning rate: 0.33870, alpha: 1.00000, zeros: 36\n",
"[Path] i: 5, In Sample R^2: 0.635258, incremental R^2:: 0.036782, learning rate: 0.30483, alpha: 0.99602, zeros: 36\n",
"[Path] i: 6, In Sample R^2: 0.663457, incremental R^2:: 0.028200, learning rate: 0.33870, alpha: 1.33651, zeros: 37\n",
"[Path] i: 7, In Sample R^2: 0.694674, incremental R^2:: 0.031216, learning rate: 0.37634, alpha: 7.99569, zeros: 42\n",
"[STOP ITERATION: alpha is None] i: 8, grad: [ 7152849.47437815 6138643.15414512 19252474.55861977 6014172.55330414\n",
" 7211816.90535016 7166815.01183565 7159494.70052793 7124531.35543984\n",
" 7101152.50571489 6136679.28858423 6137676.33533028 6137807.32020297\n",
" 6138617.50780037 6138894.47289813 6139410.33918284 6139542.90580352\n",
" 6139614.83877758 6139554.55783494 11684394.75891286 12359815.61962566\n",
" 13346750.27433854 16117390.4684952 20552023.70071043 23234134.23189435\n",
" 23732626.74506076 27093924.40044427 32786283.21931704 6014182.4270597\n",
" 6014179.84869175 6014177.34361284 6014174.91181735 6014172.55330414\n",
" 6014170.26807934 6014168.05613351 6014165.91747447 6014163.85210112\n",
" 15190325.55296857 12662062.1360392 12043539.25658304 9920475.84216331\n",
" 9497865.69989249 8254271.42934691 6114368.44371823 3417853.59423569\n",
" 4890849.82538435 6876221.60103873 5951121.99450876 7482299.83622411\n",
" 5003151.58808499 4629066.42465672 4626455.2507812 3266672.95487842\n",
" 751100.48247361 35439.56412174 -3076521.07294256], step: [-3.45758985e-05 -2.96733635e-05 -9.30638353e-05 -2.90716896e-05\n",
" -3.48609390e-05 -3.46434060e-05 -3.46080206e-05 -3.44390125e-05\n",
" -3.43260024e-05 -2.96638704e-05 -2.96686900e-05 -2.96693232e-05\n",
" -2.96732395e-05 -2.96745783e-05 -2.96770719e-05 -2.96777128e-05\n",
" -2.96780605e-05 -2.96777691e-05 -5.64807701e-05 -5.97456624e-05\n",
" -6.45163698e-05 -7.79092665e-05 -9.93456785e-05 -1.12310635e-04\n",
" -1.14720280e-04 -1.30968334e-04 -1.58484420e-04 -2.90717373e-05\n",
" -2.90717248e-05 -2.90717127e-05 -2.90717010e-05 -2.90716896e-05\n",
" -2.90716785e-05 -2.90716678e-05 -2.90716575e-05 -2.90716475e-05\n",
" -7.34279612e-05 -6.12066808e-05 -5.82168256e-05 -4.79542267e-05\n",
" -4.59113869e-05 -3.99000219e-05 -2.95560228e-05 -1.65214380e-05\n",
" -2.36417008e-05 -3.32387171e-05 -2.87669118e-05 -3.61684166e-05\n",
" -2.41845522e-05 -2.23762755e-05 -2.23636535e-05 -1.57906514e-05\n",
" -3.63071726e-06 -1.71310018e-07 1.48714831e-05]\n"
]
}
],
"source": [
"use_est_fn = True\n",
"#if use_est_fn:\n",
"# est_fit = SC.estimate_effects(\n",
"# outcomes=Y,\n",
"# unit_treatment_periods=X,\n",
"# covariates=None,\n",
"# fast = False,\n",
"# cf_folds = len(control_units), #sync with helper\n",
"# **kwargs\n",
"# )\n",
"# full_fit = \n",
"#else: \n",
"full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t], verbose=0, progress=False)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"V: [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 5.62581428e-06 1.67298271e-04\n 1.25447705e-04 2.83218864e-05 1.20684653e-04 2.90538206e-05\n 1.68471635e-04 1.91157505e-04 1.95644283e-04 2.87258604e-04\n 4.25739528e-04 4.62630652e-04 6.40482407e-04]\nV>0: [5.62581428e-06 1.67298271e-04 1.25447705e-04 2.83218864e-05\n 1.20684653e-04 2.90538206e-05 1.68471635e-04 1.91157505e-04\n 1.95644283e-04 2.87258604e-04 4.25739528e-04 4.62630652e-04\n 6.40482407e-04]\n#V>0: 13\nAvg bias pre: 0.0775621971135751\nTtest_1sampResult(statistic=0.15231397793810575, pvalue=0.8789819126134504)\nAvg MSE pre: 186.96913693932575\nAvg effect post: 0.036591103495526384\nTtest_1sampResult(statistic=0.06540254251104247, pvalue=0.9478822253243375)\nAvg MSE post: 142.42204905739652\n['cigsale(1976)' 'cigsale(1977)' 'cigsale(1978)' 'cigsale(1979)'\n 'cigsale(1980)' 'cigsale(1981)' 'cigsale(1982)' 'cigsale(1983)'\n 'cigsale(1984)' 'cigsale(1985)' 'cigsale(1986)' 'cigsale(1987)'\n 'cigsale(1988)']\n"
]
}
],
"source": [
"full_Y_sc = full_fit.predict(Y)\n",
"print_summary(full_fit, Y_pre, Y_post, full_Y_sc)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" |>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
]
}
],
"source": [
"honest_predictions, cf_fits = SC.get_c_predictions_honest(X_Y_pre[control_units,:], Y_post[control_units,:], Y[control_units,:], \n",
" w_pen=full_fit.initial_w_pen, v_pen=full_fit.initial_v_pen, cf_folds=38, verbose=1, progress=False)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"V: [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 5.62581428e-06 1.67298271e-04\n 1.25447705e-04 2.83218864e-05 1.20684653e-04 2.90538206e-05\n 1.68471635e-04 1.91157505e-04 1.95644283e-04 2.87258604e-04\n 4.25739528e-04 4.62630652e-04 6.40482407e-04]\nV>0: [5.62581428e-06 1.67298271e-04 1.25447705e-04 2.83218864e-05\n 1.20684653e-04 2.90538206e-05 1.68471635e-04 1.91157505e-04\n 1.95644283e-04 2.87258604e-04 4.25739528e-04 4.62630652e-04\n 6.40482407e-04]\n#V>0: 13\nAvg bias pre: -1.6187760995035356\nTtest_1sampResult(statistic=-3.4419861964346423, pvalue=0.0006107737173349461)\nAvg MSE pre: 162.0946915406102\nAvg effect post: -0.15200904377719812\nTtest_1sampResult(statistic=-0.30910995445207323, pvalue=0.7573793806908214)\nAvg MSE post: 110.05643980497922\n['cigsale(1976)' 'cigsale(1977)' 'cigsale(1978)' 'cigsale(1979)'\n 'cigsale(1980)' 'cigsale(1981)' 'cigsale(1982)' 'cigsale(1983)'\n 'cigsale(1984)' 'cigsale(1985)' 'cigsale(1986)' 'cigsale(1987)'\n 'cigsale(1988)']\n"
]
}
],
"source": [
"full_Y_sc[control_units,:] = honest_predictions\n",
"print_summary(full_fit, Y_pre, Y_post, full_Y_sc)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(39, 31)"
]
},
"metadata": {},
"execution_count": 20
}
],
"source": [
"full_Y_sc.shape"
]
},
{
"source": [
"# Full - flat\n",
"Since we don't fit v, we don't have to do out-of-sample refitting"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"full_fit_flat = SC._fit_fast_inner(X_Y_pre, X_Y_pre, Y_post, V=np.repeat(1,X_Y_pre.shape[1]), treated_units=[i_t])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"V: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]\nV>0: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]\n#V>0: 55\nAvg bias pre: 0.003513363039932098\nTtest_1sampResult(statistic=0.05033751319674089, pvalue=0.959867371950055)\nAvg MSE pre: 3.5123625231089566\nAvg effect post: -0.40206830732880827\nTtest_1sampResult(statistic=-0.7260495769207599, pvalue=0.46818168129036175)\nAvg MSE post: 139.69517126549485\n['beer' 'lnincome' 'retprice' 'age15to24' 'beer(1984)' 'beer(1985)'\n 'beer(1986)' 'beer(1987)' 'beer(1988)' 'lnincome(1980)' 'lnincome(1981)'\n 'lnincome(1982)' 'lnincome(1983)' 'lnincome(1984)' 'lnincome(1985)'\n 'lnincome(1986)' 'lnincome(1987)' 'lnincome(1988)' 'retprice(1980)'\n 'retprice(1981)' 'retprice(1982)' 'retprice(1983)' 'retprice(1984)'\n 'retprice(1985)' 'retprice(1986)' 'retprice(1987)' 'retprice(1988)'\n 'age15to24(1980)' 'age15to24(1981)' 'age15to24(1982)' 'age15to24(1983)'\n 'age15to24(1984)' 'age15to24(1985)' 'age15to24(1986)' 'age15to24(1987)'\n 'age15to24(1988)' 'cigsale(1970)' 'cigsale(1971)' 'cigsale(1972)'\n 'cigsale(1973)' 'cigsale(1974)' 'cigsale(1975)' 'cigsale(1976)'\n 'cigsale(1977)' 'cigsale(1978)' 'cigsale(1979)' 'cigsale(1980)'\n 'cigsale(1981)' 'cigsale(1982)' 'cigsale(1983)' 'cigsale(1984)'\n 'cigsale(1985)' 'cigsale(1986)' 'cigsale(1987)' 'cigsale(1988)']\n"
]
}
],
"source": [
"full_flat_Y_sc = full_fit_flat.predict(Y)\n",
"print_summary(full_fit_flat, Y_pre, Y_post, full_flat_Y_sc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"source": [
"write-out"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"#Write out\n",
"with open(pkl_file, \"wb\" ) as output_file:\n",
" pickle.dump( (full_fit), output_file) #full_fit_flat"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#Read back\n",
"with open(pkl_file, \"rb\" ) as input_file:\n",
" (full_fit) = pickle.load(input_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3.6.8 64-bit ('SparseSC_36': conda)",
"metadata": {
"interpreter": {
"hash": "a874c5cea27404d4932ec17617c98c8f4631437eb5052d1551a7b7bc79fe6733"
}
}
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8-final"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Просмотреть файл

@ -1,142 +0,0 @@
import pickle
import random
import scipy
import numpy as np
import pandas as pd
try:
import SparseSC as SC
except ImportError:
raise RuntimeError("SparseSC is not installed. Use 'pip install -e .' or 'conda develop .' from repo root to install in dev mode")
random.seed(12345)
np.random.seed(101101001)
pkl_file = "../replication/smoking_fits.pkl"
smoking_df = pd.read_stata("../replication/smoking.dta")
smoking_df['year'] = smoking_df['year'].astype('int')
smoking_df = smoking_df.set_index(['state', 'year'])
Y = smoking_df[['cigsale']].unstack('year')
T0 = 19
i_t = 2 #unit 3, but zero-index
treated_units = [i_t]
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
Y_names = Y.columns.get_level_values('year')
Y_pre_names = ["cigsale(" + str(i) + ")" for i in Y_names[:T0]]
print(Y.isnull().sum().sum()) #0
Y = Y.values
T = Y.shape[1]
T1 = T-T0
Y_pre,Y_post = Y[:,:T0], Y[:,T0:]
# Stata: synth cigsale beer(1984(1)1988) lnincome retprice age15to24 cigsale(1988) cigsale(1980) cigsale(1975), xperiod(1980(1)1988) trunit(3) trperiod(1989)
year_ind = smoking_df.index.get_level_values('year')
beer_pre = smoking_df.loc[np.logical_and(year_ind>=1984, year_ind<=1988),["beer"]]
Xother_pre = smoking_df.loc[np.logical_and(year_ind>=1980, year_ind<=1988), ['lnincome', 'retprice', 'age15to24']]
X_avgs = pd.concat((beer_pre.groupby('state').mean(),
Xother_pre.groupby('state').mean())
, axis=1)
#X_spot = pd.DataFrame({'cigsale_75':smoking_df.xs(1975, level='year')["cigsale"],
# 'cigsale_80':smoking_df.xs(1980, level='year')["cigsale"],
# 'cigsale_88':smoking_df.xs(1988, level='year')["cigsale"]})
#X_orig = pd.concat((X_avgs, X_spot), axis=1)
#X_orig.isnull().sum().sum() #0
X_full = pd.concat((X_avgs, beer_pre.unstack('year'), Xother_pre.unstack('year')), axis=1)
X_full_names = [c[0] + "(" + str(c[1]) + ")" if len(c)==2 else c for c in X_full.columns]
X_full.isnull().sum().sum() #0
X_full = X_full.values
X_Y_pre = np.concatenate((X_full, Y_pre), axis=1)
X_Y_pre_names = X_full_names + Y_pre_names
X_Y_pre_names_arr = np.array(X_Y_pre_names)
def print_summary(fit, Y_pre, Y_post, Y_sc):
full_Y_pre_sc,full_Y_post_sc = Y_sc[:,:T0], Y_sc[:,T0:]
print("V: " + str(np.diag(fit.V)))
print("V>0: " + str(np.diag(fit.V)[np.diag(fit.V)>0]))
print("#V>0: " + str(sum(np.diag(fit.V>0))))
full_Y_pre_effect = Y_pre - full_Y_pre_sc
full_Y_post_effect = Y_post - full_Y_post_sc
print("Avg bias pre: " + str(full_Y_pre_effect[control_units, :].mean()))
print(scipy.stats.ttest_1samp(full_Y_pre_effect[control_units, :].flatten(), popmean=0))
full_pre_mse = np.mean(np.power(full_Y_pre_effect[control_units, :], 2))
print("Avg MSE pre: " + str(full_pre_mse) )
print("Avg effect post: " + str(full_Y_post_effect[control_units, :].mean()))
print(scipy.stats.ttest_1samp(full_Y_post_effect[control_units, :].flatten(), popmean=0))
full_post_mse = np.mean(np.power(full_Y_post_effect[control_units, :], 2))
print("Avg MSE post: " + str(full_post_mse) )
print(X_Y_pre_names_arr[np.diag(fit.V)>0])
# Fast ----------------------#
if False:
fast_fit = SC.fit_fast(X_Y_pre, Y_post, treated_units=[i_t])
#print(len(np.diag(fast_fit.V)))
#print(np.diag(fast_fit.V))
#Y_post_sc = fast_fit.predict(Y_post)
#Y_pre_sc = fast_fit.predict(Y_pre)
#post_mse = np.mean(np.power(Y_post[control_units, :] - Y_post_sc[control_units, :], 2))
#pre_mse = np.mean(np.power(Y_pre[control_units, :] - Y_pre_sc[control_units, :], 2))
#print(pre_mse) #192.210632448
#print(post_mse) #129.190437803
#print(X_Y_pre_names_arr[fast_fit.match_space_desc>0])
# Full ----------------------#
use_est_fn = True
#if use_est_fn:
# est_fit = SC.estimate_effects(
# outcomes=Y,
# unit_treatment_periods=X,
# covariates=None,
# fast = False,
# cf_folds = len(control_units), #sync with helper
# **kwargs
# )
# full_fit =
#else:
full_fit = SC.fit(X_Y_pre, Y_post, treated_units=[i_t])
full_Y_sc = full_fit.predict(Y)
print_summary(full_fit, Y_pre, Y_post, full_Y_sc)
#Avg bias pre: 0.0775621971136
#Ttest_1sampResult(statistic=0.15231397793811494, pvalue=0.87898191261344316)
#Avg MSE pre: 186.969136939
#Avg effect post: 0.036591103495529125
#Ttest_1sampResult(statistic=0.065402542511047407, pvalue=0.94788222532433364)
#Avg MSE post: 142.422049057
print_summary(full_fit, Y_pre, Y_post, full_Y_sc)
Y_sc_c = get_c_preditions_honest(X_Y_pre[control_units,:], Y_post[control_units,:], Y[control_units,:],
"retrospective", full_fit.initial_w_pen, full_fit.initial_v_pen, cf_folds, cf_seed)
# Full - Flat ----------------------#
full_fit_flat = SC._fit_fast_inner(X_Y_pre, X_Y_pre, Y_post, V=np.repeat(1,X_Y_pre.shape[1]), treated_units=[i_t])
full_flat_Y_sc = full_fit_flat.predict(Y)
print_summary(full_fit_flat, Y_pre, Y_post, full_flat_Y_sc)
#Avg bias pre: 0.00351336303993
#Ttest_1sampResult(statistic=0.050337513196736801, pvalue=0.95986737195005822)
#Avg MSE pre: 3.51236252311
#Avg effect post: -0.402068307329
#Ttest_1sampResult(statistic=-0.72604957692076211, pvalue=0.46818168129036042)
#Avg MSE post: 139.695171265
with open(pkl_file, "wb" ) as output_file:
pickle.dump( (fast_fit, full_fit, full_fit_flat), output_file)
with open(pkl_file, "rb" ) as input_file:
fast_fit, full_fit, full_fit_flat = pickle.load(input_file)