added a few notes in the intro notebook on cate and target units

This commit is contained in:
Amit Sharma 2020-01-07 18:03:51 +05:30
Родитель e6476e32d2
Коммит 49ebeadc63
4 изменённых файлов: 276 добавлений и 213 удалений

Просмотреть файл

@ -1,5 +1,12 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple example on using Instrumental Variables method for estimation"
]
},
{
"cell_type": "code",
"execution_count": 1,

Просмотреть файл

@ -86,62 +86,62 @@
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>0.701982</td>\n",
" <td>0.024579</td>\n",
" <td>0.192484</td>\n",
" <td>1.453203</td>\n",
" <td>1.225925</td>\n",
" <td>-0.475766</td>\n",
" <td>0.856829</td>\n",
" <td>0.871424</td>\n",
" <td>-0.792461</td>\n",
" <td>-0.336331</td>\n",
" <td>0.386621</td>\n",
" <td>-0.068865</td>\n",
" <td>True</td>\n",
" <td>13.572196</td>\n",
" <td>9.124501</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.242941</td>\n",
" <td>1.225778</td>\n",
" <td>-1.566807</td>\n",
" <td>1.107805</td>\n",
" <td>1.132326</td>\n",
" <td>0.688376</td>\n",
" <td>1.0</td>\n",
" <td>0.491077</td>\n",
" <td>0.197358</td>\n",
" <td>-0.505399</td>\n",
" <td>-0.424140</td>\n",
" <td>0.367762</td>\n",
" <td>0.168461</td>\n",
" <td>True</td>\n",
" <td>13.946462</td>\n",
" <td>8.622930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>0.883972</td>\n",
" <td>-1.777568</td>\n",
" <td>-1.565806</td>\n",
" <td>0.001832</td>\n",
" <td>1.759653</td>\n",
" <td>0.634530</td>\n",
" <td>0.665795</td>\n",
" <td>0.945841</td>\n",
" <td>-0.288969</td>\n",
" <td>0.274395</td>\n",
" <td>-1.312587</td>\n",
" <td>2.382897</td>\n",
" <td>True</td>\n",
" <td>11.779975</td>\n",
" <td>17.977266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>0.918023</td>\n",
" <td>-0.648299</td>\n",
" <td>-0.682472</td>\n",
" <td>1.255655</td>\n",
" <td>2.117590</td>\n",
" <td>-0.085458</td>\n",
" <td>0.902905</td>\n",
" <td>1.268346</td>\n",
" <td>-0.059530</td>\n",
" <td>0.315513</td>\n",
" <td>-0.932715</td>\n",
" <td>-1.360252</td>\n",
" <td>True</td>\n",
" <td>14.588348</td>\n",
" <td>8.367090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>0.942274</td>\n",
" <td>0.193453</td>\n",
" <td>-1.284952</td>\n",
" <td>-0.778548</td>\n",
" <td>0.330621</td>\n",
" <td>0.350299</td>\n",
" <td>0.104740</td>\n",
" <td>-1.342788</td>\n",
" <td>-1.935350</td>\n",
" <td>-0.649980</td>\n",
" <td>-0.852453</td>\n",
" <td>0.843568</td>\n",
" <td>True</td>\n",
" <td>8.194333</td>\n",
" <td>-1.326686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@ -158,62 +158,62 @@
" <tr>\n",
" <th>9995</th>\n",
" <td>1.0</td>\n",
" <td>0.448219</td>\n",
" <td>-0.717358</td>\n",
" <td>0.742045</td>\n",
" <td>1.596378</td>\n",
" <td>1.889145</td>\n",
" <td>-0.160641</td>\n",
" <td>0.577368</td>\n",
" <td>1.846929</td>\n",
" <td>0.755214</td>\n",
" <td>-2.979011</td>\n",
" <td>1.525415</td>\n",
" <td>-0.225743</td>\n",
" <td>True</td>\n",
" <td>18.115923</td>\n",
" <td>9.653687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>1.0</td>\n",
" <td>0.691309</td>\n",
" <td>0.266874</td>\n",
" <td>-1.134911</td>\n",
" <td>1.726687</td>\n",
" <td>1.382415</td>\n",
" <td>-0.406323</td>\n",
" <td>0.131065</td>\n",
" <td>1.880914</td>\n",
" <td>-1.314365</td>\n",
" <td>-0.538280</td>\n",
" <td>-0.303415</td>\n",
" <td>0.863559</td>\n",
" <td>True</td>\n",
" <td>10.776816</td>\n",
" <td>11.305263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>0.0</td>\n",
" <td>0.940634</td>\n",
" <td>-1.437683</td>\n",
" <td>-1.950858</td>\n",
" <td>1.701739</td>\n",
" <td>1.891118</td>\n",
" <td>1.325125</td>\n",
" <td>1.0</td>\n",
" <td>0.739417</td>\n",
" <td>-0.974042</td>\n",
" <td>-0.707890</td>\n",
" <td>-0.028049</td>\n",
" <td>-1.371608</td>\n",
" <td>0.100693</td>\n",
" <td>True</td>\n",
" <td>15.831704</td>\n",
" <td>2.620035</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>1.0</td>\n",
" <td>0.849666</td>\n",
" <td>-0.753199</td>\n",
" <td>-0.198880</td>\n",
" <td>1.509888</td>\n",
" <td>0.106379</td>\n",
" <td>0.683263</td>\n",
" <td>0.489953</td>\n",
" <td>-0.363797</td>\n",
" <td>-0.590689</td>\n",
" <td>-1.905395</td>\n",
" <td>-0.374315</td>\n",
" <td>0.622429</td>\n",
" <td>True</td>\n",
" <td>13.269083</td>\n",
" <td>1.844830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>1.0</td>\n",
" <td>0.372705</td>\n",
" <td>-0.018488</td>\n",
" <td>-0.358214</td>\n",
" <td>-0.040396</td>\n",
" <td>2.855035</td>\n",
" <td>0.726370</td>\n",
" <td>0.484942</td>\n",
" <td>1.118425</td>\n",
" <td>-0.414818</td>\n",
" <td>-1.112958</td>\n",
" <td>0.608269</td>\n",
" <td>1.865714</td>\n",
" <td>True</td>\n",
" <td>21.026073</td>\n",
" <td>15.116874</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -222,30 +222,30 @@
],
"text/plain": [
" Z0 Z1 W0 W1 W2 W3 W4 v0 \\\n",
"0 1.0 0.701982 0.024579 0.192484 1.453203 1.225925 -0.475766 True \n",
"1 0.0 0.242941 1.225778 -1.566807 1.107805 1.132326 0.688376 True \n",
"2 1.0 0.883972 -1.777568 -1.565806 0.001832 1.759653 0.634530 True \n",
"3 1.0 0.918023 -0.648299 -0.682472 1.255655 2.117590 -0.085458 True \n",
"4 1.0 0.942274 0.193453 -1.284952 -0.778548 0.330621 0.350299 True \n",
"0 1.0 0.856829 0.871424 -0.792461 -0.336331 0.386621 -0.068865 True \n",
"1 1.0 0.491077 0.197358 -0.505399 -0.424140 0.367762 0.168461 True \n",
"2 1.0 0.665795 0.945841 -0.288969 0.274395 -1.312587 2.382897 True \n",
"3 1.0 0.902905 1.268346 -0.059530 0.315513 -0.932715 -1.360252 True \n",
"4 1.0 0.104740 -1.342788 -1.935350 -0.649980 -0.852453 0.843568 True \n",
"... ... ... ... ... ... ... ... ... \n",
"9995 1.0 0.448219 -0.717358 0.742045 1.596378 1.889145 -0.160641 True \n",
"9996 1.0 0.691309 0.266874 -1.134911 1.726687 1.382415 -0.406323 True \n",
"9997 0.0 0.940634 -1.437683 -1.950858 1.701739 1.891118 1.325125 True \n",
"9998 1.0 0.849666 -0.753199 -0.198880 1.509888 0.106379 0.683263 True \n",
"9999 1.0 0.372705 -0.018488 -0.358214 -0.040396 2.855035 0.726370 True \n",
"9995 1.0 0.577368 1.846929 0.755214 -2.979011 1.525415 -0.225743 True \n",
"9996 1.0 0.131065 1.880914 -1.314365 -0.538280 -0.303415 0.863559 True \n",
"9997 1.0 0.739417 -0.974042 -0.707890 -0.028049 -1.371608 0.100693 True \n",
"9998 1.0 0.489953 -0.363797 -0.590689 -1.905395 -0.374315 0.622429 True \n",
"9999 1.0 0.484942 1.118425 -0.414818 -1.112958 0.608269 1.865714 True \n",
"\n",
" y \n",
"0 13.572196 \n",
"1 13.946462 \n",
"2 11.779975 \n",
"3 14.588348 \n",
"4 8.194333 \n",
"0 9.124501 \n",
"1 8.622930 \n",
"2 17.977266 \n",
"3 8.367090 \n",
"4 -1.326686 \n",
"... ... \n",
"9995 18.115923 \n",
"9996 10.776816 \n",
"9997 15.831704 \n",
"9998 13.269083 \n",
"9999 21.026073 \n",
"9995 9.653687 \n",
"9996 11.305263 \n",
"9997 2.620035 \n",
"9998 1.844830 \n",
"9999 15.116874 \n",
"\n",
"[10000 rows x 9 columns]"
]
@ -359,7 +359,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['Unobserved Confounders', 'W4', 'W0', 'W1', 'W3', 'W2']\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W1', 'W4', 'W0', 'W2', 'Unobserved Confounders', 'W3']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n"
]
},
@ -386,9 +386,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -423,7 +423,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W4+W0+W1+W3+W2\n"
"INFO:dowhy.causal_estimator:b: y~v0+W1+W4+W0+W2+W3\n"
]
},
{
@ -438,9 +438,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -449,14 +449,14 @@
"Estimand assumption 2, Exclusion: If we remove {Z0,Z1}→{v0}, then ¬({Z0,Z1}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W4+W0+W1+W3+W2\n",
"b: y~v0+W1+W4+W0+W2+W3\n",
"## Estimate\n",
"Value: 9.99999999999974\n",
"Value: 10.000000000000021\n",
"\n",
"## Statistical Significance\n",
"p-value: <0.001\n",
"\n",
"Causal Estimate is 9.99999999999974\n"
"Causal Estimate is 10.000000000000021\n"
]
}
],
@ -487,7 +487,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W4+W0+W1+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W1+W4+W0+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -504,9 +504,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -515,11 +515,11 @@
"Estimand assumption 2, Exclusion: If we remove {Z0,Z1}→{v0}, then ¬({Z0,Z1}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W4+W0+W1+W3+W2\n",
"b: y~v0+W1+W4+W0+W2+W3\n",
"## Estimate\n",
"Value: 10.064504732274713\n",
"Value: 10.173499320316472\n",
"\n",
"Causal Estimate is 10.064504732274713\n"
"Causal Estimate is 10.173499320316472\n"
]
}
],
@ -550,13 +550,11 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W4+W0+W1+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W1+W4+W0+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/mnt/c/Users/amshar/code/dowhy/dowhy/causal_estimators/propensity_score_matching_estimator.py:62: FutureWarning: `item` has been deprecated and will be removed in a future version\n",
" control_outcome = control.iloc[indices[i]][self._outcome_name].item()\n",
"/mnt/c/Users/amshar/code/dowhy/dowhy/causal_estimators/propensity_score_matching_estimator.py:77: FutureWarning: `item` has been deprecated and will be removed in a future version\n",
" treated_outcome = treated.iloc[indices[i]][self._outcome_name].item()\n"
" control_outcome = control.iloc[indices[i]][self._outcome_name].item()\n"
]
},
{
@ -571,9 +569,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -582,11 +580,19 @@
"Estimand assumption 2, Exclusion: If we remove {Z0,Z1}→{v0}, then ¬({Z0,Z1}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W4+W0+W1+W3+W2\n",
"b: y~v0+W1+W4+W0+W2+W3\n",
"## Estimate\n",
"Value: 9.856834069883842\n",
"Value: 10.036816324727294\n",
"\n",
"Causal Estimate is 9.856834069883842\n"
"Causal Estimate is 10.036816324727294\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/mnt/c/Users/amshar/code/dowhy/dowhy/causal_estimators/propensity_score_matching_estimator.py:77: FutureWarning: `item` has been deprecated and will be removed in a future version\n",
" treated_outcome = treated.iloc[indices[i]][self._outcome_name].item()\n"
]
}
],
@ -620,7 +626,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W4+W0+W1+W3+W2\n"
"INFO:dowhy.causal_estimator:b: y~v0+W1+W4+W0+W2+W3\n"
]
},
{
@ -635,9 +641,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -646,11 +652,11 @@
"Estimand assumption 2, Exclusion: If we remove {Z0,Z1}→{v0}, then ¬({Z0,Z1}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W4+W0+W1+W3+W2\n",
"b: y~v0+W1+W4+W0+W2+W3\n",
"## Estimate\n",
"Value: 15.103825856686212\n",
"Value: 10.722320441623154\n",
"\n",
"Causal Estimate is 15.103825856686212\n"
"Causal Estimate is 10.722320441623154\n"
]
},
{
@ -716,9 +722,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -738,9 +744,9 @@
"Estimand assumption 4, outcome_effect_homogeneity: Each unit's outcome y is affected in the same way by common causes of ['v0'] and y\n",
"\n",
"## Estimate\n",
"Value: 8.431207181421312\n",
"Value: 6.7777521025251435\n",
"\n",
"Causal Estimate is 8.431207181421312\n"
"Causal Estimate is 6.7777521025251435\n"
]
}
],
@ -770,33 +776,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:Using Regression Discontinuity Estimator\n",
"INFO:dowhy.causal_estimator:\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" local_rd_variable local_treatment local_outcome\n",
"6 0.597719 True 19.666240\n",
"11 0.426837 True 8.465613\n",
"19 0.539785 True 16.948250\n",
"22 0.431814 True 11.478121\n",
"25 0.411029 True 17.837136\n",
"... ... ... ...\n",
"9977 0.413834 True 32.515201\n",
"9978 0.457739 True 1.210606\n",
"9979 0.594694 True 9.426116\n",
"9986 0.583982 True 17.739097\n",
"9995 0.448219 True 18.115923\n",
"\n",
"[2026 rows x 3 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:\n",
"INFO:dowhy.causal_estimator:INFO: Using Instrumental Variable Estimator\n",
"INFO:dowhy.causal_estimator:Realized estimand: Wald Estimator\n",
"Realized estimand type: nonparametric-ate\n",
@ -817,6 +797,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
" local_rd_variable local_treatment local_outcome\n",
"1 0.491077 True 8.622930\n",
"17 0.526249 True 1.107572\n",
"18 0.557455 True 4.576484\n",
"24 0.416279 True -5.730869\n",
"25 0.554845 True 1.196812\n",
"... ... ... ...\n",
"9974 0.531890 True 1.377569\n",
"9982 0.575699 True 14.511282\n",
"9995 0.577368 True 9.653687\n",
"9998 0.489953 True 1.844830\n",
"9999 0.484942 True 15.116874\n",
"\n",
"[1924 rows x 3 columns]\n",
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
@ -825,9 +819,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W4,W0,W1,W3,W2))\n",
"─────(Expectation(y|W1,W4,W0,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W4,W0,W1,W3,W2,U) = P(y|v0,W4,W0,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W1,W4,W0,W2,W3,U) = P(y|v0,W1,W4,W0,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -850,9 +844,9 @@
"Estimand assumption 4, outcome_effect_homogeneity: Each unit's outcome local_outcome is affected in the same way by common causes of ['local_treatment'] and local_outcome\n",
"\n",
"## Estimate\n",
"Value: 25.965733858996124\n",
"Value: 22.999383345332262\n",
"\n",
"Causal Estimate is 25.965733858996124\n"
"Causal Estimate is 22.999383345332262\n"
]
}
],

Просмотреть файл

@ -81,18 +81,18 @@
"output_type": "stream",
"text": [
" X0 Z0 Z1 W0 W1 W2 W3 W4 \\\n",
"0 0.262340 0.0 0.970873 -0.931370 1.534707 0.212227 0.656675 -0.163708 \n",
"1 1.357854 1.0 0.809297 0.418757 -0.368615 0.550052 1.382726 -0.073969 \n",
"2 0.319958 0.0 0.483138 1.101497 -0.700813 0.321933 0.356162 0.361954 \n",
"3 0.309237 0.0 0.262257 -0.008878 0.921562 1.102873 1.271079 -2.435455 \n",
"4 0.404030 0.0 0.179699 2.122864 1.004447 1.222506 0.880357 -1.621326 \n",
"0 2.583198 1.0 0.183011 -2.490799 -0.270172 -0.013538 0.979027 -0.033464 \n",
"1 0.712234 1.0 0.150498 -0.892550 0.878099 0.137869 0.872547 1.138340 \n",
"2 0.431597 0.0 0.680770 -1.194290 -0.743686 0.409159 -0.410003 0.378746 \n",
"3 0.715099 1.0 0.129770 -0.260848 -0.779302 1.038961 1.078510 -0.430214 \n",
"4 -0.285593 1.0 0.480503 0.933601 -0.287256 0.524709 0.602599 -0.139902 \n",
"\n",
" v0 y \n",
"0 True 11.076007 \n",
"1 True 18.942833 \n",
"2 True 16.081703 \n",
"3 False 2.038722 \n",
"4 True 21.821949 \n",
" v0 y \n",
"0 True 2.320453 \n",
"1 True 13.069987 \n",
"2 True 2.154850 \n",
"3 True 14.552760 \n",
"4 True 17.646749 \n",
"digraph { U[label=\"Unobserved Confounders\"]; U->y;v0->y; U->v0;W0-> v0; W1-> v0; W2-> v0; W3-> v0; W4-> v0;Z0-> v0; Z1-> v0;W0-> y; W1-> y; W2-> y; W3-> y; W4-> y;X0-> y;}\n",
"\n",
"\n",
@ -221,7 +221,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'Unobserved Confounders', 'W4', 'W1', 'W3', 'W2']\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'W1', 'W4', 'Unobserved Confounders', 'W2', 'W3']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n"
]
},
@ -248,9 +248,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W0,W4,W1,W3,W2))\n",
"─────(Expectation(y|W0,W1,W4,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W4,W1,W3,W2,U) = P(y|v0,W0,W4,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W2,W3,U) = P(y|v0,W0,W1,W4,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -282,7 +282,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'Unobserved Confounders', 'W4', 'W1', 'W3', 'W2']\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'W1', 'W4', 'Unobserved Confounders', 'W2', 'W3']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n",
"INFO:dowhy.causal_identifier:Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True.\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:['Z1', 'Z0']\n"
@ -297,9 +297,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W0,W4,W1,W3,W2))\n",
"─────(Expectation(y|W0,W1,W4,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W4,W1,W3,W2,U) = P(y|v0,W0,W4,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W2,W3,U) = P(y|v0,W0,W1,W4,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -332,7 +332,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W4+W1+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -349,9 +349,9 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W0,W4,W1,W3,W2))\n",
"─────(Expectation(y|W0,W1,W4,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W4,W1,W3,W2,U) = P(y|v0,W0,W4,W1,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W2,W3,U) = P(y|v0,W0,W1,W4,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
@ -360,11 +360,11 @@
"Estimand assumption 2, Exclusion: If we remove {Z1,Z0}→{v0}, then ¬({Z1,Z0}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W0+W4+W1+W3+W2\n",
"b: y~v0+W0+W1+W4+W2+W3\n",
"## Estimate\n",
"Value: 10.646781689585207\n",
"Value: 9.86863892333762\n",
"\n",
"Causal Estimate is 10.646781689585207\n"
"Causal Estimate is 9.86863892333762\n"
]
}
],
@ -379,12 +379,74 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interface 2: Specify common causes and instruments"
"You can input additional parameters to the estimate_effect method. For instance, to estimate the effect on any subset of the units, you can specify the \"target_units\" parameter which can be a string (\"ate\", \"att\", or \"atc\"), lambda function that filters rows of the data frame, or a new dataframe on which to compute the effect. You can also specify \"effect modifiers\" to estimate heterogeneous effects across these variables. See `help(CausalModel.estimate_effect)`. "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"Estimand type: nonparametric-ate\n",
"### Estimand : 1\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W0,W1,W4,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W2,W3,U) = P(y|v0,W0,W1,W4,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, [Z1, Z0])*Derivative([v0], [Z1, Z0])**(-1))\n",
"Estimand assumption 1, As-if-random: If U→→y then ¬(U →→{Z1,Z0})\n",
"Estimand assumption 2, Exclusion: If we remove {Z1,Z0}→{v0}, then ¬({Z1,Z0}→y)\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W0+W1+W4+W2+W3\n",
"## Estimate\n",
"Value: 10.280769761076874\n",
"\n",
"Causal Estimate is 10.280769761076874\n"
]
}
],
"source": [
"# Causal effect on the control group (ATC)\n",
"causal_estimate_att = model.estimate_effect(identified_estimand,\n",
" method_name=\"backdoor.propensity_score_stratification\",\n",
" target_units = \"atc\")\n",
"print(causal_estimate_att)\n",
"print(\"Causal Estimate is \" + str(causal_estimate_att.value))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interface 2: Specify common causes and instruments"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
@ -411,7 +473,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@ -420,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
@ -448,14 +510,14 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['U', 'W0', 'W1', 'W4', 'W3', 'W2']\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'W1', 'W4', 'U', 'W2', 'W3']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n",
"INFO:dowhy.causal_identifier:Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True.\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
@ -475,7 +537,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@ -483,7 +545,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -500,19 +562,19 @@
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"─────(Expectation(y|W0,W1,W4,W3,W2))\n",
"─────(Expectation(y|W0,W1,W4,W2,W3))\n",
"d[v₀] \n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W3,W2,U) = P(y|v0,W0,W1,W4,W3,W2)\n",
"Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W0,W1,W4,W2,W3,U) = P(y|v0,W0,W1,W4,W2,W3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"No such variable found!\n",
"\n",
"## Realized estimand\n",
"b: y~v0+W0+W1+W4+W3+W2\n",
"b: y~v0+W0+W1+W4+W2+W3\n",
"## Estimate\n",
"Value: 10.646781689585207\n",
"Value: 9.86863892333762\n",
"\n",
"Causal Estimate is 10.646781689585207\n"
"Causal Estimate is 9.86863892333762\n"
]
}
],
@ -541,7 +603,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"outputs": [
{
@ -549,7 +611,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W3+W2+w_random\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3+w_random\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -559,8 +621,8 @@
"output_type": "stream",
"text": [
"Refute: Add a Random Common Cause\n",
"Estimated effect:(10.646781689585207,)\n",
"New effect:(10.644077917244749,)\n",
"Estimated effect:(9.86863892333762,)\n",
"New effect:(9.92981702371751,)\n",
"\n"
]
}
@ -579,7 +641,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
@ -587,7 +649,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -597,8 +659,8 @@
"output_type": "stream",
"text": [
"Refute: Add an Unobserved Common Cause\n",
"Estimated effect:(10.646781689585207,)\n",
"New effect:(9.937718916281279,)\n",
"Estimated effect:(9.86863892333762,)\n",
"New effect:(8.764065117773479,)\n",
"\n"
]
}
@ -619,7 +681,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@ -627,7 +689,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~placebo+W0+W1+W4+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~placebo+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -637,8 +699,8 @@
"output_type": "stream",
"text": [
"Refute: Use a Placebo Treatment\n",
"Estimated effect:(10.646781689585207,)\n",
"New effect:(-0.01541340253555656,)\n",
"Estimated effect:(9.86863892333762,)\n",
"New effect:(0.786272901449524,)\n",
"\n"
]
}
@ -658,7 +720,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"metadata": {},
"outputs": [
{
@ -666,7 +728,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -676,8 +738,8 @@
"output_type": "stream",
"text": [
"Refute: Use a subset of data\n",
"Estimated effect:(10.646781689585207,)\n",
"New effect:(10.644229843118644,)\n",
"Estimated effect:(9.86863892333762,)\n",
"New effect:(10.302546340220411,)\n",
"\n"
]
}
@ -698,7 +760,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"metadata": {},
"outputs": [
{
@ -706,7 +768,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W3+W2\n",
"INFO:dowhy.causal_estimator:b: y~v0+W0+W1+W4+W2+W3\n",
"/home/amshar/python-environments/vpy36/lib/python3.6/site-packages/sklearn/utils/validation.py:744: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
@ -716,8 +778,8 @@
"output_type": "stream",
"text": [
"Refute: Use a subset of data\n",
"Estimated effect:(10.646781689585207,)\n",
"New effect:(10.740850048405411,)\n",
"Estimated effect:(9.86863892333762,)\n",
"New effect:(9.915759961476407,)\n",
"\n"
]
}

Просмотреть файл

@ -40,7 +40,7 @@ class PropensityScoreWeightingEstimator(CausalEstimator):
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
self.logger.info(self.symbolic_estimator)
if not hasattr(self, "weighting_scheme"):
self.weighting_scheme = 'ips_weight' # 'itps_weight' 'ips_weight' 'nips_weight'
self.weighting_scheme = 'ips_weight' # 'ips_weight', 'ips_normalized_weight', 'ips_stabilized_weight'
self.min_ps_score = min_ps_score
self.max_ps_score = max_ps_score