removed dependency on pygraphviz, falls back to matplotlib

This commit is contained in:
Amit Sharma 2018-09-02 20:41:15 +05:30
Родитель 8a5722ea34
Коммит 43be2b18b6
8 изменённых файлов: 237 добавлений и 314 удалений

Просмотреть файл

@ -105,7 +105,6 @@ DoWhy support Python 3+. It requires the following packages:
* scipy
* scikit-learn
* pandas
* pygraphviz (for plotting causal graphs)
* networkx (for analyzing causal graphs)
* matplotlib (for general plotting)
* sympy (for rendering symbolic expressions)
@ -116,23 +115,18 @@ the repo::
If you face any problems, try installing dependencies manually::
pip3 install numpy
pip3 install sklearn
pip3 install pandas
pip install numpy scipy pandas
pip install networkx matplotlib
pip install sympy
For better-looking graphs, you can optionally install pygraphviz. To proceed,
first install graphviz and then pygraphviz (on Ubuntu and Ubuntu WSL)::
sudo apt install graphviz libgraphviz-dev graphviz-dev pkg-config
## from https://github.com/pygraphviz/pygraphviz/issues/71
pip3 install pygraphviz --install-option="--include-path=/usr/include/graphviz" \
pip install pygraphviz --install-option="--include-path=/usr/include/graphviz" \
--install-option="--library-path=/usr/lib/graphviz/"
pip3 install networkx
pip3 install matplotlib
pip3 install sympy
Pygraphviz may have problems with installation. This is not a
required library, so you can skip installing.
Otherwise, to install, use (on Ubuntu or Ubuntu WSL):
sudo apt install graphviz graphviz-dev libgraphviz-dev pkg-config
pip3 install pygraphviz --install-option="--include-path=/usr/include/graphviz" --install-option="--library-path=/usr/lib/graphviz/"
Keep in mind that pygraphviz installation can be problematic on the latest versions of Python3. Tested to work with Python 3.5.
Graphical Models and Potential Outcomes: Best of both worlds
------------------------------------------------------------

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -82,7 +82,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -107,7 +107,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -130,20 +130,25 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X3', 'Z0', 'Z1', 'X0', 'X2', 'X4', 'U', 'X1'}\n"
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X1', 'Z0', 'X2', 'X4', 'Unobserved Confounders', 'X0', 'X3', 'Z1'}\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'no'}\n",
"There are unobserved common causes. Causal effect cannot be identified.\n",
"WARN: Do you want to continue by ignoring these unobserved confounders? [y/n] y\n"
]
@ -152,7 +157,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:['Z0', 'Z1']\n"
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
]
},
{
@ -161,18 +166,15 @@
"text": [
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"──(Expectation(y|X1,Z0,X2,X4,X0,X3,Z1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X1,Z0,X2,X4,X0,X3,Z1,U) = P(y|v,X1,Z0,X2,X4,X0,X3,Z1)\n",
"\n"
]
}
@ -193,44 +195,47 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LinearRegressionEstimator\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n"
"INFO:dowhy.causal_estimator:b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LinearRegressionEstimator\n",
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"──(Expectation(y|X1,Z0,X2,X4,X0,X3,Z1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X1,Z0,X2,X4,X0,X3,Z1,U) = P(y|v,X1,Z0,X2,X4,X0,X3,Z1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n",
"b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n",
"## Estimate\n",
"Value: 9.999999999998463\n",
"Value: 10.000000000000018\n",
"\n",
"## Statistical Significance\n",
"p-value: 0.0\n",
@ -258,7 +263,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -266,7 +271,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n"
"INFO:dowhy.causal_estimator:b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n"
]
},
{
@ -279,25 +284,22 @@
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"──(Expectation(y|X1,Z0,X2,X4,X0,X3,Z1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X1,Z0,X2,X4,X0,X3,Z1,U) = P(y|v,X1,Z0,X2,X4,X0,X3,Z1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n",
"b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n",
"## Estimate\n",
"Value: 10.195255519721503\n",
"Value: 10.238528243078772\n",
"\n",
"Causal Estimate is 10.1952555197\n"
"Causal Estimate is 10.2385282431\n"
]
}
],
@ -319,46 +321,49 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PropensityScoreMatchingEstimator\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n"
"INFO:dowhy.causal_estimator:b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PropensityScoreMatchingEstimator\n",
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"──(Expectation(y|X1,Z0,X2,X4,X0,X3,Z1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X1,Z0,X2,X4,X0,X3,Z1,U) = P(y|v,X1,Z0,X2,X4,X0,X3,Z1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n",
"b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n",
"## Estimate\n",
"Value: 11.097109391311678\n",
"Value: 8.991647331885279\n",
"\n",
"Causal Estimate is 11.097109391311678\n"
"Causal Estimate is 8.991647331885279\n"
]
}
],
@ -380,7 +385,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -388,7 +393,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n"
"INFO:dowhy.causal_estimator:b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n"
]
},
{
@ -401,25 +406,22 @@
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"──(Expectation(y|X1,Z0,X2,X4,X0,X3,Z1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X1,Z0,X2,X4,X0,X3,Z1,U) = P(y|v,X1,Z0,X2,X4,X0,X3,Z1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X3+Z0+Z1+X0+X2+X4+X1\n",
"b: y~v+X1+Z0+X2+X4+X0+X3+Z1\n",
"## Estimate\n",
"Value: 181561.38151979883\n",
"Value: 17.074885660950077\n",
"\n",
"Causal Estimate is 181561.38152\n"
"Causal Estimate is 17.074885661\n"
]
}
],
@ -441,24 +443,14 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Instrumental Variable Estimator\n",
"INFO:dowhy.causal_estimator:Realized estimand: Wald Estimator\n",
"Realized estimand type: ate\n",
"Estimand expression:\n",
" -1\n",
"Expectation(Derivative(y, Z0))⋅Expectation(Derivative(v, Z0)) \n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, treatment_effect_homogeneity: Each unit's treatment v isaffected in the same way by common causes of v and y\n",
"Estimand assumption 3, outcome_effect_homogeneity: Each unit's outcome y isaffected in the same way by common causes of v and y\n",
"Estimand assumption 4, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n"
"WARNING:dowhy.do_why:No valid identified estimand for using instrumental variables method\n"
]
},
{
@ -469,36 +461,13 @@
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n",
"None\n",
"## Realized estimand\n",
"Realized estimand: Wald Estimator\n",
"Realized estimand type: ate\n",
"Estimand expression:\n",
" -1\n",
"Expectation(Derivative(y, Z0))⋅Expectation(Derivative(v, Z0)) \n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, treatment_effect_homogeneity: Each unit's treatment v isaffected in the same way by common causes of v and y\n",
"Estimand assumption 3, outcome_effect_homogeneity: Each unit's outcome y isaffected in the same way by common causes of v and y\n",
"Estimand assumption 4, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n",
"None\n",
"## Estimate\n",
"Value: 16.13954841227219\n",
"Value: None\n",
"\n",
"Causal Estimate is 16.1395484123\n"
"Causal Estimate is None\n"
]
}
],
@ -520,26 +489,14 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:Using Regression Discontinuity Estimator\n",
"INFO:dowhy.causal_estimator:\n",
"INFO:dowhy.causal_estimator:INFO: Using Instrumental Variable Estimator\n",
"INFO:dowhy.causal_estimator:Realized estimand: Wald Estimator\n",
"Realized estimand type: ate\n",
"Estimand expression:\n",
" -1\n",
"Expectation(Derivative(y, Z0))⋅Expectation(Derivative(v, Z0)) \n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, treatment_effect_homogeneity: Each unit's treatment local_treatment isaffected in the same way by common causes of local_treatment and local_outcome\n",
"Estimand assumption 3, outcome_effect_homogeneity: Each unit's outcome local_outcome isaffected in the same way by common causes of local_treatment and local_outcome\n",
"Estimand assumption 4, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n"
"WARNING:dowhy.do_why:No valid identified estimand for using instrumental variables method\n"
]
},
{
@ -550,36 +507,13 @@
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X3,Z0,Z1,X0,X2,X4,X1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,Z1,X0,X2,X4,X1,U) = P(y|v,X3,Z0,Z1,X0,X2,X4,X1)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z0)/Derivative(v, Z0))\n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n",
"None\n",
"## Realized estimand\n",
"Realized estimand: Wald Estimator\n",
"Realized estimand type: ate\n",
"Estimand expression:\n",
" -1\n",
"Expectation(Derivative(y, Z0))⋅Expectation(Derivative(v, Z0)) \n",
"Estimand assumption 1, Exclusion: If we remove {Z0,Z1}→v, then ¬(Z0,Z1→y)\n",
"Estimand assumption 2, treatment_effect_homogeneity: Each unit's treatment local_treatment isaffected in the same way by common causes of local_treatment and local_outcome\n",
"Estimand assumption 3, outcome_effect_homogeneity: Each unit's outcome local_outcome isaffected in the same way by common causes of local_treatment and local_outcome\n",
"Estimand assumption 4, As-if-random: If U→→y then ¬(U →→Z0,Z1)\n",
"\n",
"None\n",
"## Estimate\n",
"Value: 16.25986844895645\n",
"Value: None\n",
"\n",
"Causal Estimate is 16.259868449\n"
"Causal Estimate is None\n"
]
}
],

Просмотреть файл

@ -46,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -54,19 +54,19 @@
"output_type": "stream",
"text": [
" Z0 Z1 X0 X1 X2 X3 X4 v \\\n",
"0 0.0 0.619129 2.226492 1.392440 -0.289724 -1.023950 0.743092 1.0 \n",
"1 1.0 0.386653 1.151686 -1.725829 1.129189 -1.206542 0.476580 1.0 \n",
"2 0.0 0.191009 0.263892 0.284951 -0.707140 -0.210546 -2.181717 0.0 \n",
"3 1.0 0.485629 -1.028494 -0.732026 -2.171436 -0.931260 0.205507 0.0 \n",
"4 0.0 0.399060 -0.965090 1.671290 0.036712 -0.786400 0.135370 1.0 \n",
"0 0.0 0.682129 0.619275 -1.338917 -1.344954 -1.205516 -0.380428 0.0 \n",
"1 0.0 0.780360 1.489020 0.224417 -0.925444 -1.178246 -1.462796 1.0 \n",
"2 0.0 0.749278 0.338573 0.200353 0.144210 0.006207 -0.311147 1.0 \n",
"3 0.0 0.604209 0.104159 -0.659282 0.186016 -0.054680 -0.065804 1.0 \n",
"4 0.0 0.966614 0.441749 -1.867424 -0.851817 -0.843792 -1.123326 0.0 \n",
"\n",
" y \n",
"0 13.508124 \n",
"1 9.905194 \n",
"2 -4.491918 \n",
"3 -18.170923 \n",
"4 8.134593 \n",
"digraph { v ->y; U[label=\"Unobserved Confounders\"]; U->v; U->y;X0-> v; X1-> v; X2-> v; X3-> v; X4-> v;X0-> y; X1-> y; X2-> y; X3-> y; X4-> y;Z0-> v; Z1-> v;}\n"
"0 -9.677161 \n",
"1 4.996252 \n",
"2 11.352700 \n",
"3 9.375556 \n",
"4 -10.086670 \n",
"graph[node[ id \"v\" label \"v\"]node[ id \"y\" label \"y\"]node[ id \"Unobserved Confounders\" label \"Unobserved Confounders\"]edge[source \"v\" target \"y\"]edge[source \"Unobserved Confounders\" target \"v\"]edge[source \"Unobserved Confounders\" target \"y\"]node[ id \"X0\" label \"X0\"] edge[ source \"X0\" target \"v\"] node[ id \"X1\" label \"X1\"] edge[ source \"X1\" target \"v\"] node[ id \"X2\" label \"X2\"] edge[ source \"X2\" target \"v\"] node[ id \"X3\" label \"X3\"] edge[ source \"X3\" target \"v\"] node[ id \"X4\" label \"X4\"] edge[ source \"X4\" target \"v\"]edge[ source \"X0\" target \"y\"] edge[ source \"X1\" target \"y\"] edge[ source \"X2\" target \"y\"] edge[ source \"X3\" target \"y\"] edge[ source \"X4\" target \"y\"]node[ id \"Z0\" label \"Z0\"] edge[ source \"Z0\" target \"v\"] node[ id \"Z1\" label \"Z1\"] edge[ source \"Z1\" target \"v\"]]\n"
]
}
],
@ -104,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -127,7 +127,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -150,20 +150,25 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X2', 'Z0', 'U', 'X1', 'X4', 'X0', 'Z1', 'X3'}\n"
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X3', 'Z0', 'X4', 'Z1', 'Unobserved Confounders', 'X2', 'X0', 'X1'}\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'no'}\n",
"There are unobserved common causes. Causal effect cannot be identified.\n",
"WARN: Do you want to continue by ignoring these unobserved confounders? [y/n] y\n"
]
@ -172,7 +177,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:['Z1', 'Z0']\n"
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
]
},
{
@ -181,18 +186,15 @@
"text": [
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X2,Z0,X1,X4,X0,Z1,X3))\n",
"──(Expectation(y|X3,Z0,X4,Z1,X2,X0,X1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X2,Z0,X1,X4,X0,Z1,X3,U) = P(y|v,X2,Z0,X1,X4,X0,Z1,X3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z1)/Derivative(v, Z1))\n",
"Estimand assumption 1, Exclusion: If we remove {Z1,Z0}→v, then ¬(Z1,Z0→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z1,Z0)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,X4,Z1,X2,X0,X1,U) = P(y|v,X3,Z0,X4,Z1,X2,X0,X1)\n",
"\n"
]
}
@ -219,7 +221,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X2+Z0+X1+X4+X0+Z1+X3\n"
"INFO:dowhy.causal_estimator:b: y~v+X3+Z0+X4+Z1+X2+X0+X1\n"
]
},
{
@ -231,23 +233,20 @@
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X2,Z0,X1,X4,X0,Z1,X3))\n",
"──(Expectation(y|X3,Z0,X4,Z1,X2,X0,X1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X2,Z0,X1,X4,X0,Z1,X3,U) = P(y|v,X2,Z0,X1,X4,X0,Z1,X3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"Estimand expression:\n",
"Expectation(Derivative(y, Z1)/Derivative(v, Z1))\n",
"Estimand assumption 1, Exclusion: If we remove {Z1,Z0}→v, then ¬(Z1,Z0→y)\n",
"Estimand assumption 2, As-if-random: If U→→y then ¬(U →→Z1,Z0)\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,Z0,X4,Z1,X2,X0,X1,U) = P(y|v,X3,Z0,X4,Z1,X2,X0,X1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X2+Z0+X1+X4+X0+Z1+X3\n",
"b: y~v+X3+Z0+X4+Z1+X2+X0+X1\n",
"## Estimate\n",
"Value: 9.999999999999167\n",
"Value: 9.999999999999842\n",
"\n",
"Causal Estimate is 10.0\n"
]
@ -278,7 +277,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:dowhy.do_why:WARN: Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n"
"WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n"
]
},
{
@ -343,13 +342,19 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X2', 'U', 'X1', 'X0', 'X4', 'X3'}\n"
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'X3', 'X4', 'X2', 'X0', 'X1', 'U'}\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"There are unobserved common causes. Causal effect cannot be identified.\n",
"WARN: Do you want to continue by ignoring these unobserved confounders? [y/n] y\n"
]
@ -383,7 +388,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X2+X1+X0+X4+X3\n"
"INFO:dowhy.causal_estimator:b: y~v+X3+X4+X2+X0+X1\n"
]
},
{
@ -396,20 +401,20 @@
"## Target estimand\n",
"Estimand type: ate\n",
"### Estimand : 1\n",
"Estimand name: iv\n",
"No such variable found!\n",
"### Estimand : 2\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
"d \n",
"──(Expectation(y|X2,X1,X0,X4,X3))\n",
"──(Expectation(y|X3,X4,X2,X0,X1))\n",
"dv \n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X2,X1,X0,X4,X3,U) = P(y|v,X2,X1,X0,X4,X3)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"No such variable found!\n",
"Estimand assumption 1, Unconfoundedness: If U→v and U→y then P(y|v,X3,X4,X2,X0,X1,U) = P(y|v,X3,X4,X2,X0,X1)\n",
"\n",
"## Realized estimand\n",
"b: y~v+X2+X1+X0+X4+X3\n",
"b: y~v+X3+X4+X2+X0+X1\n",
"## Estimate\n",
"Value: 9.999999999999115\n",
"Value: 9.999999999999849\n",
"\n",
"## Statistical Significance\n",
"p-value: 0.0\n",
@ -453,7 +458,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X2+X1+X0+X4+X3+w_random\n"
"INFO:dowhy.causal_estimator:b: y~v+X3+X4+X2+X0+X1+w_random\n"
]
},
{
@ -461,8 +466,8 @@
"output_type": "stream",
"text": [
"Refute: Add a Random Common Cause\n",
"Estimated effect:(9.9999999999991154,)\n",
"New effect:(9.99999999999911,)\n",
"Estimated effect:(9.999999999999849,)\n",
"New effect:(9.9999999999998561,)\n",
"\n"
]
}
@ -489,7 +494,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~placebo+X2+X1+X0+X4+X3\n"
"INFO:dowhy.causal_estimator:b: y~placebo+X3+X4+X2+X0+X1\n"
]
},
{
@ -497,8 +502,8 @@
"output_type": "stream",
"text": [
"Refute: Use a Placebo Treatment\n",
"Estimated effect:(9.9999999999991154,)\n",
"New effect:(0.10873156114653899,)\n",
"Estimated effect:(9.999999999999849,)\n",
"New effect:(-0.039298742866320742,)\n",
"\n"
]
}
@ -526,7 +531,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: y~v+X2+X1+X0+X4+X3\n"
"INFO:dowhy.causal_estimator:b: y~v+X3+X4+X2+X0+X1\n"
]
},
{
@ -534,8 +539,8 @@
"output_type": "stream",
"text": [
"Refute: Use a subset of data\n",
"Estimated effect:(9.9999999999991154,)\n",
"New effect:(10.00000000000092,)\n",
"Estimated effect:(9.999999999999849,)\n",
"New effect:(9.9999999999998206,)\n",
"\n"
]
}

Просмотреть файл

@ -11,52 +11,26 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Installing package into /home/amit/R/x86_64-pc-linux-gnu-library/3.4\n",
"(as lib is unspecified)\n",
"\n",
" warnings.warn(x, RRuntimeWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The rpy2.ipython extension is already loaded. To reload it, use:\n",
" %reload_ext rpy2.ipython\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: trying URL 'https://cloud.r-project.org/src/contrib/Matching_4.9-3.tar.gz'\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: MASS\n",
"\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Content type 'application/x-gzip'\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: length 302135 bytes (295 KB)\n",
"\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: =\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: \n",
"\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: downloaded 295 KB\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: ## \n",
"## Matching (Version 4.9-3, Build Date: 2018-05-03)\n",
"## See http://sekhon.berkeley.edu/matching for additional documentation.\n",
"## Please cite software as:\n",
"## Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching\n",
"## Software with Automated Balance Optimization: The Matching package for R.''\n",
"## Journal of Statistical Software, 42(7): 1-52. \n",
"##\n",
"\n",
"\n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: \n",
" warnings.warn(x, RRuntimeWarning)\n",
"/usr/local/lib/python3.5/dist-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: The downloaded source packages are in\n",
"\t/tmp/RtmpVW644g/downloaded_packages\n",
" warnings.warn(x, RRuntimeWarning)\n"
]
},
@ -68,7 +42,7 @@
" dtype='<U9')"
]
},
"execution_count": 10,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
@ -95,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -112,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -120,7 +94,7 @@
"output_type": "stream",
"text": [
"WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'educ', 'nodegr', 'hisp', 'married', 'black', 'age', 'U'}\n"
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'black', 'educ', 'U', 'nodegr', 'age', 'married', 'hisp'}\n"
]
},
{
@ -128,6 +102,9 @@
"output_type": "stream",
"text": [
"Model to find the causal effect of treatment treat on outcome re78\n",
"{'observed': 'yes'}\n",
"{'observed': 'yes'}\n",
"{'observed': 'no', 'label': 'Unobserved Confounders'}\n",
"There are unobserved common causes. Causal effect cannot be identified.\n",
"WARN: Do you want to continue by ignoring these unobserved confounders? [y/n] y\n"
]
@ -151,7 +128,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator\n",
"INFO:dowhy.causal_estimator:b: re78~treat+educ+nodegr+hisp+married+black+age\n"
"INFO:dowhy.causal_estimator:b: re78~treat+black+educ+nodegr+age+married+hisp\n"
]
},
{
@ -185,7 +162,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 4,
"metadata": {},
"outputs": [
{

Просмотреть файл

@ -1,7 +1,7 @@
import logging
import networkx as nx
import pygraphviz as pgv
class CausalGraph:
@ -19,20 +19,29 @@ class CausalGraph:
str(common_cause_names),
str(instrument_names)])
if graph is None:
self._graph = pgv.AGraph(strict=True, directed=True)
self._graph = nx.DiGraph()
self._graph = self.build_graph(common_cause_names,
instrument_names)
else:
self._graph = pgv.AGraph(graph, strict=True, directed=True)
self._graph = nx.DiGraph(nx.parse_gml(graph))
self._graph = nx.drawing.nx_agraph.from_agraph(self._graph)
self._graph = self.add_node_attributes(observed_node_names)
self._graph = self.add_unobserved_common_cause(observed_node_names)
self.logger = logging.getLogger(__name__)
def view_graph(self, layout="dot"):
agraph = nx.drawing.nx_agraph.to_agraph(self._graph)
agraph.draw("causal_model.png", format="png", prog=layout)
try:
import pygraphviz as pgv
agraph = nx.drawing.nx_agraph.to_agraph(self._graph)
agraph.draw("causal_model.png", format="png", prog=layout)
except:
print("Error in loading pygraphviz library. Ensure that graphviz and pygraphviz are installed.")
print("Using Matplotlib for plotting")
import matplotlib.pyplot as plt
plt.ion()
nx.draw_networkx(self._graph, pos=nx.shell_layout(self._graph))
plt.draw()
#plt.show()
def build_graph(self, common_cause_names, instrument_names):
self._graph.add_node(self.treatment_name, observed="yes")
@ -71,7 +80,7 @@ class CausalGraph:
if create_new_common_cause:
uc_label = "Unobserved Confounders"
self._graph.add_node('U', label=uc_label)
self._graph.add_node('U', label=uc_label, observed="no")
self._graph.add_edge('U', self.treatment_name)
self._graph.add_edge('U', self.outcome_name)
return self._graph
@ -105,18 +114,17 @@ class CausalGraph:
return set(nx.descendants(self._graph, node_name))
def all_observed(self, node_names):
agraph = nx.drawing.nx_agraph.to_agraph(self._graph)
for node_name in node_names:
if agraph.get_node(node_name).attr["observed"] != "yes":
print(self._graph.nodes[node_name])
if self._graph.nodes[node_name]["observed"] != "yes":
return False
return True
def filter_unobserved_variables(self, node_names):
observed_node_names = list()
agraph = nx.drawing.nx_agraph.to_agraph(self._graph)
for node_name in node_names:
if agraph.get_node(node_name).attr["observed"] == "yes":
if self._graph.nodes[node_name]["observed"] == "yes":
observed_node_names.append(node_name)
return observed_node_names

Просмотреть файл

@ -68,14 +68,26 @@ def linear_dataset(beta, num_common_causes, num_samples, num_instruments=0,
dot_graph = dot_graph + " ".join([v + "-> " + outcome + ";" for v in common_causes])
dot_graph = dot_graph + " ".join([v + "-> " + treatment + ";" for v in instruments])
dot_graph = dot_graph + "}"
gml_graph = ('graph['
'node[ id "{0}" label "{0}"]'
'node[ id "{1}" label "{1}"]'
'node[ id "{2}" label "{2}"]'
'edge[source "{0}" target "{1}"]'
'edge[source "{2}" target "{0}"]'
'edge[source "{2}" target "{1}"]'
).format(treatment, outcome, "Unobserved Confounders")
gml_graph = gml_graph + " ".join(['node[ id "{0}" label "{0}"] edge[ source "{0}" target "{1}"]'.format(v, treatment) for v in common_causes])
gml_graph = gml_graph + " ".join(['edge[ source "{0}" target "{1}"]'.format(v, outcome) for v in common_causes])
gml_graph = gml_graph + " ".join(['node[ id "{0}" label "{0}"] edge[ source "{0}" target "{1}"]'.format(v, treatment) for v in instruments])
gml_graph = gml_graph + ']'
ret_dict = {
"df": data,
"treatment_name": treatment,
"outcome_name": outcome,
"common_causes_names": common_causes,
"instrument_names": instruments,
"dot_graph": dot_graph,
"dot_graph": gml_graph,
"ate": ate
}
return ret_dict

Просмотреть файл

@ -45,6 +45,6 @@ setup(
python_requires='>=3.0',
install_requires=['numpy', 'scikit-learn', 'matplotlib', 'scipy',
'pandas', 'pygraphviz', 'networkx', 'sympy'],
'pandas', 'networkx', 'sympy'],
)