Added tests for notebooks and fixed confidence interval bug for econml (#204)

* fixed econml bug for CI intervals, 

* removed input prompt from notebooks to enable automatic testing

* added tests for notebooks

* added nbformat, jupyter as req for github actions

* adding jupyter as dependency for workflow runs

* modified readme to be compliant with twine requirements
This commit is contained in:
Amit Sharma 2020-12-06 19:03:30 +05:30 коммит произвёл GitHub
Родитель 04a640628e
Коммит b3234704e8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
26 изменённых файлов: 34687 добавлений и 3706 удалений

2
.github/workflows/python-package.yml поставляемый
Просмотреть файл

@ -28,7 +28,7 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest twine
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install econml causalml
pip install econml causalml nbformat jupyter
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names

Просмотреть файл

@ -482,7 +482,7 @@ Bibtex::
Roadmap
=======
The `projects <https://github.com/microsoft/dowhy/projects>`_ page lists the next steps for DoWhy. If you would like to contribute, have a look at the current projects. If you have a specific request for DoWhy, please raise an issue `here <https://github.com/microsoft/dowhy/issues>`_.
The `projects <https://github.com/microsoft/dowhy/projects>`_ page lists the next steps for DoWhy. If you would like to contribute, have a look at the current projects. If you have a specific request for DoWhy, please `raise an issue <https://github.com/microsoft/dowhy/issues>`_.
Contributing
============

Просмотреть файл

@ -104,12 +104,12 @@
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAANMAAAASCAYAAADBs+vIAAAABHNCSVQICAgIfAhkiAAABjtJREFUaIHtmnmIV1UUxz/mqNNi2qINkWUNLVOZVuRSaD8VhxYMs5XIbBkjsrJNssWYgkBMRLOokawowT/6Q0yyzMxyacNQ2jQt/Y2Kik2maepk0/THOY/f8/3ue+/etxDB7wuP++Oe713OOe/ed+65P6igggpywY3ALGAF8AfQDsxN2edwYD6wE2gFtgOLgWt8nJOABuX9DBwE9gIrgXuAo0L6LuocTc9OA//OCL73tKXge0hixyRtOgDjgK+A/cCfwGrgPsptltTGAKcBbyC+a0XsPgM4IYRfxM0vrrpAMnslmVcQt/vaNPgFVQHiM0BfVWYbcJ7lAGGYCkzUvt4DWoAewKVAAVikvJuAV4EdwDJgC3AKMBp4HbhaOe2GMfYijg1iv6FuLfBcyFwHA8OAD1LwPSSxY5I2c4HbgF3APOAAMAKx5eXAHT5uUhvXAp8DPYEFwHqgPzABuAq4AvjNMDcXv7jqAsnfVdd5+dELeFm5x8WRhwJnI7tEgXRfpnHa/i2gs0Heyfd7GDCS8h2oBnF6O3CDoY+iPlngCx3nugz4Sezo2uZ65WwCTvbVdwYWqmy0rz6pjRer7MFA/XStf83QpoibX1x1gWQ2dp2XHx2Aj4FfgBcxfJmiUCD5YuqC7DDNmBeSC57SecwyyIpks5j66BjbgI4Z8wu429GmzdvKGW+Q9VPZJ5bjhdm4Vus3U74Iu1IKx44NyIq4+SWtLgXyX0wTgH+AIUAjFmFeVhiBhHMzdALXAhcCh4CvkV3dFoe1/DtE3gWJY09HHPstsBzzWSYM92o5x7KdKz8P1Gi5ySDz6gYjm9lfMX2F2Xiolh8hfvRjH7AKqAcGAksDche/ZKlLHJK8L3XAFGCmcoeZSHktpsu0PASsQRaSH8uRA+SvMf1UUYqVPwzh1ADvBOo2A3cBn1nM9WjEuG3I2SFrfl5o0fJMg+wsLav09/qIfqJsfK6WG0LabkQW0zmULyYXv2Sliw1c35cq5W9BvuChiMrgpEFPLScin8PBSFhwEbLLDQHetehnCrIQFyGxexBvItnCGiTU6AM0Ab2RxEBfizFuBrojL9LWHPh54X0tHwVO9NV34sikSVjGzUOUjbtpuTekrVffPVDv6pesdIlDkvflWeBiJLN7MOnABZKfmZq07SFkon4cg7yE7cCgiD4eUs46jjSwDaZp2/kW3FXKHWnZtyu/QD5npo7IgvbSuk1IGPIDsBs5r7YDAyL6iLPxbKIP2i+o/MmIMfwI80taXQqkS5aFzWsAEvpODdQ3YrBLXl+mPVquofzAd4DSDtg/pP0DiDF/ROL23Y7jexmmITG8C5C06zZKafos+XmiDVnQk5Bweaw+G5E57lPerpD2Njb2vjzdDDJ//Z4QeRBhfkmrS1qY5lWFJEY2AJPTDlAg+Wq/W9ua7mCglFqcZJA9rLLvKIWLruhG6csYhZnKa7Ts15UP+X2ZolCNXK6GnUltbdygvKYQuZc2H245L1u/+BGnC6S3l2le3Ym/sPeeGZBfAmKpDnI+8vULZoK8hMTmQP0TSAy/FskItpAMA7U0ZYc8VANjkF1xjkWfrvz/Ercima95BpmLjZdpWU+5H7siF7YHgC8t52XjlyCidMkKpnm1Eu7nS5Bz1ErgJyyy0wXiV3stcvPcySBboO0fCdTXI075nSPDh8nKX43dGamO8vsNkDPaRu0rKvsyRjkLLcZKwvdQIL8v0/GGun7ILr4bODUgc7UxuF/aJvWLqy5+FIi3V9r3xY9GLO6ZRukDpdz/IORfDCC72OM+/lLgDCSlWQz0NR5ZvdORe6Y1yhuF7O4NlGLyscDzWr8CORgHUfTNA+AW4DEkzd6MxNW1OlY1cqaZZujHg3dXNDuCk5TvasekbZYgGabvEf3rEP0PImeQ7T5uEhsD3I/8neglJJxbhxzMhyLniacD/KR+cdEF3O2V9n1xRiPRsWExwC9qfe+Q/nogt+rNyGVbC5IxCSYe4sZtBz4NtLkS+fSvRw7Ah5FdbAlyb9IhQs867XMrdv94cOU3xuhSzKjNROAbRP9WJEx5Bfljqmv/Jht76IWklXcgfmwm/I+uSf3ioouNPsWM5hU1tvXfiSqooIIKKqigggoq+N/iX2Wlomzc4KF+AAAAAElFTkSuQmCC\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAOAAAAASCAYAAABCd9LzAAAG+ElEQVR4nO3aeaweVRkG8F9Ja1txQVG4blhaNaIFQbQRpeUCgoGCtCraGBWNtBolilqEGNGLibF1qRVNsETjAgSjLAZji0tpbImGRilBU5eSLvRWEdpaLBQUKv7xnsn97nTmu7Pc5Z/vSW7Od8+cd9555rznnPc8ZyYNDAzooYceJgaH5f5/B76JDfg3nsL1LX2ciVvxAP6Dv+MXOHcUfE/CYtyFR/Aofo8PO5RbU5u67d+fnr3b38EJ4gLz8UsM4jFsxU9wyihxORIXiz6/L/l4GHfig12ea3sXHw8UtG8aq3X4w3Ksxc7Ufi824fOJaxkqxf3knNFn8RrRmYN4ZQVC3fBlXJbudRt24/k4Gf1Y3dL39Xg3HsSNOICzcA3eiPeNgk3d9vfgqpLnnYszsGaCuCzHp7EHPxX98TJcgLen9p1B3ITLhcn/P7AO9+NovA3fwTmpzVMF93wYKwvqHymoaxIvdfnDJ3A3fiXe8+F4AwawJP3embOpHPeTcino6cnoPpwmXuANeE8FcnksxrX4QXrQ/+auT8ETLXwvxC3YhjmCJDwNN+M88VJvaWHTxEc3/E502AWiY8aTSx924SGcIIIpw+m4I91rZksuZ4gg/Tn+11Hfh414iVi9bs7db3sqZ1T0XzdemvKfhscL7vdFfEZMNh/pqK8V9/l0YB22KJ6d6mBqesD7Sx6C4YOvie+FqfyaoeCTfF2Zfl/S0qaJjzIcLwJ2lwjONs/VxOalor/vMjz4iHe/X8zSVdCNyx34meGDj0jFvp1+91f00w1146Up/6LBBz9O5cs76mrHfT4FHS2cJcisFB0xH7MFmY1i9myLvlRuLbiW1c0VK0L2IuraNPFRhiWp/K5D903jwWVLKufgeYYP2nl4pkjLqqAbl27Igu/JkutTxQp2jNjP3ov1NX2UYTT5w/mpvLejrnbcj9UAfH0qHxcb1tm56+tFGvJQCx/ZCzy24FqWRkxOv//S0KaJjyJMF4F1UOyD8hgPLntxOVZgswi2PZiFt4o9zoe6cKjKpQyTDe1Jby9p04frcnXb8AH8poavIrTlvxTPwLPxOpwqBt+yjja1475MkWqLo1J5mUgR5ooZ5gShQM0TylMbZKnPJ/HcjvophgsHz2lh08RHEd6JI0Tg5TfsTf00sVkpxJDJYq9yhRBEduL7Dk3NijASlzIsEwG5WqiBeXxPKId9Yg95PFaJPeEaIbi0xUrN+S8VyuelYvDdjrMNX0Rqx/1YDcDsvk+K2eVOoVb9UexdBsXGuUz6rYIfiY6cJWa0VfiGUO7mijyc4XuRujZNfBQhS9lWTSAXQgG8SQTbLBHoJ4uU9Qah3o2EkbgU4WP4lFiJ31vS5iqxf/ynUHP/JI5TVohVd6CGvzK04d8njn36xCCeKVa513a0qR33YzUA96VykyF1K8MBQzPgnBY+Doo8/AoxC12U/rYICX5/avdgC5smPvJ4dWo7aPixy3hz6Rcy/G1i1dwq+uJuERy7xCDppoJW4ZLHJWJi2CzUxr0V7TJkws28mnZ59GvPn5ggbhWr35H4Yce1famsHPdjtQf8a+6B8vhXKqe39POEeKnLc/XThDq1W+wh2tg08dGJqoLFWHM5L5XrCnwfECLBQpykWNihvvhyKb4uVrMzVUtx88hSvMMb2HZiNPh3YoeYVE40JOrUjvuxWgHXihz4VSU+ss1pt8Btg0VC/btxDG2qtJ8mUq6DImibYLS4TE1l2VFDVl+m5tblcrkYfPeIla/J4COOO6g2KLqhLf8ivDCV2WRUO+7bDsBZ4guEKbn6HeIs6Bh8PHftbLxFzBJlalhVPKug7kR8Rcw2ywqu17Vp4iPDhUIEWWNkwWKsuWxI5RK8KGdzDt4k1LvfljxfHS5XJt9/ECvf7u7NHad4hZuBb6XfbT+JbML/FUL1zOMwcd53VGqfrWy14z6fgi5IfwydM50iNq3Ei1za0X6tOOA81qE570fFcr5CnIdsSu0WiBnjYvHpUVPfhHT8mEhx9ouOnJ/qzhff3+VR16aJjwxZynZtlzbjxeUm/Bpvxp8Nfad4nEjPJon95J6WXC7CF0QfbxACTB7bDfUrvEvsv9aLIN4vJvf5YuVdja/m7rFAvXhpwv9cfEmIKdvStaOFkDIz2S/OPVetuM9/ijYgpNYy7DD8U6Htygcgsax/TihCLxAfzW5IpDbm2tb1Tci9i0RnTRcb6TXp/oMl96lr08QH0bGbU5sZRt4zjQeXKSJAFok06elCFNmIq4VU3pbLgO79SJzp9Xf8f5pQPE8ydAyxT6Sv16W//BcvI/kpipe6/Gen5zoVLxbHL4/ib+IY6GrFolLluM8PwB566GEcMVYiTA899FABvQHYQw8TiP8Dru0Pu59nR7AAAAAASUVORK5CYII=\n",
"text/latex": [
"$\\displaystyle 1.625771192059154$"
"$\\displaystyle 1.6199979987251836$"
],
"text/plain": [
"1.625771192059154"
"1.6199979987251836"
]
},
"execution_count": 4,
@ -172,27 +172,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['Z', 'U']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n",
"INFO:dowhy.causal_identifier:Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True.\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.causal_identifier:Frontdoor variables for treatment and outcome:[]\n"
]
}
],
"source": [
"identification = model.identify_effect()"
"identification = model.identify_effect(proceed_when_unidentifiable=True)"
]
},
{
@ -211,7 +199,6 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['Z', 'U']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n"
]
},
@ -227,6 +214,7 @@
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.causal_identifier:Frontdoor variables for treatment and outcome:[]\n",
"INFO:dowhy.do_sampler:Using WeightingSampler for do sampling.\n",
"INFO:dowhy.do_sampler:Caution: do samplers assume iid data.\n"
]
@ -238,7 +226,8 @@
"sampler = WeightingSampler(df,\n",
" causal_model=model,\n",
" keep_original_treatment=True,\n",
" variable_types={'D': 'b', 'Z': 'c', 'Y': 'c'})\n",
" variable_types={'D': 'b', 'Z': 'c', 'Y': 'c'}\n",
" )\n",
"\n"
]
},
@ -267,12 +256,12 @@
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAOAAAAASCAYAAABCd9LzAAAABHNCSVQICAgIfAhkiAAAB6NJREFUaIHt2m2wVlUVB/AfCokoWRnBVE4SQt7EAalAJ82LJTNJOGSvU5k1ZdmrNakUTcOdZprEHKayssxeJnP6UPmSmimZhZJWVkyhQlhckiGmCF8wEARvH9Y+8xzOc869zznPvU0fnv/MM/vetffaZ+393y9rr73poYce/m/wRlyBu/A4hvD9Lup7Ib6NbdiLQXwRz64ovxi3Yyv24G/4IU4Z5htNdPJ4h2jnEN5bkr8Sd+DhVP9O/BErcHRFnU10BnN2FH/bK3TG4Xz8Bk/gP7gPF+CQCp26nDSxq+k46pTLdw1jU/Y7UFJ/XV6OFmPiejyUdB7D3XiP6j6GVye97aKft+E2nJUvNK6gtA5zBJlbcTyuFYO0Lmbg13gebsQGzMdCbMQr8e9c+ZW4JMluwA4ch7MxHu/UTmITnTyOwZ9xKI4Ug/nqQpl9+AMewD9xBE7Gy0WnniwI7VZnEM8Sk6GIJ3B5ifxavC194yfYjTPRh2tE+/Ooy0lTu5qMozpczsXSinpOwxm4Ba8r5NXl5QJciX/gTvwdU3EOjsKP8SYx4fO4DBentt+a2jIFL8PPUztLsRAzxcTs190OeFvS/0hBvirJv56TTRMr1nYxOIo2DYnVUJc6eYwTnfFXfEH1DjixQv9zSedro6QzmH6d4vVabXxuTv4M3JTyzino1OGkqV3UH0fdcpnHPan82SV5dXk5A0u073TTxGQcwhsKeecn+XcFF0VMqLChDf2aT8AZSXezduMna7lLRyTZglT+xor6HseugqyJTh4X4mm8CgOqJ2AV5iSd1aOkM6jeQP9equtDJXlzU94vcrK6nDS1q4h+I4+jbrnMcGKqZ6vwajpFEy6XJ50rcrLDxM66Rfnka8NwPmw3WJjS28Ugz2MX1mKS2PJhk3AP5jt4NScmyGSxW+XRRCdDHy7Fl7Bm+KZUYklK/zSKOocJN225WCAWqh5I01JatjNkstO0BkJdTpra1QTdcJnH+1L6LeVnwCo04fKplO7Pyc4UruZ1oo8XY5nos9KYxPgaH6yDl6T0LxX5m7AIs8SheKcwdJXwz28QZ4EZwpVYjfcX6miiQ7T5GuFCLK/RpovEOfEocWY4VRB26SjqTEu25bEZ78avCvIdKZ1eUs+LUzo+/b1BfU6a2tUETbnM43CxSBzQfo4vogmXeWRnUvhZTv6KlD4pgjuzC3prRIDqX518pF9zF/Qqw7t0mc/9qYJ8qSAjH83aJAINVair81lBUn5FGhjBXuJ8kv/GreJAPhzq6KwQZ46pYieaLc5kT4vgypxC+benOh/Cc3LyCcKVy76ZtbMpJ3XtKqJf5+OoCf8Zzkvlb+6gbBMu87g86d1SkF+Z5PvFhD5VTPQTtc7fv+z0I/3+txPwEmH4KrFqT8I8LcMvK6mnrs6CVL4oHxjB3jymigDIRhE5mzdGOhkysq8vyA8Vq292HfAN4VLfLwbxlpS3IJVvOgHr2lVEv87GURP+81ibyi0ZoVweTXj5aPrOgw5e+AgOhsQOeGwhb5KIsOYXxWHRr/kEzKKKn6jI/0rK/0DhW9eVlJ0kDtUHtFyrJjrjRUc/IM40eQyoH4R5kbjfWT/GOscJ24rXA8Rut0xcpTyJR4X7dnz6xpCWi1qXk27syqPfyOMoK1OH/zxOSPoPa3Y27ZSXD6fv3K91Bs9jZcq/p0L/6pR/YSYYqyDMxpTOqsifmdLsPJLd19xZUnY3fitsPSknr6tzZLKnTwzWvAuyIpX5Zvq/7M6riC1iMp+gPXAwmjrZeaEYnSQCASuFizNR3NctFVHLmeKcuDmVrctJN3bVRRP+82gafMnQCS8fExHP9SIIVfYIIevjRyvqeCSlh2eCsQrCZB25SHRcPuo2WVz47sa9SZbtSFMq6svk+3Kyujp7BUFlmCfIvVt0YtUKVsTzU1qH9Lo6WVSy03sweKuIfv4gJ6vLyVjYVYUm/GeYiHNFf1bx2wmG42WZCNCsE5HOHSVliODVEF6qvY9pBWU26wD9RnYdZgh3p+xysc6l75u1zjIvKJR/rWjIHgc/F2qiU4UB5S7oLBEpK+IQrTPT2lHQ6VO+kxwrghBDyiO2zyyRzRW7006tQZWh7kV8U7vy6DfyOOqGy3OT7k0j2NGEF/hMyrtP+5mvDFkA7OMF+SLRjkfydhR3wKVaT3wyH/cUcatPzPyLcuXvEP7zdO2XtR8Uz56+LN7FPSgCAguFm/PpXNkfiXue16Ry2Ru6PuGejMMnHXzeaKJTF2fh82Jn3JzqmorTxXlku3j50K3OW8TZbI1wh3aJxW2xWOF/qvzJ12oxMNcnnb6ks0cEI7YVytfhpBu76o6jbrjM3M+rSvLyaMLLeVpR87tEAKaIwVy7iIcRJ4lFbbG4jpgu+uOAWOQfqzJyQPuj1vxvsOTjQ9ojPhmOwXfEW7p9gsSqh78ThJ99r3j5sF+8KrhZrB5laKJThgHlO+BsEZxYJwbNftF5v0s6ZStiE53Thbu4QZwfnhK72Gpx31R8s5vhYvw+6ewV7uBXxYPrKtThpKldA+qNI5px2afz4EsTXkZqR9W1whRxXtwi+niHWFTmj2BjDz300EMPPfTQQw89jDX+C3WMMnzb7dZiAAAAAElFTkSuQmCC\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAANIAAAASCAYAAAAucYD2AAAHz0lEQVR4nO3ae7BXVRUH8A9IoamBYyHTk0dSZDapRVlCYEkJltK7RntMSI049EJpaBRwpgErGaSawswybLQkKhvIFzICPWxKqMgMFBCBNBEhDDBF+2PtMxzOPefe3zm/e//qfmfu7N/dZ+2z1ll7r70ee/eZPXu2XvSiF+2hb0nfy3AdduApbMECHFfz3X1wIe7Bk/gP/ojPVvCFibgd27Afm3AzTq+gvxIr8HCi34W1mIXjS+iPx2T8HA+kMXuwBp8ukeuTeK6Lv4MVsuVxfo5+cjfIlUddndWhb0cueEca+4hYSztwGyaU0G5RreNHuolHEd02L/0KA4fjtxiEX+J+jMLn8G68DY+3ICDcgI/hX7gR+3AWvoO34uMF+itxaXr/L7ATr8K5eH+iv6Ew5gu4F3ckPkfjLZiNKen3wzn6Dyb+/8RKbMUJeB+uxdmJ5rlEvw5zKr5vNM7ErzvRAbwc3xKbyTEVNHXlylBXZ3Xpm8oFX8MlwmBvSbxejNMwFstLxuwRm3YRT5b0NeWRoVvnpU8htLsN4zEN38z1zxeLdpHwKF1hEpZiszDEnan/+fgZzhETtzT1D8Z2PIbXC6PIMA53pXcNK/A5EgdK+H8VM4USLsr1nymMbRmezfUPxh+Ecj+QZOwKvxOGeq6YxDL0EUY+VHzrdOGlry3QNZGrrs6a6Lipvi7ENbhebGj/LTx/Hp4u9G1J7RCtoQmPDN0+L3nXPFwY0RZ8u/DCWSI0uyC9uCtMSu1VDhkR8bGXpd8X5/pfKdzkPQ6fYGIn2Ct2miLKjAh+mtoTC/134VcOVwoRFnw3/R5b8c48ThZGtF0ouQrTxGR8SuivCk3kqquzJjpuIld/sZFtVb7AqV7graJdHt0+L3lDGpfa20sG7sVv8AKxgLrC4NRuKnmW9Y0WHgo2CmWMwosK9GNwLO5sgW+G96T2LzXGZIp/pgXaKan9vuocaSTm4WqsqiFHq3LV1Vl367hKrrOEQS4V62giZoj0oCpvy9Bf5C0zE/04HFFC1w6PHpmXfI706tRuqBi4UXisESLB7wyZFxpa8iwLHfql3/eLIsEMEULeJ+L3x4WXfK9ww5/phN90EecOwBtxhjCieV3ImaGfQznbrV3QHiUm+6COoUD+fYvFjjmzRRnqylVXZ+3quFW53pTaA6Lw87rC81UiHHqs5L2Dhd7y2Cw8x93dwKPH5iVvSANSu6dicNY/sAVGy/BRfBE3iUkk4tZ88p6vBC4QYeV1Il7N8AB+qGM4ksd0kQRmuFVU3MomqwzzxGQsF3liZ/iQ0MEyhxcy8rgcpwiD3t+iDE3kWqCezurSN5FrUGovEQY7WhRthuIbYjO+WceQ8AdYjb+JCGiYCP+niILO6fhzmzx6bF66Kl82xU2JwXDxoYuEK10nPnprosuHkJdiiZjQ4SIXO02Egj8WFZoqDBYJ5GBRURkmdqpTW5B1Gr4kPOMFLdBnYd2iiudvFrvdVaIg0RStyFVXZ+3ouFW5sjX1jPB0a0Rl7K8id96Gt+sYgs0ROcmjosK7XhS25osoYHabPHp0XvKGlHmcAcqR9e9ugdlBkad8WXiFT6S/jaL0vTfRZTvgWFGavUV4sU1CmfcKxWxPwherdkU8Kmr+48UZwI+6oL9YGPh9Ih7f1Tm5k5L825SXVvslnhscKqo0QStyjVVPZ3Xpm8q1O7VrHarEZdjnkAcb1QmfPLKkfkwbPHp8XvKG9I/Ujqh4SVYBq8qhinhaTNzJokw9EOeJDz9R5FGbE+05qV1Z8p59otTYV7jlVvCQ+NiTdEysM3xelPjXC6V0duiXoasiwzFCfyNF/J4/VJyVaL6X/l/Qplx1ddaujluVK1tHuyueP5HaoyqeF5GF5/lqcV0ePT4v+RwpU/B4odB82HWsOIzdh99XMGoVHxHVuhtzff1TW1bizveXlTmr8JLUli34GSLOXScqQDtLaIo4Urjyg8KQyvBUJ89OFYt0jVgIZeFFHbnq6qwdHdeRa4VYkK/VcR1xqDCwWWvIqsT5CnBdHj0+L3mP9KAofQ/B1ALdHLEjLHZ43X04XiOKCEW8sKTvDfi62DHyFbXVqZ2ClxbGnC2M+IC4dZFhhPIwtK84YxiU6J8oPL8s8f6TuF7SihERJ9jHicS3qsiwX1wpKfvLDm2vT///pE256uqsiY6byPWQOHt5hShH5zEe7xKeJF/tG6n8fHKIuH3A4Tcu6vLo8XkpXhG6SChyYRr0d5GkjRMh3VcK9CvEQd9QHWPVO9IHrBc50UhR798v8qcdOdol4gzjnYlndndqpAhJ+oh8K389aQLmip1kc3p2gkgyh6Xx+coUkaddIbzKapE4FrFFJONFZGHdNSXP2kUTuerqrImOm+prqtjl54s5XyvWyHnpXZMdXh3+sMjPVgkj2Ss26YkiElguqnHt8GiClr+/aEgPinOYK8TdugnintHVwisVd/fOsESEceeLWHW7WIRzRbKex7OJ19Q0ZpI4/N0llLhQeMs87hT3xM4QCh0ovOUG4TkX6pgMZ+daR4iYtwx367gwRiY+VUWGdtFErro6a6LjpvraJqqBl4uq2hj8W3iRuSIfy2OlOMc8RXjGo4VHWSPmcrGO9/nq8miClr+/eNeuF73oRQP01DlSL3rxf4VeQ+pFL7oB/wOm+BUgbC39HwAAAABJRU5ErkJggg==\n",
"text/latex": [
"$\\displaystyle 1.0884335905187326$"
"$\\displaystyle 0.983274288265642$"
],
"text/plain": [
"1.0884335905187326"
"0.983274288265642"
]
},
"execution_count": 9,
@ -308,7 +297,20 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -268,7 +268,7 @@
}
],
"source": [
"identified_estimand = model.identify_effect()\n",
"identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)\n",
"print(identified_estimand)"
]
},

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -136,7 +136,7 @@
" treatment='treat',\n",
" outcome='re78',\n",
" common_causes='nodegr+black+hisp+age+educ+married'.split('+'))\n",
"identified_estimand = model.identify_effect()\n",
"identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)\n",
"estimate = model.estimate_effect(identified_estimand,\n",
" method_name=\"backdoor.propensity_score_weighting\",\n",
" method_params={\"weighting_scheme\":\"ips_weight\"})\n",

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -20,8 +20,6 @@
"metadata": {},
"outputs": [],
"source": [
"import os, sys\n",
"sys.path.append(os.path.abspath(\"../../../\"))\n",
"import pandas as pd\n",
"import numpy as np\n",
"import dowhy\n",
@ -418,27 +416,40 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['gestat10', 'U']\n",
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n"
"WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n",
"INFO:dowhy.causal_identifier:Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True.\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.causal_identifier:Frontdoor variables for treatment and outcome:[]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
"Estimand type: nonparametric-ate\n",
"\n",
"### Estimand : 1\n",
"Estimand name: backdoor1 (Default)\n",
"Estimand expression:\n",
" d \n",
"────────────(Expectation(outcome|gestat10))\n",
"d[treatment] \n",
"Estimand assumption 1, Unconfoundedness: If U→{treatment} and U→outcome then P(outcome|treatment,gestat10,U) = P(outcome|treatment,gestat10)\n",
"\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"No such variable found!\n",
"\n",
"### Estimand : 3\n",
"Estimand name: frontdoor\n",
"No such variable found!\n",
"\n"
]
}
],
"source": [
"identified_estimand = model.identify_effect()"
"identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)\n",
"print(identified_estimand)"
]
},
{
@ -464,8 +475,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n"
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n"
]
},
{
@ -474,29 +485,19 @@
"text": [
"*** Causal Estimate ***\n",
"\n",
"## Target estimand\n",
"## Identified estimand\n",
"Estimand type: nonparametric-ate\n",
"### Estimand : 1\n",
"Estimand name: backdoor\n",
"Estimand expression:\n",
" d \n",
"────────────(Expectation(outcome|gestat10))\n",
"d[treatment] \n",
"Estimand assumption 1, Unconfoundedness: If U→{treatment} and U→outcome then P(outcome|treatment,gestat10,U) = P(outcome|treatment,gestat10)\n",
"### Estimand : 2\n",
"Estimand name: iv\n",
"No such variable found!\n",
"\n",
"## Realized estimand\n",
"b: outcome~treatment+gestat10\n",
"## Estimate\n",
"Value: -0.025200267022696315\n",
"Target units: ate\n",
"\n",
"## Statistical Significance\n",
"p-value: <0.001\n",
"## Estimate\n",
"Mean value: -0.025200267022700423\n",
"p-value: [7.18902894e-08]\n",
"\n",
"ATE -0.025200267022696926\n",
"Causal Estimate is -0.025200267022696315\n"
"Causal Estimate is -0.025200267022700423\n"
]
}
],
@ -528,8 +529,8 @@
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/arshia/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n"
]
},
{
@ -576,8 +577,8 @@
"text": [
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10+w_random\n",
"/home/arshia/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n"
]
},
{
@ -585,8 +586,8 @@
"output_type": "stream",
"text": [
"Refute: Add a Random Common Cause\n",
"Estimated effect:(-0.012600133511348465,)\n",
"New effect:(-0.02891355140186916,)\n",
"Estimated effect:-0.012600133511348465\n",
"New effect:-0.025826101468624834\n",
"\n"
]
}
@ -613,10 +614,89 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_refuters.placebo_treatment_refuter:Refutation over 20 simulated datasets of permute treatment\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/arshia/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~placebo+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"WARNING:dowhy.causal_refuters.placebo_treatment_refuter:We assume a Normal Distribution as the sample has less than 100 examples.\n",
" Note: The underlying distribution may not be Normal. We assume that it approaches normal with the increase in sample size.\n"
]
},
{
@ -624,15 +704,17 @@
"output_type": "stream",
"text": [
"Refute: Use a Placebo Treatment\n",
"Estimated effect:(-0.012600133511348465,)\n",
"New effect:(-0.16384345794392524,)\n",
"Estimated effect:-0.012600133511348465\n",
"New effect:0.06444843124165553\n",
"p value:0.31698886663194925\n",
"\n"
]
}
],
"source": [
"res_placebo=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\")\n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\",\n",
" num_simulations=20) \n",
"print(res_placebo)"
]
},
@ -652,10 +734,89 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_refuters.data_subset_refuter:Refutation over 0.9 simulated datasets of size 21571.2 each\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/arshia/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator\n",
"INFO:dowhy.causal_estimator:b: outcome~treatment+gestat10\n",
"/home/amit/py-envs/env3.8/lib/python3.8/site-packages/sklearn/utils/validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" return f(**kwargs)\n",
"WARNING:dowhy.causal_refuters.data_subset_refuter:We assume a Normal Distribution as the sample has less than 100 examples.\n",
" Note: The underlying distribution may not be Normal. We assume that it approaches normal with the increase in sample size.\n"
]
},
{
@ -663,24 +824,19 @@
"output_type": "stream",
"text": [
"Refute: Use a subset of data\n",
"Estimated effect:(-0.012600133511348465,)\n",
"New effect:(0.15136062305873627,)\n",
"Estimated effect:-0.012600133511348465\n",
"New effect:-0.08120856705762365\n",
"p value:0.21986894766211718\n",
"\n"
]
}
],
"source": [
"res_subset=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9)\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9,\n",
" num_simulations=20)\n",
"print(res_subset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -739,10 +739,11 @@ class CausalEstimate:
s += "\n## Estimate\n"
s += "Mean value: {0}\n".format(self.value)
s += ""
if hasattr(self, "cate_estimates"):
s += "Effect estimates: {0}\n".format(self.cate_estimates)
if self.estimator._significance_test:
s += "p-value: {0}\n".format(self.estimator.signif_results_tostr(self.test_stat_significance()))
if self.estimator._confidence_intervals:
s += "Standard error: {0}\n".format(self.get_standard_error())
s += "{0}% confidence interval: {1}\n".format(100 * self.estimator.confidence_level, self.get_confidence_intervals())
if self.conditional_estimates is not None:
s += "### Conditional Estimates\n"

Просмотреть файл

@ -102,17 +102,24 @@ class Econml(CausalEstimator):
est = self.estimator.effect(X_test, T0=T0_test, T1=T1_test)
ate = np.mean(est)
est_interval = None
self.effect_intervals = None
if self._confidence_intervals:
est_interval = self.estimator.effect_interval(X_test, T0=T0_test, T1=T1_test)
self.effect_intervals = self.estimator.effect_interval(
X_test, T0=T0_test, T1=T1_test,
alpha=1-self.confidence_level)
estimate = CausalEstimate(estimate=ate,
target_estimand=self._target_estimand,
realized_estimand_expr=self.symbolic_estimator,
cate_estimates=est,
effect_intervals=est_interval,
effect_intervals=self.effect_intervals,
_estimator_object=self.estimator)
return estimate
def _estimate_confidence_intervals(self, confidence_level=None, method=None):
""" Returns None if the confidence interval has not been calculated.
"""
return self.effect_intervals
def _do(self, x):
raise NotImplementedError

Просмотреть файл

@ -11,31 +11,35 @@ from dowhy.utils.api import parse_state
class CausalIdentifier:
"""Class that implements different identification methods.
Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
"""Class that implements different identification methods.
Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
If you'd like to contribute, please raise an issue or a pull request on Github.
Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.
Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
If you'd like to contribute, please raise an issue or a pull request on Github.
"""
NONPARAMETRIC_ATE="nonparametric-ate"
NONPARAMETRIC_NDE="nonparametric-nde"
NONPARAMETRIC_NIE="nonparametric-nie"
MAX_BACKDOOR_ITERATIONS = 100000
def __init__(self, graph, estimand_type, proceed_when_unidentifiable=False):
def __init__(self, graph, estimand_type,
method_name = None,
proceed_when_unidentifiable=False):
self._graph = graph
self.estimand_type = estimand_type
self.treatment_name = graph.treatment_name
self.outcome_name = graph.outcome_name
self.method_name = method_name
self._proceed_when_unidentifiable = proceed_when_unidentifiable
self.logger = logging.getLogger(__name__)
def identify_effect(self):
"""Main method that returns an identified estimand (if one exists).
"""Main method that returns an identified estimand (if one exists).
If estimand_type is non-parametric ATE, then uses backdoor, instrumental variable and frontdoor identification methods, to check if an identified estimand exists, based on the causal graph.
If estimand_type is non-parametric ATE, then uses backdoor, instrumental variable and frontdoor identification methods, to check if an identified estimand exists, based on the causal graph.
:param self: instance of the CausalEstimator class (or its subclass)
:returns: target estimand, an instance of the IdentifiedEstimand class
@ -226,8 +230,8 @@ class CausalIdentifier:
)
return estimand
def identify_backdoor(self, treatment_name, outcome_name):
backdoor_sets = []
@ -246,17 +250,39 @@ class CausalIdentifier:
- set(outcome_name) \
- set(self._graph.get_instruments(treatment_name, outcome_name))
eligible_variables -= self._graph.get_descendants(treatment_name)
for size_candidate_set in range(1, len(eligible_variables)+1):
for candidate_set in itertools.combinations(eligible_variables, size_candidate_set):
check = self._graph.check_valid_backdoor_set(treatment_name,
outcome_name, candidate_set, backdoor_paths=backdoor_paths)
self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}, No. of paths blocked by observed_nodes: {2}".format(candidate_set, check["is_dseparated"], check["num_paths_blocked_by_observed_nodes"]))
if check["is_dseparated"]:
backdoor_sets.append({
'backdoor_set': candidate_set,
'num_paths_blocked_by_observed_nodes': check["num_paths_blocked_by_observed_nodes"]})
#causes_t = self._graph.get_causes(self.treatment_name)
num_iterations = 0
found_valid_adjustment_set = False
if self.method_name == "auto":
for size_candidate_set in range(len(eligible_variables), 0, -1):
for candidate_set in itertools.combinations(eligible_variables, size_candidate_set):
check = self._graph.check_valid_backdoor_set(treatment_name,
outcome_name, candidate_set, backdoor_paths=backdoor_paths)
self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}, No. of paths blocked by observed_nodes: {2}".format(candidate_set, check["is_dseparated"], check["num_paths_blocked_by_observed_nodes"]))
if check["is_dseparated"]:
backdoor_sets.append({
'backdoor_set': candidate_set,
'num_paths_blocked_by_observed_nodes': check["num_paths_blocked_by_observed_nodes"]})
if self._graph.all_observed(candidate_set):
found_valid_adjustment_set = True
num_iterations += 1
if num_iterations > CausalIdentifier.MAX_BACKDOOR_ITERATIONS:
break
if found_valid_adjustment_set:
break
elif self.method_name == "exhaustive-search":
for size_candidate_set in range(1, len(eligible_variables)+1):
for candidate_set in itertools.combinations(eligible_variables, size_candidate_set):
check = self._graph.check_valid_backdoor_set(treatment_name,
outcome_name, candidate_set, backdoor_paths=backdoor_paths)
self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}, No. of paths blocked by observed_nodes: {2}".format(candidate_set, check["is_dseparated"], check["num_paths_blocked_by_observed_nodes"]))
if check["is_dseparated"]:
backdoor_sets.append({
'backdoor_set': candidate_set,
'num_paths_blocked_by_observed_nodes': check["num_paths_blocked_by_observed_nodes"]})
else:
raise ValueError("Identifier method " + self.method_name + "not supported. Try 'default' or 'exhaustive-search'.")
#causes_t = self._graph.get_causes(self.treatment_name)
#causes_y = self._graph.get_causes(self.outcome_name, remove_edges={'sources':self.treatment_name, 'targets':self.outcome_name})
#common_causes = list(causes_t.intersection(causes_y))
#self.logger.info("Common causes of treatment and outcome:" + str(common_causes))
@ -265,7 +291,7 @@ class CausalIdentifier:
return backdoor_sets
else:
return observed_backdoor_sets
def get_default_backdoor_set_id(self, backdoor_sets_dict):
# Adding a None estimand if no backdoor set found
if len(backdoor_sets_dict) == 0:
@ -279,7 +305,7 @@ class CausalIdentifier:
default_key = key
return default_key
def build_backdoor_estimands_dict(self, treatment_name, outcome_name,
def build_backdoor_estimands_dict(self, treatment_name, outcome_name,
backdoor_sets, estimands_dict, proceed_when_unidentifiable=None):
backdoor_variables_dict = {}
if proceed_when_unidentifiable is None:
@ -321,11 +347,11 @@ class CausalIdentifier:
estimands_dict["backdoor"+str(i+1)] = backdoor_estimand_expr
backdoor_variables_dict["backdoor"+str(i+1)] = backdoor_sets_arr[i]
return estimands_dict, backdoor_variables_dict
def identify_frontdoor(self):
""" Find a valid frontdoor variable if it exists.
Currently only supports a single variable frontdoor set.
def identify_frontdoor(self):
""" Find a valid frontdoor variable if it exists.
Currently only supports a single variable frontdoor set.
"""
frontdoor_var = None
frontdoor_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
@ -342,9 +368,9 @@ class CausalIdentifier:
return parse_state(frontdoor_var)
def identify_mediation(self):
""" Find a valid mediator if it exists.
""" Find a valid mediator if it exists.
Currently only supports a single variable mediator set.
Currently only supports a single variable mediator set.
"""
mediation_var = None
mediation_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
@ -407,7 +433,7 @@ class CausalIdentifier:
max_set_length = len(bdoor_set)
default_key = key
return default_key
def construct_backdoor_estimand(self, estimand_type, treatment_name,
outcome_name, common_causes):
# TODO: outputs string for now, but ideally should do symbolic
@ -550,7 +576,7 @@ class CausalIdentifier:
class IdentifiedEstimand:
"""Class for storing a causal estimand, typically as a result of the identification step.
"""Class for storing a causal estimand, typically as a result of the identification step.
"""
@ -582,8 +608,8 @@ class IdentifiedEstimand:
def get_backdoor_variables(self, key=None):
""" Return a list containing the backdoor variables.
If the calling estimator method is a backdoor method, return the
backdoor variables corresponding to its target estimand.
If the calling estimator method is a backdoor method, return the
backdoor variables corresponding to its target estimand.
Otherwise, return the backdoor variables for the default backdoor estimand.
"""
if key is None:

Просмотреть файл

@ -126,7 +126,8 @@ class CausalModel:
self._other_variables = kwargs
self.summary()
def identify_effect(self, estimand_type=None, proceed_when_unidentifiable=None):
def identify_effect(self, estimand_type=None,
method_name="auto", proceed_when_unidentifiable=None):
"""Identify the causal effect to be estimated, using properties of the causal graph.
:param proceed_when_unidentifiable: Binary flag indicating whether identification should proceed in the presence of (potential) unobserved confounders.
@ -140,6 +141,7 @@ class CausalModel:
self.identifier = CausalIdentifier(self._graph,
estimand_type,
method_name,
proceed_when_unidentifiable=proceed_when_unidentifiable)
identified_estimand = self.identifier.identify_effect()

75
tests/test_notebooks.py Normal file
Просмотреть файл

@ -0,0 +1,75 @@
import os
import subprocess
import tempfile
import nbformat
import pytest
NOTEBOOKS_PATH = "docs/source/example_notebooks/"
notebooks_list = [ f.name for f in os.scandir(NOTEBOOKS_PATH) if f.name.endswith(".ipynb")]
advanced_notebooks = [
# requires stdin input for identify in weighting sampler
"do_sampler_demo.ipynb",
# requires Rpy2 for lalonde
"dowhy_refutation_testing.ipynb",
"dowhy_lalonde_example.ipynb",
"lalonde_pandas_api.ipynb",
# very slow
"dowhy-conditional-treatment-effects.ipynb",
"dowhy_refuter_notebook.ipynb",
"DoWhy-The Causal Story Behind Hotel Booking Cancellations.ipynb", # needs xgboost too
]
# Adding the dowhy root folder to the python path so that jupyter notebooks
# can import dowhy
if 'PYTHONPATH' not in os.environ:
os.environ['PYTHONPATH'] = os.getcwd()
elif os.getcwd() not in os.environ['PYTHONPATH'].split(os.pathsep):
os.environ['PYTHONPATH'] = os.environ['PYTHONPATH'] + os.pathsep + os.getcwd()
def _notebook_run(filepath):
"""Execute a notebook via nbconvert and collect output.
:returns (parsed nb object, execution errors)
Source of this function: http://www.christianmoscardi.com/blog/2016/01/20/jupyter-testing.html
"""
with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout:
args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
# "--ExecutePreprocessor.timeout=600",
"-y", "--no-prompt",
"--output", fout.name, filepath]
subprocess.check_call(args)
fout.seek(0)
nb = nbformat.read(fout, nbformat.current_nbformat)
errors = [output for cell in nb.cells if "outputs" in cell
for output in cell["outputs"]\
if output.output_type == "error"]
return nb, errors
"""
def test_getstarted_notebook():
nb, errors = _notebook_run(NOTEBOOKS_PATH+ "dowhy_simple_example.ipynb")
assert errors == []
def test_confounder_notebook():
nb, errors = _notebook_run(NOTEBOOKS_PATH+"dowhy_confounder_example.ipynb")
assert errors = []
"""
parameter_list=[]
for nb in notebooks_list:
if nb in advanced_notebooks:
param = pytest.param(nb,
marks=[pytest.mark.skip, pytest.mark.advanced],
id=nb)
else:
param = pytest.param(nb, id=nb)
parameter_list.append(param)
@pytest.mark.parametrize("notebook_filename", parameter_list)
def test_notebook(notebook_filename):
nb, errors = _notebook_run(NOTEBOOKS_PATH + notebook_filename)
assert errors == []