Add test for CNTK_202_Language_Understanding.ipynb

2016-12-08 21:42:46 +01:00 · 2016-12-08 21:42:46 +01:00 · f7d0150221
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/CNTK_202_Language_Understanding_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/CNTK_202_Language_Understanding_test.py
@ -0,0 +1,34 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# Licensed under the MIT license. See LICENSE.md file in the project root
+# for full license information.
+# ==============================================================================
+
+import os
+import re
+import numpy
+
+abs_path = os.path.dirname(os.path.abspath(__file__))
+notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_202_Language_Understanding.ipynb")
+
+def test_cntk_202_language_understanding_noErrors(nb):
+    errors = [output for cell in nb.cells if 'outputs' in cell
+              for output in cell['outputs'] if output.output_type == "error"]
+    print(errors)
+    assert errors == []
+
+def test_cntk_202_language_understanding_trainerror(nb):
+    metrics = []
+    for cell in nb.cells:
+        try:
+           if cell.cell_type == 'code':
+               m = re.search('\[Evaluation\].* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
+               if m:
+                   metrics.append(float(m.group('metric')))
+        except IndexError:
+           pass
+        except KeyError:
+           pass
+    expectedMetrics = [2.7, 2.2, 2.3, 2.1]
+    # TODO tighten tolerances
+    assert numpy.allclose(expectedMetrics, metrics, atol=0.2)
--- a/Tutorials/CNTK_202_Language_Understanding.ipynb
+++ b/Tutorials/CNTK_202_Language_Understanding.ipynb
@ -41,12 +41,12 @@
    "In this tutorial we are going to use a (lightly preprocessed) version of the ATIS dataset. You can download the data automatically by running the cells below or by executing the manual instructions.\n",
    "\n",
    "#### Fallback manual instructions\n",
-    "Download the ATIS [training](https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.train.ctf) \n",
-    "and [test](https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.test.ctf) \n",
+    "Download the ATIS [training](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/SLUHandsOn/atis.train.ctf) \n",
+    "and [test](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/SLUHandsOn/atis.test.ctf) \n",
    "files and put them at the same folder as this notebook. If you want to see how the model is \n",
    "predicting on new sentences you will also need the vocabulary files for \n",
-    "[queries](https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl) and\n",
-    "[slots](https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl)"
+    "[queries](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl) and\n",
+    "[slots](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl)"
   ]
  },
  {
@ -58,25 +58,38 @@
   "outputs": [],
   "source": [
    "import requests\n",
+    "import os\n",
    "\n",
    "def download(url, filename):\n",
-    "    \"\"\" utility to download necessary data \"\"\"\n",
+    "    \"\"\" utility function to download a file \"\"\"\n",
    "    response = requests.get(url, stream=True)\n",
    "    with open(filename, \"wb\") as handle:\n",
    "        for data in response.iter_content():\n",
    "            handle.write(data)\n",
    "\n",
-    "url1 = \"https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.%s.ctf?raw=true\"\n",
-    "url2 = \"https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/%s.wl?raw=true\"\n",
-    "urls = [url1%\"train\", url1%\"test\", url2%\"query\", url2%\"slots\"]\n",
+    "locations = ['Tutorials/SLUHandsOn', 'Examples/LanguageUnderstanding/ATIS/BrainScript']\n",
    "\n",
-    "for t in urls:\n",
-    "    filename = t.split('/')[-1].split('?')[0]\n",
-    "    try:\n",
-    "        f = open(filename)\n",
-    "        f.close()\n",
-    "    except IOError:\n",
-    "        download(t, filename)\n"
+    "data = {\n",
+    "  'train': { 'file': 'atis.train.ctf', 'location': 0 },\n",
+    "  'test': { 'file': 'atis.test.ctf', 'location': 0 },\n",
+    "  'query': { 'file': 'query.wl', 'location': 1 },\n",
+    "  'slots': { 'file': 'slots.wl', 'location': 1 }\n",
+    "}\n",
+    "\n",
+    "for item in data.values():\n",
+    "    location = locations[item['location']]\n",
+    "    path = os.path.join('..', location, item['file'])\n",
+    "    if os.path.exists(path):\n",
+    "        print(\"Reusing locally cached:\", item['file'])\n",
+    "        # Update path\n",
+    "        item['file'] = path\n",
+    "    elif os.path.exists(item['file']):\n",
+    "        print(\"Reusing locally cached:\", item['file'])\n",
+    "    else:\n",
+    "        print(\"Starting download:\", item['file'])\n",
+    "        url = \"https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/%s/%s?raw=true\"%(location, item['file'])\n",
+    "        download(url, item['file'])\n",
+    "        print(\"Download completed\")\n"
   ]
  },
  {
@ -307,7 +320,7 @@
   "outputs": [],
   "source": [
    "# peek\n",
-    "reader = create_reader(\"atis.train.ctf\", is_training=True)\n",
+    "reader = create_reader(data['train']['file'], is_training=True)\n",
    "reader.streams.keys()"
   ]
  },
@ -419,7 +432,7 @@
    "def do_train():\n",
    "    global model\n",
    "    model = create_model()\n",
-    "    reader = create_reader(\"atis.train.ctf\", is_training=True)\n",
+    "    reader = create_reader(data['train']['file'], is_training=True)\n",
    "    train(reader, model)\n",
    "do_train()"
   ]
@ -511,7 +524,7 @@
   "outputs": [],
   "source": [
    "def do_test():\n",
-    "    reader = create_reader(\"atis.test.ctf\", is_training=False)\n",
+    "    reader = create_reader(data['test']['file'], is_training=False)\n",
    "    evaluate(reader, model)\n",
    "do_test()\n",
    "model.layers[2].b.value"
@ -533,8 +546,8 @@
   "outputs": [],
   "source": [
    "# load dictionaries\n",
-    "query_wl = [line.rstrip('\\n') for line in open('query.wl')]\n",
-    "slots_wl = [line.rstrip('\\n') for line in open('slots.wl')]\n",
+    "query_wl = [line.rstrip('\\n') for line in open(data['query']['file'])]\n",
+    "slots_wl = [line.rstrip('\\n') for line in open(data['slots']['file'])]\n",
    "query_dict = {query_wl[i]:i for i in range(len(query_wl))}\n",
    "slots_dict = {slots_wl[i]:i for i in range(len(slots_wl))}\n",
    "\n",
@ -619,7 +632,7 @@
    "> Note: training with Batch Normalization is currently only supported on GPU.\n",
    "\n",
    "So your task will be to insert batch-normalization layers before and after the recurrent LSTM layer.\n",
-    "If you have completed the [hands-on labs on image processing](https://github.com/Microsoft/CNTK/blob/master/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb),\n",
+    "If you have completed the [hands-on labs on image processing](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb),\n",
    "you may remember that the [batch-normalization layer](https://www.cntk.ai/pythondocs/layerref.html#batchnormalization-layernormalization-stabilizer) has this form:\n",
    "```\n",
    "    BatchNormalization()\n",
@ -647,8 +660,9 @@
    "            Dense(num_labels)\n",
    "        ])\n",
    "\n",
-    "do_train()\n",
-    "do_test()"
+    "# Enable these when done:\n",
+    "#do_train()\n",
+    "#do_test()"
   ]
  },
  {
@ -691,8 +705,10 @@
    "            Recurrence(LSTM(hidden_dim), go_backwards=False),\n",
    "            Dense(num_labels)\n",
    "        ])\n",
-    "do_train()\n",
-    "do_test()"
+    "    \n",
+    "# Enable these when done:\n",
+    "#do_train()\n",
+    "#do_test()"
   ]
  },
  {
@ -776,8 +792,10 @@
    "            Recurrence(LSTM(hidden_dim), go_backwards=False),\n",
    "            Dense(num_labels)\n",
    "        ])\n",
-    "do_train()\n",
-    "do_test()"
+    "\n",
+    "# Enable these when done:\n",
+    "#do_train()\n",
+    "#do_test()"
   ]
  },
  {