Add test for CNTK_202_Language_Understanding.ipynb

This commit is contained in:
Mark Hillebrand 2016-12-08 21:42:46 +01:00
Родитель 2b1237cd5a
Коммит f7d0150221
2 изменённых файлов: 79 добавлений и 27 удалений

Просмотреть файл

@ -0,0 +1,34 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
import numpy
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_202_Language_Understanding.ipynb")
def test_cntk_202_language_understanding_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
print(errors)
assert errors == []
def test_cntk_202_language_understanding_trainerror(nb):
metrics = []
for cell in nb.cells:
try:
if cell.cell_type == 'code':
m = re.search('\[Evaluation\].* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
if m:
metrics.append(float(m.group('metric')))
except IndexError:
pass
except KeyError:
pass
expectedMetrics = [2.7, 2.2, 2.3, 2.1]
# TODO tighten tolerances
assert numpy.allclose(expectedMetrics, metrics, atol=0.2)

Просмотреть файл

@ -41,12 +41,12 @@
"In this tutorial we are going to use a (lightly preprocessed) version of the ATIS dataset. You can download the data automatically by running the cells below or by executing the manual instructions.\n",
"\n",
"#### Fallback manual instructions\n",
"Download the ATIS [training](https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.train.ctf) \n",
"and [test](https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.test.ctf) \n",
"Download the ATIS [training](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/SLUHandsOn/atis.train.ctf) \n",
"and [test](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/SLUHandsOn/atis.test.ctf) \n",
"files and put them at the same folder as this notebook. If you want to see how the model is \n",
"predicting on new sentences you will also need the vocabulary files for \n",
"[queries](https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl) and\n",
"[slots](https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl)"
"[queries](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl) and\n",
"[slots](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl)"
]
},
{
@ -58,25 +58,38 @@
"outputs": [],
"source": [
"import requests\n",
"import os\n",
"\n",
"def download(url, filename):\n",
" \"\"\" utility to download necessary data \"\"\"\n",
" \"\"\" utility function to download a file \"\"\"\n",
" response = requests.get(url, stream=True)\n",
" with open(filename, \"wb\") as handle:\n",
" for data in response.iter_content():\n",
" handle.write(data)\n",
"\n",
"url1 = \"https://github.com/Microsoft/CNTK/blob/master/Tutorials/SLUHandsOn/atis.%s.ctf?raw=true\"\n",
"url2 = \"https://github.com/Microsoft/CNTK/blob/master/Examples/LanguageUnderstanding/ATIS/BrainScript/%s.wl?raw=true\"\n",
"urls = [url1%\"train\", url1%\"test\", url2%\"query\", url2%\"slots\"]\n",
"locations = ['Tutorials/SLUHandsOn', 'Examples/LanguageUnderstanding/ATIS/BrainScript']\n",
"\n",
"for t in urls:\n",
" filename = t.split('/')[-1].split('?')[0]\n",
" try:\n",
" f = open(filename)\n",
" f.close()\n",
" except IOError:\n",
" download(t, filename)\n"
"data = {\n",
" 'train': { 'file': 'atis.train.ctf', 'location': 0 },\n",
" 'test': { 'file': 'atis.test.ctf', 'location': 0 },\n",
" 'query': { 'file': 'query.wl', 'location': 1 },\n",
" 'slots': { 'file': 'slots.wl', 'location': 1 }\n",
"}\n",
"\n",
"for item in data.values():\n",
" location = locations[item['location']]\n",
" path = os.path.join('..', location, item['file'])\n",
" if os.path.exists(path):\n",
" print(\"Reusing locally cached:\", item['file'])\n",
" # Update path\n",
" item['file'] = path\n",
" elif os.path.exists(item['file']):\n",
" print(\"Reusing locally cached:\", item['file'])\n",
" else:\n",
" print(\"Starting download:\", item['file'])\n",
" url = \"https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/%s/%s?raw=true\"%(location, item['file'])\n",
" download(url, item['file'])\n",
" print(\"Download completed\")\n"
]
},
{
@ -307,7 +320,7 @@
"outputs": [],
"source": [
"# peek\n",
"reader = create_reader(\"atis.train.ctf\", is_training=True)\n",
"reader = create_reader(data['train']['file'], is_training=True)\n",
"reader.streams.keys()"
]
},
@ -419,7 +432,7 @@
"def do_train():\n",
" global model\n",
" model = create_model()\n",
" reader = create_reader(\"atis.train.ctf\", is_training=True)\n",
" reader = create_reader(data['train']['file'], is_training=True)\n",
" train(reader, model)\n",
"do_train()"
]
@ -511,7 +524,7 @@
"outputs": [],
"source": [
"def do_test():\n",
" reader = create_reader(\"atis.test.ctf\", is_training=False)\n",
" reader = create_reader(data['test']['file'], is_training=False)\n",
" evaluate(reader, model)\n",
"do_test()\n",
"model.layers[2].b.value"
@ -533,8 +546,8 @@
"outputs": [],
"source": [
"# load dictionaries\n",
"query_wl = [line.rstrip('\\n') for line in open('query.wl')]\n",
"slots_wl = [line.rstrip('\\n') for line in open('slots.wl')]\n",
"query_wl = [line.rstrip('\\n') for line in open(data['query']['file'])]\n",
"slots_wl = [line.rstrip('\\n') for line in open(data['slots']['file'])]\n",
"query_dict = {query_wl[i]:i for i in range(len(query_wl))}\n",
"slots_dict = {slots_wl[i]:i for i in range(len(slots_wl))}\n",
"\n",
@ -619,7 +632,7 @@
"> Note: training with Batch Normalization is currently only supported on GPU.\n",
"\n",
"So your task will be to insert batch-normalization layers before and after the recurrent LSTM layer.\n",
"If you have completed the [hands-on labs on image processing](https://github.com/Microsoft/CNTK/blob/master/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb),\n",
"If you have completed the [hands-on labs on image processing](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb),\n",
"you may remember that the [batch-normalization layer](https://www.cntk.ai/pythondocs/layerref.html#batchnormalization-layernormalization-stabilizer) has this form:\n",
"```\n",
" BatchNormalization()\n",
@ -647,8 +660,9 @@
" Dense(num_labels)\n",
" ])\n",
"\n",
"do_train()\n",
"do_test()"
"# Enable these when done:\n",
"#do_train()\n",
"#do_test()"
]
},
{
@ -691,8 +705,10 @@
" Recurrence(LSTM(hidden_dim), go_backwards=False),\n",
" Dense(num_labels)\n",
" ])\n",
"do_train()\n",
"do_test()"
" \n",
"# Enable these when done:\n",
"#do_train()\n",
"#do_test()"
]
},
{
@ -776,8 +792,10 @@
" Recurrence(LSTM(hidden_dim), go_backwards=False),\n",
" Dense(num_labels)\n",
" ])\n",
"do_train()\n",
"do_test()"
"\n",
"# Enable these when done:\n",
"#do_train()\n",
"#do_test()"
]
},
{