Fix Tutorials/CNTK_{101,102} wrt learning rate schedule.

Add test for them.
This commit is contained in:
Mark Hillebrand 2016-11-17 17:41:54 +01:00
Родитель e8c9865977
Коммит 3ec23a54d9
5 изменённых файлов: 87 добавлений и 23 удалений

Просмотреть файл

@ -0,0 +1,24 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_101_LogisticRegression.ipynb")
def test_cntk_101_logisticregression_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
assert errors == []
expectedEvalError = '0.12'
def test_cntk_101_logisticregression_evalCorrect(nb):
testCell = [cell for cell in nb.cells
if cell.cell_type == 'code' and re.search('trainer\.test_minibatch', cell.source)]
assert len(testCell) == 1
assert testCell[0].outputs[0]['data']['text/plain'] == expectedEvalError

Просмотреть файл

@ -0,0 +1,24 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_102_FeedForward.ipynb")
def test_cntk_102_feedforward_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
assert errors == []
expectedEvalError = '0.12'
def test_cntk_102_feedforward_evalCorrect(nb):
testCell = [cell for cell in nb.cells
if cell.cell_type == 'code' and re.search('trainer\.test_minibatch', cell.source)]
assert len(testCell) == 1
assert testCell[0].outputs[0]['data']['text/plain'] == expectedEvalError

Просмотреть файл

@ -1,6 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
@ -24,11 +24,11 @@ DEVICE_MAP = {
'gpu': 0
}
def pytest_generate_tests(metafunc):
if 'device_id' in metafunc.fixturenames:
def pytest_generate_tests(metafunc):
if 'device_id' in metafunc.fixturenames:
if (len(metafunc.config.option.deviceid)) > 1:
del metafunc.config.option.deviceid[0]
devices = set()
for elem in metafunc.config.option.deviceid:
try:
@ -37,10 +37,10 @@ def pytest_generate_tests(metafunc):
else:
devices.add(int(elem))
except ValueError:
raise RuntimeError("invalid deviceid value '{0}', please " +
raise RuntimeError("invalid deviceid value '{0}', please " +
"use integer values or 'auto'".format(elem))
metafunc.parametrize("device_id", devices)
metafunc.parametrize("device_id", devices, scope='session')
if 'is_1bit_sgd' in metafunc.fixturenames:
if (len(metafunc.config.option.is1bitsgd)) > 1:
@ -53,5 +53,19 @@ def pytest_generate_tests(metafunc):
else:
raise RuntimeError("invalid is1bitsgd value {}, only 0 or 1 allowed".format(elem))
metafunc.parametrize("is_1bit_sgd", is1bitsgd)
metafunc.parametrize("is_1bit_sgd", is1bitsgd, scope='session')
@pytest.fixture(scope='module')
def nb(tmpdir_factory, request, device_id):
# TODO we need a way to inject device_id into the notebook
import nbformat
import os
import subprocess
inPath = getattr(request.module, "notebook")
outPath = str(tmpdir_factory.mktemp('notebook').join('out.ipynb'))
assert os.path.isfile(inPath)
args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
"--ExecutePreprocessor.timeout=60", "--output", outPath, inPath]
subprocess.check_call(args)
nb = nbformat.read(outPath, nbformat.current_nbformat)
return nb

Просмотреть файл

@ -70,7 +70,7 @@
"import numpy as np\n",
"import sys\n",
"import os\n",
"from cntk import Trainer, cntk_device, StreamConfiguration\n",
"from cntk import Trainer, cntk_device, StreamConfiguration, learning_rate_schedule, UnitType\n",
"from cntk.device import cpu, set_default_device\n",
"from cntk.learner import sgd\n",
"from cntk.ops import *"
@ -385,8 +385,9 @@
"outputs": [],
"source": [
"# Instantiate the trainer object to drive the model training\n",
"learning_rate = 0.02\n",
"learner = sgd(z.parameters, lr=learning_rate)\n",
"learning_rate = 0.5\n",
"lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) \n",
"learner = sgd(z.parameters, lr_schedule)\n",
"trainer = Trainer(z, loss, eval_error, [learner])"
]
},

Просмотреть файл

@ -16,7 +16,7 @@
"\n",
"**Problem** (recap from the CNTK 101):\n",
"\n",
"A cancer hospital has provided data and wants us to determine if a patient has a fatal [malignant][] cancer vs. a benign growth. This is known as a classification problem. To help classify each patient, we are given their age and the size of the tumor. Intuititely, one can imagine that younger patients and/or patient with small tumor size are less likely to have malignant cancer. The data set simulates this application where the each observation is a patient represented as a dot where red color indicates malignant and blue indicates a benign disease. Note: This is a toy example for learning, in real life there are large number of features from different tests/examination sources and doctors' experience that play into the diagnosis/treatment decision for a patient.\n",
"A cancer hospital has provided data and wants us to determine if a patient has a fatal [malignant][] cancer vs. a benign growth. This is known as a classification problem. To help classify each patient, we are given their age and the size of the tumor. Intuitively, one can imagine that younger patients and/or patient with small tumor size are less likely to have malignant cancer. The data set simulates this application where the each observation is a patient represented as a dot where red color indicates malignant and blue indicates a benign disease. Note: This is a toy example for learning, in real life there are large number of features from different tests/examination sources and doctors' experience that play into the diagnosis/treatment decision for a patient.\n",
"\n",
"<img src=\"https://www.cntk.ai/jup/cancer_data_plot.jpg\", width=400, height=400>\n",
"\n",
@ -24,10 +24,10 @@
"**Goal**:\n",
"Our goal is to learn a classifier that classifies any patient into either benign or malignant category given two features (age, tumor size). \n",
"\n",
"In CNTK 101 tutorial, we learnt a linear classifer using Logistic Regression which misclassified some data points. Often in real world problems, linear classifiers cannot accurately model the data in situations where there is little to no knowledge of how to construct good features. This often results in accuracy limitations and requires models that have more complex decision boundaries. In this tutorial, we will combine multiple linear units (from the CNTK 101 tutorial - Logistic Regression) to a non-linear classifier. The other aspect of such classifiers where the feature encoders are automatically learnt from the data will be covered in later tutorials. \n",
"In CNTK 101 tutorial, we learnt a linear classifier using Logistic Regression which misclassified some data points. Often in real world problems, linear classifiers cannot accurately model the data in situations where there is little to no knowledge of how to construct good features. This often results in accuracy limitations and requires models that have more complex decision boundaries. In this tutorial, we will combine multiple linear units (from the CNTK 101 tutorial - Logistic Regression) to a non-linear classifier. The other aspect of such classifiers where the feature encoders are automatically learnt from the data will be covered in later tutorials. \n",
"\n",
"**Approach**:\n",
"Any learning algorithm has typically 5 stages namely, Data reading, Data rreprocessing, Creating a model, Learning the model parameters and Evaluating (a.k.a. testing/prediction) the model. \n",
"Any learning algorithm has typically five stages. These are Data reading, Data preprocessing, Creating a model, Learning the model parameters, and Evaluating (a.k.a. testing/prediction) the model. \n",
"\n",
"We keep everything same as CNTK 101 except for the third (Model creation) step where we use a feed forward network instead.\n",
" \n",
@ -62,7 +62,7 @@
"import numpy as np\n",
"import sys\n",
"import os\n",
"from cntk import Trainer, cntk_device, StreamConfiguration\n",
"from cntk import Trainer, cntk_device, StreamConfiguration, learning_rate_schedule, UnitType\n",
"from cntk.device import cpu, set_default_device\n",
"from cntk.learner import sgd\n",
"from cntk.ops import *"
@ -456,7 +456,7 @@
"\n",
"With minibatches we often sample observation from the larger training dataset. We repeat the process of model parameters update using different combination of training samples and over a period of time minimize the `loss` (and the error). When the incremental error rates are no longer changing significantly or after a preset number of maximum minibatches to train, we claim that our model is trained.\n",
"\n",
"One of the key parameter for optimization is called the learning rate. For now, we can think of it as a scaling factor that modulates how much we change the parameters in any iteration. We will be covering more details in later tutorial. \n",
"One of the key parameter for optimization is called the `learning_rate`. For now, we can think of it as a scaling factor that modulates how much we change the parameters in any iteration. We will be covering more details in later tutorial. \n",
"With this information, we are ready to create our trainer.\n",
"\n",
"[optimization]: https://en.wikipedia.org/wiki/Category:Convex_optimization\n",
@ -473,8 +473,9 @@
"outputs": [],
"source": [
"# Instantiate the trainer object to drive the model training\n",
"learning_rate_per_sample = 0.02\n",
"learner = sgd(z.parameters, lr=learning_rate_per_sample)\n",
"learning_rate = 0.5\n",
"lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) \n",
"learner = sgd(z.parameters, lr_schedule)\n",
"trainer = Trainer(z, loss, eval_error, [learner])"
]
},
@ -495,7 +496,7 @@
"source": [
"from cntk.utils import get_train_eval_criterion, get_train_loss\n",
"\n",
"# Define a utiltiy function to compute moving average sum (\n",
"# Define a utility function to compute moving average sum (\n",
"# More efficient implementation is possible with np.cumsum() function\n",
"def moving_average(a, w=10) :\n",
" \n",
@ -671,14 +672,14 @@
"test_minibatch_size = 25\n",
"features, labels = generate_random_data_sample(test_minibatch_size, input_dim, num_output_classes)\n",
"\n",
"trainer.test_minibatch({input : features, label : labels}) "
"trainer.test_minibatch({input : features, label : labels})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note, this error is very comparable to our training error indicating that our model has good \"out of sample\" error a.k.a generalization error. This implies that our model can very effectively deal with previously unseen observations (during the training process). This is key to avoid the phenomenon of overfitting. "
"Note, this error is very comparable to our training error indicating that our model has good \"out of sample\" error a.k.a generalization error. This implies that our model can very effectively deal with previously unseen observations (during the training process). This is key to avoid the phenomenon of overfitting."
]
},
{
@ -691,7 +692,7 @@
"\n",
"<img src=\"http://cntk.ai/jup/feedforward_network.jpg\",width=200, height=200>\n",
"\n",
"The way we have configured the network includes the output of all the activation nodes (e.g., the green layer in the figure). The output nodes (the orange layer in the figure), converts the activations into a probability. A simple and effective way is to route the activations via a softmax function. "
"The way we have configured the network includes the output of all the activation nodes (e.g., the green layer in the figure). The output nodes (the orange layer in the figure), converts the activations into a probability. A simple and effective way is to route the activations via a softmax function."
]
},
{