Updated notebooks and added missing tests

This commit is contained in:
REDMOND\sayanpa 2017-05-03 19:23:25 -07:00
Родитель 0fc7be5720
Коммит 9b48ea7648
22 изменённых файлов: 1277 добавлений и 1739 удалений

Просмотреть файл

@ -0,0 +1,17 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
import numpy as np
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_103A_MNIST_DataLoader.ipynb")
def test_cntk_103a_mnist_dataloader_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
assert errors == []

Просмотреть файл

@ -17,7 +17,7 @@ def test_cntk_103d_mnist_convolutionalneuralnetwork_noErrors(nb):
assert errors == []
notebook_timeoutSeconds = 1500
expectedEvalErrorByDeviceId = { -1: [1.3, 1.05] , 0: [1.3, 1.05] }
expectedEvalErrorByDeviceId = { -1: [1.35, 1.05] , 0: [1.35, 1.05] }
def test_cntk_103d_mnist_convolutionalneuralnetwork_trainerror(nb, device_id):
metrics = []

Просмотреть файл

@ -0,0 +1,18 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
import numpy as np
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_201A_CIFAR-10_DataLoader.ipynb")
notebook_timeoutSeconds = 900
def test_cntk_201a_cifat_10_dataloader_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
assert errors == []

Просмотреть файл

@ -0,0 +1,19 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
import numpy as np
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_201B_CIFAR-10_ImageHandsOn.ipynb")
notebook_deviceIdsToRun = [0]
notebook_timeoutSeconds = 900
def test_cntk_201B_cifar_10_imagehandson_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
assert errors == []

Просмотреть файл

@ -163,13 +163,31 @@
"import os\n",
"from cntk import *\n",
"\n",
"import cntk as C"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the block below, we check if we are running this notebook in the CNTK internal test machines by looking for environment variables defined there. We then select the right target device (GPU vs CPU) to test this notebook. In other cases, we use CNTK's default policy to use the best available device (GPU, if available, else CPU)."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Select the right target device when this notebook is being tested:\n",
"if 'TEST_DEVICE' in os.environ:\n",
" import cntk\n",
" if os.environ['TEST_DEVICE'] == 'cpu':\n",
" cntk.device.try_set_default_device(cntk.device.cpu())\n",
" C.device.try_set_default_device(C.device.cpu())\n",
" else:\n",
" cntk.device.try_set_default_device(cntk.device.gpu(0))\n"
" C.device.try_set_default_device(C.device.gpu(0))"
]
},
{
@ -388,12 +406,12 @@
"def linear_layer(input_var, output_dim):\n",
" \n",
" input_dim = input_var.shape[0]\n",
" weight_param = parameter(shape=(input_dim, output_dim))\n",
" bias_param = parameter(shape=(output_dim))\n",
" weight_param = C.parameter(shape=(input_dim, output_dim))\n",
" bias_param = C.parameter(shape=(output_dim))\n",
" \n",
" mydict['w'], mydict['b'] = weight_param, bias_param\n",
"\n",
" return times(input_var, weight_param) + bias_param"
" return C.times(input_var, weight_param) + bias_param"
]
},
{
@ -457,8 +475,8 @@
},
"outputs": [],
"source": [
"label = input_variable((num_output_classes), np.float32)\n",
"loss = cross_entropy_with_softmax(z, label)"
"label = C.input_variable((num_output_classes), np.float32)\n",
"loss = C.cross_entropy_with_softmax(z, label)"
]
},
{
@ -478,7 +496,7 @@
},
"outputs": [],
"source": [
"eval_error = classification_error(z, label)"
"eval_error = C.classification_error(z, label)"
]
},
{
@ -511,9 +529,9 @@
"source": [
"# Instantiate the trainer object to drive the model training\n",
"learning_rate = 0.5\n",
"lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) \n",
"learner = sgd(z.parameters, lr_schedule)\n",
"trainer = Trainer(z, (loss, eval_error), [learner])"
"lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) \n",
"learner = C.sgd(z.parameters, lr_schedule)\n",
"trainer = C.Trainer(z, (loss, eval_error), [learner])"
]
},
{
@ -741,7 +759,7 @@
},
"outputs": [],
"source": [
"out = softmax(z)\n",
"out = C.softmax(z)\n",
"result = out.eval({feature : features})"
]
},
@ -861,7 +879,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -22,9 +22,7 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# Import the relevant modules to be used later\n",
@ -132,9 +130,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -181,9 +177,7 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -231,9 +225,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# Save the data files into a format compatible with CNTK text reader\n",
@ -259,9 +251,7 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -107,7 +107,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"In the block below, we check if we are running in the CNTK team internal build and test infra by looking for for environment variables defined there to select the right target device with this notebook is being tested. Otherwise device selection is default. The default policy is the at the best available device (GPU, if available, above CPU) is chosen."
"In the block below, we check if we are running this notebook in the CNTK internal test machines by looking for environment variables defined there. We then select the right target device (GPU vs CPU) to test this notebook. In other cases, we use CNTK's default policy to use the best available device (GPU, if available, else CPU)."
]
},
{

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -40,9 +40,7 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt\n",
@ -54,8 +52,6 @@
"import time\n",
"\n",
"import cntk as C\n",
"import cntk.axis\n",
"from cntk.layers import Dense, Dropout, Recurrence \n",
"\n",
"try:\n",
" from urllib.request import urlretrieve\n",
@ -81,17 +77,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Select the notebook runtime environment devices / settings\n",
"\n",
"Set the device to cpu / gpu for the test environment. If you have both CPU and GPU on your machine, you can optionally switch the devices. By default we choose the best available device."
"In the block below, we check if we are running this notebook in the CNTK internal test machines by looking for environment variables defined there. We then select the right target device (GPU vs CPU) to test this notebook. In other cases, we use CNTK's default policy to use the best available device (GPU, if available, else CPU)."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"if 'TEST_DEVICE' in os.environ:\n",
@ -175,9 +167,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"def generate_solar_data(input_url, time_steps, normalize=1, val_size=0.1, test_size=0.1):\n",
@ -267,9 +257,7 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -334,9 +322,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
@ -358,9 +344,7 @@
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
@ -436,9 +420,7 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# input sequences\n",
@ -479,9 +461,7 @@
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -528,9 +508,7 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
@ -574,9 +552,7 @@
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -596,9 +572,7 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -627,7 +601,6 @@
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
@ -687,7 +660,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -705,5 +678,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}

Просмотреть файл

@ -14,7 +14,9 @@
"- Part A: Familiarizes you with the CIFAR-10 data and converts them into CNTK supported format. This data will be used later in the tutorial for image classification tasks.\n",
"- Part B: We will introduce image understanding tutorials.\n",
"\n",
"If you are curious about how well computers can perform on CIFAR-10 today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d3130) on the state-of-the-art performance of various algorithms.\n"
"If you are curious about how well computers can perform on CIFAR-10 today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d3130) on the state-of-the-art performance of various algorithms.\n",
"\n",
"**Note**: Please be patient since downloading the data and pre-processing it can take 10-15 minutes depending on the network speed.\n"
]
},
{
@ -25,7 +27,7 @@
},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)\n",
"\n",
"from PIL import Image\n",
"import getopt\n",
@ -264,9 +266,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -318,7 +318,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -52,9 +52,7 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -68,9 +66,7 @@
}
],
"source": [
"# Use a function definition from future version \n",
"# (say 3.x while running 2.7 interpreter)\n",
"from __future__ import print_function\n",
"from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)\n",
"import requests\n",
"import os\n",
"\n",
@ -119,7 +115,7 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -133,7 +129,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"In the block below, we check if we are running in the CNTK team internal build and test infra by looking for for environment variables defined there to select the right target device with this notebook is being tested. Otherwise device selection is default. The default policy is the at the best available device (GPU, if available, above CPU) is chosen."
"In the block below, we check if we are running this notebook in the CNTK internal test machines by looking for environment variables defined there. We then select the right target device (GPU vs CPU) to test this notebook. In other cases, we use CNTK's default policy to use the best available device (GPU, if available, else CPU)."
]
},
{
@ -231,7 +227,7 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -273,9 +269,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -302,9 +296,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"metadata": {},
"source": [
"In our case we have input as one-hot encoded vector of length 943 and the output dimension `emb_dim` is set to 150. In the code below we pass the input variable `x` to our model `z`. This binds the model with input data of known shape. In this case, the input shape will be the size of the input vocabulary. With this modification, the parameter returned by the embed layer is completely specified (943, 150). **Note**: You can initialize the Embedding matrix with pre-computed vectors using [Word2Vec](https://en.wikipedia.org/wiki/Word2vec) or [GloVe](https://en.wikipedia.org/wiki/GloVe_(machine_learning))."
]
@ -312,9 +304,7 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -391,9 +381,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
@ -430,9 +418,7 @@
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
@ -481,7 +467,7 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -555,7 +541,6 @@
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
@ -628,7 +613,7 @@
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -668,9 +653,7 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -733,9 +716,7 @@
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -869,7 +850,7 @@
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -916,7 +897,7 @@
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -1004,7 +985,7 @@
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
@ -1043,9 +1024,7 @@
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -1094,7 +1073,6 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
@ -1148,9 +1126,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",

Просмотреть файл

@ -181,27 +181,9 @@
"from __future__ import print_function\n",
"import numpy as np\n",
"import os\n",
"from cntk import Trainer, Axis\n",
"from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT\n",
"from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType\n",
"from cntk import input_variable, cross_entropy_with_softmax, classification_error, sequence, \\\n",
" element_select, alias, hardmax, placeholder_variable, combine, parameter, times, plus\n",
"from cntk.ops.functions import CloneMethod, load_model, Function\n",
"from cntk.initializer import glorot_uniform\n",
"from cntk.logging import log_number_of_parameters, ProgressPrinter\n",
"from cntk.logging.graph import plot\n",
"from cntk.layers import *\n",
"from cntk.layers.sequence import *\n",
"from cntk.layers.models.attention import *\n",
"from cntk.layers.typing import *\n",
"\n",
"# Select the right target device when this notebook is being tested:\n",
"if 'TEST_DEVICE' in os.environ:\n",
" import cntk\n",
" if os.environ['TEST_DEVICE'] == 'cpu':\n",
" cntk.device.try_set_default_device(cntk.device.cpu())\n",
" else:\n",
" cntk.device.set_default_device(cntk.device.gpu(0))\n",
"import cntk as C\n",
"\n",
"\n",
"# Because of randomization in training we set a fixed random seed to ensure repeatable outputs\n",
"from _cntk_py import set_fixed_random_seed\n",
@ -460,10 +442,10 @@
"outputs": [],
"source": [
"# Source and target inputs to the model\n",
"inputAxis = Axis('inputAxis')\n",
"labelAxis = Axis('labelAxis')\n",
"InputSequence = SequenceOver[inputAxis]\n",
"LabelSequence = SequenceOver[labelAxis]"
"inputAxis = C.Axis('inputAxis')\n",
"labelAxis = C.Axis('labelAxis')\n",
"C.layers.InputSequence = C.layers.SequenceOver[inputAxis]\n",
"C.layers.LabelSequence = C.layers.SequenceOver[labelAxis]"
]
},
{
@ -532,41 +514,41 @@
"def create_model(): # :: (history*, input*) -> logP(w)*\n",
" \n",
" # Embedding: (input*) --> embedded_input*\n",
" embed = Embedding(embedding_dim, name='embed') if use_embedding else identity\n",
" embed = C.layers.Embedding(embedding_dim, name='embed') if use_embedding else identity\n",
" \n",
" # Encoder: (input*) --> (h0, c0)\n",
" # Create multiple layers of LSTMs by passing the output of the i-th layer\n",
" # to the (i+1)th layer as its input\n",
" # Note: We go_backwards for the plain model, but forward for the attention model.\n",
" with default_options(enable_self_stabilization=True, go_backwards=not use_attention):\n",
" LastRecurrence = Fold if not use_attention else Recurrence\n",
" encode = Sequential([\n",
" with C.layers.default_options(enable_self_stabilization=True, go_backwards=not use_attention):\n",
" LastRecurrence = C.layers.Fold if not use_attention else C.layers.Recurrence\n",
" encode = C.layers.Sequential([\n",
" embed,\n",
" Stabilizer(),\n",
" For(range(num_layers-1), lambda:\n",
" Recurrence(LSTM(hidden_dim))),\n",
" LastRecurrence(LSTM(hidden_dim), return_full_state=True),\n",
" (Label('encoded_h'), Label('encoded_c')),\n",
" C.layers.Stabilizer(),\n",
" C.layers.For(range(num_layers-1), lambda:\n",
" C.layers.Recurrence(C.layers.LSTM(hidden_dim))),\n",
" LastRecurrence(C.layers.LSTM(hidden_dim), return_full_state=True),\n",
" (C.layers.Label('encoded_h'), C.layers.Label('encoded_c')),\n",
" ])\n",
"\n",
" # Decoder: (history*, input*) --> unnormalized_word_logp*\n",
" # where history is one of these, delayed by 1 step and <s> prepended:\n",
" # - training: labels\n",
" # - testing: its own output hardmax(z) (greedy decoder)\n",
" with default_options(enable_self_stabilization=True):\n",
" with C.layers.default_options(enable_self_stabilization=True):\n",
" # sub-layers\n",
" stab_in = Stabilizer()\n",
" rec_blocks = [LSTM(hidden_dim) for i in range(num_layers)]\n",
" stab_out = Stabilizer()\n",
" proj_out = Dense(label_vocab_dim, name='out_proj')\n",
" stab_in = C.layers.Stabilizer()\n",
" rec_blocks = [C.layers.LSTM(hidden_dim) for i in range(num_layers)]\n",
" stab_out = C.layers.Stabilizer()\n",
" proj_out = C.layers.Dense(label_vocab_dim, name='out_proj')\n",
" # attention model\n",
" if use_attention: # maps a decoder hidden state and all the encoder states into an augmented state\n",
" attention_model = AttentionModel(attention_dim, \n",
" attention_model = C.layers.AttentionModel(attention_dim, \n",
" attention_span, \n",
" attention_axis, \n",
" name='attention_model') # :: (h_enc*, h_dec) -> (h_dec augmented)\n",
" # layer function\n",
" @Function\n",
" @C.Function\n",
" def decode(history, input):\n",
" encoded_input = encode(input)\n",
" r = history\n",
@ -576,20 +558,20 @@
" rec_block = rec_blocks[i] # LSTM(hidden_dim) # :: (dh, dc, x) -> (h, c)\n",
" if use_attention:\n",
" if i == 0:\n",
" @Function\n",
" @C.Function\n",
" def lstm_with_attention(dh, dc, x):\n",
" h_att = attention_model(encoded_input.outputs[0], dh)\n",
" x = splice(x, h_att)\n",
" x = C.splice(x, h_att)\n",
" return rec_block(dh, dc, x)\n",
" r = Recurrence(lstm_with_attention)(r)\n",
" r = C.layers.Recurrence(lstm_with_attention)(r)\n",
" else:\n",
" r = Recurrence(rec_block)(r)\n",
" r = C.layers.Recurrence(rec_block)(r)\n",
" else:\n",
" # unlike Recurrence(), the RecurrenceFrom() layer takes the initial hidden state as a data input\n",
" r = RecurrenceFrom(rec_block)(*(encoded_input.outputs + (r,))) # :: h0, c0, r -> h \n",
" r = C.layers.RecurrenceFrom(rec_block)(*(encoded_input.outputs + (r,))) # :: h0, c0, r -> h \n",
" r = stab_out(r)\n",
" r = proj_out(r)\n",
" r = Label('out_proj_out')(r)\n",
" r = C.layers.Label('out_proj_out')(r)\n",
" return r\n",
"\n",
" return decode"
@ -622,12 +604,12 @@
"def create_model_train(s2smodel):\n",
" # model used in training (history is known from labels)\n",
" # note: the labels must NOT contain the initial <s>\n",
" @Function\n",
" @C.Function\n",
" def model_train(input, labels): # (input*, labels*) --> (word_logp*)\n",
"\n",
" # The input to the decoder always starts with the special label sequence start token.\n",
" # Then, use the previous value of the label sequence (for training) or the output (for execution).\n",
" past_labels = Delay(initial_state=sentence_start)(labels)\n",
" past_labels = C.layers.Delay(initial_state=sentence_start)(labels)\n",
" return s2smodel(past_labels, input)\n",
" return model_train"
]
@ -651,14 +633,14 @@
"source": [
"def create_model_greedy(s2smodel):\n",
" # model used in (greedy) decoding (history is decoder's own output)\n",
" @Function\n",
" @Signature(InputSequence[Tensor[input_vocab_dim]])\n",
" @C.Function\n",
" @C.layers.Signature(C.layers.InputSequence[C.layers.Tensor[input_vocab_dim]])\n",
" def model_greedy(input): # (input*) --> (word_sequence*)\n",
"\n",
" # Decoding is an unfold() operation starting from sentence_start.\n",
" # We must transform s2smodel (history*, input* -> word_logp*) into a generator (history* -> output*)\n",
" # which holds 'input' in its closure.\n",
" unfold = UnfoldFrom(lambda history: s2smodel(history, input) >> hardmax,\n",
" unfold = C.layers.UnfoldFrom(lambda history: s2smodel(history, input) >> C.hardmax,\n",
" # stop once sentence_end_index was max-scoring output\n",
" until_predicate=lambda w: w[...,sentence_end_index],\n",
" length_increase=length_increase)\n",
@ -728,12 +710,12 @@
" # Instantiate the trainer object to drive the model training\n",
" minibatch_size = 72\n",
" lr = 0.001 if use_attention else 0.005\n",
" learner = fsadagrad(model_train.parameters,\n",
" lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),\n",
" momentum = momentum_as_time_constant_schedule(1100),\n",
" learner = C.fsadagrad(model_train.parameters,\n",
" lr = C.learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], C.UnitType.sample, epoch_size),\n",
" momentum = C.momentum_as_time_constant_schedule(1100),\n",
" gradient_clipping_threshold_per_sample=2.3,\n",
" gradient_clipping_with_truncation=True)\n",
" trainer = Trainer(None, criterion, learner)\n",
" trainer = C.Trainer(None, criterion, learner)\n",
"\n",
" # Get minibatches of sequences to train with and perform model training\n",
" total_samples = 0\n",
@ -741,8 +723,8 @@
" eval_freq = 100\n",
"\n",
" # print out some useful training information\n",
" log_number_of_parameters(model_train) ; print()\n",
" progress_printer = ProgressPrinter(freq=30, tag='Training') \n",
" C.logging.log_number_of_parameters(model_train) ; print()\n",
" progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training') \n",
"\n",
" # a hack to allow us to print sparse vectors\n",
" sparse_to_dense = create_sparse_to_dense(input_vocab_dim)\n",
@ -806,11 +788,11 @@
"source": [
"# dummy for printing the input sequence below. Currently needed because input is sparse.\n",
"def create_sparse_to_dense(input_vocab_dim):\n",
" I = Constant(np.eye(input_vocab_dim))\n",
" @Function\n",
" @Signature(InputSequence[SparseTensor[input_vocab_dim]])\n",
" I = C.Constant(np.eye(input_vocab_dim))\n",
" @C.Function\n",
" @C.layers.Signature(C.layers.InputSequence[C.layers.SparseTensor[input_vocab_dim]])\n",
" def no_op(input):\n",
" return times(input, I)\n",
" return C.times(input, I)\n",
" return no_op"
]
},
@ -1008,7 +990,7 @@
" \n",
" model_decoding = create_model_greedy(s2smodel) # wrap the greedy decoder around the model\n",
"\n",
" progress_printer = ProgressPrinter(tag='Evaluation')\n",
" progress_printer = C.logging.ProgressPrinter(tag='Evaluation')\n",
"\n",
" sparse_to_dense = create_sparse_to_dense(input_vocab_dim)\n",
"\n",
@ -1098,7 +1080,7 @@
" \n",
" model_decoding = create_model_greedy(s2smodel) # wrap the greedy decoder around the model\n",
"\n",
" progress_printer = ProgressPrinter(tag='Evaluation')\n",
" progress_printer = C.logging.ProgressPrinter(tag='Evaluation')\n",
"\n",
" sparse_to_dense = create_sparse_to_dense(input_vocab_dim)\n",
"\n",
@ -1211,7 +1193,7 @@
" \n",
" # show attention window (requires matplotlib, seaborn, and pandas)\n",
" if use_attention and show_attention: \n",
" q = combine([model_decoding.attention_model.attention_weights])\n",
" q = C.combine([model_decoding.attention_model.attention_weights])\n",
" att_value = q(query)\n",
"\n",
" # get the attention data up to the length of the output (subset of the full window)\n",

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны