Adapting examples; set MinibatchData.data to be a property
This commit is contained in:
Родитель
1368b2de3f
Коммит
9adade3311
|
@ -279,7 +279,7 @@ def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_si
|
|||
|
||||
# This decodes the test set and counts the string error rate.
|
||||
def evaluate_decoding(reader, s2smodel, i2w):
|
||||
|
||||
|
||||
model_decoding = create_model_greedy(s2smodel) # wrap the greedy decoder around the model
|
||||
|
||||
progress_printer = ProgressPrinter(tag='Evaluation')
|
||||
|
@ -301,7 +301,7 @@ def evaluate_decoding(reader, s2smodel, i2w):
|
|||
|
||||
num_total += len(outputs)
|
||||
num_wrong += sum([label != output for output, label in zip(outputs, labels)])
|
||||
|
||||
|
||||
rate = num_wrong / num_total
|
||||
print("string error rate of {:.1f}% in {} samples".format(100 * rate, num_total))
|
||||
return rate
|
||||
|
@ -321,7 +321,7 @@ def Evaluator(model, criterion):
|
|||
parameters |= set(model.parameters)
|
||||
if metric:
|
||||
parameters |= set(metric.parameters)
|
||||
dummy_learner = momentum_sgd(tuple(parameters),
|
||||
dummy_learner = momentum_sgd(tuple(parameters),
|
||||
lr = learning_rate_schedule(1, UnitType.minibatch),
|
||||
momentum = momentum_as_time_constant_schedule(0))
|
||||
return Trainer(model, (loss, metric), dummy_learner)
|
||||
|
@ -382,10 +382,10 @@ def translate(tokens, model_decoding, vocab, i2w, show_attention=False, max_labe
|
|||
# print out translation and stop at the sequence-end tag
|
||||
prediction = np.argmax(pred, axis=-1)
|
||||
translation = [i2w[i] for i in prediction]
|
||||
|
||||
|
||||
# show attention window (requires matplotlib, seaborn, and pandas)
|
||||
if use_attention and show_attention:
|
||||
|
||||
|
||||
#att_value = model_decoding.attention_model.attention_weights(query)
|
||||
# BUGBUG: fails with "Forward: Feature Not Implemented"
|
||||
q = combine([model_decoding.attention_model.attention_weights])
|
||||
|
@ -440,7 +440,7 @@ def get_vocab(path):
|
|||
vocab = [w.strip() for w in open(path).readlines()]
|
||||
i2w = { i:w for i,w in enumerate(vocab) }
|
||||
w2i = { w:i for i,w in enumerate(vocab) }
|
||||
|
||||
|
||||
return (vocab, i2w, w2i)
|
||||
|
||||
# Given a vocab and tensor, print the output
|
||||
|
@ -454,9 +454,9 @@ def debug_attention(model, input):
|
|||
words_p = q(input)
|
||||
words = words_p[0]
|
||||
p = words_p[1]
|
||||
len = words.shape[attention_axis-1]
|
||||
seq_len = words[0].shape[attention_axis-1]
|
||||
span = 7 #attention_span #7 # test sentence is 7 tokens long
|
||||
p_sq = np.squeeze(p[0,:len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim)
|
||||
p_sq = np.squeeze(p[0][:seq_len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim)
|
||||
opts = np.get_printoptions()
|
||||
np.set_printoptions(precision=5)
|
||||
print(p_sq)
|
||||
|
@ -477,7 +477,7 @@ if __name__ == '__main__':
|
|||
|
||||
# create inputs and create model
|
||||
model = create_model()
|
||||
|
||||
|
||||
# train
|
||||
train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True)
|
||||
valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
|
||||
|
@ -489,7 +489,7 @@ if __name__ == '__main__':
|
|||
# test string error rate on decoded output
|
||||
test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
|
||||
evaluate_decoding(test_reader, model, i2w)
|
||||
|
||||
|
||||
# test same metric same as in training on test set
|
||||
test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
|
||||
evaluate_metric(test_reader, model)
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -707,11 +707,11 @@
|
|||
"onehot = np.zeros([len(w),len(query_dict)], np.float32)\n",
|
||||
"for t in range(len(w)):\n",
|
||||
" onehot[t,w[t]] = 1\n",
|
||||
"pred = model.eval({model.arguments[0]:[onehot]})\n",
|
||||
"pred = model.eval({model.arguments[0]:[onehot]})[0]\n",
|
||||
"print(pred.shape)\n",
|
||||
"best = np.argmax(pred,axis=2)\n",
|
||||
"print(best[0])\n",
|
||||
"list(zip(seq.split(),[slots_wl[s] for s in best[0]]))"
|
||||
"best = np.argmax(pred,axis=1)\n",
|
||||
"print(best)\n",
|
||||
"list(zip(seq.split(),[slots_wl[s] for s in best]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -842,9 +842,9 @@
|
|||
" words_p = q(input)\n",
|
||||
" words = words_p[0]\n",
|
||||
" p = words_p[1]\n",
|
||||
" len = words.shape[attention_axis-1]\n",
|
||||
" seq_len = words[0].shape[attention_axis-1]\n",
|
||||
" span = 7 #attention_span #7 # test sentence is 7 tokens long\n",
|
||||
" p_sq = np.squeeze(p[0,:len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim)\n",
|
||||
" p_sq = np.squeeze(p[0][:seq_len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim)\n",
|
||||
" opts = np.get_printoptions()\n",
|
||||
" np.set_printoptions(precision=5)\n",
|
||||
" print(p_sq)\n",
|
||||
|
@ -897,7 +897,7 @@
|
|||
" [ 0.1439 0.14321 0.14322 0.14308 0.14287 0.14216 0.14156]\n",
|
||||
" [ 0.1439 0.14321 0.14322 0.14308 0.14287 0.14216 0.14156]]\n",
|
||||
" Minibatch[ 181- 210]: loss = 3.143627 * 1565, metric = 82.30% * 1565;\n",
|
||||
" Minibatch[ 211- 240]: loss = 3.186274 * 1583, metric = 83.39% * 1583;\n",
|
||||
" Minibatch[ 211- 240]: loss = 3.186273 * 1583, metric = 83.39% * 1583;\n",
|
||||
" Minibatch[ 241- 270]: loss = 3.128010 * 1562, metric = 83.03% * 1562;\n",
|
||||
" Minibatch[ 271- 300]: loss = 3.152663 * 1551, metric = 83.69% * 1551;\n",
|
||||
"['<s> A B A D I </s>']\n",
|
||||
|
@ -921,7 +921,7 @@
|
|||
" [ 0.14417 0.14347 0.14339 0.14316 0.14279 0.14191 0.14111]\n",
|
||||
" [ 0.14417 0.14348 0.14339 0.14316 0.14279 0.14191 0.14111]]\n",
|
||||
" Minibatch[ 391- 420]: loss = 3.126911 * 1601, metric = 82.26% * 1601;\n",
|
||||
"Finished Epoch[1 of 300]: [Training] loss = 3.279741 * 22067, metric = 84.28% * 22067 92.880s (237.6 samples/s);\n",
|
||||
"Finished Epoch[1 of 300]: [Training] loss = 3.279741 * 22067, metric = 84.28% * 22067 260.163s ( 84.8 samples/s);\n",
|
||||
"Saving final model to 'model_0.cmf'\n",
|
||||
"1 epochs complete.\n"
|
||||
]
|
||||
|
@ -932,6 +932,15 @@
|
|||
"train(train_reader, valid_reader, vocab, i2w, model, max_epochs=1, epoch_size=25000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -1290,15 +1299,6 @@
|
|||
"\n",
|
||||
"With the above model, you have the basics for training a powerful sequence-to-sequence model with attention in a number of distinct domains. The only major changes required are preparing a dataset with pairs input and output sequences and in general the rest of the building blocks will remain the same. Good luck, and have fun!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -788,7 +788,7 @@
|
|||
" output = loaded_model.eval(arguments)\n",
|
||||
"\n",
|
||||
" # return softmax probabilities\n",
|
||||
" sm = softmax(output[0,0])\n",
|
||||
" sm = softmax(output[0][0])\n",
|
||||
" return sm.eval()\n",
|
||||
" except FileNotFoundError:\n",
|
||||
" print(\"Could not open (skipping file): \", image_path)\n",
|
||||
|
|
|
@ -227,7 +227,7 @@ class Value(cntk_py.Value):
|
|||
else:
|
||||
super(Value, self).__init__(ndav)
|
||||
|
||||
def as_sequences(self, variable):
|
||||
def as_sequences(self, variable=None):
|
||||
'''
|
||||
Convert a Value to a sequence of NumPy arrays that have their masked
|
||||
entries removed.
|
||||
|
@ -238,6 +238,9 @@ class Value(cntk_py.Value):
|
|||
returned. Otherwise, the arrays will be returned directly.
|
||||
'''
|
||||
if self.is_sparse():
|
||||
if variable is None:
|
||||
raise ValueError('cannot convert sparse value to sequences '
|
||||
'wihtout the corresponding variable')
|
||||
network = _sparse_to_dense_network_cache(variable.shape)
|
||||
|
||||
warnings.warn('converting Value object to CSR format might be slow')
|
||||
|
|
|
@ -3,13 +3,9 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import sys
|
||||
import numbers
|
||||
import collections
|
||||
import copy
|
||||
import numpy as np
|
||||
from numbers import Number
|
||||
from scipy import sparse
|
||||
|
||||
from .. import cntk_py
|
||||
from ..axis import Axis
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import warnings
|
||||
from .. import cntk_py, Value
|
||||
from ..tensor import ArrayMixin
|
||||
from cntk.internal import typemap
|
||||
|
@ -49,6 +50,25 @@ class MinibatchData(cntk_py.MinibatchData, ArrayMixin):
|
|||
'''
|
||||
return self.data.as_sequences(variable)
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
'''
|
||||
The Value representation of the minibatch.
|
||||
'''
|
||||
return super(MinibatchData, self).data()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
'''
|
||||
The value of the minibatch as a NumPy array.
|
||||
'''
|
||||
warnings.warn('the .value property is deprecated. Please use '
|
||||
'.asarray() or .as_sequences() to get the NumPy '
|
||||
'representations or .data to get the Value '
|
||||
'representation', RuntimeWarning)
|
||||
|
||||
return self.as_sequences()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
'''
|
||||
|
|
|
@ -1,43 +1,43 @@
|
|||
:orphan:
|
||||
|
||||
Concepts
|
||||
Concepts
|
||||
========
|
||||
|
||||
There is a common property in key machine learning models, such as deep neural
|
||||
networks (DNNs), convolutional neural networks (CNNs), and recurrent neural
|
||||
networks (DNNs), convolutional neural networks (CNNs), and recurrent neural
|
||||
networks (RNNs). All of these models can be described as *computational networks*.
|
||||
|
||||
The directed edges of these *computational networks* are vectors, matrices, or in
|
||||
general n-dimensional arrays (tensors) which represent input data and model
|
||||
parameters. The vertices are *functions* (also called operations) that are
|
||||
performing a computation on these input tensors.
|
||||
The directed edges of these *computational networks* are vectors, matrices, or in
|
||||
general n-dimensional arrays (tensors) which represent input data and model
|
||||
parameters. The vertices are *functions* (also called operations) that are
|
||||
performing a computation on these input tensors.
|
||||
|
||||
|
||||
Tensors
|
||||
-------
|
||||
|
||||
The underlying data structure in CNTK is that of a *tensor*. It is a
|
||||
multidimensional array on which computations can be performed. Every dimension in
|
||||
these arrays is referred to as an *axis* to distinguish it from the scalar size
|
||||
of every axis. So, a matrix has two *axes* which both have a certain
|
||||
*dimension* corresponding to the number of rows and columns of the *axes*.
|
||||
The underlying data structure in CNTK is that of a *tensor*. It is a
|
||||
multidimensional array on which computations can be performed. Every dimension in
|
||||
these arrays is referred to as an *axis* to distinguish it from the scalar size
|
||||
of every axis. So, a matrix has two *axes* which both have a certain
|
||||
*dimension* corresponding to the number of rows and columns of the *axes*.
|
||||
|
||||
Using tensors makes the framework generic in that it can be used e.g. for
|
||||
classification problems where the inputs are vectors, black-and-white
|
||||
images (input is a matrix of points), color images (includes a separate dimension
|
||||
for r, g, and b) or videos (has an extra time dimension).
|
||||
Using tensors makes the framework generic in that it can be used e.g. for
|
||||
classification problems where the inputs are vectors, black-and-white
|
||||
images (input is a matrix of points), color images (includes a separate dimension
|
||||
for r, g, and b) or videos (has an extra time dimension).
|
||||
|
||||
- Tensors have a *shape* which describes the dimensions of its axes. E.g. a shape ``[2,3,4]``
|
||||
would refer to a tensor with three axes that have, respectively, 2, 3, and 4
|
||||
dimensions.
|
||||
- Tensors have a *shape* which describes the dimensions of its axes. E.g. a shape ``[2,3,4]``
|
||||
would refer to a tensor with three axes that have, respectively, 2, 3, and 4
|
||||
dimensions.
|
||||
|
||||
- CNTK allows for the last axis to be a *dynamic axis*, i.e. an axis whose size
|
||||
might vary between input samples. This allows for easily
|
||||
modelling sequences (for recurrent networks) without needing to introduce masks
|
||||
- CNTK allows for the last axis to be a *dynamic axis*, i.e. an axis whose size
|
||||
might vary between input samples. This allows for easily
|
||||
modelling sequences (for recurrent networks) without needing to introduce masks
|
||||
or padding. See below for a detailed explanation.
|
||||
|
||||
- All data inside of a tensor is of a certain data type. Right now, CNTK
|
||||
implements *float* (32 bit) and *double* (64 bit) precision floating point types,
|
||||
- All data inside of a tensor is of a certain data type. Right now, CNTK
|
||||
implements *float* (32 bit) and *double* (64 bit) precision floating point types,
|
||||
and all tensors in a network have the same type.
|
||||
|
||||
- Tensors come either in *dense* or *sparse* form. Sparse tensors should be used
|
||||
|
@ -45,39 +45,39 @@ for r, g, and b) or videos (has an extra time dimension).
|
|||
tensors, however, the data ingestion of sparse tensors is only supported via
|
||||
the reader framework and not yet through NumPy.
|
||||
|
||||
|
||||
|
||||
Tensors are introduced in CNTK in one of three places:
|
||||
|
||||
- **Inputs**: These represent data inputs to the computation which are usually
|
||||
bound to a data reader. Data inputs are organized as (mini) batches and
|
||||
therefore receive an extra minibatch dimension. In addition, inputs can have a
|
||||
"ragged" axis called "dynamic axis" which is used to model sequential data. See
|
||||
- **Inputs**: These represent data inputs to the computation which are usually
|
||||
bound to a data reader. Data inputs are organized as (mini) batches and
|
||||
therefore receive an extra minibatch dimension. In addition, inputs can have a
|
||||
"ragged" axis called "dynamic axis" which is used to model sequential data. See
|
||||
below for details.
|
||||
|
||||
- **Parameters**: Parameters are weight tensors that make up the bulk of the
|
||||
actual model. Parameters are initialized using a constant (e.g. all 0's,
|
||||
randomly generated data, or initialized from a file) and are updated during
|
||||
- **Parameters**: Parameters are weight tensors that make up the bulk of the
|
||||
actual model. Parameters are initialized using a constant (e.g. all 0's,
|
||||
randomly generated data, or initialized from a file) and are updated during
|
||||
*backpropagation* in a training run.
|
||||
|
||||
- **Constants**: Constants are very similar to parameters, but they are not
|
||||
- **Constants**: Constants are very similar to parameters, but they are not
|
||||
taking part in backpropagation.
|
||||
|
||||
All of these represent the *leaf nodes* in the network, or, in other words, the
|
||||
All of these represent the *leaf nodes* in the network, or, in other words, the
|
||||
input parameters of the function that the network represents.
|
||||
|
||||
To introduce a tensor, simply use one of the methods in the cntk namespace. Once
|
||||
To introduce a tensor, simply use one of the methods in the cntk namespace. Once
|
||||
introduced, overloaded operators can be applied to them to form an operator graph::
|
||||
|
||||
import cntk as C
|
||||
|
||||
# Create an input with the shape (2,3,*)
|
||||
>>> x = C.input_variable((2,3), name='features')
|
||||
>>> x = C.input_variable((2,3), name='features')
|
||||
|
||||
# Create a constant scalar with value 2
|
||||
>>> c = C.constant(value=2)
|
||||
|
||||
# Create a parameter of shape (2,3), randomly initialized
|
||||
>>> w = C.parameter((2,3))
|
||||
>>> w = C.parameter((2,3))
|
||||
|
||||
# Set up some test input data to check the operators.
|
||||
# We specify a full batch having a sequence with one element, which is a
|
||||
|
@ -85,30 +85,31 @@ introduced, overloaded operators can be applied to them to form an operator grap
|
|||
>>> test_input = [[ np.asarray([[10,20,30],[40,50,60]]) ]]
|
||||
|
||||
# Elementwise multiplication operation
|
||||
>>> op = x * c
|
||||
>>> op = x * c
|
||||
|
||||
# Evaluate the op using test_input
|
||||
>>> print(op.eval({ x: test_input }))
|
||||
[[[[ 20. 40. 60.]
|
||||
[ 80. 100. 120.]]]]
|
||||
|
||||
# Same as above (2 will be converted to constant)
|
||||
>>> op2 = x * 2
|
||||
>>> print(op2.eval({ x: test_input }))
|
||||
[[[[ 20. 40. 60.]
|
||||
[ 80. 100. 120.]]]]
|
||||
[array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)]
|
||||
|
||||
# Elementwise multiplication of two 2x3 matrices
|
||||
>>> op3 = x * [[1,2,3], [4,5,6]]
|
||||
# Same as above (2 will be converted to constant)
|
||||
>>> op2 = x * 2
|
||||
>>> print(op2.eval({ x: test_input }))
|
||||
[array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)]
|
||||
|
||||
# Elementwise multiplication of two 2x3 matrices
|
||||
>>> op3 = x * [[1,2,3], [4,5,6]]
|
||||
>>> print(op3.eval({ x: test_input}))
|
||||
[[[[ 10. 40. 90.]
|
||||
[ 160. 250. 360.]]]]
|
||||
[array([[[ 10., 40., 90.],
|
||||
[ 160., 250., 360.]]], dtype=float32)]
|
||||
|
||||
|
||||
Broadcasting
|
||||
~~~~~~~~~~~~
|
||||
|
||||
For operations that require the tensor dimensions of their arguments to match,
|
||||
*broadcasting* is applied automatically whenever a tensor dimension is 1.
|
||||
For operations that require the tensor dimensions of their arguments to match,
|
||||
*broadcasting* is applied automatically whenever a tensor dimension is 1.
|
||||
Examples are elementwise product or plus operations.
|
||||
E.g. the following are equivalent:
|
||||
|
||||
|
@ -117,5 +118,5 @@ E.g. the following are equivalent:
|
|||
|
||||
>>> C.element_times([2,3], [2,2]).eval()
|
||||
array([ 4., 6.], dtype=float32)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -22,24 +22,29 @@ more common case) is as follows:
|
|||
>>> x0 = np.asarray([[2., 1.]], dtype=np.float32)
|
||||
>>> y0 = np.asarray([[4., 6.]], dtype=np.float32)
|
||||
>>> cntk.squared_error(x, y).eval({x:x0, y:y0})
|
||||
array([[ 29.]], dtype=float32)
|
||||
[array([ 29.], dtype=float32)]
|
||||
|
||||
In the above example we are first setting up two input variables with shape ``(1, 2)``. We then setup a ``squared_error`` node with those two variables as
|
||||
inputs. Within the ``eval()`` method we can setup the input-mapping of the data for those two variables. In this case we pass in two numpy arrays.
|
||||
The squared error is then of course ``(2-4)**2 + (1-6)**2 = 29``.
|
||||
|
||||
As the graph nodes implement the NumPy array interface, you can easily access
|
||||
their content and use them in other NumPy operations:
|
||||
Most of the data containers like parameters, constants, values, etc. implement
|
||||
the asarray() method, which returns a NumPy interface.
|
||||
|
||||
>>> import cntk as C
|
||||
>>> c = C.constant(3, shape=(2,3))
|
||||
>>> np.asarray(c)
|
||||
>>> c.asarray()
|
||||
array([[ 3., 3., 3.],
|
||||
[ 3., 3., 3.]], dtype=float32)
|
||||
>>> np.ones_like(c)
|
||||
>>> np.ones_like(c.asarray())
|
||||
array([[ 1., 1., 1.],
|
||||
[ 1., 1., 1.]], dtype=float32)
|
||||
|
||||
For values that have a sequence axis, ``asarray()`` cannot work since, it requires
|
||||
the shape to be rectangular and sequences most of the time have different
|
||||
lengths. In that case, ``as_sequences(var)`` returns a list of NumPy arrays,
|
||||
where every NumPy arrays has the shape of the static axes of ``var``.
|
||||
|
||||
Overview and first run
|
||||
----------------------
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче