Merge branch 'blis/ops14todo' of https://github.com/Microsoft/CNTK into blis/ops14todo

This commit is contained in:
William Darling 2016-05-04 13:24:32 +02:00
Родитель 14f5628950 46254d4b22
Коммит f904d224c2
7 изменённых файлов: 119 добавлений и 100 удалений

Просмотреть файл

@ -109,7 +109,7 @@ def seqcla():
# setup embedding matrix
embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0,
init='fromFile', init_from_file_path=embedding_file)
init_from_file_path=embedding_file)
# get the vector representing the word
sequence = C.times(embedding, features, name='sequence')
@ -146,7 +146,7 @@ def seqcla():
acc = calc_accuracy(train_file, ctx.output_filename_base)
# and test for the same number...
TOLERANCE_ABSOLUTE = 1E-06
TOLERANCE_ABSOLUTE = 1E-06
assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
"""

Просмотреть файл

@ -65,7 +65,7 @@ def train_eval_logistic_regression_from_file(criterion_name=None, eval_name=None
def test_logistic_regression_from_file():
result = train_eval_logistic_regression_from_file('crit_node', 'eval_node')
TOLERANCE_ABSOLUTE = 1E-06
TOLERANCE_ABSOLUTE = 1E-02
assert result['SamplesSeen'] == 500
assert np.allclose(result['Perplexity'], 1.5584637, atol=TOLERANCE_ABSOLUTE)
assert np.allclose(result['crit_node'], 0.4437005, atol=TOLERANCE_ABSOLUTE)

Просмотреть файл

@ -59,7 +59,7 @@ def train_eval_logistic_regression_with_numpy(criterion_name=None, eval_name=Non
def test_logistic_regression_with_numpy():
result = train_eval_logistic_regression_with_numpy('crit_node', 'eval_node')
TOLERANCE_ABSOLUTE = 1E-06
TOLERANCE_ABSOLUTE = 1E-02
assert result['SamplesSeen'] == 500
assert np.allclose(result['Perplexity'], 1.5575403, atol=TOLERANCE_ABSOLUTE)
assert np.allclose(result['crit_node'], 0.44310782, atol=TOLERANCE_ABSOLUTE)

Просмотреть файл

@ -4,75 +4,99 @@
# for full license information.
# ==============================================================================
# TODO: re-write the example using the new facade
"""
MNIST Example, one hidden layer neural network
MNIST Example, one hidden layer neural network using training and testing data
generated through `uci_to_cntk_text_format_converter.py
<https://github.com/Microsoft/CNTK/blob/master/Source/Readers/CNTKTextFormatReader/uci_to_cntk_text_format_converter.py>`_
to convert it to the CNTKTextFormatReader format.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..'))
from cntk import *
import numpy as np
import cntk as C
def add_dnn_sigmoid_layer(in_dim, out_dim, x, param_scale):
W = LearnableParameter(out_dim, in_dim, initValueScale=param_scale)
b = LearnableParameter(out_dim, 1, initValueScale=param_scale)
t = Times(W, x)
z = Plus(t, b)
return Sigmoid(z)
W = C.parameter((out_dim, in_dim)) * param_scale
b = C.parameter((out_dim, 1)) * param_scale
t = C.times(W, x)
z = C.plus(t, b)
return C.sigmoid(z)
def add_dnn_layer(in_dim, out_dim, x, param_scale):
W = LearnableParameter(out_dim, in_dim, initValueScale=param_scale)
b = LearnableParameter(out_dim, 1, initValueScale=param_scale)
t = Times(W, x)
return Plus(t, b)
W = C.parameter((out_dim, in_dim)) * param_scale
b = C.parameter((out_dim, 1)) * param_scale
t = C.times(W, x)
return C.plus(t, b)
if (__name__ == "__main__"):
def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None):
# Network definition
feat_dim = 784
label_dim = 10
hidden_dim = 200
cur_dir = os.path.dirname(__file__)
training_filename = os.path.join("Data", "Train-28x28.txt")
test_filename = os.path.join("Data", "Test-28x28.txt")
training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt")
test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt")
features = Input(feat_dim)
features = C.input(feat_dim)
features.name = 'features'
feat_scale = Constant(0.00390625)
feats_scaled = Scale(feat_scale, features)
feat_scale = C.constant(0.00390625)
feats_scaled = C.element_times(features, feat_scale)
labels = Input(label_dim)
labels = C.input(label_dim)
labels.tag = 'label'
labels.name = 'labels'
f_reader = UCIFastReader(training_filename, 1, feat_dim)
l_reader = UCIFastReader(training_filename, 0, 1, label_dim,
os.path.join("Data", "labelsmap.txt"))
f_reader_t = UCIFastReader(test_filename, 1, feat_dim)
l_reader_t = UCIFastReader(test_filename, 0, 1, label_dim,
os.path.join("Data", "labelsmap.txt"))
traning_reader = C.CNTKTextFormatReader(training_filename)
test_reader = C.CNTKTextFormatReader(test_filename)
h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1)
out = add_dnn_layer(hidden_dim, label_dim, h1, 1)
out.tag = 'output'
ec = CrossEntropyWithSoftmax(labels, out)
ec = C.cross_entropy_with_softmax(labels, out)
ec.name = criterion_name
ec.tag = 'criterion'
eval = C.ops.square_error(labels, out)
eval.name = eval_name
eval.tag = 'eval'
# Specify the training parameters (settings are scaled down)
my_sgd = SGDParams(epoch_size=600, minibatch_size=32,
learning_ratesPerMB=0.1, max_epochs=5, momentum_per_mb=0)
my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32,
learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0)
# Create a context or re-use if already there
with LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
# CNTK actions
ctx.train(ec, my_sgd, {features: f_reader, labels: l_reader})
ctx.write({features: f_reader_t, labels: l_reader_t})
print(ctx.test({features: f_reader_t, labels: l_reader_t}))
ctx.train(
root_nodes=[ec, eval],
training_params=my_sgd,
input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
result = ctx.test(
root_nodes=[ec, eval],
input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
return result
def _test_mnist_onelayer_from_file():
result = train_eval_mnist_onelayer_from_file('crit_node', 'eval_node')
TOLERANCE_ABSOLUTE = 1E-06
assert result['SamplesSeen'] == 10000
assert np.allclose(result['Perplexity'], 7.6323031, atol=TOLERANCE_ABSOLUTE)
assert np.allclose(result['crit_node'], 2.0323896, atol=TOLERANCE_ABSOLUTE)
assert np.allclose(result['eval_node'], 1.9882504, atol=TOLERANCE_ABSOLUTE)
if __name__ == "__main__":
print(train_eval_mnist_onelayer_from_file('crit_node', 'eval_node'))

Просмотреть файл

@ -665,70 +665,69 @@ def input(shape, dynamic_axis='', name=None):
return Input(shape, dynamicAxis=dynamic_axis, name=name)
def parameter(shape=None, value=0, learning_rate_multiplier=1.0, init='uniform',
init_value_scale=1, init_from_file_path='', init_from_literal=None,
random_seed=-1, name=None):
def parameter(shape=None, value=None, learning_rate_multiplier=1.0,
init_from_file_path=None, name=None):
"""
It creates a parameter tensor.
Args:
shape (tuple or int): the shape of the input tensor. If `init='fromLiteral'`, shape is not needed as it will be inferred from the literal.
value: a scalar initial value that would be replicated for every element in the tensor
learning_rate_multiplier (float):
init (str): 'uniform', 'fromFile' or 'fromLiteral'
init_value_scale (float): a scaling factor for the initial value
init_from_file_path (str): the file that contains the initial tensor value
init_from_literal (ndarray): the numpy array used to initialize the tensor parameter
random_seed (float): the seed used for initialization
shape (tuple or int, optional): the shape of the input tensor. If not provided, it will be inferred from ``value``.
value (scalar or NumPy array, optional): a scalar initial value that would be replicated for every element in the tensor or NumPy array. If ``None``, the tensor will be initialized uniformly random.
learning_rate_multiplier (float): set to control the learning rate on this particular node
init_from_file_path (str): the file that contains the initial tensor value. Used only if ``value=None``.
name (str, optional): the name of the node in the network
Returns:
:class:`cntk.graph.ComputationNode`
"""
from . import cntk1
# if the parameter is initialized from a literal value
if (init == 'fromLiteral'):
"""
To be as generic as possible, we
- flatten the data
- initialize a ParameterTensor operator with it
- ensure that the graph does not backprob to it.
- Finally we to reshape it.
"""
if value is None:
if shape is None:
raise ValueError('you need to specify at least shape or value')
value = init_from_literal
if init_from_file_path:
return cntk1.ParameterTensor(shape, init='fromFile',
learningRateMultiplier=learning_rate_multiplier,
initFromFilePath=init_from_file_path, name=name)
else:
return cntk1.ParameterTensor(shape,
learningRateMultiplier=learning_rate_multiplier,
name=name)
"""
To be as generic as possible, we
- flatten the data
- initialize a ParameterTensor operator with it
- ensure that the graph does not backprob to it.
- Finally we to reshape it.
"""
from .. import utils
if not (np.isscalar(value) or utils.is_tensor(value)):
raise ValueError('value type is not supported: %s' % type(value))
from .. import utils
if not (np.isscalar(value) or utils.is_tensor(value)):
raise ValueError('value type is not supported: %s' % type(value))
if isinstance(value, list) or np.isscalar(value):
value = np.asarray(value)
if isinstance(value, list) or np.isscalar(value):
value = np.asarray(value)
import scipy.sparse
if scipy.sparse.issparse(value):
raise ValueError('only dense data is supported')
import scipy.sparse
if scipy.sparse.issparse(value):
raise ValueError('only dense data is supported')
param_shape = value.shape if value.shape else (1,)
literal_shape = (param_shape[0], np.multiply.reduce(param_shape[1:]))
param_shape = value.shape if value.shape else (1,)
literal_shape = (param_shape[0], np.multiply.reduce(param_shape[1:]))
literal_array = np.reshape(value, literal_shape)
literal_array = np.reshape(value, literal_shape)
from io import BytesIO
s = BytesIO()
np.savetxt(s, literal_array, '%.4f')
from io import BytesIO
s = BytesIO()
np.savetxt(s, literal_array, '%.4f')
return cntk1.ParameterTensor(
dims=param_shape,
learningRateMultiplier=learning_rate_multiplier,
init='fromLiteral',
initFromLiteral=s.getvalue().decode())
else:
return cntk1.ParameterTensor(shape, learning_rate_multiplier, init,
init_value_scale, value, init_from_file_path,
randomSeed=random_seed, name=name)
return cntk1.ParameterTensor(
dims=param_shape,
learningRateMultiplier=learning_rate_multiplier,
init='fromLiteral',
initFromLiteral=s.getvalue().decode())
def constant(value, name=None):
@ -742,8 +741,7 @@ def constant(value, name=None):
:class:`cntk.graph.ComputationNode`
"""
return parameter(name=name, init='fromLiteral', init_from_literal=value,
learning_rate_multiplier=0.0)
return parameter(value=value, learning_rate_multiplier=0.0, name=name)
def dynamic_axis(name=None):

Просмотреть файл

@ -1,5 +1,4 @@
# Copyright (c) Microsoft. All rights reserved.
#Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
@ -27,8 +26,7 @@ class SGDParams:
* **momentum_per_sample: momentum per sample. Useful when you want to keep the momentum per sample constant, i.e., automatically scales effective momentum for the minibatch when the minibatch size is changed. Can use syntax such as 0.9996*10**:0.998 which means using the per sample momentum 0.9996 for 10 epochs and then 0.998 for the rest. momentumPerSample may be missing, for example, when momentumPerMB is provided.
* **momentum_as_time_constant**: number of samples after which the contribution is decayed to e^-1
* **auto_adjust parameters**: they represent information related to the automatic learning rate control.
* **auto_adjust_lr**: the automatic learning rate adjustment algorithm to use. Valid values are None (default, don’t auto adjust learning rate), AdjustAfterEpoch (check the training criterion after each epoch using the development set of the training set and decide whether to adjust the learning rate), and SearchBeforeEpoch (search the learning rate based on a small portion of the training set before each epoch starts).
* **auto_adjust_lr**: the automatic learning rate adjustment algorithm to use. Valid values are None (default, don't auto adjust learning rate), AdjustAfterEpoch (check the training criterion after each epoch using the development set of the training set and decide whether to adjust the learning rate), and SearchBeforeEpoch (search the learning rate based on a small portion of the training set before each epoch starts).
* When used in the AdjustAfterEpoch mode
* **reduce_learn_rate_if_improve_less_than**: reduce the learning rate if the improvement is less than this value. Default is 0.
* **learn_rate_decrease_factor**: the learning rate decrease factor. Default value is 0.618.
@ -38,12 +36,12 @@ class SGDParams:
* **learn_rate_adjust_interval**: determine the frequency of applying the learning rate adjustment check. Default is 1 epoch. If this value is set to a value larger than 1 the learning rate adjustment will be based on the average criterion computed from the last learnRateAdjustInterval epochs.
* When used in the SearchBeforeEpoch mode.
* **numMiniBatch4LRSearch**: the number of minibatches used to search the learning rate. Default value is 500. It’s typically set to 10-20% of the total minibatches in an epoch.
* **numMiniBatch4LRSearch**: the number of minibatches used to search the learning rate. Default value is 500. It's typically set to 10-20% of the total minibatches in an epoch.
* **num_prev_learn_rate**: number of previous learning rates used as a hint to the search range. Default value is 5.
* **num_best_search_epoch**: number of epochs in which we use the best learning rate instead of the sufficient learning rate . Default value is 1.
* When used in the 'AdaptiveMinibatchSizing' mode
* **num_minibatch_for_lr_search**: the number of minibatches used to search the minibatch size when in adaptive minibatch size mode. Default value is 500. It’s typically set to 10-20% of the total minibatches in an epoch this is shared with the search for learning rate in SearchBeforeEpoch mode.
* **num_minibatch_for_lr_search**: the number of minibatches used to search the minibatch size when in adaptive minibatch size mode. Default value is 500. It's typically set to 10-20% of the total minibatches in an epoch this is shared with the search for learning rate in SearchBeforeEpoch mode.
* **auto_adjust_minibatch: enable or disable whether minibatch size is adaptively adjusted. Default value is false. Adapative minibatch sizing will begin on epochs starting after user minbatch sizes expcitily specified are complete. For example if the user specifed minibatchSize=256**:1024, then 256 and 1024 are used in the first 2 Epochs and adaptive minibatch sizing is used afterwards.
* **minibatch_size_tuning_frequency**: The number of epochs to skip, on a periodic basis, before dynamically adjusting the minibatch size. Default value is 1.
* **minibatch_size_tuning_max**: The maximum size allowed for an adaptively adjusted minibatch size. Default value is 1048576.
@ -85,9 +83,7 @@ class SGDParams:
* Gradient Check
* **gradient_check**: determines whether to use the gradient checker. The default value is false. When using the gradient checker you need to use a minibatch size that is larger than the sequence length for RNNs due to the truncated backpropagation through time (BPTT) algorithm used to train RNNs, and a smaller learning rate to prevent numerical issues caused by divergence. In addition, precision should be set to double.
"""
def __init__(self,
model_path=None,
train_criterion_node_name=None,

Просмотреть файл

@ -19,9 +19,10 @@ def eval(node):
# [array([[[-150., 200.], [5., 10.]]])]
Args:
node (cntk.graph.ComputationNode): the node to evaluate
node (:class:`cntk.graph.ComputationNode`): the node to evaluate
Returns:
numpy array containing the result
NumPy array containing the result
"""
from cntk.context import get_context
@ -31,15 +32,15 @@ def eval(node):
ctx = get_context()
first = True
# the params are passed as arryas e.g. plus([1,2], [3,4]), we need to
# wrap them with input and parameter nodes
# The params are passed as arryas, e.g. plus([1,2], [3,4]), and we need to
# wrap them with input and parameter nodes.
if node.params:
for p in node.params:
if p in node.inputs:
val = getattr(node, p)
# one param needs to be an Input() node. This is being fixed in
#CNTK we will remove this workaround onces we can evaluate a
#network with no inputs
# One param needs to be an Input() node. This will being fixed in
# CNTK soon, so that we can remove this workaround and evaluate a
# network with no inputs.
if first:
if not isinstance(val, list):
# inputs have the outmost dimension for sequences