This commit is contained in:
Willi Richert 2016-11-10 17:01:06 +01:00
Родитель 3a5403d6fc
Коммит b326a9af94
5 изменённых файлов: 155 добавлений и 128 удалений

Просмотреть файл

@ -119,7 +119,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
the next minibatch. Must be > 0.
minibatch_size_in_sequences (`int`, defaults to `None`): number of
samples to retrieve for the next minibatch. Must be > 0.
input_map (`dict`): mapping of :class:`cntk.ops.variabls.Variable`
input_map (`dict`): mapping of :class:`~cntk.ops.variabls.Variable`
to :class:`StreamInformation` which will be used to convert the
returned data.
device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
@ -127,7 +127,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
Returns:
A mapping of :class:`StramInformation` to :class:`MinibatchData` if
``input_map`` was not specified. Otherwise, the returned value will
be a mapping of :class:`cntk.ops.variabls.Variable` to class:`MinibatchData`.
be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`.
'''
if device is None:
device = use_default_device()
@ -161,7 +161,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
Gets the checkpoint state of the MinibatchSource.
Returns:
:class:`cntk_py.Dictionary`
:class:`~cntk_py.Dictionary`
'''
return super(MinibatchSource, self).get_checkpoint_state()
@ -170,7 +170,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
Restores the MinibatchSource state from the specified checkpoint.
Args:
checkpoint (:class:`cntk_py.Dictionary`): checkpoint to restore from
checkpoint (:class:`~cntk_py.Dictionary`): checkpoint to restore from
'''
super(MinibatchSource, self).restore_from_checkpoint(checkpoint)
@ -181,7 +181,7 @@ def _py_dict_to_cntk_dict(py_dict):
Args:
py_dict (`dict`): a dictionary to be converted.
Returns:
:class:`cntk_py.Dictionary`
:class:`~cntk_py.Dictionary`
'''
res = cntk_py.Dictionary()
for k, v in py_dict.items():
@ -244,10 +244,10 @@ class ReaderConfig(dict):
'''
Creates an instance of :class:`MinibatchSource` from this
instance, which can be used to feed data into the `eval()` methods of
the graph nodes or the `train_minibatch()` of :class:`cntk.trainer.Trainer`.
the graph nodes or the `train_minibatch()` of :class:`~cntk.trainer.Trainer`.
Args:
distributed_communicator (:class:`cntk.distributed.communicator`): distributed communicator
distributed_communicator (:class:`~cntk.distributed.communicator`): distributed communicator
Returns:
instance of :class:`MinibatchSource`
@ -481,7 +481,7 @@ def text_format_minibatch_source(path, stream_configs, epoch_size=INFINITELY_REP
epoch_size (`int`, optional): size of an epoch. In case of 0 the size
of the training set will be taken. Default is max of 64bit.
randomize (`bool`, optional): whether to randomize the contents of data file.
distributed_communicator (:class:`cntk.distributed.communicator`): optional distributed communicator
distributed_communicator (:class:`~cntk.distributed.communicator`): optional distributed communicator
Returns:
:class:`MinibatchSource`

Просмотреть файл

@ -63,7 +63,7 @@ class Learner(cntk_py.Learner):
Update the parameters associated with this learner.
Args:
gradient_values (`dict`): maps :class:`cntk.variables.Parameter` to
gradient_values (`dict`): maps :class:`~cntk.variables.Parameter` to
a NumPy array containing the first order gradient values for the
Parameter w.r.t. the training objective.
training_sample_count (`int`): training sample count
@ -102,9 +102,9 @@ class Learner(cntk_py.Learner):
The learning rate.
Args:
minibatch_size (`int`): minibatch size to re-scaled
minibatch_size (``int``): minibatch size to re-scaled
the learning rate to the per-sample value (in case when the schedule
was build with unit=UnitType.minibatch).
was build with ``unit=UnitType.minibatch``).
'''
return super(Learner, self).learning_rate(minibatch_size)
@ -132,16 +132,16 @@ def training_parameter_schedule(schedule, epoch_size=1, unit=UnitType.sample):
(0.1, 0.1, 0.01, 0.01, 0.001, 0.001)
Args:
schedule (`float` or `list`): if `float`, is the parameter schedule to be used
schedule (``float`` or ``list``): if ``float``, is the parameter schedule to be used
for all samples. In case of list, the elements are used as the
values for ``epoch_size`` samples. If list contains pair, the second element is
used as a value for (``epoch_size`` x first element) samples
epoch_size (`int`): number of samples as a scheduling unit. Parameters in
the schedule change their values every 'epoch_size' samples.
unit (:class:`cntk.ops.functions.UnitType`): one of two
the schedule change their values every ``epoch_size`` samples.
unit (:class:`UnitType`): one of two
* 'sample': the returned schedule contains per-sample values (default)
* 'minibatch': the returned schedule contains per-minibatch values.
* ``sample``: the returned schedule contains per-sample values (default)
* ``minibatch``: the returned schedule contains per-minibatch values.
Returns:
training parameter schedule
@ -176,11 +176,11 @@ def learning_rate_schedule(lr, epoch_size=1, unit=UnitType.sample):
:func:`training_parameter_schedule`).
Args:
lr (`float` or `list`): see parameter ``schedule`` in
lr (``float`` or ``list``): see parameter ``schedule`` in
:func:`training_parameter_schedule`.
epoch_size (`int`): see parameter ``epoch_size`` in
epoch_size (``int``): see parameter ``epoch_size`` in
:func:`training_parameter_schedule`.
unit (:class:`cntk.ops.functions.UnitType`): see parameter
unit (:class:`UnitType`): see parameter
``unit`` in :func:`training_parameter_schedule`.
Returns:
@ -195,11 +195,11 @@ def momentum_schedule(momentum, epoch_size=1, unit=UnitType.sample):
:func:`training_parameter_schedule`).
Args:
momentum (`float` or `list`): see parameter ``schedule`` in
momentum (``float`` or ``list``): see parameter ``schedule`` in
:func:`training_parameter_schedule`.
epoch_size (`int`): see parameter ``epoch_size`` in
epoch_size (``int``): see parameter ``epoch_size`` in
:func:`training_parameter_schedule`.
unit (:class:`cntk.ops.functions.UnitType`): see parameter
unit (:class:`UnitType`): see parameter
``unit`` in :func:`training_parameter_schedule`.
If you want to provide momentum values in a sample/minibatch
@ -223,11 +223,11 @@ def momentum_schedule(momentum, epoch_size=1, unit=UnitType.sample):
(0.99, 0.99, 0.88, 0.88, 0.77)
Args:
momentum (`float` or `list`): see parameter ``schedule`` in
momentum (``float`` or ``list``): see parameter ``schedule`` in
:func:`training_parameter_schedule`.
epoch_size (`int`): see parameter ``epoch_size`` in
epoch_size (``int``): see parameter ``epoch_size`` in
:func:`training_parameter_schedule`.
unit (:class:`cntk.ops.functions.UnitType`): see parameter
unit (:class:`UnitType`): see parameter
``unit`` in :func:`training_parameter_schedule`.
Returns:
@ -242,11 +242,11 @@ def momentum_as_time_constant_schedule(momentum, epoch_size=1):
semantics as :func:`training_parameter_schedule`).
Args:
momentum (`float` or `list`): see parameter ``schedule`` in
momentum (``float`` or ``list``): see parameter ``schedule`` in
:func:`training_parameter_schedule`.
epoch_size (`int`): see parameter ``epoch_size`` in
epoch_size (``int``): see parameter ``epoch_size`` in
:func:`training_parameter_schedule`.
unit (:class:`cntk.ops.functions.UnitType`): see parameter
unit (:class:`UnitType`): see parameter
``unit`` in :func:`training_parameter_schedule`.
CNTK specifies momentum in a minibatch-size agnostic way as the time
@ -267,9 +267,9 @@ def momentum_as_time_constant_schedule(momentum, epoch_size=1):
>>> m = momentum_as_time_constant_schedule([1100, 1500], 1000)
Args:
momentum (`float` or `list`): see parameter ``schedule`` in
momentum (``float`` or ``list``): see parameter ``schedule`` in
:func:`training_parameter_schedule`.
epoch_size (`int`): see parameter ``epoch_size`` in
epoch_size (``int``): see parameter ``epoch_size`` in
:func:`training_parameter_schedule`.
Returns:
@ -295,27 +295,33 @@ def sgd(parameters, lr,
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
gradient_clipping_with_truncation=True):
'''
Creates an SGD learner instance to learn the parameters.
Creates an SGD learner instance to learn the parameters. See [1] for more
information on how to set the parameters.
Args:
parameters (`list` of parameters): list of network parameters to tune.
These can be obtained by the '.parameters()' method of the root
operator.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
lr (``float``, ``list`` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
See also:
[1] L. Bottou. `Stochastic Gradient Descent Tricks
<http://research.microsoft.com/pubs/192769/tricks-2012.pdf>`_. Neural
Networks: Tricks of the Trade: Springer, 2012.
'''
lr = learning_rate_schedule(lr)
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)
@ -340,26 +346,25 @@ def momentum_sgd(parameters, lr, momentum,
Args:
parameters (list of parameters): list of network parameters to tune.
These can be obtained by the root operator's ``parameters``.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
lr (``float``, `list````` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
momentum (`float`, `list` or output of `:func:momentum_schedule` or
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
invoking `:func:momentum_schedule`. Refer to
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
momentum (``float``, ``list`` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
value is a ``float`` or a ``list``, momentum is converted to a per-sample schedule by
invoking :func:`momentum_schedule`. Refer to the `wiki
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
'''
lr = learning_rate_schedule(lr)
momentum = momentum_schedule(momentum)
@ -381,31 +386,42 @@ def nesterov(parameters, lr, momentum,
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
gradient_clipping_with_truncation=True):
'''
Creates a Nesterov SGD learner instance to learn the parameters.
Creates a Nesterov SGD learner instance to learn the parameters. This was
originally proposed by Nesterov [1] in 1983 and then proved to work well in
a deep learning context by Sutskever, et al. [2].
Args:
parameters (list of parameters): list of network parameters to tune.
These can be obtained by the root operator's ``parameters``.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
momentum (`float`, `list` or output of `:func:momentum_schedule` or
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
invoking `:func:momentum_schedule`. Refer to
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
lr (``float``, ``list`` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a ``float`` or a ``list``, lr is
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
momentum (``float``, ``list`` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
value is a ``float`` or a ``list``, momentum is converted to a per-sample schedule by
invoking :func:`momentum_schedule`. Refer to the `wiki
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the
:class:`~cntk.trainer.Trainer`.
See also:
[1] Y. Nesterov. A Method of Solving a Convex Programming Problem with Convergence Rate O(1/ sqrt(k)). Soviet Mathematics Doklady, 1983.
[2] I. Sutskever, J. Martens, G. Dahl, and G. Hinton. `On the
Importance of Initialization and Momentum in Deep Learning
<http://www.cs.toronto.edu/~fritz/absps/momentum.pdf>`_. Proceedings
of the 30th International Conference on Machine Learning, 2013.
'''
lr = learning_rate_schedule(lr)
momentum = momentum_schedule(momentum)
@ -427,27 +443,34 @@ def adagrad(parameters, lr, need_ave_multiplier=True,
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
gradient_clipping_with_truncation=True):
'''
Creates an AdaGrad learner instance to learn the parameters.
Creates an AdaGrad learner instance to learn the parameters. See [1] for
more information.
Args:
parameters (list of parameters): list of network parameters to tune.
These can be obtained by the root operator's ``parameters``.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
need_ave_multiplier ('bool', default):
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
need_ave_multiplier (``bool``, default):
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
See also:
[1] J. Duchi, E. Hazan, and Y. Singer. `Adaptive Subgradient Methods
for Online Learning and Stochastic Optimization
<http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_. The Journal of
Machine Learning Research, 2011.
'''
lr = learning_rate_schedule(lr)
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)
@ -471,35 +494,39 @@ def adam_sgd(parameters, lr, momentum,
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
gradient_clipping_with_truncation=True):
'''
Creates an Adam learner instance to learn the parameters.
Creates an Adam learner instance to learn the parameters. See [1] for more
information.
Args:
parameters (list of parameters): list of network parameters to tune.
These can be obtained by the root operator's ``parameters``.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
momentum (`float`, `list` or output of `:func:momentum_schedule` or
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
momentum (`float`, `list` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
invoking `:func:momentum_schedule`. Refer to
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
variance_momentum (`float`, `list` or output of `:func:momentum_schedule` or
`:func:momentum_as_time_constant_schedule`): variance momentum schedule. When the argument
invoking :func:`momentum_schedule`. Refer to the `wiki
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
variance_momentum (`float`, `list` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): variance momentum schedule. When the argument
value is a `float` or a `list`, variance momentum is converted to a per-sample schedule by
invoking `:func:momentum_schedule`. Defaults to momentum_as_time_constant_schedule(720000).
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
invoking :func:`momentum_schedule`. Defaults to momentum_as_time_constant_schedule(720000).
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
See also:
[1] D. Kingma, J. Ba. `Adam: A Method for Stochastic Optimization
<http://arxiv.org/abs/1412.6980>`_. International Conference for
Learning Representations, 2015.
'''
if not low_memory:
raise NotImplementedError('adam: low_memory=True currently required')
@ -532,27 +559,27 @@ def rmsprop(parameters, lr,
Args:
parameters (list of parameters): list of network parameters to tune.
These can be obtained by the root operator's ``parameters``.
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
schedule. When the argument value is a `float` or a `list`, lr is
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
gamma ('float'):
inc ('float'):
dec ('float'):
max ('float'):
min ('float'):
need_ave_multiplier ('bool', default):
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
gamma (``float``):
inc (``float``):
dec (``float``):
max (``float``):
min (``float``):
need_ave_multiplier (``bool``, default):
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
defaults to 0.0
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
defaults to 0.0
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
of the Gaussian noise added to parameters post update, defaults to 0.0
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
per sample, defaults to infinity
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
Returns:
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
'''
lr = learning_rate_schedule(lr)
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)

Просмотреть файл

@ -5,7 +5,7 @@ from enum import Enum, unique
@unique
class CloneMethod(Enum):
'''
Describes different ways how :class:`cntk.ops.functions.Function.forward`
Describes different ways how :class:`~cntk.ops.functions.Function.forward`
works.
'''
@ -123,7 +123,7 @@ class Function(cntk_py.Function):
substitutions requested are applied in the cloned Function instance.
Args:
method (:class:`cntk.ops.functions.CloneMethod`): one of
method (:class:`CloneMethod`): one of
* 'clone': the returned function gets its own copy of parameters (default)
* 'share': the returned function shares its parameters with this function
@ -133,7 +133,7 @@ class Function(cntk_py.Function):
function to variables in the cloned function
Returns:
:class:`Function`: the cloned Function
:class:`~cntk.ops.functions.Function`: the cloned Function
'''
if not isinstance(method, CloneMethod):
raise ValueError('clone method "%s" is not supported' %
@ -149,7 +149,7 @@ class Function(cntk_py.Function):
@typemap
def constants(self):
'''
List of all `Constant` variables of this :class:`Function`
List of all `Constant` variables of this :class:`~cntk.ops.functions.Function`
'''
return super(Function, self).constants()
@ -171,8 +171,8 @@ class Function(cntk_py.Function):
be used as a list of bools, denoting whether a sequence is a new
one (`True`) or a continuation of the previous one (`False`).
Data should be either NumPy arrays or a
:class:`cntk.io.MinibatchData` instance.
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
:class:`~cntk.io.MinibatchData` instance.
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
contains the type and id of the device on which the computation is
to be performed.
@ -222,14 +222,14 @@ class Function(cntk_py.Function):
be used as a list of bools, denoting whether a sequence is a new
one (`True`) or a continuation of the previous one (`False`).
Data should be either NumPy arrays or a
:class:`cntk.io.MinibatchData` instance.
:class:`~cntk.io.MinibatchData` instance.
outputs (iterable): outputs to fetch values for.
keep_for_backward (`set`, default `None`): the subset of the
Function's output variables for which gradients shall be calculated
in a subsequent backward call. If `None`, the returned state will
be `None` and a subsequent call to :func:`backward` will not be
possible.
device (:class:`cntk.device.DeviceDescriptor`, default `None`): the device
device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device
descriptor that contains the type and id of the device on which the
computation is. If `None`, the default device is used.
@ -371,7 +371,7 @@ class Function(cntk_py.Function):
specified substitution.
Args:
substitution (:class:`cntk.ops.variables.Variable`): the variable
substitution (:class:`~cntk.ops.variables.Variable`): the variable
that will replace the placeholder
Returns:

Просмотреть файл

@ -23,11 +23,11 @@ class Trainer(cntk_py.Trainer):
using computed gradients.
Args:
model (:class:`cntk.ops.functions.Function`): root node of the function to train
loss_function (:class:`cntk.ops.functions.Function`): loss function
eval_function (:class:`cntk.ops.functions.Function`): evaluation function
model (:class:`~cntk.ops.functions.Function`): root node of the function to train
loss_function (:class:`~cntk.ops.functions.Function`): loss function
eval_function (:class:`~cntk.ops.functions.Function`): evaluation function
parameter_learners (`list`): list of learners from :mod:`cntk.learner`
distributed_trainer (:class:`cntk.distributed.distributed_trainer`): distributed trainer
distributed_trainer (:class:`~cntk.distributed.distributed_trainer`): distributed trainer
'''
def __init__(self, model, loss_function, eval_function, parameter_learners, distributed_trainer=None):
# TODO sanitizing should be removed once Swig's typemaps are in place
@ -62,9 +62,9 @@ class Trainer(cntk_py.Trainer):
be used as a list of bools, denoting whether a sequence is a new
one (`True`) or a continuation of the previous one (`False`).
Data should be either NumPy arrays or a
:class:`cntk.io.MinibatchData` instance.
:class:`~cntk.io.MinibatchData` instance.
outputs (iterable): outputs to fetch values for.
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
contains the type and id of the device on which the computation is
to be performed.
@ -113,8 +113,8 @@ class Trainer(cntk_py.Trainer):
be used as a list of bools, denoting whether a sequence is a new
one (`True`) or a continuation of the previous one (`False`).
Data should be either NumPy arrays or a
:class:`cntk.io.MinibatchData` instance.
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
:class:`~cntk.io.MinibatchData` instance.
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
contains the type and id of the device on which the computation is
to be performed.
Returns:

Просмотреть файл

@ -45,11 +45,11 @@ def simple_mnist(debug_output=False):
# Instantiate the feedforward classification model
scaled_input = element_times(constant(0.00390625), input)
netout = fully_connected_classifier_net(
z = fully_connected_classifier_net(
scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu)
ce = cross_entropy_with_softmax(netout, label)
pe = classification_error(netout, label)
ce = cross_entropy_with_softmax(z, label)
pe = classification_error(z, label)
try:
rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
@ -67,7 +67,7 @@ def simple_mnist(debug_output=False):
}
# Instantiate the trainer object to drive the model training
trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125))
trainer = Trainer(z, ce, pe, sgd(z.parameters, lr=1./320))
# Get minibatches of images to train with and perform model training
minibatch_size = 64