Beautifying doc/code changes
This commit is contained in:
Родитель
3a5403d6fc
Коммит
b326a9af94
|
@ -119,7 +119,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
the next minibatch. Must be > 0.
|
||||
minibatch_size_in_sequences (`int`, defaults to `None`): number of
|
||||
samples to retrieve for the next minibatch. Must be > 0.
|
||||
input_map (`dict`): mapping of :class:`cntk.ops.variabls.Variable`
|
||||
input_map (`dict`): mapping of :class:`~cntk.ops.variabls.Variable`
|
||||
to :class:`StreamInformation` which will be used to convert the
|
||||
returned data.
|
||||
device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
|
||||
|
@ -127,7 +127,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
Returns:
|
||||
A mapping of :class:`StramInformation` to :class:`MinibatchData` if
|
||||
``input_map`` was not specified. Otherwise, the returned value will
|
||||
be a mapping of :class:`cntk.ops.variabls.Variable` to class:`MinibatchData`.
|
||||
be a mapping of :class:`~cntk.ops.variabls.Variable` to class:`MinibatchData`.
|
||||
'''
|
||||
if device is None:
|
||||
device = use_default_device()
|
||||
|
@ -161,7 +161,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
Gets the checkpoint state of the MinibatchSource.
|
||||
|
||||
Returns:
|
||||
:class:`cntk_py.Dictionary`
|
||||
:class:`~cntk_py.Dictionary`
|
||||
'''
|
||||
return super(MinibatchSource, self).get_checkpoint_state()
|
||||
|
||||
|
@ -170,7 +170,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
Restores the MinibatchSource state from the specified checkpoint.
|
||||
|
||||
Args:
|
||||
checkpoint (:class:`cntk_py.Dictionary`): checkpoint to restore from
|
||||
checkpoint (:class:`~cntk_py.Dictionary`): checkpoint to restore from
|
||||
'''
|
||||
super(MinibatchSource, self).restore_from_checkpoint(checkpoint)
|
||||
|
||||
|
@ -181,7 +181,7 @@ def _py_dict_to_cntk_dict(py_dict):
|
|||
Args:
|
||||
py_dict (`dict`): a dictionary to be converted.
|
||||
Returns:
|
||||
:class:`cntk_py.Dictionary`
|
||||
:class:`~cntk_py.Dictionary`
|
||||
'''
|
||||
res = cntk_py.Dictionary()
|
||||
for k, v in py_dict.items():
|
||||
|
@ -244,10 +244,10 @@ class ReaderConfig(dict):
|
|||
'''
|
||||
Creates an instance of :class:`MinibatchSource` from this
|
||||
instance, which can be used to feed data into the `eval()` methods of
|
||||
the graph nodes or the `train_minibatch()` of :class:`cntk.trainer.Trainer`.
|
||||
the graph nodes or the `train_minibatch()` of :class:`~cntk.trainer.Trainer`.
|
||||
|
||||
Args:
|
||||
distributed_communicator (:class:`cntk.distributed.communicator`): distributed communicator
|
||||
distributed_communicator (:class:`~cntk.distributed.communicator`): distributed communicator
|
||||
|
||||
Returns:
|
||||
instance of :class:`MinibatchSource`
|
||||
|
@ -481,7 +481,7 @@ def text_format_minibatch_source(path, stream_configs, epoch_size=INFINITELY_REP
|
|||
epoch_size (`int`, optional): size of an epoch. In case of 0 the size
|
||||
of the training set will be taken. Default is max of 64bit.
|
||||
randomize (`bool`, optional): whether to randomize the contents of data file.
|
||||
distributed_communicator (:class:`cntk.distributed.communicator`): optional distributed communicator
|
||||
distributed_communicator (:class:`~cntk.distributed.communicator`): optional distributed communicator
|
||||
|
||||
Returns:
|
||||
:class:`MinibatchSource`
|
||||
|
|
|
@ -63,7 +63,7 @@ class Learner(cntk_py.Learner):
|
|||
Update the parameters associated with this learner.
|
||||
|
||||
Args:
|
||||
gradient_values (`dict`): maps :class:`cntk.variables.Parameter` to
|
||||
gradient_values (`dict`): maps :class:`~cntk.variables.Parameter` to
|
||||
a NumPy array containing the first order gradient values for the
|
||||
Parameter w.r.t. the training objective.
|
||||
training_sample_count (`int`): training sample count
|
||||
|
@ -102,9 +102,9 @@ class Learner(cntk_py.Learner):
|
|||
The learning rate.
|
||||
|
||||
Args:
|
||||
minibatch_size (`int`): minibatch size to re-scaled
|
||||
minibatch_size (``int``): minibatch size to re-scaled
|
||||
the learning rate to the per-sample value (in case when the schedule
|
||||
was build with unit=UnitType.minibatch).
|
||||
was build with ``unit=UnitType.minibatch``).
|
||||
'''
|
||||
return super(Learner, self).learning_rate(minibatch_size)
|
||||
|
||||
|
@ -132,16 +132,16 @@ def training_parameter_schedule(schedule, epoch_size=1, unit=UnitType.sample):
|
|||
(0.1, 0.1, 0.01, 0.01, 0.001, 0.001)
|
||||
|
||||
Args:
|
||||
schedule (`float` or `list`): if `float`, is the parameter schedule to be used
|
||||
schedule (``float`` or ``list``): if ``float``, is the parameter schedule to be used
|
||||
for all samples. In case of list, the elements are used as the
|
||||
values for ``epoch_size`` samples. If list contains pair, the second element is
|
||||
used as a value for (``epoch_size`` x first element) samples
|
||||
epoch_size (`int`): number of samples as a scheduling unit. Parameters in
|
||||
the schedule change their values every 'epoch_size' samples.
|
||||
unit (:class:`cntk.ops.functions.UnitType`): one of two
|
||||
the schedule change their values every ``epoch_size`` samples.
|
||||
unit (:class:`UnitType`): one of two
|
||||
|
||||
* 'sample': the returned schedule contains per-sample values (default)
|
||||
* 'minibatch': the returned schedule contains per-minibatch values.
|
||||
* ``sample``: the returned schedule contains per-sample values (default)
|
||||
* ``minibatch``: the returned schedule contains per-minibatch values.
|
||||
|
||||
Returns:
|
||||
training parameter schedule
|
||||
|
@ -176,11 +176,11 @@ def learning_rate_schedule(lr, epoch_size=1, unit=UnitType.sample):
|
|||
:func:`training_parameter_schedule`).
|
||||
|
||||
Args:
|
||||
lr (`float` or `list`): see parameter ``schedule`` in
|
||||
lr (``float`` or ``list``): see parameter ``schedule`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
epoch_size (`int`): see parameter ``epoch_size`` in
|
||||
epoch_size (``int``): see parameter ``epoch_size`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
unit (:class:`cntk.ops.functions.UnitType`): see parameter
|
||||
unit (:class:`UnitType`): see parameter
|
||||
``unit`` in :func:`training_parameter_schedule`.
|
||||
|
||||
Returns:
|
||||
|
@ -195,11 +195,11 @@ def momentum_schedule(momentum, epoch_size=1, unit=UnitType.sample):
|
|||
:func:`training_parameter_schedule`).
|
||||
|
||||
Args:
|
||||
momentum (`float` or `list`): see parameter ``schedule`` in
|
||||
momentum (``float`` or ``list``): see parameter ``schedule`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
epoch_size (`int`): see parameter ``epoch_size`` in
|
||||
epoch_size (``int``): see parameter ``epoch_size`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
unit (:class:`cntk.ops.functions.UnitType`): see parameter
|
||||
unit (:class:`UnitType`): see parameter
|
||||
``unit`` in :func:`training_parameter_schedule`.
|
||||
|
||||
If you want to provide momentum values in a sample/minibatch
|
||||
|
@ -223,11 +223,11 @@ def momentum_schedule(momentum, epoch_size=1, unit=UnitType.sample):
|
|||
(0.99, 0.99, 0.88, 0.88, 0.77)
|
||||
|
||||
Args:
|
||||
momentum (`float` or `list`): see parameter ``schedule`` in
|
||||
momentum (``float`` or ``list``): see parameter ``schedule`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
epoch_size (`int`): see parameter ``epoch_size`` in
|
||||
epoch_size (``int``): see parameter ``epoch_size`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
unit (:class:`cntk.ops.functions.UnitType`): see parameter
|
||||
unit (:class:`UnitType`): see parameter
|
||||
``unit`` in :func:`training_parameter_schedule`.
|
||||
|
||||
Returns:
|
||||
|
@ -242,11 +242,11 @@ def momentum_as_time_constant_schedule(momentum, epoch_size=1):
|
|||
semantics as :func:`training_parameter_schedule`).
|
||||
|
||||
Args:
|
||||
momentum (`float` or `list`): see parameter ``schedule`` in
|
||||
momentum (``float`` or ``list``): see parameter ``schedule`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
epoch_size (`int`): see parameter ``epoch_size`` in
|
||||
epoch_size (``int``): see parameter ``epoch_size`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
unit (:class:`cntk.ops.functions.UnitType`): see parameter
|
||||
unit (:class:`UnitType`): see parameter
|
||||
``unit`` in :func:`training_parameter_schedule`.
|
||||
|
||||
CNTK specifies momentum in a minibatch-size agnostic way as the time
|
||||
|
@ -267,9 +267,9 @@ def momentum_as_time_constant_schedule(momentum, epoch_size=1):
|
|||
>>> m = momentum_as_time_constant_schedule([1100, 1500], 1000)
|
||||
|
||||
Args:
|
||||
momentum (`float` or `list`): see parameter ``schedule`` in
|
||||
momentum (``float`` or ``list``): see parameter ``schedule`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
epoch_size (`int`): see parameter ``epoch_size`` in
|
||||
epoch_size (``int``): see parameter ``epoch_size`` in
|
||||
:func:`training_parameter_schedule`.
|
||||
|
||||
Returns:
|
||||
|
@ -295,27 +295,33 @@ def sgd(parameters, lr,
|
|||
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
|
||||
gradient_clipping_with_truncation=True):
|
||||
'''
|
||||
Creates an SGD learner instance to learn the parameters.
|
||||
Creates an SGD learner instance to learn the parameters. See [1] for more
|
||||
information on how to set the parameters.
|
||||
|
||||
Args:
|
||||
parameters (`list` of parameters): list of network parameters to tune.
|
||||
These can be obtained by the '.parameters()' method of the root
|
||||
operator.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
lr (``float``, ``list`` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
|
||||
|
||||
See also:
|
||||
[1] L. Bottou. `Stochastic Gradient Descent Tricks
|
||||
<http://research.microsoft.com/pubs/192769/tricks-2012.pdf>`_. Neural
|
||||
Networks: Tricks of the Trade: Springer, 2012.
|
||||
'''
|
||||
lr = learning_rate_schedule(lr)
|
||||
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)
|
||||
|
@ -340,26 +346,25 @@ def momentum_sgd(parameters, lr, momentum,
|
|||
Args:
|
||||
parameters (list of parameters): list of network parameters to tune.
|
||||
These can be obtained by the root operator's ``parameters``.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
lr (``float``, `list````` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
momentum (`float`, `list` or output of `:func:momentum_schedule` or
|
||||
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
|
||||
invoking `:func:momentum_schedule`. Refer to
|
||||
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
momentum (``float``, ``list`` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
value is a ``float`` or a ``list``, momentum is converted to a per-sample schedule by
|
||||
invoking :func:`momentum_schedule`. Refer to the `wiki
|
||||
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
|
||||
'''
|
||||
lr = learning_rate_schedule(lr)
|
||||
momentum = momentum_schedule(momentum)
|
||||
|
@ -381,31 +386,42 @@ def nesterov(parameters, lr, momentum,
|
|||
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
|
||||
gradient_clipping_with_truncation=True):
|
||||
'''
|
||||
Creates a Nesterov SGD learner instance to learn the parameters.
|
||||
Creates a Nesterov SGD learner instance to learn the parameters. This was
|
||||
originally proposed by Nesterov [1] in 1983 and then proved to work well in
|
||||
a deep learning context by Sutskever, et al. [2].
|
||||
|
||||
Args:
|
||||
parameters (list of parameters): list of network parameters to tune.
|
||||
These can be obtained by the root operator's ``parameters``.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
momentum (`float`, `list` or output of `:func:momentum_schedule` or
|
||||
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
|
||||
invoking `:func:momentum_schedule`. Refer to
|
||||
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
lr (``float``, ``list`` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a ``float`` or a ``list``, lr is
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
momentum (``float``, ``list`` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
value is a ``float`` or a ``list``, momentum is converted to a per-sample schedule by
|
||||
invoking :func:`momentum_schedule`. Refer to the `wiki
|
||||
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default ``True``): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the
|
||||
:class:`~cntk.trainer.Trainer`.
|
||||
|
||||
See also:
|
||||
[1] Y. Nesterov. A Method of Solving a Convex Programming Problem with Convergence Rate O(1/ sqrt(k)). Soviet Mathematics Doklady, 1983.
|
||||
|
||||
[2] I. Sutskever, J. Martens, G. Dahl, and G. Hinton. `On the
|
||||
Importance of Initialization and Momentum in Deep Learning
|
||||
<http://www.cs.toronto.edu/~fritz/absps/momentum.pdf>`_. Proceedings
|
||||
of the 30th International Conference on Machine Learning, 2013.
|
||||
|
||||
'''
|
||||
lr = learning_rate_schedule(lr)
|
||||
momentum = momentum_schedule(momentum)
|
||||
|
@ -427,27 +443,34 @@ def adagrad(parameters, lr, need_ave_multiplier=True,
|
|||
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
|
||||
gradient_clipping_with_truncation=True):
|
||||
'''
|
||||
Creates an AdaGrad learner instance to learn the parameters.
|
||||
Creates an AdaGrad learner instance to learn the parameters. See [1] for
|
||||
more information.
|
||||
|
||||
Args:
|
||||
parameters (list of parameters): list of network parameters to tune.
|
||||
These can be obtained by the root operator's ``parameters``.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
need_ave_multiplier ('bool', default):
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
need_ave_multiplier (``bool``, default):
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
|
||||
|
||||
See also:
|
||||
[1] J. Duchi, E. Hazan, and Y. Singer. `Adaptive Subgradient Methods
|
||||
for Online Learning and Stochastic Optimization
|
||||
<http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_. The Journal of
|
||||
Machine Learning Research, 2011.
|
||||
'''
|
||||
lr = learning_rate_schedule(lr)
|
||||
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)
|
||||
|
@ -471,35 +494,39 @@ def adam_sgd(parameters, lr, momentum,
|
|||
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=1E10,
|
||||
gradient_clipping_with_truncation=True):
|
||||
'''
|
||||
Creates an Adam learner instance to learn the parameters.
|
||||
Creates an Adam learner instance to learn the parameters. See [1] for more
|
||||
information.
|
||||
|
||||
Args:
|
||||
parameters (list of parameters): list of network parameters to tune.
|
||||
These can be obtained by the root operator's ``parameters``.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
momentum (`float`, `list` or output of `:func:momentum_schedule` or
|
||||
`:func:momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
momentum (`float`, `list` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): momentum schedule. When the argument
|
||||
value is a `float` or a `list`, momentum is converted to a per-sample schedule by
|
||||
invoking `:func:momentum_schedule`. Refer to
|
||||
https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits
|
||||
variance_momentum (`float`, `list` or output of `:func:momentum_schedule` or
|
||||
`:func:momentum_as_time_constant_schedule`): variance momentum schedule. When the argument
|
||||
invoking :func:`momentum_schedule`. Refer to the `wiki
|
||||
<https://github.com/Microsoft/CNTK/wiki/SGD-block#converting-learning-rate-and-momentum-parameters-from-other-toolkits>`_.
|
||||
variance_momentum (`float`, `list` or output of :func:`momentum_schedule` or :func:`momentum_as_time_constant_schedule`): variance momentum schedule. When the argument
|
||||
value is a `float` or a `list`, variance momentum is converted to a per-sample schedule by
|
||||
invoking `:func:momentum_schedule`. Defaults to momentum_as_time_constant_schedule(720000).
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
invoking :func:`momentum_schedule`. Defaults to momentum_as_time_constant_schedule(720000).
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
|
||||
|
||||
See also:
|
||||
[1] D. Kingma, J. Ba. `Adam: A Method for Stochastic Optimization
|
||||
<http://arxiv.org/abs/1412.6980>`_. International Conference for
|
||||
Learning Representations, 2015.
|
||||
'''
|
||||
if not low_memory:
|
||||
raise NotImplementedError('adam: low_memory=True currently required')
|
||||
|
@ -532,27 +559,27 @@ def rmsprop(parameters, lr,
|
|||
Args:
|
||||
parameters (list of parameters): list of network parameters to tune.
|
||||
These can be obtained by the root operator's ``parameters``.
|
||||
lr ('float', `list` or output of `:func:learning_rate_schedule`): learning rate
|
||||
lr (``float``, `list` or output of :func:`learning_rate_schedule`): learning rate
|
||||
schedule. When the argument value is a `float` or a `list`, lr is
|
||||
converted to a per-sample schedule by invoking `:func:learning_rate_schedule`.
|
||||
gamma ('float'):
|
||||
inc ('float'):
|
||||
dec ('float'):
|
||||
max ('float'):
|
||||
min ('float'):
|
||||
need_ave_multiplier ('bool', default):
|
||||
l1_regularization_weight ('float', optional): the L1 regularization weight per sample,
|
||||
converted to a per-sample schedule by invoking :func:`learning_rate_schedule`.
|
||||
gamma (``float``):
|
||||
inc (``float``):
|
||||
dec (``float``):
|
||||
max (``float``):
|
||||
min (``float``):
|
||||
need_ave_multiplier (``bool``, default):
|
||||
l1_regularization_weight (``float``, optional): the L1 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
l2_regularization_weight ('float', optional): the L2 regularization weight per sample,
|
||||
l2_regularization_weight (``float``, optional): the L2 regularization weight per sample,
|
||||
defaults to 0.0
|
||||
gaussian_noise_injection_std_dev ('float', optional): the standard deviation
|
||||
gaussian_noise_injection_std_dev (``float``, optional): the standard deviation
|
||||
of the Gaussian noise added to parameters post update, defaults to 0.0
|
||||
gradient_clipping_threshold_per_sample ('float', optional): clipping threshold
|
||||
gradient_clipping_threshold_per_sample (``float``, optional): clipping threshold
|
||||
per sample, defaults to infinity
|
||||
gradient_clipping_with_truncation ('bool', default `True`): gradient clipping
|
||||
gradient_clipping_with_truncation (``bool``, default `True`): gradient clipping
|
||||
|
||||
Returns:
|
||||
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
|
||||
Instance of a :class:`~cntk.learner.Learner` that can be passed to the :class:`~cntk.trainer.Trainer`
|
||||
'''
|
||||
lr = learning_rate_schedule(lr)
|
||||
gaussian_noise_injection_std_dev = training_parameter_schedule(gaussian_noise_injection_std_dev)
|
||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum, unique
|
|||
@unique
|
||||
class CloneMethod(Enum):
|
||||
'''
|
||||
Describes different ways how :class:`cntk.ops.functions.Function.forward`
|
||||
Describes different ways how :class:`~cntk.ops.functions.Function.forward`
|
||||
works.
|
||||
'''
|
||||
|
||||
|
@ -123,7 +123,7 @@ class Function(cntk_py.Function):
|
|||
substitutions requested are applied in the cloned Function instance.
|
||||
|
||||
Args:
|
||||
method (:class:`cntk.ops.functions.CloneMethod`): one of
|
||||
method (:class:`CloneMethod`): one of
|
||||
|
||||
* 'clone': the returned function gets its own copy of parameters (default)
|
||||
* 'share': the returned function shares its parameters with this function
|
||||
|
@ -133,7 +133,7 @@ class Function(cntk_py.Function):
|
|||
function to variables in the cloned function
|
||||
|
||||
Returns:
|
||||
:class:`Function`: the cloned Function
|
||||
:class:`~cntk.ops.functions.Function`: the cloned Function
|
||||
'''
|
||||
if not isinstance(method, CloneMethod):
|
||||
raise ValueError('clone method "%s" is not supported' %
|
||||
|
@ -149,7 +149,7 @@ class Function(cntk_py.Function):
|
|||
@typemap
|
||||
def constants(self):
|
||||
'''
|
||||
List of all `Constant` variables of this :class:`Function`
|
||||
List of all `Constant` variables of this :class:`~cntk.ops.functions.Function`
|
||||
'''
|
||||
return super(Function, self).constants()
|
||||
|
||||
|
@ -171,8 +171,8 @@ class Function(cntk_py.Function):
|
|||
be used as a list of bools, denoting whether a sequence is a new
|
||||
one (`True`) or a continuation of the previous one (`False`).
|
||||
Data should be either NumPy arrays or a
|
||||
:class:`cntk.io.MinibatchData` instance.
|
||||
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
:class:`~cntk.io.MinibatchData` instance.
|
||||
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
contains the type and id of the device on which the computation is
|
||||
to be performed.
|
||||
|
||||
|
@ -222,14 +222,14 @@ class Function(cntk_py.Function):
|
|||
be used as a list of bools, denoting whether a sequence is a new
|
||||
one (`True`) or a continuation of the previous one (`False`).
|
||||
Data should be either NumPy arrays or a
|
||||
:class:`cntk.io.MinibatchData` instance.
|
||||
:class:`~cntk.io.MinibatchData` instance.
|
||||
outputs (iterable): outputs to fetch values for.
|
||||
keep_for_backward (`set`, default `None`): the subset of the
|
||||
Function's output variables for which gradients shall be calculated
|
||||
in a subsequent backward call. If `None`, the returned state will
|
||||
be `None` and a subsequent call to :func:`backward` will not be
|
||||
possible.
|
||||
device (:class:`cntk.device.DeviceDescriptor`, default `None`): the device
|
||||
device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device
|
||||
descriptor that contains the type and id of the device on which the
|
||||
computation is. If `None`, the default device is used.
|
||||
|
||||
|
@ -371,7 +371,7 @@ class Function(cntk_py.Function):
|
|||
specified substitution.
|
||||
|
||||
Args:
|
||||
substitution (:class:`cntk.ops.variables.Variable`): the variable
|
||||
substitution (:class:`~cntk.ops.variables.Variable`): the variable
|
||||
that will replace the placeholder
|
||||
|
||||
Returns:
|
||||
|
|
|
@ -23,11 +23,11 @@ class Trainer(cntk_py.Trainer):
|
|||
using computed gradients.
|
||||
|
||||
Args:
|
||||
model (:class:`cntk.ops.functions.Function`): root node of the function to train
|
||||
loss_function (:class:`cntk.ops.functions.Function`): loss function
|
||||
eval_function (:class:`cntk.ops.functions.Function`): evaluation function
|
||||
model (:class:`~cntk.ops.functions.Function`): root node of the function to train
|
||||
loss_function (:class:`~cntk.ops.functions.Function`): loss function
|
||||
eval_function (:class:`~cntk.ops.functions.Function`): evaluation function
|
||||
parameter_learners (`list`): list of learners from :mod:`cntk.learner`
|
||||
distributed_trainer (:class:`cntk.distributed.distributed_trainer`): distributed trainer
|
||||
distributed_trainer (:class:`~cntk.distributed.distributed_trainer`): distributed trainer
|
||||
'''
|
||||
def __init__(self, model, loss_function, eval_function, parameter_learners, distributed_trainer=None):
|
||||
# TODO sanitizing should be removed once Swig's typemaps are in place
|
||||
|
@ -62,9 +62,9 @@ class Trainer(cntk_py.Trainer):
|
|||
be used as a list of bools, denoting whether a sequence is a new
|
||||
one (`True`) or a continuation of the previous one (`False`).
|
||||
Data should be either NumPy arrays or a
|
||||
:class:`cntk.io.MinibatchData` instance.
|
||||
:class:`~cntk.io.MinibatchData` instance.
|
||||
outputs (iterable): outputs to fetch values for.
|
||||
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
contains the type and id of the device on which the computation is
|
||||
to be performed.
|
||||
|
||||
|
@ -113,8 +113,8 @@ class Trainer(cntk_py.Trainer):
|
|||
be used as a list of bools, denoting whether a sequence is a new
|
||||
one (`True`) or a continuation of the previous one (`False`).
|
||||
Data should be either NumPy arrays or a
|
||||
:class:`cntk.io.MinibatchData` instance.
|
||||
device (:class:`cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
:class:`~cntk.io.MinibatchData` instance.
|
||||
device (:class:`~cntk.device.DeviceDescriptor`): the device descriptor that
|
||||
contains the type and id of the device on which the computation is
|
||||
to be performed.
|
||||
Returns:
|
||||
|
|
|
@ -45,11 +45,11 @@ def simple_mnist(debug_output=False):
|
|||
|
||||
# Instantiate the feedforward classification model
|
||||
scaled_input = element_times(constant(0.00390625), input)
|
||||
netout = fully_connected_classifier_net(
|
||||
z = fully_connected_classifier_net(
|
||||
scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu)
|
||||
|
||||
ce = cross_entropy_with_softmax(netout, label)
|
||||
pe = classification_error(netout, label)
|
||||
ce = cross_entropy_with_softmax(z, label)
|
||||
pe = classification_error(z, label)
|
||||
|
||||
try:
|
||||
rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
|
||||
|
@ -67,7 +67,7 @@ def simple_mnist(debug_output=False):
|
|||
}
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125))
|
||||
trainer = Trainer(z, ce, pe, sgd(z.parameters, lr=1./320))
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
minibatch_size = 64
|
||||
|
|
Загрузка…
Ссылка в новой задаче