Integrate yuqtang/StagingFixNewLearner2 into master
This commit is contained in:
Коммит
0f3033dbf3
|
@ -4460,13 +4460,13 @@ namespace CNTK
|
|||
///
|
||||
/// A special value that can be used for the minibatchSize to indicate that the reference minibatch size is not specified.
|
||||
///
|
||||
static const size_t UnspecifiedMinibatchSize = 0;
|
||||
static const size_t IgnoredMinibatchSize = 0;
|
||||
///
|
||||
/// Create a schedule with a constant parameter value.
|
||||
/// @param value a single value to populate the schedule
|
||||
/// @param minibatchSize a minibatch size that the @e value specifies for.
|
||||
///
|
||||
CNTK_API TrainingParameterSchedule(T value, size_t minibatchSize = UnspecifiedMinibatchSize);
|
||||
CNTK_API TrainingParameterSchedule(T value, size_t minibatchSize = IgnoredMinibatchSize);
|
||||
|
||||
#ifndef SWIG
|
||||
///
|
||||
|
@ -4475,7 +4475,7 @@ namespace CNTK
|
|||
/// and so on. The last value is then used repeatedly until the end of training.
|
||||
/// @e minibatchSize is the a minibatch size that each schedule[i] specifies for.
|
||||
///
|
||||
CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, size_t epochSize = FullDataSweep, size_t minibatchSize = UnspecifiedMinibatchSize);
|
||||
CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, size_t epochSize = FullDataSweep, size_t minibatchSize = IgnoredMinibatchSize);
|
||||
#endif
|
||||
|
||||
///
|
||||
|
@ -4488,7 +4488,7 @@ namespace CNTK
|
|||
/// after which the values is switched to '0.005'.
|
||||
/// @e minibatchSize is the a minibatch size that each schedule[i] specifies for.
|
||||
///
|
||||
CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, size_t epochSize = FullDataSweep, size_t minibatchSize = UnspecifiedMinibatchSize);
|
||||
CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, size_t epochSize = FullDataSweep, size_t minibatchSize = IgnoredMinibatchSize);
|
||||
|
||||
|
||||
///
|
||||
|
@ -4635,7 +4635,7 @@ namespace CNTK
|
|||
///
|
||||
/// A special value that can be used for the minibatchSize to indicate that the reference minibatch size is not specified.
|
||||
///
|
||||
CNTK_API static const size_t UnspecifiedMinibatchSize;
|
||||
CNTK_API static const size_t IgnoredMinibatchSize;
|
||||
|
||||
public:
|
||||
//
|
||||
|
@ -4717,7 +4717,7 @@ namespace CNTK
|
|||
///setting and be specialized to its own reference minibatch size. However, this is only suggested for advanced
|
||||
///users.
|
||||
CNTK_API void SetMinibatchSize(std::size_t minibatchSize) { GetOptions().Add(MinibatchSizeKey, minibatchSize); }
|
||||
CNTK_API std::size_t GetMinibatchSize() const { return GetOptions().GetOrElse(MinibatchSizeKey, UnspecifiedMinibatchSize); }
|
||||
CNTK_API std::size_t GetMinibatchSize() const { return GetOptions().GetOrElse(MinibatchSizeKey, IgnoredMinibatchSize); }
|
||||
|
||||
CNTK_API void SetLearningRateSchedule(const LearningRateSchedule& learningRateSchedule) { m_learningRateSchedule = learningRateSchedule; }
|
||||
CNTK_API const LearningRateSchedule& GetLearningRateSchedule() const { return m_learningRateSchedule; }
|
||||
|
@ -4726,7 +4726,7 @@ namespace CNTK
|
|||
template<typename T>
|
||||
static bool IsCompatibleMode(const TrainingParameterSchedule<T>& schedule)
|
||||
{
|
||||
return schedule.GetMinibatchSize() == UnspecifiedMinibatchSize;
|
||||
return schedule.GetMinibatchSize() == IgnoredMinibatchSize;
|
||||
}
|
||||
|
||||
///
|
||||
|
@ -4737,7 +4737,7 @@ namespace CNTK
|
|||
{
|
||||
if (GetOptions().Contains(MinibatchSizeKey))
|
||||
{
|
||||
return GetMinibatchSize() == UnspecifiedMinibatchSize;
|
||||
return GetMinibatchSize() == IgnoredMinibatchSize;
|
||||
}
|
||||
else
|
||||
//if the learner minbiatch size is not set, by default it is not in compatible mode.
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace CNTK
|
|||
///
|
||||
/// A special value that can be used for the minibatchSize to indicate that the reference minibatch size is not specified.
|
||||
///
|
||||
CNTK_API const size_t Learner::UnspecifiedMinibatchSize = TrainingParameterSchedule<double>::UnspecifiedMinibatchSize;
|
||||
CNTK_API const size_t Learner::IgnoredMinibatchSize = TrainingParameterSchedule<double>::IgnoredMinibatchSize;
|
||||
|
||||
|
||||
// This method completely replaces the current schedule with the new schedule. However, since
|
||||
|
|
|
@ -162,13 +162,13 @@ void TestTrainingParametersSchedule()
|
|||
assert(schedule3[100] == 0.3);
|
||||
|
||||
LearningRateSchedule schedule4(vector<double>{ 0.5 }, 10 ); // without vector<> gcc complains that conversion here is ambiguousS
|
||||
assert(schedule4.GetMinibatchSize() == LearningRateSchedule::UnspecifiedMinibatchSize);
|
||||
assert(schedule4.GetMinibatchSize() == LearningRateSchedule::IgnoredMinibatchSize);
|
||||
assert(schedule4[0] == 0.5);
|
||||
assert(schedule4[10] == 0.5);
|
||||
assert(schedule4[100] == 0.5);
|
||||
|
||||
LearningRateSchedule schedule5{ std::vector<double>{ 0.5, 0.3, 0.2 }, 10 };
|
||||
assert(schedule5.GetMinibatchSize() == LearningRateSchedule::UnspecifiedMinibatchSize); //unspecified reference minibatch size is 0
|
||||
assert(schedule5.GetMinibatchSize() == LearningRateSchedule::IgnoredMinibatchSize); //unspecified reference minibatch size is 0
|
||||
assert(schedule5[0] == 0.5);
|
||||
assert(schedule5[9] == 0.5);
|
||||
assert(schedule5[10] == 0.3);
|
||||
|
@ -177,20 +177,20 @@ void TestTrainingParametersSchedule()
|
|||
assert(schedule5[100] == 0.2);
|
||||
|
||||
MomentumSchedule schedule6{ { make_pair(1, 0.5) } }; // without make_pair this is interpreted as a vector of doubles
|
||||
assert(schedule6.GetMinibatchSize() == MomentumSchedule::UnspecifiedMinibatchSize);
|
||||
assert(schedule6.GetMinibatchSize() == MomentumSchedule::IgnoredMinibatchSize);
|
||||
assert(schedule6[0] == 0.5);
|
||||
assert(schedule6[10] == 0.5);
|
||||
assert(schedule6[100] == 0.5);
|
||||
|
||||
LearningRateSchedule schedule7{ std::vector<std::pair<size_t, double>>{ { 1, 0.5 }, { 1, 0.3 }, { 1, 0.2 } } };
|
||||
assert(schedule7.GetMinibatchSize() == LearningRateSchedule::UnspecifiedMinibatchSize);
|
||||
assert(schedule7.GetMinibatchSize() == LearningRateSchedule::IgnoredMinibatchSize);
|
||||
assert(schedule7[0] == 0.5);
|
||||
assert(schedule7[1] == 0.3);
|
||||
assert(schedule7[2] == 0.2);
|
||||
assert(schedule7[100] == 0.2);
|
||||
|
||||
MomentumSchedule schedule8{ std::vector<std::pair<size_t, double>>{ { 1, 0.5 }, { 1, 0.3 }, { 1, 0.2 } }, 10 };
|
||||
assert(schedule8.GetMinibatchSize() == MomentumSchedule::UnspecifiedMinibatchSize);
|
||||
assert(schedule8.GetMinibatchSize() == MomentumSchedule::IgnoredMinibatchSize);
|
||||
assert(schedule8[0] == 0.5);
|
||||
assert(schedule8[9] == 0.5);
|
||||
assert(schedule8[10] == 0.3);
|
||||
|
@ -208,7 +208,7 @@ void TestTrainingParametersSchedule()
|
|||
assert(schedule9[100] == 0.2);
|
||||
|
||||
MomentumSchedule schedule10 = { std::vector<std::pair<size_t, double>>{ { 3, 0.5 }, { 2, 0.3 }, { 1, 0.2 } }, 10 };
|
||||
assert(schedule10.GetMinibatchSize() == MomentumSchedule::UnspecifiedMinibatchSize);
|
||||
assert(schedule10.GetMinibatchSize() == MomentumSchedule::IgnoredMinibatchSize);
|
||||
assert(schedule10[0] == 0.5);
|
||||
assert(schedule10[29] == 0.5);
|
||||
assert(schedule10[30] == 0.3);
|
||||
|
|
|
@ -117,10 +117,10 @@
|
|||
|
||||
%rename(l1_regularization_weight) CNTK::AdditionalLearningOptions::l1RegularizationWeight;
|
||||
%rename(l2_regularization_weight) CNTK::AdditionalLearningOptions::l2RegularizationWeight;
|
||||
%rename(unspecified_minibatch_size) CNTK::TrainingParameterSchedule<double>::UnspecifiedMinibatchSize;
|
||||
%rename(unspecified_minibatch_size) CNTK::TrainingParameterSchedule<std::size_t>::UnspecifiedMinibatchSize;
|
||||
%rename(ignored_minibatch_size) CNTK::TrainingParameterSchedule<double>::IgnoredMinibatchSize;
|
||||
%rename(ignored_minibatch_size) CNTK::TrainingParameterSchedule<std::size_t>::IgnoredMinibatchSize;
|
||||
%rename(_MINIBATCH_SIZE) CNTK::Learner::MinibatchSizeKey; // L"MinibatchSize"
|
||||
%rename(unspecified_minibatch_size) CNTK::Learner::UnspecifiedMinibatchSize;
|
||||
%rename(ignored_minibatch_size) CNTK::Learner::IgnoredMinibatchSize;
|
||||
%rename(_options) CNTK::Learner::GetOptions;
|
||||
|
||||
%rename(ndcg_at_1) CNTK::NDCGAt1;
|
||||
|
|
|
@ -107,7 +107,7 @@ class QLearning(AgentBaseClass):
|
|||
self._q.parameters,
|
||||
C.learners.learning_rate_schedule(
|
||||
self._parameters.initial_eta, C.learners.UnitType.sample),
|
||||
minibatch_size=minibatch_size,
|
||||
use_mean_gradient=True,
|
||||
momentum=C.learners.momentum_schedule(self._parameters.momentum),
|
||||
variance_momentum=C.learners.momentum_schedule(0.999),
|
||||
gradient_clipping_threshold_per_sample=
|
||||
|
|
|
@ -154,7 +154,7 @@ class Learner(cntk_py.Learner):
|
|||
_verify_learning_rate_type(learning_rate)
|
||||
if not learning_rate.is_minibatch_size_explicitly_specified:
|
||||
#If the schedule minibatch size is not explicitly specified, the learner's specification will take over
|
||||
if self.minibatch_size is not None and self.minibatch_size != self.unspecified_minibatch_size:
|
||||
if self.minibatch_size is not None and self.minibatch_size != self.ignored_minibatch_size:
|
||||
learning_rate.minibatch_size = self.minibatch_size
|
||||
return super(Learner, self).reset_learning_rate(learning_rate)
|
||||
|
||||
|
@ -164,7 +164,7 @@ class Learner(cntk_py.Learner):
|
|||
'''
|
||||
return super(Learner, self).learning_rate()
|
||||
|
||||
IGNORE = Learner.unspecified_minibatch_size
|
||||
IGNORE = Learner.ignored_minibatch_size
|
||||
'''
|
||||
Indicate that the minibatch size is ignored in learning's hyper-parameter schedule.
|
||||
'''
|
||||
|
@ -288,7 +288,7 @@ def training_parameter_schedule(schedule, unit=UnitType.minibatch, epoch_size=No
|
|||
if unit == UnitType.sample:
|
||||
ref_minibatch_size = 1
|
||||
else: # unit == UnitType.minibatch
|
||||
ref_minibatch_size = cntk_py.training_double_parameter_schedule.unspecified_minibatch_size
|
||||
ref_minibatch_size = cntk_py.training_double_parameter_schedule.ignored_minibatch_size
|
||||
|
||||
if isinstance(schedule, cntk_py.training_double_parameter_schedule):
|
||||
schedule.is_minibatch_size_explicitly_specified = True #legacy learning parameter always have the specification
|
||||
|
@ -325,9 +325,9 @@ def learning_parameter_schedule(schedule, minibatch_size=None, epoch_size=None):
|
|||
pair, i.e. [(num_epoch_1, p_1), (num_epoch_n, p_2), .., (num_epoch_n, p_n)], the i-th parameter is used as a
|
||||
value from the (``epoch_size`` * (num_epoch_0 + ... + num_epoch_2 + ... + num_epoch_(i-1) + 1)-th sample to the
|
||||
(``epoch_size`` * num_epoch_i)-th sample (taking num_epoch_0 = 0 as a special initialization).
|
||||
minibatch_size (int): an integer to specify the reference minibatch size that schedule are designed for;
|
||||
minibatch_size (int): an integer to specify the minibatch size that schedule are designed for.
|
||||
CNTK will scale the schedule internally so as to simulate the behavior of the schedule as much as possible
|
||||
to match the designed effect. If it is not specified, CNTK will set to the special value cntk.learners.unspecified_minibatch_size.
|
||||
to match the designed effect. If it is not specified, CNTK will set to the special value :attr:`IGNORE`.
|
||||
epoch_size (optional, int): number of samples as a scheduling unit.
|
||||
Parameters in the schedule change their values every ``epoch_size``
|
||||
samples. If no ``epoch_size`` is provided, this parameter is substituted
|
||||
|
@ -514,17 +514,17 @@ def _infer_ref_minibatch_size_from_legacy_use_mean_gradient(ref_minibatch_size,
|
|||
#if ref_minibatch_size and the legacy use_mean_gradient are neither specified
|
||||
return None
|
||||
if ref_minibatch_size is not None:
|
||||
if use_mean_gradient == True and ref_minibatch_size != cntk_py.Learner.unspecified_minibatch_size:
|
||||
if use_mean_gradient == True and ref_minibatch_size != cntk_py.Learner.ignored_minibatch_size:
|
||||
Warning(
|
||||
'Learner reference minibatch size is specified while use_mean_gradient (depreated option) is specified to True. Learner reference minibatch size will override the mean gradient behavior')
|
||||
#if the ref_minibatch_size is specified, it overrides the legacay use_mean_gradient specification
|
||||
return ref_minibatch_size
|
||||
elif use_mean_gradient is not None:
|
||||
#if the ref_minibatch_size is NOT specified, the legacay use_mean_gradient specification take in the effect
|
||||
return cntk_py.Learner.unspecified_minibatch_size if use_mean_gradient is True else None
|
||||
return cntk_py.Learner.ignored_minibatch_size if use_mean_gradient is True else None
|
||||
return None
|
||||
|
||||
def _infer_learning_parameter_schedule(number_or_schedule, ref_minibatch_size, epoch_size):
|
||||
def _infer_learning_parameter_schedule(number_or_schedule, ref_minibatch_size, epoch_size, use_mean_gradient=None):
|
||||
#the input is a number, create a new training parameter
|
||||
if isinstance(number_or_schedule, (int, float)) or \
|
||||
(isinstance(number_or_schedule, list) and all(isinstance(r, (int, float, tuple)) for r in number_or_schedule)):
|
||||
|
@ -538,6 +538,13 @@ def _infer_learning_parameter_schedule(number_or_schedule, ref_minibatch_size, e
|
|||
if not number_or_schedule.is_minibatch_size_explicitly_specified and ref_minibatch_size is not None:
|
||||
#If the schedule minibatch size is not explicitly specified, the learner's specification will take over
|
||||
number_or_schedule.minibatch_size = ref_minibatch_size
|
||||
#for backward compatibility: use_mean_gradient = True and lr.unit = UnitType.sample
|
||||
#this combination was there to avoid the double-scaling of gradients when the gradients are already mean gradients
|
||||
if use_mean_gradient and number_or_schedule.minibatch_size == 1:
|
||||
#override the learning rate's minibatch_size to IGNORE
|
||||
number_or_schedule.minibatch_size = IGNORE
|
||||
Warning('use_mean_gradient=True and learning_rate_schedule.unit=UnitType.sample is a deprecated combination. '
|
||||
'Please use the new learner APIs: see https://www.cntk.ai/pythondocs/cntk.learners.html for details.')
|
||||
return number_or_schedule
|
||||
else:
|
||||
raise ValueError('training parameter schedule type (%s) not supported. '
|
||||
|
@ -549,7 +556,7 @@ def _infer_learning_rate_schedule_and_ref_minibatch_size(use_mean_gradient, ref_
|
|||
#if non-None reference_minibatch_size will take precedence otherwise according use_mean_gradient if it is True
|
||||
ref_minibatch_size = _infer_ref_minibatch_size_from_legacy_use_mean_gradient(ref_minibatch_size, use_mean_gradient)
|
||||
#if minibatch_size is not None, any schedules that are with unspecified reference minibatch size will be overrided.
|
||||
schedule = _infer_learning_parameter_schedule(schedule, ref_minibatch_size, epoch_size)
|
||||
schedule = _infer_learning_parameter_schedule(schedule, ref_minibatch_size, epoch_size, use_mean_gradient)
|
||||
_verify_learning_rate_type(schedule)
|
||||
return schedule, ref_minibatch_size
|
||||
|
||||
|
@ -588,8 +595,8 @@ def sgd(parameters, lr,
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
|
||||
|
@ -658,8 +665,8 @@ def momentum_sgd(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate and momentum. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -727,8 +734,8 @@ def nesterov(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate and momentum. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -801,9 +808,8 @@ def adadelta(parameters, lr=learning_rate_schedule(1, UnitType.sample), rho=0.95
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling. If the learner's learning rate
|
||||
schedule ``lr`` has its own specification of reference minibatch size, the learning rate schedule's specification takes precedence.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -870,8 +876,8 @@ def adagrad(parameters, lr, need_ave_multiplier=True,
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -945,8 +951,8 @@ def fsadagrad(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate, momentum and variance_momentum. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -1025,8 +1031,8 @@ def adam(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate, momentum and variance_momentum. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
@ -1104,8 +1110,8 @@ def rmsprop(parameters, lr,
|
|||
size is usually set to the same as the minibatch data source's size. CNTK will perform automatic scaling of the parameters
|
||||
to enable efficient model parameter update implementation while approximate the behavior of pre-designed and pre-tuned parameters.
|
||||
In case that minibatch_size is not specified, CNTK will inherit the minibatch size from the learning rate schedule;
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to 1. Setting minibatch_size to 0
|
||||
will have the parameters apply as it is preventing CNTK performing any parameter scaling.
|
||||
if the learning rate schedule does not specify the minibatch_size, CNTK will set it to :attr:`IGNORE`. Setting minibatch_size to :attr:`IGNORE`
|
||||
will have the learner apply as it is preventing CNTK performing any hyper-parameter scaling. See also: :func:`learning_parameter_schedule`
|
||||
epoch_size (optional, int): number of samples as a scheduling unit for learning rate. See also: :func:`learning_parameter_schedule`
|
||||
|
||||
Returns:
|
||||
|
|
|
@ -138,6 +138,15 @@ def test_learner_init_legacy():
|
|||
assert learner.learning_rate() == 0.1
|
||||
assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0
|
||||
|
||||
# this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
|
||||
learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True)
|
||||
assert learner.is_compatible_mode() == True
|
||||
assert learner.learning_rate() == 0.1
|
||||
#test the override in the new version
|
||||
assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
|
||||
assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0
|
||||
|
||||
|
||||
# for backcompatibility test
|
||||
# this will be deprecated in future version
|
||||
# The UnitType will provide per minibatch instruction for the learner
|
||||
|
|
Загрузка…
Ссылка в новой задаче