Updated examples and tests with new Learning APIs.
This commit is contained in:
Родитель
35255ed03b
Коммит
f1d6fc9ed6
|
@ -54,7 +54,7 @@ def criterion(data, label_one_hot):
|
||||||
|
|
||||||
# Learner object. The learner implements the update algorithm, in this case plain SGD.
|
# Learner object. The learner implements the update algorithm, in this case plain SGD.
|
||||||
learning_rate = 0.1
|
learning_rate = 0.1
|
||||||
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch))
|
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))
|
||||||
|
|
||||||
# Trainer configuration parameters.
|
# Trainer configuration parameters.
|
||||||
progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches
|
progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches
|
||||||
|
|
|
@ -52,7 +52,7 @@ criterion = cntk.combine([loss, metric]) # criterion is a tuple-valued function
|
||||||
|
|
||||||
# Learner object. The learner implements the update algorithm, in this case plain SGD.
|
# Learner object. The learner implements the update algorithm, in this case plain SGD.
|
||||||
learning_rate = 0.1
|
learning_rate = 0.1
|
||||||
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch))
|
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))
|
||||||
|
|
||||||
# Trainer.
|
# Trainer.
|
||||||
minibatch_size = 32
|
minibatch_size = 32
|
||||||
|
|
|
@ -81,14 +81,14 @@ def criterion(data, label_one_hot):
|
||||||
|
|
||||||
# Learner object. The learner implements the update algorithm, in this case momentum SGD.
|
# Learner object. The learner implements the update algorithm, in this case momentum SGD.
|
||||||
# Because this script supports data-parallel training, the learning rate is specified
|
# Because this script supports data-parallel training, the learning rate is specified
|
||||||
# "per sample" (UnitType.sample), the value is already pre-divided by the minibatch size.
|
# "per sample", the value is already pre-divided by the minibatch size.
|
||||||
# This allows data-parallel training to slice the data into subsets and also to increase
|
# This allows data-parallel training to slice the data into subsets and also to increase
|
||||||
# the minibatch size where possible, while maintaining the same contribution per sample gradient.
|
# the minibatch size where possible, while maintaining the same contribution per sample gradient.
|
||||||
epoch_size = len(X_train)
|
epoch_size = len(X_train)
|
||||||
lr_per_sample = 0.001
|
lr_per_sample = 0.001
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample)
|
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample)
|
||||||
mm_time_constant = [0]*5 + [1024] # 5 epochs without momentum, then switch it on
|
mm_per_sample = [0]*5 + [0.9990239141819757] # 5 epochs without momentum, then switch it on
|
||||||
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
|
mm_schedule = C.learners.momentum_schedule_per_sample(mm_per_sample, epoch_size=epoch_size)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training.
|
# Instantiate the trainer object to drive the model training.
|
||||||
learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)
|
learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)
|
||||||
|
@ -114,7 +114,7 @@ prev_metric = 1 # metric from previous call to the callback. At very beginning,
|
||||||
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
|
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
|
||||||
global prev_metric
|
global prev_metric
|
||||||
if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
|
if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
|
||||||
learner.reset_learning_rate(C.learning_rate_schedule(learner.learning_rate() / 2, C.learners.UnitType.sample))
|
learner.reset_learning_rate(C.learning_parameter_per_sample(learner.learning_rate() / 2))
|
||||||
if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
|
if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
|
||||||
print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
|
print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
|
||||||
return False # means we are done
|
return False # means we are done
|
||||||
|
@ -137,7 +137,7 @@ learner = C.train.distributed.data_parallel_distributed_learner(learner)
|
||||||
# For distributed training, we must maximize the minibatch size, as to minimize
|
# For distributed training, we must maximize the minibatch size, as to minimize
|
||||||
# communication cost and GPU underutilization. Hence, we use a "schedule"
|
# communication cost and GPU underutilization. Hence, we use a "schedule"
|
||||||
# that increases the minibatch size after a few epochs. By specifying the learning rate
|
# that increases the minibatch size after a few epochs. By specifying the learning rate
|
||||||
# as UnitType.sample, the contribution per sample maintains the same scale without
|
# as per sample, the contribution per sample maintains the same scale without
|
||||||
# having to fix up the learning rate.
|
# having to fix up the learning rate.
|
||||||
# For this MNIST model, larger minibatch sizes make it faster, because the
|
# For this MNIST model, larger minibatch sizes make it faster, because the
|
||||||
# model is too small to utilize a full GPU. Hence data-parallel training cannot
|
# model is too small to utilize a full GPU. Hence data-parallel training cannot
|
||||||
|
|
|
@ -100,9 +100,9 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
|
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
|
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
|
||||||
mm_time_constant = [0]*20 + [600]*20 + [1200]
|
mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
|
||||||
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
|
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
|
||||||
l2_reg_weight = 0.002
|
l2_reg_weight = 0.002
|
||||||
|
|
||||||
# trainer object
|
# trainer object
|
||||||
|
|
|
@ -16,7 +16,7 @@ from cntk.layers import Convolution2D, MaxPooling, AveragePooling, Dropout, Batc
|
||||||
from cntk.layers.typing import *
|
from cntk.layers.typing import *
|
||||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
||||||
from cntk import Trainer, use_default_device
|
from cntk import Trainer, use_default_device
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule, learning_parameter_schedule
|
from cntk.learners import momentum_sgd, momentum_schedule, momentum_schedule_per_sample, learning_parameter_schedule, learning_parameter_schedule_per_sample
|
||||||
from cntk import cross_entropy_with_softmax, classification_error, relu
|
from cntk import cross_entropy_with_softmax, classification_error, relu
|
||||||
from cntk.ops import Function
|
from cntk.ops import Function
|
||||||
from cntk.debugging import set_computation_network_trace_level
|
from cntk.debugging import set_computation_network_trace_level
|
||||||
|
@ -109,8 +109,8 @@ def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
|
||||||
|
|
||||||
# learning parameters
|
# learning parameters
|
||||||
learner = momentum_sgd(model.parameters,
|
learner = momentum_sgd(model.parameters,
|
||||||
lr = learning_parameter_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], minibatch_size=1, epoch_size=epoch_size),
|
lr = learning_parameter_schedule_per_sample([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], epoch_size=epoch_size),
|
||||||
momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
|
momentum = momentum_schedule_per_sample([0]*20+[0.9983347214509387]*20+[0.9991670137924583], epoch_size=epoch_size),
|
||||||
l2_regularization_weight = 0.002)
|
l2_regularization_weight = 0.002)
|
||||||
|
|
||||||
# trainer object
|
# trainer object
|
||||||
|
@ -147,8 +147,8 @@ def Evaluator(criterion):
|
||||||
if metric:
|
if metric:
|
||||||
parameters |= set(metric.parameters)
|
parameters |= set(metric.parameters)
|
||||||
dummy_learner = momentum_sgd(tuple(parameters),
|
dummy_learner = momentum_sgd(tuple(parameters),
|
||||||
lr = learning_rate_schedule(1, UnitType.minibatch),
|
lr = learning_parameter_schedule(1),
|
||||||
momentum = momentum_as_time_constant_schedule(0))
|
momentum = momentum_schedule(0))
|
||||||
return Trainer(None, (loss, metric), dummy_learner)
|
return Trainer(None, (loss, metric), dummy_learner)
|
||||||
|
|
||||||
def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None):
|
def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None):
|
||||||
|
|
|
@ -86,9 +86,9 @@ def create_conv_network():
|
||||||
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
|
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
|
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
|
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
|
||||||
mm_time_constant = [0]*20 + [600]*20 + [1200]
|
mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
|
||||||
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
|
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
|
||||||
l2_reg_weight = 0.002
|
l2_reg_weight = 0.002
|
||||||
|
|
||||||
# Create learner
|
# Create learner
|
||||||
|
|
|
@ -55,9 +55,9 @@ def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_e
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001]
|
lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size)
|
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
|
||||||
mm_time_constant = [0]*5 + [1024]
|
mms = [0]*5 + [0.9990239141819757]
|
||||||
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
|
mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
|
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
|
||||||
|
|
|
@ -17,7 +17,7 @@ import _cntk_py
|
||||||
import cntk.io.transforms as xforms
|
import cntk.io.transforms as xforms
|
||||||
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
|
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
|
||||||
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
||||||
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
|
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
|
||||||
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
from cntk.metrics import classification_error
|
from cntk.metrics import classification_error
|
||||||
|
@ -114,7 +114,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, progress_wri
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_schedule(0.9)
|
mm_schedule = momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ import _cntk_py
|
||||||
|
|
||||||
import cntk.io.transforms as xforms
|
import cntk.io.transforms as xforms
|
||||||
from cntk.debugging import start_profiler, stop_profiler
|
from cntk.debugging import start_profiler, stop_profiler
|
||||||
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
|
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
|
||||||
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
||||||
from cntk.ops import input
|
from cntk.ops import input
|
||||||
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
||||||
|
@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_schedule(0.9)
|
mm_schedule = momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ import _cntk_py
|
||||||
import cntk.io.transforms as xforms
|
import cntk.io.transforms as xforms
|
||||||
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
|
from cntk.debugging import start_profiler, stop_profiler, enable_profiler
|
||||||
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
||||||
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
|
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
|
||||||
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
from cntk.metrics import classification_error
|
from cntk.metrics import classification_error
|
||||||
|
@ -115,7 +115,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_schedule(0.9)
|
mm_schedule = momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ import _cntk_py
|
||||||
|
|
||||||
import cntk.io.transforms as xforms
|
import cntk.io.transforms as xforms
|
||||||
from cntk.debugging import start_profiler, stop_profiler
|
from cntk.debugging import start_profiler, stop_profiler
|
||||||
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType
|
from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
|
||||||
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
from cntk.logging import ProgressPrinter, log_number_of_parameters
|
||||||
from cntk.train.distributed import data_parallel_distributed_learner, Communicator
|
from cntk.train.distributed import data_parallel_distributed_learner, Communicator
|
||||||
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
|
||||||
|
@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_schedule(0.9)
|
mm_schedule = momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = C.learners.momentum_schedule(0.9)
|
mm_schedule = C.learners.momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
|
||||||
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
|
||||||
learning_rate *= learn_rate_decrease_factor
|
learning_rate *= learn_rate_decrease_factor
|
||||||
|
|
||||||
lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = C.learners.momentum_schedule(0.9)
|
mm_schedule = C.learners.momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ import cntk as C
|
||||||
from cntk.train import Trainer, minibatch_size_schedule
|
from cntk.train import Trainer, minibatch_size_schedule
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
||||||
from cntk.device import cpu, try_set_default_device
|
from cntk.device import cpu, try_set_default_device
|
||||||
from cntk.learners import adadelta, learning_rate_schedule, UnitType
|
from cntk.learners import adadelta, learning_parameter_schedule_per_sample
|
||||||
from cntk.ops import relu, element_times, constant
|
from cntk.ops import relu, element_times, constant
|
||||||
from cntk.layers import Dense, Sequential, For
|
from cntk.layers import Dense, Sequential, For
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
|
@ -85,7 +85,7 @@ def simple_mnist(tensorboard_logdir=None):
|
||||||
progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
|
progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
lr = learning_rate_schedule(1, UnitType.sample)
|
lr = learning_parameter_schedule_per_sample(1)
|
||||||
trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)
|
trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)
|
||||||
|
|
||||||
training_session(
|
training_session(
|
||||||
|
|
|
@ -13,7 +13,7 @@ import numpy as np
|
||||||
from cntk import cross_entropy_with_softmax, classification_error, reduce_mean
|
from cntk import cross_entropy_with_softmax, classification_error, reduce_mean
|
||||||
from cntk import Trainer, cntk_py
|
from cntk import Trainer, cntk_py
|
||||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
|
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
|
||||||
from cntk.debugging import *
|
from cntk.debugging import *
|
||||||
from cntk.logging import *
|
from cntk.logging import *
|
||||||
from resnet_models import *
|
from resnet_models import *
|
||||||
|
@ -80,13 +80,12 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
||||||
|
|
||||||
# shared training parameters
|
# shared training parameters
|
||||||
minibatch_size = 128
|
minibatch_size = 128
|
||||||
momentum_time_constant = -minibatch_size/np.log(0.9)
|
|
||||||
l2_reg_weight = 0.0001
|
l2_reg_weight = 0.0001
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
|
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
|
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
|
mm_schedule = momentum_schedule(0.9, minibatch_size)
|
||||||
|
|
||||||
# progress writers
|
# progress writers
|
||||||
progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
|
progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
|
||||||
|
|
|
@ -10,9 +10,10 @@ import argparse
|
||||||
import cntk as C
|
import cntk as C
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
import cntk as C
|
||||||
from cntk import input, cross_entropy_with_softmax, classification_error, Trainer, cntk_py
|
from cntk import input, cross_entropy_with_softmax, classification_error, Trainer, cntk_py
|
||||||
from cntk import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator
|
from cntk import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
|
from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
|
||||||
from cntk.device import try_set_default_device, gpu
|
from cntk.device import try_set_default_device, gpu
|
||||||
from cntk.train.training_session import *
|
from cntk.train.training_session import *
|
||||||
from cntk.debugging import *
|
from cntk.debugging import *
|
||||||
|
@ -71,15 +72,13 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
|
||||||
else:
|
else:
|
||||||
return RuntimeError("Unknown model name!")
|
return RuntimeError("Unknown model name!")
|
||||||
|
|
||||||
momentum_time_constant = -minibatch_size/np.log(0.9)
|
|
||||||
l2_reg_weight = 0.0001
|
l2_reg_weight = 0.0001
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
minibatch_size = 128
|
minibatch_size = 128
|
||||||
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
|
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
|
lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size = minibatch_size, epoch_size=epoch_size)
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
|
mm_schedule = momentum_schedule(0.9, minibatch_size = minibatch_size)
|
||||||
|
|
||||||
# learner object
|
# learner object
|
||||||
if block_size != None and num_quantization_bits != 32:
|
if block_size != None and num_quantization_bits != 32:
|
||||||
raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")
|
raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")
|
||||||
|
|
|
@ -136,7 +136,7 @@ def create_vgg16():
|
||||||
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
|
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
|
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = C.learners.momentum_schedule(0.9)
|
mm_schedule = C.learners.momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -136,7 +136,7 @@ def create_vgg19():
|
||||||
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
|
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
|
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
|
lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
|
||||||
mm_schedule = C.learners.momentum_schedule(0.9)
|
mm_schedule = C.learners.momentum_schedule(0.9)
|
||||||
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
|
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ from cntk.initializer import glorot_uniform
|
||||||
from cntk.io import MinibatchSource, ImageDeserializer, CTFDeserializer, StreamDefs, StreamDef
|
from cntk.io import MinibatchSource, ImageDeserializer, CTFDeserializer, StreamDefs, StreamDef
|
||||||
from cntk.io.transforms import scale
|
from cntk.io.transforms import scale
|
||||||
from cntk.layers import placeholder, Constant
|
from cntk.layers import placeholder, Constant
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule
|
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule_per_sample
|
||||||
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
||||||
from cntk.logging.graph import find_by_name, plot
|
from cntk.logging.graph import find_by_name, plot
|
||||||
import PARAMETERS
|
import PARAMETERS
|
||||||
|
@ -44,10 +44,10 @@ epoch_size = p.cntk_num_train_images
|
||||||
num_test_images = p.cntk_num_test_images
|
num_test_images = p.cntk_num_test_images
|
||||||
mb_size = p.cntk_mb_size
|
mb_size = p.cntk_mb_size
|
||||||
max_epochs = p.cntk_max_epochs
|
max_epochs = p.cntk_max_epochs
|
||||||
momentum_time_constant = p.cntk_momentum_time_constant
|
|
||||||
distributed_flg = p.distributed_flg
|
distributed_flg = p.distributed_flg
|
||||||
num_quantization_bits = p.num_quantization_bits
|
num_quantization_bits = p.num_quantization_bits
|
||||||
warm_up = p.warm_up
|
warm_up = p.warm_up
|
||||||
|
momentum_per_sample = p.cntk_momentum_per_sample
|
||||||
|
|
||||||
# model specific variables (only AlexNet for now)
|
# model specific variables (only AlexNet for now)
|
||||||
base_model = "AlexNet"
|
base_model = "AlexNet"
|
||||||
|
@ -154,8 +154,8 @@ def train_fast_rcnn(debug_output=False, model_path=model_file):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
l2_reg_weight = 0.0005
|
l2_reg_weight = 0.0005
|
||||||
lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
|
lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
|
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
|
mm_schedule = momentum_schedule_per_sample(momentum_per_sample)
|
||||||
|
|
||||||
# Instantiate the trainer object as default
|
# Instantiate the trainer object as default
|
||||||
learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
||||||
|
|
|
@ -54,7 +54,7 @@ class Parameters():
|
||||||
self.cntk_num_test_images = -1 # set per data set below
|
self.cntk_num_test_images = -1 # set per data set below
|
||||||
self.cntk_mb_size = -1 # set per data set below
|
self.cntk_mb_size = -1 # set per data set below
|
||||||
self.cntk_max_epochs = -1 # set per data set below
|
self.cntk_max_epochs = -1 # set per data set below
|
||||||
self.cntk_momentum_time_constant = -1 # set per data set below
|
self.cntk_momentum_per_sample = -1 # set per data set below
|
||||||
|
|
||||||
# for Distributed learner
|
# for Distributed learner
|
||||||
self.distributed_flg = False # In case of distributed learning, set 'True'
|
self.distributed_flg = False # In case of distributed learning, set 'True'
|
||||||
|
@ -83,7 +83,7 @@ class GroceryParameters(Parameters):
|
||||||
self.cntk_num_test_images = 5
|
self.cntk_num_test_images = 5
|
||||||
self.cntk_mb_size = 5
|
self.cntk_mb_size = 5
|
||||||
self.cntk_max_epochs = 20
|
self.cntk_max_epochs = 20
|
||||||
self.cntk_momentum_time_constant = 10
|
self.cntk_momentum_per_sample = 0.8187307530779818
|
||||||
|
|
||||||
# postprocessing
|
# postprocessing
|
||||||
self.nmsThreshold = 0.01
|
self.nmsThreshold = 0.01
|
||||||
|
@ -117,7 +117,7 @@ class PascalParameters(Parameters):
|
||||||
self.cntk_num_test_images = 4952
|
self.cntk_num_test_images = 4952
|
||||||
self.cntk_mb_size = 2
|
self.cntk_mb_size = 2
|
||||||
self.cntk_max_epochs = 17
|
self.cntk_max_epochs = 17
|
||||||
self.cntk_momentum_time_constant = 20
|
self.cntk_momentum_per_sample = 0.951229424500714
|
||||||
|
|
||||||
self.pascalDataDir = os.path.join(self.rootDir, "..", "..", "DataSets", "Pascal")
|
self.pascalDataDir = os.path.join(self.rootDir, "..", "..", "DataSets", "Pascal")
|
||||||
self.imgDir = self.pascalDataDir
|
self.imgDir = self.pascalDataDir
|
||||||
|
|
|
@ -10,12 +10,12 @@ import os, sys
|
||||||
import argparse
|
import argparse
|
||||||
import easydict # pip install easydict
|
import easydict # pip install easydict
|
||||||
import cntk
|
import cntk
|
||||||
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
|
from cntk import Trainer, load_model, Axis, input_variable, parameter, times, combine, \
|
||||||
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
|
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
|
||||||
from cntk.core import Value
|
from cntk.core import Value
|
||||||
from cntk.initializer import normal
|
from cntk.initializer import normal
|
||||||
from cntk.layers import placeholder, Constant, Sequential
|
from cntk.layers import placeholder, Constant, Sequential
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
|
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
|
||||||
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
||||||
from cntk.logging.graph import find_by_name, plot
|
from cntk.logging.graph import find_by_name, plot
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
|
@ -295,11 +295,11 @@ def train_fast_rcnn(cfg):
|
||||||
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
|
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
|
||||||
others = [p for p in params if not p in biases]
|
others = [p for p in params if not p in biases]
|
||||||
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
|
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample)
|
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample_scaled)
|
||||||
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
|
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
|
||||||
|
|
||||||
bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE]
|
bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE]
|
||||||
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
|
bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
|
||||||
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
|
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
|
||||||
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
|
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
|
||||||
|
|
||||||
|
|
|
@ -10,13 +10,13 @@ import os, sys
|
||||||
import argparse
|
import argparse
|
||||||
import easydict # pip install easydict
|
import easydict # pip install easydict
|
||||||
import cntk
|
import cntk
|
||||||
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
|
from cntk import Trainer, load_model, Axis, input_variable, parameter, times, combine, \
|
||||||
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
|
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
|
||||||
from cntk.core import Value
|
from cntk.core import Value
|
||||||
from cntk.io import MinibatchData
|
from cntk.io import MinibatchData
|
||||||
from cntk.initializer import normal
|
from cntk.initializer import normal
|
||||||
from cntk.layers import placeholder, Constant, Sequential
|
from cntk.layers import placeholder, Constant, Sequential
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
|
from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
|
||||||
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
||||||
from cntk.logging.graph import find_by_name, plot
|
from cntk.logging.graph import find_by_name, plot
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
|
@ -521,12 +521,12 @@ def train_model(image_input, roi_input, dims_input, loss, pred_error,
|
||||||
print("bias_lr_mult: {}".format(bias_lr_mult))
|
print("bias_lr_mult: {}".format(bias_lr_mult))
|
||||||
|
|
||||||
# Instantiate the learners and the trainer object
|
# Instantiate the learners and the trainer object
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
|
lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
|
||||||
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
||||||
unit_gain=False, use_mean_gradient=True)
|
unit_gain=False, use_mean_gradient=True)
|
||||||
|
|
||||||
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
|
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
|
||||||
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
|
bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
|
||||||
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
||||||
unit_gain=False, use_mean_gradient=True)
|
unit_gain=False, use_mean_gradient=True)
|
||||||
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
|
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
|
||||||
|
|
|
@ -57,8 +57,8 @@ def deconv_mnist(max_epochs=3):
|
||||||
minibatch_size = 64
|
minibatch_size = 64
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_schedule = C.learning_rate_schedule([0.00015], C.learners.UnitType.sample, epoch_size)
|
lr_schedule = C.learning_parameter_schedule_per_sample([0.00015], epoch_size=epoch_size)
|
||||||
mm_schedule = C.learners.momentum_as_time_constant_schedule([600], epoch_size)
|
mm_schedule = C.learners.momentum_schedule_per_sample([0.9983347214509387], epoch_size=epoch_size)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
|
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
|
||||||
|
|
|
@ -11,12 +11,12 @@ import os
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from cntk.device import try_set_default_device, gpu
|
from cntk.device import try_set_default_device, gpu
|
||||||
from cntk import load_model, placeholder, Constant
|
from cntk import load_model, placeholder, Constant
|
||||||
from cntk import Trainer, UnitType
|
from cntk import Trainer
|
||||||
from cntk.logging.graph import find_by_name, get_node_outputs
|
from cntk.logging.graph import find_by_name, get_node_outputs
|
||||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef
|
from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef
|
||||||
import cntk.io.transforms as xforms
|
import cntk.io.transforms as xforms
|
||||||
from cntk.layers import Dense
|
from cntk.layers import Dense
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
|
from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
|
||||||
from cntk.ops import combine, softmax
|
from cntk.ops import combine, softmax
|
||||||
from cntk.ops.functions import CloneMethod
|
from cntk.ops.functions import CloneMethod
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
|
@ -114,7 +114,7 @@ def train_model(base_model_file, feature_node_name, last_hidden_node_name,
|
||||||
pe = classification_error(tl_model, label_input)
|
pe = classification_error(tl_model, label_input)
|
||||||
|
|
||||||
# Instantiate the trainer object
|
# Instantiate the trainer object
|
||||||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
|
lr_schedule = learning_parameter_schedule(lr_per_mb)
|
||||||
mm_schedule = momentum_schedule(momentum_per_mb)
|
mm_schedule = momentum_schedule(momentum_per_mb)
|
||||||
learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
||||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
|
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
|
||||||
|
|
|
@ -138,8 +138,8 @@ def train(reader, model, max_epochs):
|
||||||
|
|
||||||
# SGD parameters
|
# SGD parameters
|
||||||
learner = cntk.learners.fsadagrad(criterion.parameters,
|
learner = cntk.learners.fsadagrad(criterion.parameters,
|
||||||
lr = cntk.learners.learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], cntk.learners.UnitType.sample, epoch_size),
|
lr = cntk.learners.learning_parameter_schedule_per_sample([0.003]*2+[0.0015]*12+[0.0003], epoch_size=epoch_size),
|
||||||
momentum = cntk.learners.momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)),
|
momentum = cntk.learners.momentum_schedule(0.9, minibatch_size),
|
||||||
gradient_clipping_threshold_per_sample = 15,
|
gradient_clipping_threshold_per_sample = 15,
|
||||||
gradient_clipping_with_truncation = True)
|
gradient_clipping_with_truncation = True)
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ from cntk.core import Value
|
||||||
from cntk.initializer import he_uniform
|
from cntk.initializer import he_uniform
|
||||||
from cntk.layers import Sequential, Convolution2D, Dense, default_options
|
from cntk.layers import Sequential, Convolution2D, Dense, default_options
|
||||||
from cntk.layers.typing import Signature, Tensor
|
from cntk.layers.typing import Signature, Tensor
|
||||||
from cntk.learners import adam, learning_rate_schedule, momentum_schedule, UnitType
|
from cntk.learners import adam, learning_parameter_schedule, momentum_schedule
|
||||||
from cntk.logging import TensorBoardProgressWriter
|
from cntk.logging import TensorBoardProgressWriter
|
||||||
from cntk.ops import abs, argmax, element_select, less, relu, reduce_max, reduce_sum, square
|
from cntk.ops import abs, argmax, element_select, less, relu, reduce_max, reduce_sum, square
|
||||||
from cntk.ops.functions import CloneMethod, Function
|
from cntk.ops.functions import CloneMethod, Function
|
||||||
|
@ -317,7 +317,7 @@ class DeepQAgent(object):
|
||||||
return huber_loss(q_targets, q_acted, 1.0)
|
return huber_loss(q_targets, q_acted, 1.0)
|
||||||
|
|
||||||
# Adam based SGD
|
# Adam based SGD
|
||||||
lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
|
lr_schedule = learning_parameter_schedule(learning_rate)
|
||||||
m_schedule = momentum_schedule(momentum)
|
m_schedule = momentum_schedule(momentum)
|
||||||
vm_schedule = momentum_schedule(0.999)
|
vm_schedule = momentum_schedule(0.999)
|
||||||
l_sgd = adam(self._action_value_net.parameters, lr_schedule,
|
l_sgd = adam(self._action_value_net.parameters, lr_schedule,
|
||||||
|
|
|
@ -53,7 +53,7 @@ def train_sequence_classifier():
|
||||||
label : reader.streams.labels
|
label : reader.streams.labels
|
||||||
}
|
}
|
||||||
|
|
||||||
lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample)
|
lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
progress_printer = C.logging.ProgressPrinter(0)
|
progress_printer = C.logging.ProgressPrinter(0)
|
||||||
|
|
|
@ -9,7 +9,8 @@ import numpy as np
|
||||||
import os
|
import os
|
||||||
from cntk import Trainer, Axis
|
from cntk import Trainer, Axis
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
||||||
from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType
|
from cntk.learners import momentum_sgd, fsadagrad, momentum_schedule_per_sample, \
|
||||||
|
learning_parameter_schedule, learning_parameter_schedule_per_sample
|
||||||
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \
|
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \
|
||||||
element_select, alias, hardmax, placeholder, combine, parameter, times, plus
|
element_select, alias, hardmax, placeholder, combine, parameter, times, plus
|
||||||
from cntk.ops.functions import CloneMethod, load_model, Function
|
from cntk.ops.functions import CloneMethod, load_model, Function
|
||||||
|
@ -218,8 +219,8 @@ def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_si
|
||||||
minibatch_size = 72
|
minibatch_size = 72
|
||||||
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
|
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
|
||||||
learner = fsadagrad(model_train.parameters,
|
learner = fsadagrad(model_train.parameters,
|
||||||
lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
|
lr = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
|
||||||
momentum = momentum_as_time_constant_schedule(1100),
|
momentum = momentum_schedule_per_sample(0.9990913221888589),
|
||||||
gradient_clipping_threshold_per_sample=2.3,
|
gradient_clipping_threshold_per_sample=2.3,
|
||||||
gradient_clipping_with_truncation=True)
|
gradient_clipping_with_truncation=True)
|
||||||
trainer = Trainer(None, criterion, learner)
|
trainer = Trainer(None, criterion, learner)
|
||||||
|
@ -314,7 +315,7 @@ def evaluate_decoding(reader, s2smodel, i2w):
|
||||||
# TODO: replace by a proper such class once available
|
# TODO: replace by a proper such class once available
|
||||||
def Evaluator(model, criterion):
|
def Evaluator(model, criterion):
|
||||||
from cntk import Trainer
|
from cntk import Trainer
|
||||||
from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
|
from cntk.learners import momentum_sgd, momentum_schedule_per_sample
|
||||||
loss, metric = Trainer._get_loss_metric(criterion)
|
loss, metric = Trainer._get_loss_metric(criterion)
|
||||||
parameters = set(loss.parameters)
|
parameters = set(loss.parameters)
|
||||||
if model:
|
if model:
|
||||||
|
@ -322,8 +323,8 @@ def Evaluator(model, criterion):
|
||||||
if metric:
|
if metric:
|
||||||
parameters |= set(metric.parameters)
|
parameters |= set(metric.parameters)
|
||||||
dummy_learner = momentum_sgd(tuple(parameters),
|
dummy_learner = momentum_sgd(tuple(parameters),
|
||||||
lr = learning_rate_schedule(1, UnitType.minibatch),
|
lr = learning_parameter_schedule(1),
|
||||||
momentum = momentum_as_time_constant_schedule(0))
|
momentum = momentum_schedule_per_sample(0))
|
||||||
return Trainer(model, (loss, metric), dummy_learner)
|
return Trainer(model, (loss, metric), dummy_learner)
|
||||||
|
|
||||||
# This computes the metric on the test set.
|
# This computes the metric on the test set.
|
||||||
|
|
|
@ -15,7 +15,7 @@ import cntk
|
||||||
from cntk import Trainer
|
from cntk import Trainer
|
||||||
from cntk.train.distributed import Communicator, data_parallel_distributed_learner, block_momentum_distributed_learner
|
from cntk.train.distributed import Communicator, data_parallel_distributed_learner, block_momentum_distributed_learner
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||||
from cntk.learners import fsadagrad, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
|
from cntk.learners import fsadagrad, learning_parameter_schedule_per_sample, momentum_schedule, momentum_schedule_per_sample
|
||||||
from cntk.train.training_session import *
|
from cntk.train.training_session import *
|
||||||
from cntk.logging import *
|
from cntk.logging import *
|
||||||
|
|
||||||
|
@ -50,8 +50,8 @@ def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantiza
|
||||||
|
|
||||||
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
|
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
|
||||||
local_learner = fsadagrad(model_train.parameters,
|
local_learner = fsadagrad(model_train.parameters,
|
||||||
lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
|
lr = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
|
||||||
momentum = momentum_as_time_constant_schedule(1100),
|
momentum = momentum_schedule_per_sample(0.9990913221888589),
|
||||||
gradient_clipping_threshold_per_sample=2.3,
|
gradient_clipping_threshold_per_sample=2.3,
|
||||||
gradient_clipping_with_truncation=True)
|
gradient_clipping_with_truncation=True)
|
||||||
|
|
||||||
|
|
|
@ -72,8 +72,8 @@ def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_
|
||||||
lr = [0.001]
|
lr = [0.001]
|
||||||
|
|
||||||
local_learner = fsadagrad(network['output'].parameters,
|
local_learner = fsadagrad(network['output'].parameters,
|
||||||
lr=learning_rate_schedule(lr, UnitType.sample, epoch_size),
|
lr=learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
|
||||||
momentum=momentum_as_time_constant_schedule(1000),
|
momentum=momentum_schedule_per_sample(0.9990913221888589),
|
||||||
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
||||||
|
|
||||||
if block_size != None:
|
if block_size != None:
|
||||||
|
|
|
@ -9,7 +9,7 @@ import numpy as np
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from cntk import Trainer, Axis
|
from cntk import Trainer, Axis
|
||||||
from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType
|
from cntk.learners import momentum_sgd, momentum_schedule_per_sample, learning_parameter_schedule_per_sample
|
||||||
from cntk.ops import sequence
|
from cntk.ops import sequence
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
from cntk.metrics import classification_error
|
from cntk.metrics import classification_error
|
||||||
|
@ -157,11 +157,11 @@ def train_lm(training_file, epochs, max_num_minibatches):
|
||||||
errs = classification_error(z, label_sequence)
|
errs = classification_error(z, label_sequence)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
|
lr_per_sample = learning_parameter_schedule_per_sample(0.001)
|
||||||
momentum_time_constant = momentum_as_time_constant_schedule(1100)
|
momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
|
||||||
clipping_threshold_per_sample = 5.0
|
clipping_threshold_per_sample = 5.0
|
||||||
gradient_clipping_with_truncation = True
|
gradient_clipping_with_truncation = True
|
||||||
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
|
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule,
|
||||||
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
|
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
|
||||||
gradient_clipping_with_truncation=gradient_clipping_with_truncation)
|
gradient_clipping_with_truncation=gradient_clipping_with_truncation)
|
||||||
progress_printer = ProgressPrinter(freq=100, tag='Training')
|
progress_printer = ProgressPrinter(freq=100, tag='Training')
|
||||||
|
|
|
@ -190,12 +190,12 @@ def create_criterion(network):
|
||||||
# return: learners: [sgd, adam, adagrad]
|
# return: learners: [sgd, adam, adagrad]
|
||||||
def create_learner(model):
|
def create_learner(model):
|
||||||
'''Create the optimized method'''
|
'''Create the optimized method'''
|
||||||
lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
|
lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
|
||||||
momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
|
momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
|
||||||
if opt.optim == 'sgd':
|
if opt.optim == 'sgd':
|
||||||
return C.sgd(model.parameters, lr=lr_per_minibatch)
|
return C.sgd(model.parameters, lr=lr_per_minibatch)
|
||||||
elif opt.optim == 'adam':
|
elif opt.optim == 'adam':
|
||||||
return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant)
|
return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
|
||||||
elif opt.optim == 'adagrad':
|
elif opt.optim == 'adagrad':
|
||||||
return C.adagrad(model.parameters, lr=lr_per_minibatch)
|
return C.adagrad(model.parameters, lr=lr_per_minibatch)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -10,7 +10,7 @@ import cntk as C
|
||||||
import timeit
|
import timeit
|
||||||
from cntk import Axis
|
from cntk import Axis
|
||||||
from cntk.train import Trainer
|
from cntk.train import Trainer
|
||||||
from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType
|
from cntk.learners import momentum_sgd
|
||||||
from cntk.ops import sequence
|
from cntk.ops import sequence
|
||||||
from cntk.losses import cross_entropy_with_softmax
|
from cntk.losses import cross_entropy_with_softmax
|
||||||
from cntk.metrics import classification_error
|
from cntk.metrics import classification_error
|
||||||
|
@ -34,7 +34,7 @@ sequence_length = 40
|
||||||
sequences_per_batch = 10
|
sequences_per_batch = 10
|
||||||
alpha = 0.75
|
alpha = 0.75
|
||||||
learning_rate = 0.002
|
learning_rate = 0.002
|
||||||
momentum_as_time_constant = 10000
|
momentum_per_sample = 0.9999000049998333
|
||||||
clipping_threshold_per_sample = 5.0
|
clipping_threshold_per_sample = 5.0
|
||||||
token_to_id_path = './ptb/token2id.txt'
|
token_to_id_path = './ptb/token2id.txt'
|
||||||
validation_file_path = './ptb/valid.txt'
|
validation_file_path = './ptb/valid.txt'
|
||||||
|
@ -196,8 +196,8 @@ def train_lm(testing=False):
|
||||||
num_trained_samples_since_last_report = 0
|
num_trained_samples_since_last_report = 0
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
|
lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
|
||||||
momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant)
|
momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
|
||||||
gradient_clipping_with_truncation = True
|
gradient_clipping_with_truncation = True
|
||||||
learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
|
learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
|
||||||
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
|
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
|
||||||
|
|
|
@ -192,9 +192,9 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.01]*10+[0.001]*10+[0.0001]
|
lr_per_sample = [0.01]*10+[0.001]*10+[0.0001]
|
||||||
lr_schedule = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample)
|
lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=train_epoch_size)
|
||||||
momentum_time_constant = 4096
|
momentum_per_sample = 0.9997558891748972
|
||||||
mm_schedule = C.momentum_as_time_constant_schedule([momentum_time_constant])
|
mm_schedule = C.momentum_schedule_per_sample([momentum_per_sample])
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
|
learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
|
||||||
|
|
|
@ -39,8 +39,8 @@ def test_htk_deserializers():
|
||||||
errs = C.classification_error (z, labels)
|
errs = C.classification_error (z, labels)
|
||||||
|
|
||||||
learner = C.fsadagrad(z.parameters,
|
learner = C.fsadagrad(z.parameters,
|
||||||
lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
|
lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
|
||||||
momentum=C.momentum_as_time_constant_schedule(1000),
|
momentum=C.momentum_schedule_per_sample(0.9990913221888589),
|
||||||
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
||||||
progress_printer = C.logging.ProgressPrinter(freq=0)
|
progress_printer = C.logging.ProgressPrinter(freq=0)
|
||||||
trainer = C.Trainer(z, (ce, errs), learner, progress_printer)
|
trainer = C.Trainer(z, (ce, errs), learner, progress_printer)
|
||||||
|
|
|
@ -40,11 +40,11 @@ if __name__=='__main__':
|
||||||
z = plus(n, p2, name='z')
|
z = plus(n, p2, name='z')
|
||||||
ce = squared_error(z, labels)
|
ce = squared_error(z, labels)
|
||||||
|
|
||||||
momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
|
momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
|
||||||
lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
|
lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
|
||||||
dist_learners = [
|
dist_learners = [
|
||||||
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_time_constant, True)),
|
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True)),
|
||||||
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_time_constant, True))
|
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
|
||||||
]
|
]
|
||||||
|
|
||||||
trainer = C.Trainer(z, ce, dist_learners)
|
trainer = C.Trainer(z, ce, dist_learners)
|
||||||
|
|
|
@ -75,7 +75,7 @@ def test_word_rnn(device_id):
|
||||||
W.sequences_per_batch = 2
|
W.sequences_per_batch = 2
|
||||||
W.alpha = 0.75
|
W.alpha = 0.75
|
||||||
W.learning_rate = 0.02
|
W.learning_rate = 0.02
|
||||||
W.momentum_as_time_constant = 5
|
W.momentum_per_sample = 0.8187307530779818
|
||||||
W.clipping_threshold_per_sample = 5.0
|
W.clipping_threshold_per_sample = 5.0
|
||||||
W.segment_sepparator = '<eos>'
|
W.segment_sepparator = '<eos>'
|
||||||
W.num_samples_between_progress_report = 2
|
W.num_samples_between_progress_report = 2
|
||||||
|
|
|
@ -77,8 +77,8 @@ def LanguageUnderstanding_train(reader, model, max_epochs):
|
||||||
minibatch_size = 70
|
minibatch_size = 70
|
||||||
|
|
||||||
learner = fsadagrad(criterion.parameters,
|
learner = fsadagrad(criterion.parameters,
|
||||||
lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size),
|
lr = learning_parameter_schedule_per_sample([0.003]*2+[0.0015]*12+[0.0003], epoch_size=epoch_size),
|
||||||
momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)),
|
momentum = momentum_schedule(0.9, minibatch_size),
|
||||||
gradient_clipping_threshold_per_sample = 15,
|
gradient_clipping_threshold_per_sample = 15,
|
||||||
gradient_clipping_with_truncation = True)
|
gradient_clipping_with_truncation = True)
|
||||||
|
|
||||||
|
|
|
@ -235,9 +235,8 @@ class ActorCritic(AgentBaseClass):
|
||||||
(combined_loss, None),
|
(combined_loss, None),
|
||||||
C.learners.adam(
|
C.learners.adam(
|
||||||
combined_networks.parameters,
|
combined_networks.parameters,
|
||||||
C.learners.learning_rate_schedule(
|
C.learners.learning_parameter_schedule_per_sample(
|
||||||
self._parameters.initial_eta,
|
self._parameters.initial_eta),
|
||||||
C.learners.UnitType.sample),
|
|
||||||
momentum=C.learners.momentum_schedule(self._parameters.momentum),
|
momentum=C.learners.momentum_schedule(self._parameters.momentum),
|
||||||
variance_momentum=C.learners.momentum_schedule(0.999),
|
variance_momentum=C.learners.momentum_schedule(0.999),
|
||||||
minibatch_size=C.learners.IGNORE))
|
minibatch_size=C.learners.IGNORE))
|
||||||
|
@ -255,8 +254,7 @@ class ActorCritic(AgentBaseClass):
|
||||||
(self._parameters.initial_eta - self._parameters.eta_minimum) *
|
(self._parameters.initial_eta - self._parameters.eta_minimum) *
|
||||||
(1 - float(self.step_count)/self._parameters.eta_decay_step_count))
|
(1 - float(self.step_count)/self._parameters.eta_decay_step_count))
|
||||||
self._trainer.parameter_learners[0].reset_learning_rate(
|
self._trainer.parameter_learners[0].reset_learning_rate(
|
||||||
C.learners.learning_rate_schedule(
|
C.learners.learning_parameter_schedule_per_sample(eta))
|
||||||
eta, C.learners.UnitType.sample))
|
|
||||||
|
|
||||||
def _choose_action(self, state):
|
def _choose_action(self, state):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -105,8 +105,8 @@ class QLearning(AgentBaseClass):
|
||||||
# TODO: allow user to specify learner through config file.
|
# TODO: allow user to specify learner through config file.
|
||||||
opt = C.learners.adam(
|
opt = C.learners.adam(
|
||||||
self._q.parameters,
|
self._q.parameters,
|
||||||
C.learners.learning_rate_schedule(
|
C.learners.learning_parameter_schedule_per_sample(
|
||||||
self._parameters.initial_eta, C.learners.UnitType.sample),
|
self._parameters.initial_eta),
|
||||||
use_mean_gradient=True,
|
use_mean_gradient=True,
|
||||||
momentum=C.learners.momentum_schedule(self._parameters.momentum),
|
momentum=C.learners.momentum_schedule(self._parameters.momentum),
|
||||||
variance_momentum=C.learners.momentum_schedule(0.999),
|
variance_momentum=C.learners.momentum_schedule(0.999),
|
||||||
|
@ -226,8 +226,8 @@ class QLearning(AgentBaseClass):
|
||||||
(1 - float(self.step_count)/self._parameters.eta_decay_step_count))
|
(1 - float(self.step_count)/self._parameters.eta_decay_step_count))
|
||||||
|
|
||||||
self._trainer.parameter_learners[0].reset_learning_rate(
|
self._trainer.parameter_learners[0].reset_learning_rate(
|
||||||
C.learners.learning_rate_schedule(
|
C.learners.learning_parameter_schedule_per_sample(
|
||||||
eta, C.learners.UnitType.sample))
|
eta))
|
||||||
|
|
||||||
def _adjust_exploration_rate(self):
|
def _adjust_exploration_rate(self):
|
||||||
self._epsilon = self._parameters.epsilon_minimum + max(
|
self._epsilon = self._parameters.epsilon_minimum + max(
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cntk as C
|
import cntk as C
|
||||||
from cntk import sgd, Trainer, learning_rate_schedule, parameter, \
|
from cntk import sgd, Trainer, learning_parameter_schedule, parameter, \
|
||||||
times, cross_entropy_with_softmax, \
|
times, cross_entropy_with_softmax, \
|
||||||
classification_error, UnitType, combine
|
classification_error, UnitType, combine
|
||||||
from cntk.debugging.debug import debug_model, _DebugNode
|
from cntk.debugging.debug import debug_model, _DebugNode
|
||||||
|
@ -32,7 +32,7 @@ def _train_backcompatible_test(z, loss, eval_error,
|
||||||
|
|
||||||
input_dim = 2
|
input_dim = 2
|
||||||
|
|
||||||
lr_schedule = learning_rate_schedule(0.5, UnitType.minibatch)
|
lr_schedule = learning_parameter_schedule(0.5)
|
||||||
|
|
||||||
learner = sgd(z.parameters, lr_schedule)
|
learner = sgd(z.parameters, lr_schedule)
|
||||||
trainer = Trainer(z, (loss, eval_error), [learner])
|
trainer = Trainer(z, (loss, eval_error), [learner])
|
||||||
|
|
|
@ -5,7 +5,7 @@ import numpy as np
|
||||||
import cntk as C
|
import cntk as C
|
||||||
from cntk import Axis, NDArrayView
|
from cntk import Axis, NDArrayView
|
||||||
from cntk.logging import ProgressPrinter
|
from cntk.logging import ProgressPrinter
|
||||||
from cntk.learners import UserLearner, sgd, learning_rate_schedule, UnitType
|
from cntk.learners import UserLearner, sgd, learning_parameter_schedule
|
||||||
from cntk.layers import Dense, Sequential
|
from cntk.layers import Dense, Sequential
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ class MySgdFast(UserLearner):
|
||||||
|
|
||||||
ADDITIONAL_ARGUMENTS = [
|
ADDITIONAL_ARGUMENTS = [
|
||||||
#(additional learning rate arguments (args), additional learner arguments (kwargs))
|
#(additional learning rate arguments (args), additional learner arguments (kwargs))
|
||||||
(C.learning_rate_schedule, [UnitType.minibatch], {'minibatch_size': 0}), #for backward compatible test
|
(C.learning_rate_schedule, [C.learners.UnitType.minibatch], {'minibatch_size': 0}), #for backward compatible test
|
||||||
(C.learning_parameter_schedule, [25], {'minibatch_size': 25}), # test new API; 25 is the actually minibatch size
|
(C.learning_parameter_schedule, [25], {'minibatch_size': 25}), # test new API; 25 is the actually minibatch size
|
||||||
(C.learning_parameter_schedule, [], {'minibatch_size': 0}), # test new API
|
(C.learning_parameter_schedule, [], {'minibatch_size': 0}), # test new API
|
||||||
]
|
]
|
||||||
|
|
|
@ -896,8 +896,8 @@ def test_usermbsource_training(tmpdir, with_checkpoint_impl):
|
||||||
mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)
|
mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)
|
||||||
|
|
||||||
from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
|
from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
|
||||||
classification_error, learning_rate_schedule, sgd, Trainer, \
|
classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
|
||||||
training_session, times, UnitType
|
training_session, times
|
||||||
|
|
||||||
feature = sequence.input_variable(shape=(input_dim,))
|
feature = sequence.input_variable(shape=(input_dim,))
|
||||||
label = C.input_variable(shape=(num_output_classes,))
|
label = C.input_variable(shape=(num_output_classes,))
|
||||||
|
@ -908,7 +908,7 @@ def test_usermbsource_training(tmpdir, with_checkpoint_impl):
|
||||||
|
|
||||||
#having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
|
#having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
|
||||||
#note that training session can end earlier if there is no updates
|
#note that training session can end earlier if there is no updates
|
||||||
lr_per_sample = learning_rate_schedule(0.3, UnitType.sample)
|
lr_per_sample = learning_parameter_schedule_per_sample(0.3)
|
||||||
learner = sgd(z.parameters, lr_per_sample)
|
learner = sgd(z.parameters, lr_per_sample)
|
||||||
trainer = Trainer(z, (ce, errs), [learner])
|
trainer = Trainer(z, (ce, errs), [learner])
|
||||||
input_map = {
|
input_map = {
|
||||||
|
|
|
@ -800,7 +800,7 @@ def nesterov(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
|
||||||
return opt
|
return opt
|
||||||
|
|
||||||
@typemap
|
@typemap
|
||||||
def adadelta(parameters, lr=learning_rate_schedule(1, UnitType.sample), rho=0.95, epsilon=1e-8,
|
def adadelta(parameters, lr=learning_parameter_schedule_per_sample(1), rho=0.95, epsilon=1e-8,
|
||||||
l1_regularization_weight=0.0, l2_regularization_weight=0.0,
|
l1_regularization_weight=0.0, l2_regularization_weight=0.0,
|
||||||
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=np.inf,
|
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=np.inf,
|
||||||
gradient_clipping_with_truncation=True, use_mean_gradient=None,
|
gradient_clipping_with_truncation=True, use_mean_gradient=None,
|
||||||
|
|
|
@ -51,7 +51,7 @@ class SimpleTrainer:
|
||||||
self.trainer = C.Trainer(self.z, (self.z, None), learner, []) if learner else None
|
self.trainer = C.Trainer(self.z, (self.z, None), learner, []) if learner else None
|
||||||
|
|
||||||
def create_distributed_learner(self, mode, config):
|
def create_distributed_learner(self, mode, config):
|
||||||
local_learner = C.sgd(self.z.parameters, C.learning_rate_schedule(0.01, unit=C.learners.UnitType.sample))
|
local_learner = C.sgd(self.z.parameters, C.learning_parameter_schedule_per_sample(0.01))
|
||||||
try:
|
try:
|
||||||
if mode == 'data_parallel':
|
if mode == 'data_parallel':
|
||||||
if config is None:
|
if config is None:
|
||||||
|
|
|
@ -52,13 +52,13 @@ MOMENTUM_SCHEDULE_PARAMS = [
|
||||||
|
|
||||||
LEARNER_LAMBDAS = [
|
LEARNER_LAMBDAS = [
|
||||||
lambda params: C.adadelta(params),
|
lambda params: C.adadelta(params),
|
||||||
lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
|
lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
|
||||||
lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
|
lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
|
||||||
lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
|
lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
|
||||||
lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
|
lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
|
||||||
lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
|
lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
|
||||||
lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
|
lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
|
||||||
lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]
|
lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]
|
||||||
|
|
||||||
@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
|
@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
|
||||||
def test_learning_rate_schedule(params, expectation, minibatch_size):
|
def test_learning_rate_schedule(params, expectation, minibatch_size):
|
||||||
|
@ -466,7 +466,7 @@ def test_noise_injection_with_checkpointing():
|
||||||
w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
|
w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
|
||||||
w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
|
w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
|
||||||
|
|
||||||
lr=learning_rate_schedule(0.5, UnitType.sample)
|
lr=C.learning_parameter_schedule_per_sample(0.5)
|
||||||
m=C.momentum_schedule(0.99)
|
m=C.momentum_schedule(0.99)
|
||||||
|
|
||||||
learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
|
learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
|
||||||
|
@ -515,8 +515,8 @@ def test_learner_logging():
|
||||||
lr_values = [0.3, 0.2, 0.1, 0]
|
lr_values = [0.3, 0.2, 0.1, 0]
|
||||||
m_values = [0.6, 0.7, 0.8]
|
m_values = [0.6, 0.7, 0.8]
|
||||||
learner = C.momentum_sgd(z.parameters,
|
learner = C.momentum_sgd(z.parameters,
|
||||||
learning_rate_schedule(lr_values, UnitType.sample, 1),
|
C.learning_parameter_schedule_per_sample(lr_values, epoch_size=1),
|
||||||
C.momentum_schedule(m_values, 1))
|
C.momentum_schedule(m_values, epoch_size=1))
|
||||||
trainer = Trainer(z, (ce, errs), [learner], writer)
|
trainer = Trainer(z, (ce, errs), [learner], writer)
|
||||||
|
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
|
@ -572,7 +572,7 @@ def test_sweep_based_schedule(tmpdir, device_id):
|
||||||
ce = cross_entropy_with_softmax(z, labels)
|
ce = cross_entropy_with_softmax(z, labels)
|
||||||
errs = classification_error(z, labels)
|
errs = classification_error(z, labels)
|
||||||
|
|
||||||
lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample)
|
lr_per_sample = C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])
|
||||||
learner = sgd(z.parameters, lr_per_sample)
|
learner = sgd(z.parameters, lr_per_sample)
|
||||||
trainer = Trainer(z, (ce, errs), [learner])
|
trainer = Trainer(z, (ce, errs), [learner])
|
||||||
|
|
||||||
|
@ -617,7 +617,7 @@ def generate_random_data(sample_size, feature_dim, num_classes):
|
||||||
|
|
||||||
|
|
||||||
def test_learner_empy_parameters_list():
|
def test_learner_empy_parameters_list():
|
||||||
lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
|
lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
learner = C.sgd([], lr_per_sample)
|
learner = C.sgd([], lr_per_sample)
|
||||||
|
|
||||||
|
@ -673,14 +673,14 @@ def test_sgd_with_noise():
|
||||||
# in some layers. This tests that cuRand library will not
|
# in some layers. This tests that cuRand library will not
|
||||||
# complain about generating an odd number of random values
|
# complain about generating an odd number of random values
|
||||||
np.random.seed(98052)
|
np.random.seed(98052)
|
||||||
learner = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch), gaussian_noise_injection_std_dev=0.01)
|
learner = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125), gaussian_noise_injection_std_dev=0.01)
|
||||||
ffnet(learner)
|
ffnet(learner)
|
||||||
# We just verify that we did not crash
|
# We just verify that we did not crash
|
||||||
assert(True)
|
assert(True)
|
||||||
|
|
||||||
def test_universal():
|
def test_universal():
|
||||||
np.random.seed(98052)
|
np.random.seed(98052)
|
||||||
builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch))
|
builtin_sgd = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125))
|
||||||
builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd)
|
builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd)
|
||||||
np.random.seed(98052)
|
np.random.seed(98052)
|
||||||
my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)])
|
my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)])
|
||||||
|
@ -735,10 +735,10 @@ def test_restore_from_checkpoint(tmpdir, learner):
|
||||||
# this should be replaced with LEARNER_LAMBDAS
|
# this should be replaced with LEARNER_LAMBDAS
|
||||||
SPARSE_AND_DENSE_LEARNER_LAMBDAS = [
|
SPARSE_AND_DENSE_LEARNER_LAMBDAS = [
|
||||||
(lambda params: C.adadelta(params), False),
|
(lambda params: C.adadelta(params), False),
|
||||||
(lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), True),
|
(lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), True),
|
||||||
(lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), True),
|
(lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), True),
|
||||||
(lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), True),
|
(lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), True),
|
||||||
(lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), False)]
|
(lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), False)]
|
||||||
|
|
||||||
@pytest.mark.parametrize("learner, gpu_only", SPARSE_AND_DENSE_LEARNER_LAMBDAS)
|
@pytest.mark.parametrize("learner, gpu_only", SPARSE_AND_DENSE_LEARNER_LAMBDAS)
|
||||||
@pytest.mark.parametrize("checkpoint", [True, False])
|
@pytest.mark.parametrize("checkpoint", [True, False])
|
||||||
|
|
|
@ -1358,7 +1358,7 @@ class Function(cntk_py.Function):
|
||||||
... def criterion(data, label_one_hot):
|
... def criterion(data, label_one_hot):
|
||||||
... z = model(data) # apply model. Computes a non-normalized log probability for every output class.
|
... z = model(data) # apply model. Computes a non-normalized log probability for every output class.
|
||||||
... return cntk.cross_entropy_with_softmax(z, label_one_hot)
|
... return cntk.cross_entropy_with_softmax(z, label_one_hot)
|
||||||
>>> learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch))
|
>>> learner = cntk.sgd(model.parameters, 0.1)
|
||||||
>>> progress = criterion.train((X, Y), minibatch_size=25, max_epochs=2, epoch_size=125, parameter_learners=[learner])
|
>>> progress = criterion.train((X, Y), minibatch_size=25, max_epochs=2, epoch_size=125, parameter_learners=[learner])
|
||||||
>>> print("%.2f" % progress.epoch_summaries[-1].loss) # get the final epoch's loss value
|
>>> print("%.2f" % progress.epoch_summaries[-1].loss) # get the final epoch's loss value
|
||||||
0.68
|
0.68
|
||||||
|
|
|
@ -88,7 +88,7 @@ def test_training_2d_sparse_sequence_operand(device_id):
|
||||||
|
|
||||||
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
|
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
|
||||||
loss = cross_entropy_with_softmax(z, l, axis=-1)
|
loss = cross_entropy_with_softmax(z, l, axis=-1)
|
||||||
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
|
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_parameter_schedule_per_sample(0.7)))
|
||||||
return (a, l, w, trainer)
|
return (a, l, w, trainer)
|
||||||
|
|
||||||
# Run with sparse inputs
|
# Run with sparse inputs
|
||||||
|
@ -146,7 +146,7 @@ def test_training_3d_sparse_sequence_with_recurrence(device_id):
|
||||||
|
|
||||||
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
|
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
|
||||||
loss = cross_entropy_with_softmax(z, l, axis=-1)
|
loss = cross_entropy_with_softmax(z, l, axis=-1)
|
||||||
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
|
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_parameter_schedule_per_sample(0.7)))
|
||||||
return (a, l, w_i, w_h, trainer)
|
return (a, l, w_i, w_h, trainer)
|
||||||
|
|
||||||
# Run with sparse inputs
|
# Run with sparse inputs
|
||||||
|
|
|
@ -129,7 +129,7 @@ def mem_leak_check(nonlinearity, num_hidden_layers, device_id,
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
|
|
||||||
learning_rate = 0.5
|
learning_rate = 0.5
|
||||||
lr_schedule = C.learning_rate_schedule(learning_rate)
|
lr_schedule = C.learning_parameter_schedule(learning_rate)
|
||||||
|
|
||||||
hidden_layers_dim = 50
|
hidden_layers_dim = 50
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ def test_clone_freeze():
|
||||||
c_copies = [q.value for q in c_clone.constants]
|
c_copies = [q.value for q in c_clone.constants]
|
||||||
|
|
||||||
# update z
|
# update z
|
||||||
trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_rate_schedule(1.0, C.UnitType.minibatch)))
|
trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_parameter_schedule(1.0)))
|
||||||
x = np.random.randn(16,3).astype('f')
|
x = np.random.randn(16,3).astype('f')
|
||||||
y = np.random.randn(16,5).astype('f')
|
y = np.random.randn(16,5).astype('f')
|
||||||
trainer.train_minibatch({features: x, label: y})
|
trainer.train_minibatch({features: x, label: y})
|
||||||
|
|
|
@ -192,7 +192,7 @@ def test_restore_constants(tmpdir):
|
||||||
|
|
||||||
x = C.input_variable(10)
|
x = C.input_variable(10)
|
||||||
f = C.layers.BatchNormalization()(x)
|
f = C.layers.BatchNormalization()(x)
|
||||||
trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_rate_schedule(0.1, 'sample')))
|
trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_parameter_schedule_per_sample(0.1)))
|
||||||
|
|
||||||
model_filename = str(tmpdir / 'function.out')
|
model_filename = str(tmpdir / 'function.out')
|
||||||
checkpoint_filename = str(tmpdir / 'checkpoint.out')
|
checkpoint_filename = str(tmpdir / 'checkpoint.out')
|
||||||
|
|
|
@ -97,7 +97,7 @@ def mb_source(tmpdir, fileprefix, max_samples=FULL_DATA_SWEEP, ctf=ctf_data, str
|
||||||
|
|
||||||
|
|
||||||
def create_sample_model(device, writer=None,
|
def create_sample_model(device, writer=None,
|
||||||
lr_per_sample=C.learning_rate_schedule([0.3, 0.2, 0.1, 0.0], C.UnitType.sample)):
|
lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])):
|
||||||
in1 = sequence.input_variable(shape=(input_dim,))
|
in1 = sequence.input_variable(shape=(input_dim,))
|
||||||
labels = sequence.input_variable(shape=(input_dim,))
|
labels = sequence.input_variable(shape=(input_dim,))
|
||||||
p = parameter(shape=(input_dim,), init=10, device=device)
|
p = parameter(shape=(input_dim,), init=10, device=device)
|
||||||
|
@ -428,7 +428,7 @@ def test_session_progress_print_on_sweep_unit(tmpdir, device_id):
|
||||||
device = cntk_device(device_id)
|
device = cntk_device(device_id)
|
||||||
writer = MockProgressWriter()
|
writer = MockProgressWriter()
|
||||||
#set to a higher learning rate as we don't need to have converge but just to go through all the samples
|
#set to a higher learning rate as we don't need to have converge but just to go through all the samples
|
||||||
t, feature, label = create_sample_model(device, writer, lr_per_sample=C.learning_rate_schedule(0.3, C.UnitType.sample))
|
t, feature, label = create_sample_model(device, writer, lr_per_sample=C.learning_parameter_schedule_per_sample(0.3))
|
||||||
mbs = mb_source(tmpdir, "training",
|
mbs = mb_source(tmpdir, "training",
|
||||||
#max_samples=INFINITELY_REPEAT,
|
#max_samples=INFINITELY_REPEAT,
|
||||||
max_sweeps = 4)
|
max_sweeps = 4)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cntk as C
|
import cntk as C
|
||||||
from cntk.learners import sgd, learning_rate_schedule, UnitType
|
from cntk.learners import sgd
|
||||||
from cntk.logging import ProgressPrinter
|
from cntk.logging import ProgressPrinter
|
||||||
from cntk.layers import Dense, Sequential
|
from cntk.layers import Dense, Sequential
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ def ffnet():
|
||||||
pe = C.classification_error(z, label)
|
pe = C.classification_error(z, label)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
|
lr_per_minibatch = C.learning_parameter_schedule(0.125)
|
||||||
progress_printer = ProgressPrinter(0)
|
progress_printer = ProgressPrinter(0)
|
||||||
trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])
|
trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import os
|
||||||
from cntk import Trainer, Axis
|
from cntk import Trainer, Axis
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs,\
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs,\
|
||||||
INFINITELY_REPEAT
|
INFINITELY_REPEAT
|
||||||
from cntk.learners import sgd, learning_rate_schedule, UnitType
|
from cntk.learners import sgd, learning_parameter_schedule_per_sample
|
||||||
from cntk import input_variable, cross_entropy_with_softmax, \
|
from cntk import input_variable, cross_entropy_with_softmax, \
|
||||||
classification_error, sequence
|
classification_error, sequence
|
||||||
from cntk.logging import ProgressPrinter
|
from cntk.logging import ProgressPrinter
|
||||||
|
@ -58,7 +58,7 @@ def train_sequence_classifier():
|
||||||
label: reader.streams.labels
|
label: reader.streams.labels
|
||||||
}
|
}
|
||||||
|
|
||||||
lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
|
lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
progress_printer = ProgressPrinter(0)
|
progress_printer = ProgressPrinter(0)
|
||||||
trainer = Trainer(classifier_output, (ce, pe),
|
trainer = Trainer(classifier_output, (ce, pe),
|
||||||
|
|
Загрузка…
Ссылка в новой задаче