Updated examples and tests with new Learning APIs.

This commit is contained in:
Yuqing Tang 2017-11-13 15:30:59 -08:00
Родитель 35255ed03b
Коммит f1d6fc9ed6
54 изменённых файлов: 149 добавлений и 152 удалений

Просмотреть файл

@ -54,7 +54,7 @@ def criterion(data, label_one_hot):
# Learner object. The learner implements the update algorithm, in this case plain SGD. # Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1 learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)) learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))
# Trainer configuration parameters. # Trainer configuration parameters.
progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches

Просмотреть файл

@ -52,7 +52,7 @@ criterion = cntk.combine([loss, metric]) # criterion is a tuple-valued function
# Learner object. The learner implements the update algorithm, in this case plain SGD. # Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1 learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)) learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))
# Trainer. # Trainer.
minibatch_size = 32 minibatch_size = 32

Просмотреть файл

@ -81,14 +81,14 @@ def criterion(data, label_one_hot):
# Learner object. The learner implements the update algorithm, in this case momentum SGD. # Learner object. The learner implements the update algorithm, in this case momentum SGD.
# Because this script supports data-parallel training, the learning rate is specified # Because this script supports data-parallel training, the learning rate is specified
# "per sample" (UnitType.sample), the value is already pre-divided by the minibatch size. # "per sample", the value is already pre-divided by the minibatch size.
# This allows data-parallel training to slice the data into subsets and also to increase # This allows data-parallel training to slice the data into subsets and also to increase
# the minibatch size where possible, while maintaining the same contribution per sample gradient. # the minibatch size where possible, while maintaining the same contribution per sample gradient.
epoch_size = len(X_train) epoch_size = len(X_train)
lr_per_sample = 0.001 lr_per_sample = 0.001
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample) lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample)
mm_time_constant = [0]*5 + [1024] # 5 epochs without momentum, then switch it on mm_per_sample = [0]*5 + [0.9990239141819757] # 5 epochs without momentum, then switch it on
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size) mm_schedule = C.learners.momentum_schedule_per_sample(mm_per_sample, epoch_size=epoch_size)
# Instantiate the trainer object to drive the model training. # Instantiate the trainer object to drive the model training.
learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule) learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)
@ -114,7 +114,7 @@ prev_metric = 1 # metric from previous call to the callback. At very beginning,
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches): def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
global prev_metric global prev_metric
if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
learner.reset_learning_rate(C.learning_rate_schedule(learner.learning_rate() / 2, C.learners.UnitType.sample)) learner.reset_learning_rate(C.learning_parameter_per_sample(learner.learning_rate() / 2))
if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
print("Learning rate {} too small. Training complete.".format(learner.learning_rate())) print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
return False # means we are done return False # means we are done
@ -137,7 +137,7 @@ learner = C.train.distributed.data_parallel_distributed_learner(learner)
# For distributed training, we must maximize the minibatch size, as to minimize # For distributed training, we must maximize the minibatch size, as to minimize
# communication cost and GPU underutilization. Hence, we use a "schedule" # communication cost and GPU underutilization. Hence, we use a "schedule"
# that increases the minibatch size after a few epochs. By specifying the learning rate # that increases the minibatch size after a few epochs. By specifying the learning rate
# as UnitType.sample, the contribution per sample maintains the same scale without # as per sample, the contribution per sample maintains the same scale without
# having to fix up the learning rate. # having to fix up the learning rate.
# For this MNIST model, larger minibatch sizes make it faster, because the # For this MNIST model, larger minibatch sizes make it faster, because the
# model is too small to utilize a full GPU. Hence data-parallel training cannot # model is too small to utilize a full GPU. Hence data-parallel training cannot

Просмотреть файл

@ -100,9 +100,9 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
# Set learning parameters # Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size) lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200] mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
l2_reg_weight = 0.002 l2_reg_weight = 0.002
# trainer object # trainer object

Просмотреть файл

@ -16,7 +16,7 @@ from cntk.layers import Convolution2D, MaxPooling, AveragePooling, Dropout, Batc
from cntk.layers.typing import * from cntk.layers.typing import *
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk import Trainer, use_default_device from cntk import Trainer, use_default_device
from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule, learning_parameter_schedule from cntk.learners import momentum_sgd, momentum_schedule, momentum_schedule_per_sample, learning_parameter_schedule, learning_parameter_schedule_per_sample
from cntk import cross_entropy_with_softmax, classification_error, relu from cntk import cross_entropy_with_softmax, classification_error, relu
from cntk.ops import Function from cntk.ops import Function
from cntk.debugging import set_computation_network_trace_level from cntk.debugging import set_computation_network_trace_level
@ -109,8 +109,8 @@ def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
# learning parameters # learning parameters
learner = momentum_sgd(model.parameters, learner = momentum_sgd(model.parameters,
lr = learning_parameter_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], minibatch_size=1, epoch_size=epoch_size), lr = learning_parameter_schedule_per_sample([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], epoch_size=epoch_size),
momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), momentum = momentum_schedule_per_sample([0]*20+[0.9983347214509387]*20+[0.9991670137924583], epoch_size=epoch_size),
l2_regularization_weight = 0.002) l2_regularization_weight = 0.002)
# trainer object # trainer object
@ -147,8 +147,8 @@ def Evaluator(criterion):
if metric: if metric:
parameters |= set(metric.parameters) parameters |= set(metric.parameters)
dummy_learner = momentum_sgd(tuple(parameters), dummy_learner = momentum_sgd(tuple(parameters),
lr = learning_rate_schedule(1, UnitType.minibatch), lr = learning_parameter_schedule(1),
momentum = momentum_as_time_constant_schedule(0)) momentum = momentum_schedule(0))
return Trainer(None, (loss, metric), dummy_learner) return Trainer(None, (loss, metric), dummy_learner)
def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None): def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None):

Просмотреть файл

@ -86,9 +86,9 @@ def create_conv_network():
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers): def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
# Set learning parameters # Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size) lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200] mms = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
l2_reg_weight = 0.002 l2_reg_weight = 0.002
# Create learner # Create learner

Просмотреть файл

@ -55,9 +55,9 @@ def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_e
# Set learning parameters # Set learning parameters
lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001] lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001]
lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size) lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mm_time_constant = [0]*5 + [1024] mms = [0]*5 + [0.9990239141819757]
mm_schedule = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size) mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule) learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)

Просмотреть файл

@ -17,7 +17,7 @@ import _cntk_py
import cntk.io.transforms as xforms import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler, enable_profiler from cntk.debugging import start_profiler, stop_profiler, enable_profiler
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error from cntk.metrics import classification_error
@ -114,7 +114,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, progress_wri
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9) mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -16,7 +16,7 @@ import _cntk_py
import cntk.io.transforms as xforms import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler from cntk.debugging import start_profiler, stop_profiler
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.ops import input from cntk.ops import input
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9) mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -17,7 +17,7 @@ import _cntk_py
import cntk.io.transforms as xforms import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler, enable_profiler from cntk.debugging import start_profiler, stop_profiler, enable_profiler
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error from cntk.metrics import classification_error
@ -115,7 +115,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9) mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -16,7 +16,7 @@ import _cntk_py
import cntk.io.transforms as xforms import cntk.io.transforms as xforms
from cntk.debugging import start_profiler, stop_profiler from cntk.debugging import start_profiler, stop_profiler
from cntk.learners import learning_rate_schedule, momentum_schedule, momentum_sgd, UnitType from cntk.learners import learning_parameter_schedule, momentum_schedule, momentum_sgd
from cntk.logging import ProgressPrinter, log_number_of_parameters from cntk.logging import ProgressPrinter, log_number_of_parameters
from cntk.train.distributed import data_parallel_distributed_learner, Communicator from cntk.train.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
@ -51,7 +51,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = momentum_schedule(0.9) mm_schedule = momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -103,7 +103,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size):
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size) lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9) mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -39,7 +39,7 @@ def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantiza
lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
learning_rate *= learn_rate_decrease_factor learning_rate *= learn_rate_decrease_factor
lr_schedule = C.learners.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size) lr_schedule = C.learners.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9) mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -12,7 +12,7 @@ import cntk as C
from cntk.train import Trainer, minibatch_size_schedule from cntk.train import Trainer, minibatch_size_schedule
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk.device import cpu, try_set_default_device from cntk.device import cpu, try_set_default_device
from cntk.learners import adadelta, learning_rate_schedule, UnitType from cntk.learners import adadelta, learning_parameter_schedule_per_sample
from cntk.ops import relu, element_times, constant from cntk.ops import relu, element_times, constant
from cntk.layers import Dense, Sequential, For from cntk.layers import Dense, Sequential, For
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
@ -85,7 +85,7 @@ def simple_mnist(tensorboard_logdir=None):
progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
lr = learning_rate_schedule(1, UnitType.sample) lr = learning_parameter_schedule_per_sample(1)
trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers) trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)
training_session( training_session(

Просмотреть файл

@ -13,7 +13,7 @@ import numpy as np
from cntk import cross_entropy_with_softmax, classification_error, reduce_mean from cntk import cross_entropy_with_softmax, classification_error, reduce_mean
from cntk import Trainer, cntk_py from cntk import Trainer, cntk_py
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
from cntk.debugging import * from cntk.debugging import *
from cntk.logging import * from cntk.logging import *
from resnet_models import * from resnet_models import *
@ -80,13 +80,12 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
# shared training parameters # shared training parameters
minibatch_size = 128 minibatch_size = 128
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001 l2_reg_weight = 0.0001
# Set learning parameters # Set learning parameters
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) mm_schedule = momentum_schedule(0.9, minibatch_size)
# progress writers # progress writers
progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)] progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]

Просмотреть файл

@ -10,9 +10,10 @@ import argparse
import cntk as C import cntk as C
import numpy as np import numpy as np
import cntk as C
from cntk import input, cross_entropy_with_softmax, classification_error, Trainer, cntk_py from cntk import input, cross_entropy_with_softmax, classification_error, Trainer, cntk_py
from cntk import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator from cntk import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
from cntk.device import try_set_default_device, gpu from cntk.device import try_set_default_device, gpu
from cntk.train.training_session import * from cntk.train.training_session import *
from cntk.debugging import * from cntk.debugging import *
@ -71,15 +72,13 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
else: else:
return RuntimeError("Unknown model name!") return RuntimeError("Unknown model name!")
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001 l2_reg_weight = 0.0001
# Set learning parameters # Set learning parameters
minibatch_size = 128 minibatch_size = 128
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size = minibatch_size, epoch_size=epoch_size)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) mm_schedule = momentum_schedule(0.9, minibatch_size = minibatch_size)
# learner object # learner object
if block_size != None and num_quantization_bits != 32: if block_size != None and num_quantization_bits != 32:
raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

Просмотреть файл

@ -136,7 +136,7 @@ def create_vgg16():
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer): def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters # Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001] lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size) lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9) mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -136,7 +136,7 @@ def create_vgg19():
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer): def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters # Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001] lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size) lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
mm_schedule = C.learners.momentum_schedule(0.9) mm_schedule = C.learners.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

Просмотреть файл

@ -11,7 +11,7 @@ from cntk.initializer import glorot_uniform
from cntk.io import MinibatchSource, ImageDeserializer, CTFDeserializer, StreamDefs, StreamDef from cntk.io import MinibatchSource, ImageDeserializer, CTFDeserializer, StreamDefs, StreamDef
from cntk.io.transforms import scale from cntk.io.transforms import scale
from cntk.layers import placeholder, Constant from cntk.layers import placeholder, Constant
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule_per_sample
from cntk.logging import log_number_of_parameters, ProgressPrinter from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot from cntk.logging.graph import find_by_name, plot
import PARAMETERS import PARAMETERS
@ -44,10 +44,10 @@ epoch_size = p.cntk_num_train_images
num_test_images = p.cntk_num_test_images num_test_images = p.cntk_num_test_images
mb_size = p.cntk_mb_size mb_size = p.cntk_mb_size
max_epochs = p.cntk_max_epochs max_epochs = p.cntk_max_epochs
momentum_time_constant = p.cntk_momentum_time_constant
distributed_flg = p.distributed_flg distributed_flg = p.distributed_flg
num_quantization_bits = p.num_quantization_bits num_quantization_bits = p.num_quantization_bits
warm_up = p.warm_up warm_up = p.warm_up
momentum_per_sample = p.cntk_momentum_per_sample
# model specific variables (only AlexNet for now) # model specific variables (only AlexNet for now)
base_model = "AlexNet" base_model = "AlexNet"
@ -154,8 +154,8 @@ def train_fast_rcnn(debug_output=False, model_path=model_file):
# Set learning parameters # Set learning parameters
l2_reg_weight = 0.0005 l2_reg_weight = 0.0005
lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) mm_schedule = momentum_schedule_per_sample(momentum_per_sample)
# Instantiate the trainer object as default # Instantiate the trainer object as default
learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)

Просмотреть файл

@ -54,7 +54,7 @@ class Parameters():
self.cntk_num_test_images = -1 # set per data set below self.cntk_num_test_images = -1 # set per data set below
self.cntk_mb_size = -1 # set per data set below self.cntk_mb_size = -1 # set per data set below
self.cntk_max_epochs = -1 # set per data set below self.cntk_max_epochs = -1 # set per data set below
self.cntk_momentum_time_constant = -1 # set per data set below self.cntk_momentum_per_sample = -1 # set per data set below
# for Distributed learner # for Distributed learner
self.distributed_flg = False # In case of distributed learning, set 'True' self.distributed_flg = False # In case of distributed learning, set 'True'
@ -83,7 +83,7 @@ class GroceryParameters(Parameters):
self.cntk_num_test_images = 5 self.cntk_num_test_images = 5
self.cntk_mb_size = 5 self.cntk_mb_size = 5
self.cntk_max_epochs = 20 self.cntk_max_epochs = 20
self.cntk_momentum_time_constant = 10 self.cntk_momentum_per_sample = 0.8187307530779818
# postprocessing # postprocessing
self.nmsThreshold = 0.01 self.nmsThreshold = 0.01
@ -117,7 +117,7 @@ class PascalParameters(Parameters):
self.cntk_num_test_images = 4952 self.cntk_num_test_images = 4952
self.cntk_mb_size = 2 self.cntk_mb_size = 2
self.cntk_max_epochs = 17 self.cntk_max_epochs = 17
self.cntk_momentum_time_constant = 20 self.cntk_momentum_per_sample = 0.951229424500714
self.pascalDataDir = os.path.join(self.rootDir, "..", "..", "DataSets", "Pascal") self.pascalDataDir = os.path.join(self.rootDir, "..", "..", "DataSets", "Pascal")
self.imgDir = self.pascalDataDir self.imgDir = self.pascalDataDir

Просмотреть файл

@ -10,12 +10,12 @@ import os, sys
import argparse import argparse
import easydict # pip install easydict import easydict # pip install easydict
import cntk import cntk
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \ from cntk import Trainer, load_model, Axis, input_variable, parameter, times, combine, \
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
from cntk.core import Value from cntk.core import Value
from cntk.initializer import normal from cntk.initializer import normal
from cntk.layers import placeholder, Constant, Sequential from cntk.layers import placeholder, Constant, Sequential
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
from cntk.logging import log_number_of_parameters, ProgressPrinter from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot from cntk.logging.graph import find_by_name, plot
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
@ -295,11 +295,11 @@ def train_fast_rcnn(cfg):
biases = [p for p in params if '.b' in p.name or 'b' == p.name] biases = [p for p in params if '.b' in p.name or 'b' == p.name]
others = [p for p in params if not p in biases] others = [p for p in params if not p in biases]
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample) lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample_scaled)
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE] bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE]
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

Просмотреть файл

@ -10,13 +10,13 @@ import os, sys
import argparse import argparse
import easydict # pip install easydict import easydict # pip install easydict
import cntk import cntk
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \ from cntk import Trainer, load_model, Axis, input_variable, parameter, times, combine, \
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
from cntk.core import Value from cntk.core import Value
from cntk.io import MinibatchData from cntk.io import MinibatchData
from cntk.initializer import normal from cntk.initializer import normal
from cntk.layers import placeholder, Constant, Sequential from cntk.layers import placeholder, Constant, Sequential
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule from cntk.learners import momentum_sgd, learning_parameter_schedule_per_sample, momentum_schedule
from cntk.logging import log_number_of_parameters, ProgressPrinter from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot from cntk.logging.graph import find_by_name, plot
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
@ -521,12 +521,12 @@ def train_model(image_input, roi_input, dims_input, loss, pred_error,
print("bias_lr_mult: {}".format(bias_lr_mult)) print("bias_lr_mult: {}".format(bias_lr_mult))
# Instantiate the learners and the trainer object # Instantiate the learners and the trainer object
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=True) unit_gain=False, use_mean_gradient=True)
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=True) unit_gain=False, use_mean_gradient=True)
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

Просмотреть файл

@ -57,8 +57,8 @@ def deconv_mnist(max_epochs=3):
minibatch_size = 64 minibatch_size = 64
# Set learning parameters # Set learning parameters
lr_schedule = C.learning_rate_schedule([0.00015], C.learners.UnitType.sample, epoch_size) lr_schedule = C.learning_parameter_schedule_per_sample([0.00015], epoch_size=epoch_size)
mm_schedule = C.learners.momentum_as_time_constant_schedule([600], epoch_size) mm_schedule = C.learners.momentum_schedule_per_sample([0.9983347214509387], epoch_size=epoch_size)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True) learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)

Просмотреть файл

@ -11,12 +11,12 @@ import os
from PIL import Image from PIL import Image
from cntk.device import try_set_default_device, gpu from cntk.device import try_set_default_device, gpu
from cntk import load_model, placeholder, Constant from cntk import load_model, placeholder, Constant
from cntk import Trainer, UnitType from cntk import Trainer
from cntk.logging.graph import find_by_name, get_node_outputs from cntk.logging.graph import find_by_name, get_node_outputs
from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef
import cntk.io.transforms as xforms import cntk.io.transforms as xforms
from cntk.layers import Dense from cntk.layers import Dense
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule from cntk.learners import momentum_sgd, learning_parameter_schedule, momentum_schedule
from cntk.ops import combine, softmax from cntk.ops import combine, softmax
from cntk.ops.functions import CloneMethod from cntk.ops.functions import CloneMethod
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
@ -114,7 +114,7 @@ def train_model(base_model_file, feature_node_name, last_hidden_node_name,
pe = classification_error(tl_model, label_input) pe = classification_error(tl_model, label_input)
# Instantiate the trainer object # Instantiate the trainer object
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) lr_schedule = learning_parameter_schedule(lr_per_mb)
mm_schedule = momentum_schedule(momentum_per_mb) mm_schedule = momentum_schedule(momentum_per_mb)
learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)

Просмотреть файл

@ -138,8 +138,8 @@ def train(reader, model, max_epochs):
# SGD parameters # SGD parameters
learner = cntk.learners.fsadagrad(criterion.parameters, learner = cntk.learners.fsadagrad(criterion.parameters,
lr = cntk.learners.learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], cntk.learners.UnitType.sample, epoch_size), lr = cntk.learners.learning_parameter_schedule_per_sample([0.003]*2+[0.0015]*12+[0.0003], epoch_size=epoch_size),
momentum = cntk.learners.momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), momentum = cntk.learners.momentum_schedule(0.9, minibatch_size),
gradient_clipping_threshold_per_sample = 15, gradient_clipping_threshold_per_sample = 15,
gradient_clipping_with_truncation = True) gradient_clipping_with_truncation = True)

Просмотреть файл

@ -12,7 +12,7 @@ from cntk.core import Value
from cntk.initializer import he_uniform from cntk.initializer import he_uniform
from cntk.layers import Sequential, Convolution2D, Dense, default_options from cntk.layers import Sequential, Convolution2D, Dense, default_options
from cntk.layers.typing import Signature, Tensor from cntk.layers.typing import Signature, Tensor
from cntk.learners import adam, learning_rate_schedule, momentum_schedule, UnitType from cntk.learners import adam, learning_parameter_schedule, momentum_schedule
from cntk.logging import TensorBoardProgressWriter from cntk.logging import TensorBoardProgressWriter
from cntk.ops import abs, argmax, element_select, less, relu, reduce_max, reduce_sum, square from cntk.ops import abs, argmax, element_select, less, relu, reduce_max, reduce_sum, square
from cntk.ops.functions import CloneMethod, Function from cntk.ops.functions import CloneMethod, Function
@ -317,7 +317,7 @@ class DeepQAgent(object):
return huber_loss(q_targets, q_acted, 1.0) return huber_loss(q_targets, q_acted, 1.0)
# Adam based SGD # Adam based SGD
lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) lr_schedule = learning_parameter_schedule(learning_rate)
m_schedule = momentum_schedule(momentum) m_schedule = momentum_schedule(momentum)
vm_schedule = momentum_schedule(0.999) vm_schedule = momentum_schedule(0.999)
l_sgd = adam(self._action_value_net.parameters, lr_schedule, l_sgd = adam(self._action_value_net.parameters, lr_schedule,

Просмотреть файл

@ -53,7 +53,7 @@ def train_sequence_classifier():
label : reader.streams.labels label : reader.streams.labels
} }
lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample) lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
progress_printer = C.logging.ProgressPrinter(0) progress_printer = C.logging.ProgressPrinter(0)

Просмотреть файл

@ -9,7 +9,8 @@ import numpy as np
import os import os
from cntk import Trainer, Axis from cntk import Trainer, Axis
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType from cntk.learners import momentum_sgd, fsadagrad, momentum_schedule_per_sample, \
learning_parameter_schedule, learning_parameter_schedule_per_sample
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \ from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \
element_select, alias, hardmax, placeholder, combine, parameter, times, plus element_select, alias, hardmax, placeholder, combine, parameter, times, plus
from cntk.ops.functions import CloneMethod, load_model, Function from cntk.ops.functions import CloneMethod, load_model, Function
@ -218,8 +219,8 @@ def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_si
minibatch_size = 72 minibatch_size = 72
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
learner = fsadagrad(model_train.parameters, learner = fsadagrad(model_train.parameters,
lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), lr = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
momentum = momentum_as_time_constant_schedule(1100), momentum = momentum_schedule_per_sample(0.9990913221888589),
gradient_clipping_threshold_per_sample=2.3, gradient_clipping_threshold_per_sample=2.3,
gradient_clipping_with_truncation=True) gradient_clipping_with_truncation=True)
trainer = Trainer(None, criterion, learner) trainer = Trainer(None, criterion, learner)
@ -314,7 +315,7 @@ def evaluate_decoding(reader, s2smodel, i2w):
# TODO: replace by a proper such class once available # TODO: replace by a proper such class once available
def Evaluator(model, criterion): def Evaluator(model, criterion):
from cntk import Trainer from cntk import Trainer
from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule from cntk.learners import momentum_sgd, momentum_schedule_per_sample
loss, metric = Trainer._get_loss_metric(criterion) loss, metric = Trainer._get_loss_metric(criterion)
parameters = set(loss.parameters) parameters = set(loss.parameters)
if model: if model:
@ -322,8 +323,8 @@ def Evaluator(model, criterion):
if metric: if metric:
parameters |= set(metric.parameters) parameters |= set(metric.parameters)
dummy_learner = momentum_sgd(tuple(parameters), dummy_learner = momentum_sgd(tuple(parameters),
lr = learning_rate_schedule(1, UnitType.minibatch), lr = learning_parameter_schedule(1),
momentum = momentum_as_time_constant_schedule(0)) momentum = momentum_schedule_per_sample(0))
return Trainer(model, (loss, metric), dummy_learner) return Trainer(model, (loss, metric), dummy_learner)
# This computes the metric on the test set. # This computes the metric on the test set.

Просмотреть файл

@ -15,7 +15,7 @@ import cntk
from cntk import Trainer from cntk import Trainer
from cntk.train.distributed import Communicator, data_parallel_distributed_learner, block_momentum_distributed_learner from cntk.train.distributed import Communicator, data_parallel_distributed_learner, block_momentum_distributed_learner
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.learners import fsadagrad, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule from cntk.learners import fsadagrad, learning_parameter_schedule_per_sample, momentum_schedule, momentum_schedule_per_sample
from cntk.train.training_session import * from cntk.train.training_session import *
from cntk.logging import * from cntk.logging import *
@ -50,8 +50,8 @@ def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantiza
lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both?
local_learner = fsadagrad(model_train.parameters, local_learner = fsadagrad(model_train.parameters,
lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), lr = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
momentum = momentum_as_time_constant_schedule(1100), momentum = momentum_schedule_per_sample(0.9990913221888589),
gradient_clipping_threshold_per_sample=2.3, gradient_clipping_threshold_per_sample=2.3,
gradient_clipping_with_truncation=True) gradient_clipping_with_truncation=True)

Просмотреть файл

@ -72,8 +72,8 @@ def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_
lr = [0.001] lr = [0.001]
local_learner = fsadagrad(network['output'].parameters, local_learner = fsadagrad(network['output'].parameters,
lr=learning_rate_schedule(lr, UnitType.sample, epoch_size), lr=learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
momentum=momentum_as_time_constant_schedule(1000), momentum=momentum_schedule_per_sample(0.9990913221888589),
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
if block_size != None: if block_size != None:

Просмотреть файл

@ -9,7 +9,7 @@ import numpy as np
import os import os
import sys import sys
from cntk import Trainer, Axis from cntk import Trainer, Axis
from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType from cntk.learners import momentum_sgd, momentum_schedule_per_sample, learning_parameter_schedule_per_sample
from cntk.ops import sequence from cntk.ops import sequence
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error from cntk.metrics import classification_error
@ -157,11 +157,11 @@ def train_lm(training_file, epochs, max_num_minibatches):
errs = classification_error(z, label_sequence) errs = classification_error(z, label_sequence)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) lr_per_sample = learning_parameter_schedule_per_sample(0.001)
momentum_time_constant = momentum_as_time_constant_schedule(1100) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
clipping_threshold_per_sample = 5.0 clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True gradient_clipping_with_truncation = True
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule,
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
gradient_clipping_with_truncation=gradient_clipping_with_truncation) gradient_clipping_with_truncation=gradient_clipping_with_truncation)
progress_printer = ProgressPrinter(freq=100, tag='Training') progress_printer = ProgressPrinter(freq=100, tag='Training')

Просмотреть файл

@ -190,12 +190,12 @@ def create_criterion(network):
# return: learners: [sgd, adam, adagrad] # return: learners: [sgd, adam, adagrad]
def create_learner(model): def create_learner(model):
'''Create the optimized method''' '''Create the optimized method'''
lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
momentum_time_constant = C.momentum_as_time_constant_schedule(1100) momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
if opt.optim == 'sgd': if opt.optim == 'sgd':
return C.sgd(model.parameters, lr=lr_per_minibatch) return C.sgd(model.parameters, lr=lr_per_minibatch)
elif opt.optim == 'adam': elif opt.optim == 'adam':
return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant) return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
elif opt.optim == 'adagrad': elif opt.optim == 'adagrad':
return C.adagrad(model.parameters, lr=lr_per_minibatch) return C.adagrad(model.parameters, lr=lr_per_minibatch)
else: else:

Просмотреть файл

@ -10,7 +10,7 @@ import cntk as C
import timeit import timeit
from cntk import Axis from cntk import Axis
from cntk.train import Trainer from cntk.train import Trainer
from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType from cntk.learners import momentum_sgd
from cntk.ops import sequence from cntk.ops import sequence
from cntk.losses import cross_entropy_with_softmax from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error from cntk.metrics import classification_error
@ -34,7 +34,7 @@ sequence_length = 40
sequences_per_batch = 10 sequences_per_batch = 10
alpha = 0.75 alpha = 0.75
learning_rate = 0.002 learning_rate = 0.002
momentum_as_time_constant = 10000 momentum_per_sample = 0.9999000049998333
clipping_threshold_per_sample = 5.0 clipping_threshold_per_sample = 5.0
token_to_id_path = './ptb/token2id.txt' token_to_id_path = './ptb/token2id.txt'
validation_file_path = './ptb/valid.txt' validation_file_path = './ptb/valid.txt'
@ -196,8 +196,8 @@ def train_lm(testing=False):
num_trained_samples_since_last_report = 0 num_trained_samples_since_last_report = 0
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant) momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
gradient_clipping_with_truncation = True gradient_clipping_with_truncation = True
learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,

Просмотреть файл

@ -192,9 +192,9 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
# Set learning parameters # Set learning parameters
lr_per_sample = [0.01]*10+[0.001]*10+[0.0001] lr_per_sample = [0.01]*10+[0.001]*10+[0.0001]
lr_schedule = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample) lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=train_epoch_size)
momentum_time_constant = 4096 momentum_per_sample = 0.9997558891748972
mm_schedule = C.momentum_as_time_constant_schedule([momentum_time_constant]) mm_schedule = C.momentum_schedule_per_sample([momentum_per_sample])
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)

Просмотреть файл

@ -39,8 +39,8 @@ def test_htk_deserializers():
errs = C.classification_error (z, labels) errs = C.classification_error (z, labels)
learner = C.fsadagrad(z.parameters, learner = C.fsadagrad(z.parameters,
lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
momentum=C.momentum_as_time_constant_schedule(1000), momentum=C.momentum_schedule_per_sample(0.9990913221888589),
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
progress_printer = C.logging.ProgressPrinter(freq=0) progress_printer = C.logging.ProgressPrinter(freq=0)
trainer = C.Trainer(z, (ce, errs), learner, progress_printer) trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

Просмотреть файл

@ -40,11 +40,11 @@ if __name__=='__main__':
z = plus(n, p2, name='z') z = plus(n, p2, name='z')
ce = squared_error(z, labels) ce = squared_error(z, labels)
momentum_time_constant = C.momentum_as_time_constant_schedule(1100) momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
dist_learners = [ dist_learners = [
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_time_constant, True)), C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True)),
C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_time_constant, True)) C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
] ]
trainer = C.Trainer(z, ce, dist_learners) trainer = C.Trainer(z, ce, dist_learners)

Просмотреть файл

@ -75,7 +75,7 @@ def test_word_rnn(device_id):
W.sequences_per_batch = 2 W.sequences_per_batch = 2
W.alpha = 0.75 W.alpha = 0.75
W.learning_rate = 0.02 W.learning_rate = 0.02
W.momentum_as_time_constant = 5 W.momentum_per_sample = 0.8187307530779818
W.clipping_threshold_per_sample = 5.0 W.clipping_threshold_per_sample = 5.0
W.segment_sepparator = '<eos>' W.segment_sepparator = '<eos>'
W.num_samples_between_progress_report = 2 W.num_samples_between_progress_report = 2

Просмотреть файл

@ -77,8 +77,8 @@ def LanguageUnderstanding_train(reader, model, max_epochs):
minibatch_size = 70 minibatch_size = 70
learner = fsadagrad(criterion.parameters, learner = fsadagrad(criterion.parameters,
lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size), lr = learning_parameter_schedule_per_sample([0.003]*2+[0.0015]*12+[0.0003], epoch_size=epoch_size),
momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), momentum = momentum_schedule(0.9, minibatch_size),
gradient_clipping_threshold_per_sample = 15, gradient_clipping_threshold_per_sample = 15,
gradient_clipping_with_truncation = True) gradient_clipping_with_truncation = True)

Просмотреть файл

@ -235,9 +235,8 @@ class ActorCritic(AgentBaseClass):
(combined_loss, None), (combined_loss, None),
C.learners.adam( C.learners.adam(
combined_networks.parameters, combined_networks.parameters,
C.learners.learning_rate_schedule( C.learners.learning_parameter_schedule_per_sample(
self._parameters.initial_eta, self._parameters.initial_eta),
C.learners.UnitType.sample),
momentum=C.learners.momentum_schedule(self._parameters.momentum), momentum=C.learners.momentum_schedule(self._parameters.momentum),
variance_momentum=C.learners.momentum_schedule(0.999), variance_momentum=C.learners.momentum_schedule(0.999),
minibatch_size=C.learners.IGNORE)) minibatch_size=C.learners.IGNORE))
@ -255,8 +254,7 @@ class ActorCritic(AgentBaseClass):
(self._parameters.initial_eta - self._parameters.eta_minimum) * (self._parameters.initial_eta - self._parameters.eta_minimum) *
(1 - float(self.step_count)/self._parameters.eta_decay_step_count)) (1 - float(self.step_count)/self._parameters.eta_decay_step_count))
self._trainer.parameter_learners[0].reset_learning_rate( self._trainer.parameter_learners[0].reset_learning_rate(
C.learners.learning_rate_schedule( C.learners.learning_parameter_schedule_per_sample(eta))
eta, C.learners.UnitType.sample))
def _choose_action(self, state): def _choose_action(self, state):
""" """

Просмотреть файл

@ -105,8 +105,8 @@ class QLearning(AgentBaseClass):
# TODO: allow user to specify learner through config file. # TODO: allow user to specify learner through config file.
opt = C.learners.adam( opt = C.learners.adam(
self._q.parameters, self._q.parameters,
C.learners.learning_rate_schedule( C.learners.learning_parameter_schedule_per_sample(
self._parameters.initial_eta, C.learners.UnitType.sample), self._parameters.initial_eta),
use_mean_gradient=True, use_mean_gradient=True,
momentum=C.learners.momentum_schedule(self._parameters.momentum), momentum=C.learners.momentum_schedule(self._parameters.momentum),
variance_momentum=C.learners.momentum_schedule(0.999), variance_momentum=C.learners.momentum_schedule(0.999),
@ -226,8 +226,8 @@ class QLearning(AgentBaseClass):
(1 - float(self.step_count)/self._parameters.eta_decay_step_count)) (1 - float(self.step_count)/self._parameters.eta_decay_step_count))
self._trainer.parameter_learners[0].reset_learning_rate( self._trainer.parameter_learners[0].reset_learning_rate(
C.learners.learning_rate_schedule( C.learners.learning_parameter_schedule_per_sample(
eta, C.learners.UnitType.sample)) eta))
def _adjust_exploration_rate(self): def _adjust_exploration_rate(self):
self._epsilon = self._parameters.epsilon_minimum + max( self._epsilon = self._parameters.epsilon_minimum + max(

Просмотреть файл

@ -5,7 +5,7 @@
import numpy as np import numpy as np
import cntk as C import cntk as C
from cntk import sgd, Trainer, learning_rate_schedule, parameter, \ from cntk import sgd, Trainer, learning_parameter_schedule, parameter, \
times, cross_entropy_with_softmax, \ times, cross_entropy_with_softmax, \
classification_error, UnitType, combine classification_error, UnitType, combine
from cntk.debugging.debug import debug_model, _DebugNode from cntk.debugging.debug import debug_model, _DebugNode
@ -32,7 +32,7 @@ def _train_backcompatible_test(z, loss, eval_error,
input_dim = 2 input_dim = 2
lr_schedule = learning_rate_schedule(0.5, UnitType.minibatch) lr_schedule = learning_parameter_schedule(0.5)
learner = sgd(z.parameters, lr_schedule) learner = sgd(z.parameters, lr_schedule)
trainer = Trainer(z, (loss, eval_error), [learner]) trainer = Trainer(z, (loss, eval_error), [learner])

Просмотреть файл

@ -5,7 +5,7 @@ import numpy as np
import cntk as C import cntk as C
from cntk import Axis, NDArrayView from cntk import Axis, NDArrayView
from cntk.logging import ProgressPrinter from cntk.logging import ProgressPrinter
from cntk.learners import UserLearner, sgd, learning_rate_schedule, UnitType from cntk.learners import UserLearner, sgd, learning_parameter_schedule
from cntk.layers import Dense, Sequential from cntk.layers import Dense, Sequential
import pytest import pytest
@ -79,7 +79,7 @@ class MySgdFast(UserLearner):
ADDITIONAL_ARGUMENTS = [ ADDITIONAL_ARGUMENTS = [
#(additional learning rate arguments (args), additional learner arguments (kwargs)) #(additional learning rate arguments (args), additional learner arguments (kwargs))
(C.learning_rate_schedule, [UnitType.minibatch], {'minibatch_size': 0}), #for backward compatible test (C.learning_rate_schedule, [C.learners.UnitType.minibatch], {'minibatch_size': 0}), #for backward compatible test
(C.learning_parameter_schedule, [25], {'minibatch_size': 25}), # test new API; 25 is the actually minibatch size (C.learning_parameter_schedule, [25], {'minibatch_size': 25}), # test new API; 25 is the actually minibatch size
(C.learning_parameter_schedule, [], {'minibatch_size': 0}), # test new API (C.learning_parameter_schedule, [], {'minibatch_size': 0}), # test new API
] ]

Просмотреть файл

@ -896,8 +896,8 @@ def test_usermbsource_training(tmpdir, with_checkpoint_impl):
mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)
from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
classification_error, learning_rate_schedule, sgd, Trainer, \ classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
training_session, times, UnitType training_session, times
feature = sequence.input_variable(shape=(input_dim,)) feature = sequence.input_variable(shape=(input_dim,))
label = C.input_variable(shape=(num_output_classes,)) label = C.input_variable(shape=(num_output_classes,))
@ -908,7 +908,7 @@ def test_usermbsource_training(tmpdir, with_checkpoint_impl):
#having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
#note that training session can end earlier if there is no updates #note that training session can end earlier if there is no updates
lr_per_sample = learning_rate_schedule(0.3, UnitType.sample) lr_per_sample = learning_parameter_schedule_per_sample(0.3)
learner = sgd(z.parameters, lr_per_sample) learner = sgd(z.parameters, lr_per_sample)
trainer = Trainer(z, (ce, errs), [learner]) trainer = Trainer(z, (ce, errs), [learner])
input_map = { input_map = {

Просмотреть файл

@ -800,7 +800,7 @@ def nesterov(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
return opt return opt
@typemap @typemap
def adadelta(parameters, lr=learning_rate_schedule(1, UnitType.sample), rho=0.95, epsilon=1e-8, def adadelta(parameters, lr=learning_parameter_schedule_per_sample(1), rho=0.95, epsilon=1e-8,
l1_regularization_weight=0.0, l2_regularization_weight=0.0, l1_regularization_weight=0.0, l2_regularization_weight=0.0,
gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=np.inf, gaussian_noise_injection_std_dev=0.0, gradient_clipping_threshold_per_sample=np.inf,
gradient_clipping_with_truncation=True, use_mean_gradient=None, gradient_clipping_with_truncation=True, use_mean_gradient=None,

Просмотреть файл

@ -51,7 +51,7 @@ class SimpleTrainer:
self.trainer = C.Trainer(self.z, (self.z, None), learner, []) if learner else None self.trainer = C.Trainer(self.z, (self.z, None), learner, []) if learner else None
def create_distributed_learner(self, mode, config): def create_distributed_learner(self, mode, config):
local_learner = C.sgd(self.z.parameters, C.learning_rate_schedule(0.01, unit=C.learners.UnitType.sample)) local_learner = C.sgd(self.z.parameters, C.learning_parameter_schedule_per_sample(0.01))
try: try:
if mode == 'data_parallel': if mode == 'data_parallel':
if config is None: if config is None:

Просмотреть файл

@ -52,13 +52,13 @@ MOMENTUM_SCHEDULE_PARAMS = [
LEARNER_LAMBDAS = [ LEARNER_LAMBDAS = [
lambda params: C.adadelta(params), lambda params: C.adadelta(params),
lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))] lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]
@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size): def test_learning_rate_schedule(params, expectation, minibatch_size):
@ -466,7 +466,7 @@ def test_noise_injection_with_checkpointing():
w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
lr=learning_rate_schedule(0.5, UnitType.sample) lr=C.learning_parameter_schedule_per_sample(0.5)
m=C.momentum_schedule(0.99) m=C.momentum_schedule(0.99)
learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5) learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
@ -515,8 +515,8 @@ def test_learner_logging():
lr_values = [0.3, 0.2, 0.1, 0] lr_values = [0.3, 0.2, 0.1, 0]
m_values = [0.6, 0.7, 0.8] m_values = [0.6, 0.7, 0.8]
learner = C.momentum_sgd(z.parameters, learner = C.momentum_sgd(z.parameters,
learning_rate_schedule(lr_values, UnitType.sample, 1), C.learning_parameter_schedule_per_sample(lr_values, epoch_size=1),
C.momentum_schedule(m_values, 1)) C.momentum_schedule(m_values, epoch_size=1))
trainer = Trainer(z, (ce, errs), [learner], writer) trainer = Trainer(z, (ce, errs), [learner], writer)
for i in range(10): for i in range(10):
@ -572,7 +572,7 @@ def test_sweep_based_schedule(tmpdir, device_id):
ce = cross_entropy_with_softmax(z, labels) ce = cross_entropy_with_softmax(z, labels)
errs = classification_error(z, labels) errs = classification_error(z, labels)
lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) lr_per_sample = C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])
learner = sgd(z.parameters, lr_per_sample) learner = sgd(z.parameters, lr_per_sample)
trainer = Trainer(z, (ce, errs), [learner]) trainer = Trainer(z, (ce, errs), [learner])
@ -617,7 +617,7 @@ def generate_random_data(sample_size, feature_dim, num_classes):
def test_learner_empy_parameters_list(): def test_learner_empy_parameters_list():
lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)
with pytest.raises(ValueError): with pytest.raises(ValueError):
learner = C.sgd([], lr_per_sample) learner = C.sgd([], lr_per_sample)
@ -673,14 +673,14 @@ def test_sgd_with_noise():
# in some layers. This tests that cuRand library will not # in some layers. This tests that cuRand library will not
# complain about generating an odd number of random values # complain about generating an odd number of random values
np.random.seed(98052) np.random.seed(98052)
learner = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch), gaussian_noise_injection_std_dev=0.01) learner = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125), gaussian_noise_injection_std_dev=0.01)
ffnet(learner) ffnet(learner)
# We just verify that we did not crash # We just verify that we did not crash
assert(True) assert(True)
def test_universal(): def test_universal():
np.random.seed(98052) np.random.seed(98052)
builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch)) builtin_sgd = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125))
builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd) builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd)
np.random.seed(98052) np.random.seed(98052)
my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)]) my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)])
@ -735,10 +735,10 @@ def test_restore_from_checkpoint(tmpdir, learner):
# this should be replaced with LEARNER_LAMBDAS # this should be replaced with LEARNER_LAMBDAS
SPARSE_AND_DENSE_LEARNER_LAMBDAS = [ SPARSE_AND_DENSE_LEARNER_LAMBDAS = [
(lambda params: C.adadelta(params), False), (lambda params: C.adadelta(params), False),
(lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), True), (lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), True),
(lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), True), (lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), True),
(lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), True), (lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), True),
(lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), False)] (lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), False)]
@pytest.mark.parametrize("learner, gpu_only", SPARSE_AND_DENSE_LEARNER_LAMBDAS) @pytest.mark.parametrize("learner, gpu_only", SPARSE_AND_DENSE_LEARNER_LAMBDAS)
@pytest.mark.parametrize("checkpoint", [True, False]) @pytest.mark.parametrize("checkpoint", [True, False])

Просмотреть файл

@ -1358,7 +1358,7 @@ class Function(cntk_py.Function):
... def criterion(data, label_one_hot): ... def criterion(data, label_one_hot):
... z = model(data) # apply model. Computes a non-normalized log probability for every output class. ... z = model(data) # apply model. Computes a non-normalized log probability for every output class.
... return cntk.cross_entropy_with_softmax(z, label_one_hot) ... return cntk.cross_entropy_with_softmax(z, label_one_hot)
>>> learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)) >>> learner = cntk.sgd(model.parameters, 0.1)
>>> progress = criterion.train((X, Y), minibatch_size=25, max_epochs=2, epoch_size=125, parameter_learners=[learner]) >>> progress = criterion.train((X, Y), minibatch_size=25, max_epochs=2, epoch_size=125, parameter_learners=[learner])
>>> print("%.2f" % progress.epoch_summaries[-1].loss) # get the final epoch's loss value >>> print("%.2f" % progress.epoch_summaries[-1].loss) # get the final epoch's loss value
0.68 0.68

Просмотреть файл

@ -88,7 +88,7 @@ def test_training_2d_sparse_sequence_operand(device_id):
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
loss = cross_entropy_with_softmax(z, l, axis=-1) loss = cross_entropy_with_softmax(z, l, axis=-1)
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_parameter_schedule_per_sample(0.7)))
return (a, l, w, trainer) return (a, l, w, trainer)
# Run with sparse inputs # Run with sparse inputs
@ -146,7 +146,7 @@ def test_training_3d_sparse_sequence_with_recurrence(device_id):
l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
loss = cross_entropy_with_softmax(z, l, axis=-1) loss = cross_entropy_with_softmax(z, l, axis=-1)
trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_parameter_schedule_per_sample(0.7)))
return (a, l, w_i, w_h, trainer) return (a, l, w_i, w_h, trainer)
# Run with sparse inputs # Run with sparse inputs

Просмотреть файл

@ -129,7 +129,7 @@ def mem_leak_check(nonlinearity, num_hidden_layers, device_id,
np.random.seed(0) np.random.seed(0)
learning_rate = 0.5 learning_rate = 0.5
lr_schedule = C.learning_rate_schedule(learning_rate) lr_schedule = C.learning_parameter_schedule(learning_rate)
hidden_layers_dim = 50 hidden_layers_dim = 50

Просмотреть файл

@ -79,7 +79,7 @@ def test_clone_freeze():
c_copies = [q.value for q in c_clone.constants] c_copies = [q.value for q in c_clone.constants]
# update z # update z
trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_rate_schedule(1.0, C.UnitType.minibatch))) trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_parameter_schedule(1.0)))
x = np.random.randn(16,3).astype('f') x = np.random.randn(16,3).astype('f')
y = np.random.randn(16,5).astype('f') y = np.random.randn(16,5).astype('f')
trainer.train_minibatch({features: x, label: y}) trainer.train_minibatch({features: x, label: y})

Просмотреть файл

@ -192,7 +192,7 @@ def test_restore_constants(tmpdir):
x = C.input_variable(10) x = C.input_variable(10)
f = C.layers.BatchNormalization()(x) f = C.layers.BatchNormalization()(x)
trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_rate_schedule(0.1, 'sample'))) trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_parameter_schedule_per_sample(0.1)))
model_filename = str(tmpdir / 'function.out') model_filename = str(tmpdir / 'function.out')
checkpoint_filename = str(tmpdir / 'checkpoint.out') checkpoint_filename = str(tmpdir / 'checkpoint.out')

Просмотреть файл

@ -97,7 +97,7 @@ def mb_source(tmpdir, fileprefix, max_samples=FULL_DATA_SWEEP, ctf=ctf_data, str
def create_sample_model(device, writer=None, def create_sample_model(device, writer=None,
lr_per_sample=C.learning_rate_schedule([0.3, 0.2, 0.1, 0.0], C.UnitType.sample)): lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])):
in1 = sequence.input_variable(shape=(input_dim,)) in1 = sequence.input_variable(shape=(input_dim,))
labels = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,))
p = parameter(shape=(input_dim,), init=10, device=device) p = parameter(shape=(input_dim,), init=10, device=device)
@ -428,7 +428,7 @@ def test_session_progress_print_on_sweep_unit(tmpdir, device_id):
device = cntk_device(device_id) device = cntk_device(device_id)
writer = MockProgressWriter() writer = MockProgressWriter()
#set to a higher learning rate as we don't need to have converge but just to go through all the samples #set to a higher learning rate as we don't need to have converge but just to go through all the samples
t, feature, label = create_sample_model(device, writer, lr_per_sample=C.learning_rate_schedule(0.3, C.UnitType.sample)) t, feature, label = create_sample_model(device, writer, lr_per_sample=C.learning_parameter_schedule_per_sample(0.3))
mbs = mb_source(tmpdir, "training", mbs = mb_source(tmpdir, "training",
#max_samples=INFINITELY_REPEAT, #max_samples=INFINITELY_REPEAT,
max_sweeps = 4) max_sweeps = 4)

Просмотреть файл

@ -1,7 +1,7 @@
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import cntk as C import cntk as C
from cntk.learners import sgd, learning_rate_schedule, UnitType from cntk.learners import sgd
from cntk.logging import ProgressPrinter from cntk.logging import ProgressPrinter
from cntk.layers import Dense, Sequential from cntk.layers import Dense, Sequential
@ -38,7 +38,7 @@ def ffnet():
pe = C.classification_error(z, label) pe = C.classification_error(z, label)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) lr_per_minibatch = C.learning_parameter_schedule(0.125)
progress_printer = ProgressPrinter(0) progress_printer = ProgressPrinter(0)
trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer]) trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])

Просмотреть файл

@ -3,7 +3,7 @@ import os
from cntk import Trainer, Axis from cntk import Trainer, Axis
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs,\ from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs,\
INFINITELY_REPEAT INFINITELY_REPEAT
from cntk.learners import sgd, learning_rate_schedule, UnitType from cntk.learners import sgd, learning_parameter_schedule_per_sample
from cntk import input_variable, cross_entropy_with_softmax, \ from cntk import input_variable, cross_entropy_with_softmax, \
classification_error, sequence classification_error, sequence
from cntk.logging import ProgressPrinter from cntk.logging import ProgressPrinter
@ -58,7 +58,7 @@ def train_sequence_classifier():
label: reader.streams.labels label: reader.streams.labels
} }
lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
progress_printer = ProgressPrinter(0) progress_printer = ProgressPrinter(0)
trainer = Trainer(classifier_output, (ce, pe), trainer = Trainer(classifier_output, (ce, pe),