Updated examples to use the latest progress writer API
This commit is contained in:
Родитель
52ebff65e0
Коммит
6256c6e355
|
@ -109,7 +109,8 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
|
|||
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
unit_gain = True,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner)
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -118,7 +119,6 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
|
|||
}
|
||||
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -127,9 +127,8 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
|
||||
|
||||
### Evaluation action
|
||||
|
|
|
@ -64,6 +64,7 @@ def convnet_cifar10(debug_output=False):
|
|||
# training config
|
||||
epoch_size = 50000 # for now we manually specify epoch size
|
||||
minibatch_size = 64
|
||||
max_epochs = 30
|
||||
|
||||
# Set learning parameters
|
||||
lr_per_sample = [0.0015625]*10 + [0.00046875]*10 + [0.00015625]
|
||||
|
@ -75,7 +76,8 @@ def convnet_cifar10(debug_output=False):
|
|||
# Instantiate the trainer object to drive the model training
|
||||
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner)
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -84,8 +86,6 @@ def convnet_cifar10(debug_output=False):
|
|||
}
|
||||
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
max_epochs = 30
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -94,9 +94,8 @@ def convnet_cifar10(debug_output=False):
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
z.save(os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch)))
|
||||
|
||||
# Load test data
|
||||
|
|
|
@ -87,7 +87,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
|
|||
# trainer object
|
||||
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner)
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -96,7 +97,6 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
|
|||
}
|
||||
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -105,9 +105,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
|
||||
|
||||
### Evaluation action
|
||||
|
|
|
@ -150,25 +150,25 @@ def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64,
|
|||
|
||||
network = create_conv_network()
|
||||
|
||||
progress_printer = cntk.utils.ProgressPrinter(
|
||||
progress_writers = [cntk.utils.ProgressPrinter(
|
||||
freq=num_mbs_per_log,
|
||||
tag='Training',
|
||||
log_to_file=log_to_file,
|
||||
rank=cntk.distributed.Communicator.rank(),
|
||||
gen_heartbeat=gen_heartbeat,
|
||||
num_epochs=max_epochs)
|
||||
num_epochs=max_epochs)]
|
||||
|
||||
tensorboard_writer = cntk.utils.TensorBoardProgressWriter(
|
||||
freq=num_mbs_per_log,
|
||||
log_dir=tensorboard_logdir if tensorboard_logdir is not None else 'log',
|
||||
rank=cntk.distributed.Communicator.rank(),
|
||||
model=network['output'])
|
||||
if tensorboard_logdir is not None:
|
||||
progress_writers.append(cntk.utils.TensorBoardProgressWriter(
|
||||
freq=num_mbs_per_log,
|
||||
log_dir=tensorboard_logdir,
|
||||
rank=cntk.distributed.Communicator.rank(),
|
||||
model=network['output']))
|
||||
|
||||
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, [progress_printer, tensorboard_writer])
|
||||
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers)
|
||||
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
|
||||
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
|
||||
train_and_test(network, trainer, train_source, test_source, minibatch_size,
|
||||
epoch_size, restore, profiling)
|
||||
train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore, profiling)
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
|
@ -179,7 +179,7 @@ if __name__=='__main__':
|
|||
parser.add_argument('-datadir', '--datadir', help='Data directory where the CIFAR dataset is located', required=False, default=data_path)
|
||||
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
|
||||
parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
|
||||
parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where to tensorboard logs should be written', required=False, default='log')
|
||||
parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where TensorBoard logs should be created', required=False, default=None)
|
||||
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='160')
|
||||
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='64')
|
||||
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='50000')
|
||||
|
|
|
@ -56,6 +56,7 @@ def convnet_mnist(debug_output=False):
|
|||
# training config
|
||||
epoch_size = 60000 # for now we manually specify epoch size
|
||||
minibatch_size = 128
|
||||
max_epochs = 40
|
||||
|
||||
# Set learning parameters
|
||||
lr_per_sample = [0.001]*10 + [0.0005]*10 + [0.0001]
|
||||
|
@ -65,7 +66,8 @@ def convnet_mnist(debug_output=False):
|
|||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner)
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -74,8 +76,6 @@ def convnet_mnist(debug_output=False):
|
|||
}
|
||||
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
max_epochs = 40
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -84,9 +84,8 @@ def convnet_mnist(debug_output=False):
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += data[label_var].num_samples # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch)))
|
||||
|
||||
# Load test data
|
||||
|
|
|
@ -4,16 +4,16 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import argparse
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
from cntk import Trainer, minibatch_size_schedule
|
||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||
from cntk.device import cpu, set_default_device
|
||||
from cntk.learner import sgd, learning_rate_schedule, UnitType
|
||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
|
||||
from cntk.utils import ProgressPrinter
|
||||
from cntk.training_session import *
|
||||
from cntk.utils import ProgressPrinter, TensorBoardProgressWriter
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "common"))
|
||||
|
@ -35,7 +35,7 @@ def create_reader(path, is_training, input_dim, label_dim):
|
|||
|
||||
# Creates and trains a feedforward classification model for MNIST images
|
||||
|
||||
def simple_mnist():
|
||||
def simple_mnist(tensorboard_logdir=None):
|
||||
input_dim = 784
|
||||
num_output_classes = 10
|
||||
num_hidden_layers = 1
|
||||
|
@ -65,24 +65,27 @@ def simple_mnist():
|
|||
label : reader_train.streams.labels
|
||||
}
|
||||
|
||||
lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch)
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
# Training config
|
||||
minibatch_size = 64
|
||||
num_samples_per_sweep = 60000
|
||||
num_sweeps_to_train_with = 10
|
||||
#training_progress_output_freq = 100
|
||||
|
||||
progress_printer = ProgressPrinter(
|
||||
# Instantiate progress writers.
|
||||
#training_progress_output_freq = 100
|
||||
progress_writers = [ProgressPrinter(
|
||||
#freq=training_progress_output_freq,
|
||||
tag='Training',
|
||||
num_epochs=num_sweeps_to_train_with)
|
||||
num_epochs=num_sweeps_to_train_with)]
|
||||
|
||||
if tensorboard_logdir is not None:
|
||||
progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_printer)
|
||||
lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch)
|
||||
trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_writers)
|
||||
|
||||
training_session(
|
||||
trainer=trainer,
|
||||
trainer=trainer,
|
||||
mb_source = reader_train,
|
||||
mb_size = minibatch_size,
|
||||
var_to_stream = input_map,
|
||||
|
@ -120,5 +123,10 @@ if __name__=='__main__':
|
|||
# use the best available one, e.g.
|
||||
# set_default_device(cpu())
|
||||
|
||||
error = simple_mnist()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir',
|
||||
help='Directory where TensorBoard logs should be created', required=False, default=None)
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
error = simple_mnist(args['tensorboard_logdir'])
|
||||
print("Error: %f" % error)
|
||||
|
|
|
@ -7,11 +7,9 @@
|
|||
from __future__ import print_function
|
||||
import os
|
||||
import argparse
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
from cntk.utils import *
|
||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error
|
||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, reduce_mean
|
||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
||||
import cntk.io.transforms as xforms
|
||||
from cntk import Trainer, cntk_py
|
||||
|
@ -53,7 +51,8 @@ def create_reader(map_file, mean_file, train):
|
|||
|
||||
|
||||
# Train and evaluate the network.
|
||||
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None):
|
||||
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
|
||||
model_dir=None, tensorboard_logdir=None):
|
||||
|
||||
set_computation_network_trace_level(0)
|
||||
|
||||
|
@ -85,10 +84,17 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
|||
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
|
||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
|
||||
|
||||
# progress writers
|
||||
progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
|
||||
tensorboard_writer = None
|
||||
if tensorboard_logdir is not None:
|
||||
tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
|
||||
progress_writers.append(tensorboard_writer)
|
||||
|
||||
# trainer object
|
||||
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = Trainer(z, (ce, pe), learner)
|
||||
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = Trainer(z, (ce, pe), learner, progress_writers)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -97,7 +103,6 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
|||
}
|
||||
|
||||
log_number_of_parameters(z) ; print()
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# perform model training
|
||||
if profiler_dir:
|
||||
|
@ -109,8 +114,14 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
# Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
|
||||
if tensorboard_writer:
|
||||
for parameter in z.parameters:
|
||||
tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)
|
||||
|
||||
if model_dir:
|
||||
z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
|
||||
enable_profiler() # begin to collect profiler data after first epoch
|
||||
|
@ -126,7 +137,6 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
|||
metric_numer = 0
|
||||
metric_denom = 0
|
||||
sample_count = 0
|
||||
minibatch_index = 0
|
||||
|
||||
while sample_count < test_epoch_size:
|
||||
current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
|
||||
|
@ -137,10 +147,9 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
|
|||
metric_denom += current_minibatch
|
||||
# Keep track of the number of samples processed so far.
|
||||
sample_count += data[label_var].num_samples
|
||||
minibatch_index += 1
|
||||
|
||||
print("")
|
||||
print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
|
||||
trainer.summarize_test_progress()
|
||||
print("")
|
||||
|
||||
return metric_numer/metric_denom
|
||||
|
@ -151,6 +160,7 @@ if __name__=='__main__':
|
|||
parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='160')
|
||||
parser.add_argument('-p', '--profiler_dir', help='directory for saving profiler output', required=False, default=None)
|
||||
parser.add_argument('-m', '--model_dir', help='directory for saving model', required=False, default=None)
|
||||
parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where TensorBoard logs should be created', required=False, default=None)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
epochs = int(args['epochs'])
|
||||
|
@ -164,4 +174,5 @@ if __name__=='__main__':
|
|||
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
||||
|
||||
epoch_size = 50000
|
||||
train_and_evaluate(reader_train, reader_test, network_name, epoch_size, epochs, args['profiler_dir'], model_dir)
|
||||
train_and_evaluate(reader_train, reader_test, network_name, epoch_size, epochs, args['profiler_dir'], model_dir,
|
||||
args['tensorboard_logdir'])
|
||||
|
|
|
@ -94,7 +94,7 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
|
|||
learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
|
||||
else:
|
||||
learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
|
||||
|
||||
|
||||
return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
|
||||
|
||||
# Train and test
|
||||
|
|
|
@ -159,21 +159,20 @@ def train_fast_rcnn(debug_output=False):
|
|||
|
||||
# Instantiate the trainer object
|
||||
learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
||||
trainer = Trainer(frcn_output, (ce, pe), learner)
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)
|
||||
|
||||
# Get minibatches of images and perform model training
|
||||
print("Training Fast R-CNN model for %s epochs." % max_epochs)
|
||||
log_number_of_parameters(frcn_output)
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
sample_count = 0
|
||||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
if debug_output:
|
||||
frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))
|
||||
|
||||
|
|
|
@ -62,7 +62,8 @@ def deconv_mnist(max_epochs=3):
|
|||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
|
||||
trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner)
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training')
|
||||
trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner, progress_printer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -70,7 +71,6 @@ def deconv_mnist(max_epochs=3):
|
|||
}
|
||||
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
progress_printer = cntk.utils.ProgressPrinter(tag='Training')
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -79,9 +79,8 @@ def deconv_mnist(max_epochs=3):
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += data[input_var].num_samples # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
z.save(os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(epoch)))
|
||||
|
||||
# rename final model
|
||||
|
|
|
@ -114,23 +114,22 @@ def train_model(base_model_file, feature_node_name, last_hidden_node_name,
|
|||
lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
|
||||
mm_schedule = momentum_schedule(momentum_per_mb)
|
||||
learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
|
||||
trainer = Trainer(tl_model, (ce, pe), learner)
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
|
||||
trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)
|
||||
|
||||
# Get minibatches of images and perform model training
|
||||
print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
|
||||
log_number_of_parameters(tl_model)
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
|
||||
for epoch in range(num_epochs): # loop over epochs
|
||||
sample_count = 0
|
||||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
if sample_count % (100 * mb_size) == 0:
|
||||
print ("Processed {0} samples".format(sample_count))
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
return tl_model
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ def create_model():
|
|||
# train action #
|
||||
########################
|
||||
|
||||
def train(reader, model, max_epochs, model_dir=None):
|
||||
def train(reader, model, max_epochs, model_dir=None, tensorboard_logdir=None):
|
||||
# Input variables denoting the features and label data
|
||||
query = cntk.blocks.Input(input_dim, is_sparse=False)
|
||||
slot_labels = cntk.blocks.Input(num_labels, is_sparse=True) # TODO: make sparse once it works
|
||||
|
@ -73,16 +73,21 @@ def train(reader, model, max_epochs, model_dir=None):
|
|||
momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))
|
||||
|
||||
# LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
|
||||
lr_schedule = [0.003]*2+[0.0015]*12+[0.0003]
|
||||
|
||||
# trainer object
|
||||
lr_schedule = [0.003]*2+[0.0015]*12+[0.0003]
|
||||
lr_per_sample = cntk.learner.learning_rate_schedule(lr_schedule, cntk.learner.UnitType.sample, epoch_size)
|
||||
learner = cntk.learner.adam_sgd(z.parameters,
|
||||
lr=lr_per_sample, momentum=momentum_time_constant,
|
||||
low_memory=True,
|
||||
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
||||
|
||||
trainer = cntk.Trainer(z, (ce, pe), [learner])
|
||||
# Progress writers
|
||||
progress_writers = [cntk.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)] # more detailed logging
|
||||
#progress_writers = [cntk.ProgressPrinter(tag='Training', num_epochs=max_epochs)]
|
||||
if tensorboard_logdir is not None:
|
||||
progress_writers.append(cntk.TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
|
||||
|
||||
# trainer object
|
||||
trainer = cntk.Trainer(z, (ce, pe), [learner], progress_writers)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
|
@ -92,22 +97,30 @@ def train(reader, model, max_epochs, model_dir=None):
|
|||
|
||||
# process minibatches and perform model training
|
||||
cntk.utils.log_number_of_parameters(z) ; print()
|
||||
progress_printer = cntk.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging
|
||||
#progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
t = 0
|
||||
aggregate_loss = 0
|
||||
aggregate_error = 0
|
||||
total_samples = 0
|
||||
|
||||
# loop over epochs
|
||||
for epoch in range(max_epochs):
|
||||
epoch_end = (epoch+1) * epoch_size
|
||||
|
||||
aggregate_loss = 0
|
||||
aggregate_error = 0
|
||||
total_samples = 0
|
||||
|
||||
# loop over minibatches on the epoch
|
||||
while t < epoch_end:
|
||||
# BUGBUG? The change of minibatch_size parameter vv has no effect.
|
||||
data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
t += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
samples = trainer.previous_minibatch_sample_count
|
||||
t += samples
|
||||
total_samples += samples
|
||||
aggregate_loss += trainer.previous_minibatch_loss_average * samples
|
||||
aggregate_error += trainer.previous_minibatch_evaluation_average * samples
|
||||
|
||||
#def trace_node(name):
|
||||
# nl = [n for n in z.parameters if n.name() == name]
|
||||
|
@ -117,9 +130,9 @@ def train(reader, model, max_epochs, model_dir=None):
|
|||
#trace_node('stabilizer_param')
|
||||
if model_dir:
|
||||
z.save(os.path.join(model_dir, "atis" + "_{}.dnn".format(epoch)))
|
||||
loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
return loss, metric
|
||||
return aggregate_loss / total_samples, aggregate_error / total_samples
|
||||
|
||||
|
||||
#############################
|
||||
|
@ -129,6 +142,8 @@ def train(reader, model, max_epochs, model_dir=None):
|
|||
if __name__=='__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='8')
|
||||
parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir',
|
||||
help='Directory where TensorBoard logs should be created', required=False, default=None)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
max_epochs = int(args['epochs'])
|
||||
|
@ -145,7 +160,7 @@ if __name__=='__main__':
|
|||
|
||||
model_path = os.path.join(abs_path, "Models")
|
||||
# train
|
||||
train(reader, model, max_epochs, model_path)
|
||||
train(reader, model, max_epochs, model_path, args['tensorboard_logdir'])
|
||||
|
||||
# test (TODO)
|
||||
reader = create_reader(data_dir + "/atis.test.ctf")
|
||||
|
|
|
@ -1,139 +0,0 @@
|
|||
# ==============================================================================
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import math
|
||||
from cntk.layers import * # Layers library
|
||||
from cntk.utils import *
|
||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
|
||||
from cntk import Trainer
|
||||
from cntk.learner import adam_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
|
||||
from cntk.ops import cross_entropy_with_softmax, classification_error
|
||||
|
||||
########################
|
||||
# variables and stuff #
|
||||
########################
|
||||
|
||||
cntk_dir = os.path.dirname(os.path.abspath(__file__)) + "/../.." # data resides in the CNTK folder
|
||||
data_dir = cntk_dir + "/Examples/LanguageUnderstanding/ATIS/Data" # under Examples/LanguageUnderstanding/ATIS
|
||||
vocab_size = 943 ; num_labels = 129 ; num_intents = 26 # number of words in vocab, slot labels, and intent labels
|
||||
|
||||
model_dir = "./Models"
|
||||
|
||||
# model dimensions
|
||||
input_dim = vocab_size
|
||||
label_dim = num_labels
|
||||
emb_dim = 150
|
||||
hidden_dim = 300
|
||||
|
||||
########################
|
||||
# define the reader #
|
||||
########################
|
||||
|
||||
def create_reader(path):
|
||||
return MinibatchSource(CTFDeserializer(path, StreamDefs(
|
||||
query = StreamDef(field='S0', shape=input_dim, is_sparse=True),
|
||||
intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True), # BUGBUG: unused, and should infer dim
|
||||
slot_labels = StreamDef(field='S2', shape=label_dim, is_sparse=True)
|
||||
)))
|
||||
|
||||
########################
|
||||
# define the model #
|
||||
########################
|
||||
|
||||
def create_model():
|
||||
with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day
|
||||
return Sequential([
|
||||
Embedding(emb_dim),
|
||||
Recurrence(LSTM(hidden_dim), go_backwards=False),
|
||||
Dense(label_dim)
|
||||
])
|
||||
|
||||
########################
|
||||
# train action #
|
||||
########################
|
||||
|
||||
def train(reader, model, max_epochs):
|
||||
# Input variables denoting the features and label data
|
||||
query = Input(input_dim, is_sparse=False)
|
||||
slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works
|
||||
|
||||
# apply model to input
|
||||
z = model(query)
|
||||
|
||||
# loss and metric
|
||||
ce = cross_entropy_with_softmax(z, slot_labels)
|
||||
pe = classification_error (z, slot_labels)
|
||||
|
||||
# training config
|
||||
epoch_size = 36000
|
||||
minibatch_size = 70
|
||||
num_mbs_to_show_result = 100
|
||||
momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700?
|
||||
|
||||
lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
|
||||
|
||||
# trainer object
|
||||
lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size)
|
||||
learner = adam_sgd(z.parameters,
|
||||
lr=lr_per_sample, momentum=momentum_time_constant,
|
||||
unit_gain=True,
|
||||
low_memory=True,
|
||||
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
||||
|
||||
# more detailed logging
|
||||
progress_printer = ProgressPrinter(freq=100, first=10, tag='Training')
|
||||
#progress_printer = ProgressPrinter(tag='Training')
|
||||
tensorboard_writer = TensorBoardProgressWriter(freq=100, log_dir='atis_log', model=z)
|
||||
|
||||
trainer = Trainer(z, (ce, pe), [learner], [progress_printer, tensorboard_writer])
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
query : reader.streams.query,
|
||||
slot_labels : reader.streams.slot_labels
|
||||
}
|
||||
|
||||
# process minibatches and perform model training
|
||||
log_number_of_parameters(z) ; print()
|
||||
|
||||
t = 0
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
epoch_end = (epoch+1) * epoch_size
|
||||
while t < epoch_end: # loop over minibatches on the epoch
|
||||
# BUGBUG? The change of minibatch_size parameter vv has no effect.
|
||||
data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
t += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
#def trace_node(name):
|
||||
# nl = [n for n in z.parameters if n.name() == name]
|
||||
# if len(nl) > 0:
|
||||
# print (name, np.asarray(nl[0].value))
|
||||
#trace_node('W')
|
||||
#trace_node('stabilizer_param')
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
tensorboard_writer.close()
|
||||
|
||||
#############################
|
||||
# main function boilerplate #
|
||||
#############################
|
||||
|
||||
if __name__=='__main__':
|
||||
# TODO: leave these in for now as debugging aids; remove for beta
|
||||
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
|
||||
#set_computation_network_trace_level(1) # TODO: remove debugging facilities once this all works
|
||||
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
|
||||
force_deterministic_algorithms()
|
||||
|
||||
reader = create_reader(data_dir + "/atis.train.ctf")
|
||||
model = create_model()
|
||||
# train
|
||||
train(reader, model, max_epochs=8)
|
||||
# test (TODO)
|
||||
reader = create_reader(data_dir + "/atis.test.ctf")
|
||||
#test(reader, model_dir + "/slu.cmf") # TODO: what is the correct pattern here?
|
|
@ -1,134 +0,0 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
from cntk import Trainer
|
||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||
from cntk.learner import sgd, learning_rate_schedule, UnitType
|
||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant, \
|
||||
reduce_max, reduce_mean, reduce_min
|
||||
from cntk.utils import *
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, "..", ".."))
|
||||
from Examples.common.nn import fully_connected_classifier_net
|
||||
|
||||
|
||||
def check_path(path):
|
||||
if not os.path.exists(path):
|
||||
readme_file = os.path.normpath(os.path.join(
|
||||
os.path.dirname(path), "..", "README.md"))
|
||||
raise RuntimeError(
|
||||
"File '%s' does not exist. Please follow the instructions at %s to download and prepare it." %
|
||||
(path, readme_file))
|
||||
|
||||
|
||||
def create_reader(path, is_training, input_dim, label_dim):
|
||||
return MinibatchSource(CTFDeserializer(path, StreamDefs(
|
||||
features=StreamDef(field='features', shape=input_dim, is_sparse=False),
|
||||
labels=StreamDef(field='labels', shape=label_dim, is_sparse=False)
|
||||
)), randomize=is_training, epoch_size=INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
|
||||
|
||||
|
||||
# Creates and trains a feedforward classification model for MNIST images
|
||||
def simple_mnist():
|
||||
input_dim = 784
|
||||
num_output_classes = 10
|
||||
num_hidden_layers = 1
|
||||
hidden_layers_dim = 200
|
||||
|
||||
# Input variables denoting the features and label data
|
||||
features = input_variable(input_dim, np.float32)
|
||||
label = input_variable(num_output_classes, np.float32)
|
||||
|
||||
# Instantiate the feedforward classification model
|
||||
scaled_input = element_times(constant(0.00390625), features)
|
||||
netout = fully_connected_classifier_net(
|
||||
scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu)
|
||||
|
||||
ce = cross_entropy_with_softmax(netout, label)
|
||||
pe = classification_error(netout, label)
|
||||
|
||||
try:
|
||||
rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
|
||||
*"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/"))
|
||||
except KeyError:
|
||||
rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/"))
|
||||
path = os.path.normpath(os.path.join(abs_path, rel_path))
|
||||
check_path(path)
|
||||
|
||||
reader_train = create_reader(path, True, input_dim, num_output_classes)
|
||||
|
||||
input_map = {
|
||||
features: reader_train.streams.features,
|
||||
label: reader_train.streams.labels
|
||||
}
|
||||
|
||||
# Instantiate progress writers.
|
||||
logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log")
|
||||
tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout)
|
||||
progress_printer = ProgressPrinter(freq=10, tag='Training')
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch)
|
||||
learner = sgd(netout.parameters, lr=lr_per_minibatch)
|
||||
trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer])
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
minibatch_size = 64
|
||||
num_samples_per_sweep = 6000
|
||||
num_sweeps_to_train_with = 2
|
||||
num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
|
||||
|
||||
for minibatch_idx in range(0, int(num_minibatches_to_train)):
|
||||
trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map))
|
||||
|
||||
# Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed.
|
||||
# Don't want to do that very often though, otherwise will spend too much time computing min/max/mean.
|
||||
if minibatch_idx % 10 == 9:
|
||||
for p in netout.parameters:
|
||||
tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx)
|
||||
tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx)
|
||||
tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx)
|
||||
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
# Load test data
|
||||
try:
|
||||
rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
|
||||
*"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/"))
|
||||
except KeyError:
|
||||
rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/"))
|
||||
path = os.path.normpath(os.path.join(abs_path, rel_path))
|
||||
check_path(path)
|
||||
|
||||
reader_test = create_reader(path, False, input_dim, num_output_classes)
|
||||
|
||||
input_map = {
|
||||
features: reader_test.streams.features,
|
||||
label: reader_test.streams.labels
|
||||
}
|
||||
|
||||
# Test data for trained model
|
||||
test_minibatch_size = 1024
|
||||
num_samples = 10000
|
||||
num_minibatches_to_test = num_samples / test_minibatch_size
|
||||
test_result = 0.0
|
||||
for i in range(0, int(num_minibatches_to_test)):
|
||||
mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
|
||||
test_result += trainer.test_minibatch(mb)
|
||||
|
||||
# Average of evaluation errors of all test minibatches
|
||||
trainer.summarize_test_progress()
|
||||
return test_result / num_minibatches_to_test
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Specify the target device to be used for computing, if you do not want to
|
||||
# use the best available one, e.g.
|
||||
# set_default_device(cpu())
|
||||
error = simple_mnist()
|
||||
print("Error: %f" % error)
|
|
@ -165,7 +165,8 @@ def train_lm(training_file, epochs, max_num_minibatches):
|
|||
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
|
||||
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
|
||||
gradient_clipping_with_truncation=gradient_clipping_with_truncation)
|
||||
trainer = Trainer(z, (ce, errs), learner)
|
||||
progress_printer = ProgressPrinter(freq=100, tag='Training')
|
||||
trainer = Trainer(z, (ce, errs), learner, progress_printer)
|
||||
|
||||
sample_freq = 1000
|
||||
minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)
|
||||
|
@ -174,8 +175,6 @@ def train_lm(training_file, epochs, max_num_minibatches):
|
|||
log_number_of_parameters(z)
|
||||
print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))
|
||||
print()
|
||||
|
||||
progress_printer = ProgressPrinter(freq=100, tag='Training')
|
||||
|
||||
for e in range(0, epochs):
|
||||
# Specify the mapping of input variables in the model to actual minibatch data to be trained with
|
||||
|
@ -188,7 +187,6 @@ def train_lm(training_file, epochs, max_num_minibatches):
|
|||
mask = [False]
|
||||
trainer.train_minibatch(arguments)
|
||||
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
global_minibatch = e*minibatches_per_epoch + b
|
||||
if global_minibatch % sample_freq == 0:
|
||||
print(sample(z, ix_to_char, vocab_dim, char_to_ix))
|
||||
|
|
|
@ -15,7 +15,7 @@ from cntk.ops.functions import load_model
|
|||
from cntk.blocks import LSTM, Stabilizer
|
||||
from cntk.layers import Recurrence, Dense
|
||||
from cntk.models import For, Sequential
|
||||
from cntk.utils import log_number_of_parameters, ProgressPrinter
|
||||
from cntk.utils import log_number_of_parameters
|
||||
from data_reader import DataReader
|
||||
from math import log, exp
|
||||
from cntk.device import set_default_device, cpu, gpu
|
||||
|
|
|
@ -197,11 +197,11 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
|
|||
mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size)
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
|
||||
trainer = Trainer(z, (ce, pe), learner)
|
||||
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
trainer = Trainer(z, (ce, pe), learner, progress_printer)
|
||||
|
||||
log_number_of_parameters(z) ; print()
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
|
@ -211,8 +211,7 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
|
|||
videos, labels, current_minibatch = train_reader.next_minibatch(minibatch_size)
|
||||
trainer.train_minibatch({input_var : videos, label_var : labels})
|
||||
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
trainer.summarize_training_progress()
|
||||
|
||||
# Test data for trained model
|
||||
epoch_size = 332
|
||||
|
|
|
@ -8,6 +8,7 @@ import numpy as np
|
|||
import os
|
||||
import sys
|
||||
import signal
|
||||
import shutil
|
||||
import subprocess
|
||||
import re
|
||||
import pytest
|
||||
|
@ -53,15 +54,29 @@ def mpiexec_test(device_id, script, params, expected_test_error, match_exactly=T
|
|||
assert np.allclose(float(results[0])/100, expected_test_error, atol=error_tolerance)
|
||||
|
||||
def test_cifar_convnet_distributed(device_id):
|
||||
# Create a path to TensorBoard log directory and make sure it does not exist.
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log')
|
||||
if os.path.exists(tb_logdir):
|
||||
shutil.rmtree(tb_logdir)
|
||||
|
||||
params = [ "-n", "2",
|
||||
"-m", "64",
|
||||
"-e", "3200",
|
||||
"-datadir", prepare_CIFAR10_data(),
|
||||
"-tensorboard_logdir", tb_logdir,
|
||||
"-q", "32",
|
||||
"-r",
|
||||
"-device", "0" ]
|
||||
mpiexec_test(device_id, script_under_test, params, 0.75, True)
|
||||
|
||||
# Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
|
||||
tb_files = 0
|
||||
for tb_file in os.listdir(tb_logdir):
|
||||
assert tb_file.startswith("events.out.tfevents")
|
||||
tb_files += 1
|
||||
assert tb_files == 1
|
||||
|
||||
def test_cifar_convnet_distributed_1bitsgd(device_id):
|
||||
params = [ "-n", "2",
|
||||
"-m", "64",
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import numpy as np
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from cntk.ops.tests.ops_test_utils import cntk_device
|
||||
from cntk.cntk_py import DeviceKind_GPU
|
||||
|
@ -38,7 +39,14 @@ def test_cifar_resnet_error(device_id):
|
|||
reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
|
||||
reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
|
||||
|
||||
test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1)
|
||||
# Create a path to TensorBoard log directory and make sure it does not exist.
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
|
||||
if os.path.exists(tb_logdir):
|
||||
shutil.rmtree(tb_logdir)
|
||||
|
||||
test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1,
|
||||
tensorboard_logdir=tb_logdir)
|
||||
|
||||
# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
|
||||
# tolerance back once convolution operator is determinsitic.
|
||||
|
@ -47,3 +55,9 @@ def test_cifar_resnet_error(device_id):
|
|||
|
||||
# assert np.allclose(test_error, expected_test_error,
|
||||
# atol=TOLERANCE_ABSOLUTE)
|
||||
|
||||
files = 0
|
||||
for file in os.listdir(tb_logdir):
|
||||
assert file.startswith("events.out.tfevents")
|
||||
files += 1
|
||||
assert files == 1
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
import cntk as C
|
||||
import numpy as np
|
||||
from cntk.io import MinibatchSource, HTKFeatureDeserializer, HTKMLFDeserializer, StreamDef, StreamDefs
|
||||
from cntk.blocks import LSTM, Placeholder, Input
|
||||
from cntk.layers import Recurrence, Dense, BatchNormalization
|
||||
from cntk.blocks import LSTM
|
||||
from cntk.layers import Recurrence, Dense
|
||||
from cntk.models import Sequential, For
|
||||
|
||||
import os, sys
|
||||
import os
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
data_path = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Speech", "AN4", "Data")
|
||||
|
||||
|
||||
def test_htk_deserializers():
|
||||
mbsize = 640
|
||||
epoch_size = 1000 * mbsize
|
||||
|
@ -46,16 +46,15 @@ def test_htk_deserializers():
|
|||
momentum=C.momentum_as_time_constant_schedule(1000),
|
||||
low_memory=True,
|
||||
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
|
||||
trainer = C.Trainer(z, (ce, errs), learner)
|
||||
progress_printer = C.ProgressPrinter(freq=0)
|
||||
trainer = C.Trainer(z, (ce, errs), learner, progress_printer)
|
||||
|
||||
input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }
|
||||
|
||||
pp = C.ProgressPrinter(freq=0)
|
||||
# just run and verify it doesn't crash
|
||||
for i in range(3):
|
||||
mb_data = reader.next_minibatch(mbsize, input_map=input_map)
|
||||
trainer.train_minibatch(mb_data)
|
||||
pp.update_with_trainer(trainer, with_metric=True)
|
||||
assert True
|
||||
os.chdir(abs_path)
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
from __future__ import print_function
|
||||
import os, sys
|
||||
import numpy as np
|
||||
import shutil
|
||||
from cntk import DeviceDescriptor
|
||||
|
||||
TOLERANCE_ABSOLUTE = 1E-1 # TODO: Once set_fixed_random_seed(1) is honored, this must be tightened a lot.
|
||||
|
@ -244,12 +245,26 @@ def test_language_understanding(device_id):
|
|||
|
||||
# test of a config like in the example but with additions to test many code paths
|
||||
if device_id >= 0: # BatchNormalization currently does not run on CPU
|
||||
# Create a path to TensorBoard log directory and make sure it does not exist.
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
|
||||
if os.path.exists(tb_logdir):
|
||||
shutil.rmtree(tb_logdir)
|
||||
|
||||
reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
|
||||
model = create_test_model()
|
||||
loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
|
||||
loss_avg, evaluation_avg = train(reader, model, max_epochs=1, tensorboard_logdir=tb_logdir)
|
||||
log_number_of_parameters(model, trace_level=1) ; print()
|
||||
expected_avg = [0.084, 0.407364]
|
||||
assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
|
||||
|
||||
# Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
|
||||
tb_files = 0
|
||||
for tb_file in os.listdir(tb_logdir):
|
||||
assert tb_file.startswith("events.out.tfevents")
|
||||
tb_files += 1
|
||||
assert tb_files == 1
|
||||
|
||||
# example also saves and loads; we skip it here, so that we get a test case of no save/load
|
||||
# (we save/load in all cases above)
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import os, sys
|
||||
import numpy as np
|
||||
import shutil
|
||||
from cntk.device import set_default_device
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -15,11 +16,24 @@ from SimpleMNIST import simple_mnist
|
|||
TOLERANCE_ABSOLUTE = 1E-1
|
||||
|
||||
def test_simple_mnist_error(device_id):
|
||||
# Create a path to TensorBoard log directory and make sure it does not exist.
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
tb_logdir = os.path.join(abs_path, 'simple_mnist_test_log')
|
||||
if os.path.exists(tb_logdir):
|
||||
shutil.rmtree(tb_logdir)
|
||||
|
||||
from cntk.ops.tests.ops_test_utils import cntk_device
|
||||
set_default_device(cntk_device(device_id))
|
||||
|
||||
test_error = simple_mnist()
|
||||
test_error = simple_mnist(tb_logdir)
|
||||
expected_test_error = 0.09
|
||||
|
||||
assert np.allclose(test_error, expected_test_error,
|
||||
atol=TOLERANCE_ABSOLUTE)
|
||||
|
||||
# Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
|
||||
tb_files = 0
|
||||
for tb_file in os.listdir(tb_logdir):
|
||||
assert tb_file.startswith("events.out.tfevents")
|
||||
tb_files += 1
|
||||
assert tb_files == 1
|
||||
|
|
|
@ -24,7 +24,7 @@ def test_cntk_202_language_understanding_trainerror(nb):
|
|||
for cell in nb.cells:
|
||||
try:
|
||||
if cell.cell_type == 'code':
|
||||
m = re.search('\[Evaluation\].* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
|
||||
m = re.search('Finished Evaluation.* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
|
||||
if m:
|
||||
metrics.append(float(m.group('metric')))
|
||||
except IndexError:
|
||||
|
|
|
@ -350,7 +350,8 @@
|
|||
" lr=lr_schedule, momentum=momentum_as_time_constant) \n",
|
||||
" \n",
|
||||
" # Instantiate the trainer\n",
|
||||
" trainer = Trainer(model, (loss, label_error), learner)\n",
|
||||
" progress_printer = ProgressPrinter(0)\n",
|
||||
" trainer = Trainer(model, (loss, label_error), learner, progress_printer)\n",
|
||||
" \n",
|
||||
" # Map the data streams to the input and labels.\n",
|
||||
" # Note: for autoencoders input == label\n",
|
||||
|
@ -359,17 +360,18 @@
|
|||
" label : reader_train.streams.features\n",
|
||||
" } \n",
|
||||
" \n",
|
||||
" pp = ProgressPrinter(0)\n",
|
||||
" aggregate_metric = 0\n",
|
||||
" for i in range(num_minibatches_to_train):\n",
|
||||
" # Read a mini batch from the training data file\n",
|
||||
" data = reader_train.next_minibatch(minibatch_size, input_map = input_map)\n",
|
||||
" \n",
|
||||
" # Run the trainer on and perform model training\n",
|
||||
" trainer.train_minibatch(data) \n",
|
||||
" pp.update_with_trainer(trainer, with_metric=True)\n",
|
||||
" trainer.train_minibatch(data)\n",
|
||||
" samples = trainer.previous_minibatch_sample_count\n",
|
||||
" aggregate_metric += trainer.previous_minibatch_evaluation_average * samples\n",
|
||||
" \n",
|
||||
" train_error = pp.avg_metric_since_start()*100\n",
|
||||
" print(\"Average training error: {0:0.2f}%\".format(pp.avg_metric_since_start()*100))\n",
|
||||
" train_error = (aggregate_metric*100.0) / (trainer.total_number_of_samples_seen)\n",
|
||||
" print(\"Average training error: {0:0.2f}%\".format(train_error))\n",
|
||||
" \n",
|
||||
" #############################################################################\n",
|
||||
" # Testing the model\n",
|
||||
|
|
|
@ -472,10 +472,11 @@
|
|||
" l2_reg_weight = 0.001\n",
|
||||
" \n",
|
||||
" # trainer object\n",
|
||||
" learner = momentum_sgd(z.parameters, \n",
|
||||
" lr = lr_per_minibatch, momentum = momentum_time_constant, \n",
|
||||
" l2_regularization_weight=l2_reg_weight)\n",
|
||||
" trainer = Trainer(z, (ce, pe), [learner])\n",
|
||||
" learner = momentum_sgd(z.parameters, \n",
|
||||
" lr = lr_per_minibatch, momentum = momentum_time_constant, \n",
|
||||
" l2_regularization_weight=l2_reg_weight)\n",
|
||||
" progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
|
||||
" trainer = Trainer(z, (ce, pe), [learner], [progress_printer])\n",
|
||||
"\n",
|
||||
" # define mapping from reader streams to network inputs\n",
|
||||
" input_map = {\n",
|
||||
|
@ -484,7 +485,6 @@
|
|||
" }\n",
|
||||
"\n",
|
||||
" log_number_of_parameters(z) ; print()\n",
|
||||
" progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
|
||||
"\n",
|
||||
" # perform model training\n",
|
||||
" batch_index = 0\n",
|
||||
|
@ -502,9 +502,8 @@
|
|||
" plot_data['loss'].append(trainer.previous_minibatch_loss_average)\n",
|
||||
" plot_data['error'].append(trainer.previous_minibatch_evaluation_average)\n",
|
||||
" \n",
|
||||
" progress_printer.update_with_trainer(trainer, with_metric=True) # log progress\n",
|
||||
" batch_index += 1\n",
|
||||
" progress_printer.epoch_summary(with_metric=True)\n",
|
||||
" trainer.summarize_training_progress()\n",
|
||||
" \n",
|
||||
" #\n",
|
||||
" # Evaluation action\n",
|
||||
|
|
|
@ -433,12 +433,12 @@
|
|||
" gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)\n",
|
||||
"\n",
|
||||
" # trainer\n",
|
||||
" trainer = Trainer(model, criterion, learner)\n",
|
||||
" progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
|
||||
" #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging\n",
|
||||
" trainer = Trainer(model, criterion, learner, progress_printer)\n",
|
||||
"\n",
|
||||
" # process minibatches and perform model training\n",
|
||||
" log_number_of_parameters(model)\n",
|
||||
" progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
|
||||
" #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging\n",
|
||||
"\n",
|
||||
" t = 0\n",
|
||||
" for epoch in range(max_epochs): # loop over epochs\n",
|
||||
|
@ -450,10 +450,7 @@
|
|||
" })\n",
|
||||
" trainer.train_minibatch(data) # update model with it\n",
|
||||
" t += data[criterion.arguments[1]].num_samples # samples so far\n",
|
||||
" progress_printer.update_with_trainer(trainer, with_metric=True) # log progress\n",
|
||||
" loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)\n",
|
||||
"\n",
|
||||
" return loss, metric"
|
||||
" trainer.summarize_training_progress()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -559,8 +556,8 @@
|
|||
" momentum_as_time_constant = momentum_as_time_constant_schedule(0)\n",
|
||||
" dummy_learner = adam_sgd(criterion.parameters, \n",
|
||||
" lr=lr_schedule, momentum=momentum_as_time_constant, low_memory=True)\n",
|
||||
" evaluator = Trainer(model, criterion, dummy_learner)\n",
|
||||
" progress_printer = ProgressPrinter(tag='Evaluation', num_epochs=0)\n",
|
||||
" evaluator = Trainer(model, criterion, dummy_learner, progress_printer)\n",
|
||||
"\n",
|
||||
" while True:\n",
|
||||
" minibatch_size = 500\n",
|
||||
|
@ -570,11 +567,8 @@
|
|||
" })\n",
|
||||
" if not data: # until we hit the end\n",
|
||||
" break\n",
|
||||
" metric = evaluator.test_minibatch(data)\n",
|
||||
" progress_printer.update(0, data[criterion.arguments[1]].num_samples, metric) # log progress\n",
|
||||
" loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)\n",
|
||||
"\n",
|
||||
" return loss, metric"
|
||||
" evaluator.test_minibatch(data)\n",
|
||||
" evaluator.summarize_test_progress()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -387,7 +387,8 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_graph(noise_shape, image_shape):\n",
|
||||
"def build_graph(noise_shape, image_shape,\n",
|
||||
" G_progress_printer, D_progress_printer):\n",
|
||||
" input_dynamic_axes = [C.Axis.default_batch_axis()]\n",
|
||||
" Z = Input(noise_shape, dynamic_axes=input_dynamic_axes)\n",
|
||||
" X_real = Input(image_shape, dynamic_axes=input_dynamic_axes)\n",
|
||||
|
@ -420,12 +421,14 @@
|
|||
" G_trainer = Trainer(\n",
|
||||
" X_fake,\n",
|
||||
" (G_loss, None),\n",
|
||||
" G_learner\n",
|
||||
" G_learner,\n",
|
||||
" G_progress_printer\n",
|
||||
" )\n",
|
||||
" D_trainer = Trainer(\n",
|
||||
" D_real,\n",
|
||||
" (D_loss, None),\n",
|
||||
" D_learner\n",
|
||||
" D_learner,\n",
|
||||
" D_progress_printer\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return X_real, X_fake, Z, G_trainer, D_trainer"
|
||||
|
@ -447,16 +450,15 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"def train(reader_train):\n",
|
||||
" X_real, X_fake, Z, G_trainer, D_trainer = \\\n",
|
||||
" build_graph(g_input_dim, d_input_dim)\n",
|
||||
" k = 2\n",
|
||||
" \n",
|
||||
" # print out loss for each model for upto 50 times\n",
|
||||
" print_frequency_mbsize = num_minibatches // 50\n",
|
||||
"\n",
|
||||
" pp_G = ProgressPrinter(print_frequency_mbsize)\n",
|
||||
" pp_D = ProgressPrinter(print_frequency_mbsize)\n",
|
||||
" \n",
|
||||
" k = 2\n",
|
||||
" pp_D = ProgressPrinter(print_frequency_mbsize * k)\n",
|
||||
"\n",
|
||||
" X_real, X_fake, Z, G_trainer, D_trainer = \\\n",
|
||||
" build_graph(g_input_dim, d_input_dim, pp_G, pp_D)\n",
|
||||
" \n",
|
||||
" input_map = {X_real: reader_train.streams.features}\n",
|
||||
" for train_step in range(num_minibatches):\n",
|
||||
|
@ -473,10 +475,7 @@
|
|||
" Z_data = noise_sample(minibatch_size)\n",
|
||||
" batch_inputs = {Z: Z_data}\n",
|
||||
" G_trainer.train_minibatch(batch_inputs)\n",
|
||||
" \n",
|
||||
" pp_G.update_with_trainer(G_trainer)\n",
|
||||
" pp_D.update_with_trainer(D_trainer)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" G_trainer_loss = C.utils.get_train_loss(G_trainer)\n",
|
||||
"\n",
|
||||
" return Z, X_fake, G_trainer_loss"
|
||||
|
|
|
@ -325,16 +325,16 @@
|
|||
" learning_rate_schedule = C.learning_rate_schedule(Param.learning_rate, C.UnitType.sample)\n",
|
||||
" momentum_schedule = C.momentum_as_time_constant_schedule(Param.momentum_time_constant)\n",
|
||||
" learner = C.momentum_sgd(z.parameters, learning_rate_schedule, momentum_schedule, True)\n",
|
||||
" trainer = C.Trainer(z, (cross_entropy, errs), learner)\n",
|
||||
" progress_writers = None\n",
|
||||
" if do_print_progress:\n",
|
||||
" progress_writers = [ProgressPrinter(freq=Param.reporting_interval, tag='Training')]\n",
|
||||
" trainer = C.Trainer(z, (cross_entropy, errs), learner, progress_writers)\n",
|
||||
"\n",
|
||||
" minbatch = 0\n",
|
||||
" average_cross_entropy = compute_average_cross_entropy(z)\n",
|
||||
" minbatch_data = [0] # store minibatch values\n",
|
||||
" cross_entropy_data = [average_cross_entropy] # store cross_entropy values\n",
|
||||
"\n",
|
||||
" if do_print_progress:\n",
|
||||
" progress_printer = ProgressPrinter(freq=Param.reporting_interval, tag='Training') \n",
|
||||
"\n",
|
||||
" # Run training\n",
|
||||
" t_total= 0\n",
|
||||
"\n",
|
||||
|
@ -344,6 +344,8 @@
|
|||
" label_data, indices = get_random_one_hot_data(Param.minibatch_size)\n",
|
||||
" arguments = ({labels : label_data})\n",
|
||||
"\n",
|
||||
" # If do_print_progress is True, this will automatically print the progress using ProgressPrinter\n",
|
||||
" # The printed loss numbers are computed using the sampled softmax criterion\n",
|
||||
" t_start = timeit.default_timer()\n",
|
||||
" trainer.train_minibatch(arguments)\n",
|
||||
" t_end = timeit.default_timer()\n",
|
||||
|
@ -355,11 +357,6 @@
|
|||
" if minbatch > 2:\n",
|
||||
" t_total += t_delta\n",
|
||||
"\n",
|
||||
" # Print the progress using progress printer\n",
|
||||
" # The prints numbers computed the the sampled softmax criterion\n",
|
||||
" if do_print_progress:\n",
|
||||
" progress_printer.update_with_trainer(trainer, with_metric=True)\n",
|
||||
" \n",
|
||||
" # For comparison also print result using the full criterion\n",
|
||||
" if minbatch % Param.reporting_interval == int(Param.reporting_interval/2):\n",
|
||||
" # memorize the progress data for plotting\n",
|
||||
|
|
|
@ -54,20 +54,21 @@ def ffnet():
|
|||
|
||||
lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch)
|
||||
# Instantiate the trainer object to drive the model training
|
||||
trainer = Trainer(netout, (ce, pe), sgd(netout.parameters, lr=lr_per_minibatch))
|
||||
learner = sgd(netout.parameters, lr=lr_per_minibatch)
|
||||
progress_printer = ProgressPrinter(128)
|
||||
trainer = Trainer(netout, (ce, pe), learner, progress_printer)
|
||||
|
||||
# Get minibatches of training data and perform model training
|
||||
minibatch_size = 25
|
||||
|
||||
pp = ProgressPrinter(128)
|
||||
for i in range(1024):
|
||||
features, labels = generate_random_data(
|
||||
minibatch_size, input_dim, num_output_classes)
|
||||
# Specify the mapping of input variables in the model to actual
|
||||
# minibatch data to be trained with
|
||||
trainer.train_minibatch({input: features, label: labels})
|
||||
pp.update_with_trainer(trainer)
|
||||
pp.epoch_summary()
|
||||
|
||||
trainer.summarize_training_progress()
|
||||
test_features, test_labels = generate_random_data(
|
||||
minibatch_size, input_dim, num_output_classes)
|
||||
avg_error = trainer.test_minibatch(
|
||||
|
|
|
@ -50,20 +50,17 @@ def ffnet(optimizer):
|
|||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
|
||||
trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)])
|
||||
progress_printer = ProgressPrinter(0)
|
||||
trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)], progress_printer)
|
||||
|
||||
# Get minibatches of training data and perform model training
|
||||
minibatch_size = 25
|
||||
num_minibatches_to_train = 63
|
||||
|
||||
pp = ProgressPrinter(0)
|
||||
for i in range(num_minibatches_to_train):
|
||||
train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
|
||||
# Specify the mapping of input variables in the model to actual minibatch data to be trained with
|
||||
trainer.train_minibatch({features : train_features, label : labels})
|
||||
pp.update_with_trainer(trainer)
|
||||
|
||||
last_avg_error = pp.avg_loss_since_start()
|
||||
|
||||
test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
|
||||
avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
from cntk import cntk_py
|
||||
|
||||
|
||||
def _warn_deprecated(message):
|
||||
from warnings import warn
|
||||
warn('DEPRECATED: ' + message, DeprecationWarning, stacklevel=2)
|
||||
|
@ -26,32 +26,33 @@ def _avg(numerator, denominator):
|
|||
# TODO: Let's switch to import logging in the future instead of print. [ebarsoum]
|
||||
class ProgressPrinter(cntk_py.ProgressWriter):
|
||||
'''
|
||||
Allows printing various training time statistics (e.g. loss and metric) and printing them as training progresses.
|
||||
Allows printing various statistics (e.g. loss and metric) as training/evaluation progresses.
|
||||
|
||||
Args:
|
||||
freq (`int` or `None`, default `None`): determines how often printing of training progress will occur.
|
||||
A value of 0 means a geometric schedule (1,2,4,...).
|
||||
A value > 0 means an arithmetic schedule (print for minibatch number: ``freq``,
|
||||
print for minibatch number: 2*``freq``, print for minibatch number: 3*``freq``,...).
|
||||
A value of None means no per-minibatch log.
|
||||
first (`int`, default 0): Only start printing after the training minibatch number is greater or equal to
|
||||
``first``.
|
||||
tag (`string`, default EmptyString): prepend minibatch log lines with your own string
|
||||
log_to_file (`string` or `None`, default `None`): if None, output log data to stdout.
|
||||
If a string is passed, the string is path to a file for log data.
|
||||
rank (`int` or `None`, default `None`): set this to distributed.rank if you are using distributed
|
||||
parallelism -- each rank's log will go to separate file.
|
||||
gen_heartbeat (`bool`, default `False`): If True output a progress message every 10 seconds or so to stdout.
|
||||
num_epochs (`int`, default 300): The total number of epochs to be trained. Used for some metadata.
|
||||
This parameter is optional.
|
||||
test_freq (`int` or `None`, default `None`): similar to ``freq``, but applies to printing intermediate
|
||||
test results.
|
||||
test_first (`int`, default 0): similar to ``first``, but applies to printing intermediate test results.
|
||||
'''
|
||||
|
||||
def __init__(self, freq=None, first=0, tag='', log_to_file=None, rank=None, gen_heartbeat=False, num_epochs=300,
|
||||
test_freq=None, test_first=0):
|
||||
'''
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
freq (`int` or `None`, default `None`): determines how often
|
||||
printing will occur. The value of 0 means an geometric
|
||||
schedule (1,2,4,...). A value > 0 means a arithmetic schedule
|
||||
(a log print for minibatch number: ``freq``, a log print for minibatch number: 2*``freq``,
|
||||
a log print for minibatch number: 3*``freq``,...), and a value of None means no per-minibatch log.
|
||||
first (`int`, default 0): Only start logging after the minibatch number is greater or equal to ``first``.
|
||||
tag (`string`, default EmptyString): prepend minibatch log lines with your own string
|
||||
log_to_file (`string` or `None`, default `None`): if None, output log data to stdout.
|
||||
If a string is passed, the string is path to a file for log data.
|
||||
rank (`int` or `None`, default `None`): set this to distributed.rank if you are using distributed
|
||||
parallelism -- each rank's log will go to separate file.
|
||||
gen_heartbeat (`bool`, default `False`): If True output a progress message every 10 seconds or so to stdout.
|
||||
num_epochs (`int`, default 300): The total number of epochs to be trained. Used for some metadata.
|
||||
This parameter is optional.
|
||||
test_freq (`int` or `None`, default `None`): similar to ``freq``, but applies to printing intermediate
|
||||
test results.
|
||||
test_first (`int`, default 0): similar to ``first``, but applies to printing intermediate test results.
|
||||
'''
|
||||
if freq is None:
|
||||
freq = sys.maxsize
|
||||
|
@ -289,7 +290,7 @@ class ProgressPrinter(cntk_py.ProgressWriter):
|
|||
|
||||
def update_with_trainer(self, trainer, with_metric=False):
|
||||
'''
|
||||
DEPRECATED. Use :func:`cntk.utils.ProgressPrinter.update_training` instead.
|
||||
DEPRECATED.
|
||||
|
||||
Update the current loss, the minibatch size and optionally the metric using the information from the
|
||||
``trainer``.
|
||||
|
@ -300,7 +301,8 @@ class ProgressPrinter(cntk_py.ProgressWriter):
|
|||
'''
|
||||
if self.total_updates == 0:
|
||||
# Only warn once to avoid flooding with warnings.
|
||||
_warn_deprecated('Use ProgressPrinter.update_progress() instead.')
|
||||
_warn_deprecated('Inefficient. '
|
||||
'Please pass an instance of ProgressPrinter to Trainer upon construction.')
|
||||
|
||||
if trainer is not None and trainer.previous_minibatch_sample_count != 0:
|
||||
self.update(
|
||||
|
@ -389,25 +391,25 @@ class ProgressPrinter(cntk_py.ProgressWriter):
|
|||
|
||||
class TensorBoardProgressWriter(cntk_py.ProgressWriter):
|
||||
'''
|
||||
Allows tracking various training time statistics (e.g. loss and metric) and write them as TensorBoard event files.
|
||||
Allows writing various statistics (e.g. loss and metric) to TensorBoard event files during training/evaluation.
|
||||
The generated files can be opened in TensorBoard to visualize the progress.
|
||||
|
||||
Args:
|
||||
freq (`int` or `None`, default `None`): frequency at which training progress is written.
|
||||
For example, the value of 2 will cause the progress to be logged every second time when
|
||||
`:func:cntk.util.TensorBoardFileWriter.update_with_trainer` is invoked.
|
||||
None indicates that progress is logged only when
|
||||
`:func:cntk.util.TensorBoardFileWriter.summarize_progress` is invoked.
|
||||
Must be a positive integer otherwise.
|
||||
log_dir (`string`, default '.'): directory where to create a TensorBoard event file.
|
||||
rank (`int` or `None`, default `None`): rank of a worker when using distributed training, or `None` if
|
||||
training locally. If not `None`, event files will be created only by rank 0.
|
||||
model (:class:`cntk.ops.Function` or `None`, default `None`): model graph to plot.
|
||||
'''
|
||||
|
||||
def __init__(self, freq=None, log_dir='.', rank=None, model=None):
|
||||
'''
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
freq (`int` or `None`, default `None`): frequency at which progress is logged.
|
||||
For example, the value of 2 will cause the progress to be logged every second time when
|
||||
`:func:cntk.util.TensorBoardFileWriter.update_with_trainer` is invoked.
|
||||
None indicates that progress is logged only when
|
||||
`:func:cntk.util.TensorBoardFileWriter.summarize_progress` is invoked.
|
||||
Must be a positive integer otherwise.
|
||||
log_dir (`string`, default '.'): directory where to create a TensorBoard event file.
|
||||
rank (`int` or `None`, default `None`): rank of a worker when using distributed training, or `None` if
|
||||
training locally. If not `None`, event files will be created in log_dir/rank[rank] rather than log_dir.
|
||||
model (:class:`cntk.ops.Function` or `None`, default `None`): model graph to plot.
|
||||
'''
|
||||
if freq is None:
|
||||
freq = sys.maxsize
|
||||
|
|
|
@ -39,20 +39,22 @@ def ffnet():
|
|||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
|
||||
trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)])
|
||||
progress_printer = ProgressPrinter(0)
|
||||
trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])
|
||||
|
||||
# Get minibatches of training data and perform model training
|
||||
minibatch_size = 25
|
||||
num_minibatches_to_train = 1024
|
||||
|
||||
pp = ProgressPrinter(0)
|
||||
aggregate_loss = 0.0
|
||||
for i in range(num_minibatches_to_train):
|
||||
train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
|
||||
# Specify the mapping of input variables in the model to actual minibatch data to be trained with
|
||||
trainer.train_minibatch({features : train_features, label : labels})
|
||||
pp.update_with_trainer(trainer)
|
||||
sample_count = trainer.previous_minibatch_sample_count
|
||||
aggregate_loss += trainer.previous_minibatch_loss_average * sample_count
|
||||
|
||||
last_avg_error = pp.avg_loss_since_start()
|
||||
last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen
|
||||
|
||||
test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
|
||||
avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
|
||||
|
|
|
@ -65,17 +65,17 @@ def train_sequence_classifier(debug_output=False):
|
|||
|
||||
lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
|
||||
# Instantiate the trainer object to drive the model training
|
||||
progress_printer = ProgressPrinter(0)
|
||||
trainer = Trainer(classifier_output, (ce, pe),
|
||||
sgd(classifier_output.parameters, lr=lr_per_sample))
|
||||
sgd(classifier_output.parameters, lr=lr_per_sample),
|
||||
progress_printer)
|
||||
|
||||
# Get minibatches of sequences to train with and perform model training
|
||||
minibatch_size = 200
|
||||
|
||||
pp = ProgressPrinter(0)
|
||||
for i in range(255):
|
||||
mb = reader.next_minibatch(minibatch_size, input_map=input_map)
|
||||
trainer.train_minibatch(mb)
|
||||
pp.update_with_trainer(trainer, True)
|
||||
|
||||
evaluation_average = float(trainer.previous_minibatch_evaluation_average)
|
||||
loss_average = float(trainer.previous_minibatch_loss_average)
|
||||
|
|
Загрузка…
Ссылка в новой задаче