Updated examples to use the latest progress writer API

2017-02-22 16:37:37 -08:00 · 2017-02-22 16:37:37 -08:00 · 6256c6e355
--- a/Examples/Image/Classification/ConvNet/Python/ConvNetLRN_CIFAR10_DataAug.py
+++ b/Examples/Image/Classification/ConvNet/Python/ConvNetLRN_CIFAR10_DataAug.py
@ -109,7 +109,8 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        unit_gain = True,
                                        l2_regularization_weight = l2_reg_weight)
-    trainer =  cntk.Trainer(z, (ce, pe), learner)
+    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
@ -118,7 +119,6 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
    }

    cntk.utils.log_number_of_parameters(z) ; print()
-    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -127,9 +127,8 @@ def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
--- a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10.py
+++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10.py
@ -64,6 +64,7 @@ def convnet_cifar10(debug_output=False):
    # training config
    epoch_size = 50000                  # for now we manually specify epoch size
    minibatch_size = 64
+    max_epochs = 30

    # Set learning parameters
    lr_per_sample          = [0.0015625]*10 + [0.00046875]*10 + [0.00015625]
@ -75,7 +76,8 @@ def convnet_cifar10(debug_output=False):
    # Instantiate the trainer object to drive the model training
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        l2_regularization_weight = l2_reg_weight)
-    trainer = cntk.Trainer(z, (ce, pe), learner)
+    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
@ -84,8 +86,6 @@ def convnet_cifar10(debug_output=False):
    }

    cntk.utils.log_number_of_parameters(z) ; print()
-    max_epochs = 30
-    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -94,9 +94,8 @@ def convnet_cifar10(debug_output=False):
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch)))
    
    # Load test data
--- a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py
+++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py
@ -87,7 +87,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
    # trainer object
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        l2_regularization_weight = l2_reg_weight)
-    trainer =  cntk.Trainer(z, (ce, pe), learner)
+    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer =  cntk.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
@ -96,7 +97,6 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
    }

    cntk.utils.log_number_of_parameters(z) ; print()
-    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -105,9 +105,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, epoch_size = 50000, max_e
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
--- a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py
+++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py
@ -150,25 +150,25 @@ def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64,

    network = create_conv_network()

-    progress_printer = cntk.utils.ProgressPrinter(
+    progress_writers = [cntk.utils.ProgressPrinter(
        freq=num_mbs_per_log,
        tag='Training',
        log_to_file=log_to_file,
        rank=cntk.distributed.Communicator.rank(),
        gen_heartbeat=gen_heartbeat,
-        num_epochs=max_epochs)
+        num_epochs=max_epochs)]

-    tensorboard_writer = cntk.utils.TensorBoardProgressWriter(
-        freq=num_mbs_per_log,
-        log_dir=tensorboard_logdir if tensorboard_logdir is not None else 'log',
-        rank=cntk.distributed.Communicator.rank(),
-        model=network['output'])
+    if tensorboard_logdir is not None:
+        progress_writers.append(cntk.utils.TensorBoardProgressWriter(
+            freq=num_mbs_per_log,
+            log_dir=tensorboard_logdir,
+            rank=cntk.distributed.Communicator.rank(),
+            model=network['output']))

-    trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, [progress_printer, tensorboard_writer])
+    trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers)
    train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
    test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
-    train_and_test(network, trainer, train_source, test_source, minibatch_size,
-                   epoch_size, restore, profiling)
+    train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore, profiling)


 if __name__=='__main__':
@ -179,7 +179,7 @@ if __name__=='__main__':
    parser.add_argument('-datadir', '--datadir', help='Data directory where the CIFAR dataset is located', required=False, default=data_path)
    parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
    parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
-    parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where to tensorboard logs should be written', required=False, default='log')
+    parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where TensorBoard logs should be created', required=False, default=None)
    parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='160')
    parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='64')
    parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='50000')
--- a/Examples/Image/Classification/ConvNet/Python/ConvNet_MNIST.py
+++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_MNIST.py
@ -56,6 +56,7 @@ def convnet_mnist(debug_output=False):
    # training config
    epoch_size = 60000                    # for now we manually specify epoch size
    minibatch_size = 128
+    max_epochs = 40

    # Set learning parameters
    lr_per_sample    = [0.001]*10 + [0.0005]*10 + [0.0001]
@ -65,7 +66,8 @@ def convnet_mnist(debug_output=False):

    # Instantiate the trainer object to drive the model training
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
-    trainer = cntk.Trainer(z, (ce, pe), learner)
+    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
@ -74,8 +76,6 @@ def convnet_mnist(debug_output=False):
    }

    cntk.utils.log_number_of_parameters(z) ; print()
-    max_epochs = 40
-    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -84,9 +84,8 @@ def convnet_mnist(debug_output=False):
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[label_var].num_samples                     # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch)))
    
    # Load test data
--- a/Examples/Image/Classification/MLP/Python/SimpleMNIST.py
+++ b/Examples/Image/Classification/MLP/Python/SimpleMNIST.py
@ -4,16 +4,16 @@
 # for full license information.
 # ==============================================================================

+import argparse
 import numpy as np
 import sys
 import os
 from cntk import Trainer, minibatch_size_schedule 
 from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
-from cntk.device import cpu, set_default_device
 from cntk.learner import sgd, learning_rate_schedule, UnitType
 from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
-from cntk.utils import ProgressPrinter
 from cntk.training_session import *
+from cntk.utils import ProgressPrinter, TensorBoardProgressWriter

 abs_path = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "common"))
@ -35,7 +35,7 @@ def create_reader(path, is_training, input_dim, label_dim):

 # Creates and trains a feedforward classification model for MNIST images

-def simple_mnist():
+def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
@ -65,24 +65,27 @@ def simple_mnist():
        label  : reader_train.streams.labels
    }

-    lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch)
-
-    # Get minibatches of images to train with and perform model training
+    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10
-    #training_progress_output_freq = 100

-    progress_printer = ProgressPrinter(
+    # Instantiate progress writers.
+    #training_progress_output_freq = 100
+    progress_writers = [ProgressPrinter(
        #freq=training_progress_output_freq,
        tag='Training',
-        num_epochs=num_sweeps_to_train_with)
+        num_epochs=num_sweeps_to_train_with)]
+
+    if tensorboard_logdir is not None:
+        progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))

    # Instantiate the trainer object to drive the model training
-    trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_printer)
+    lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch)
+    trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_writers)

    training_session(
-        trainer=trainer, 
+        trainer=trainer,
        mb_source = reader_train,
        mb_size = minibatch_size,
        var_to_stream = input_map,
@ -120,5 +123,10 @@ if __name__=='__main__':
    # use the best available one, e.g.
    # set_default_device(cpu())

-    error = simple_mnist()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir',
+                        help='Directory where TensorBoard logs should be created', required=False, default=None)
+    args = vars(parser.parse_args())
+
+    error = simple_mnist(args['tensorboard_logdir'])
    print("Error: %f" % error)
--- a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
+++ b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
@ -7,11 +7,9 @@
 from __future__ import print_function
 import os
 import argparse
-import math
-import numpy as np

 from cntk.utils import *
-from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error
+from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, reduce_mean
 from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
 import cntk.io.transforms as xforms
 from cntk import Trainer, cntk_py
@ -53,7 +51,8 @@ def create_reader(map_file, mean_file, train):


 # Train and evaluate the network.
-def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None):
+def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
+                       model_dir=None, tensorboard_logdir=None):

    set_computation_network_trace_level(0)

@ -85,10 +84,17 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

+    # progress writers
+    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
+    tensorboard_writer = None
+    if tensorboard_logdir is not None:
+        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
+        progress_writers.append(tensorboard_writer)
+
    # trainer object
-    learner     = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
-                               l2_regularization_weight = l2_reg_weight)
-    trainer     = Trainer(z, (ce, pe), learner)
+    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
+                           l2_regularization_weight = l2_reg_weight)
+    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
@ -97,7 +103,6 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
    }

    log_number_of_parameters(z) ; print()
-    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # perform model training
    if profiler_dir:
@ -109,8 +114,14 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
-        progress_printer.epoch_summary(with_metric=True)
+
+        trainer.summarize_training_progress()
+
+        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
+        if tensorboard_writer:
+            for parameter in z.parameters:
+                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)
+
        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch
@ -126,7 +137,6 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
-    minibatch_index = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
@ -137,10 +147,9 @@ def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
-        minibatch_index += 1

    print("")
-    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
+    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
@ -151,6 +160,7 @@ if __name__=='__main__':
    parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='160')
    parser.add_argument('-p', '--profiler_dir', help='directory for saving profiler output', required=False, default=None)
    parser.add_argument('-m', '--model_dir', help='directory for saving model', required=False, default=None)
+    parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir', help='Directory where TensorBoard logs should be created', required=False, default=None)

    args = vars(parser.parse_args())
    epochs = int(args['epochs'])
@ -164,4 +174,5 @@ if __name__=='__main__':
    reader_test  = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)

    epoch_size = 50000
-    train_and_evaluate(reader_train, reader_test, network_name, epoch_size, epochs, args['profiler_dir'], model_dir)
+    train_and_evaluate(reader_train, reader_test, network_name, epoch_size, epochs, args['profiler_dir'], model_dir,
+                       args['tensorboard_logdir'])
--- a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py
+++ b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py
@ -94,7 +94,7 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
-    
+
    return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)

 # Train and test
--- a/Examples/Image/Detection/FastRCNN/A2_RunCntk_py3.py
+++ b/Examples/Image/Detection/FastRCNN/A2_RunCntk_py3.py
@ -159,21 +159,20 @@ def train_fast_rcnn(debug_output=False):

    # Instantiate the trainer object
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
-    trainer = Trainer(frcn_output, (ce, pe), learner)
+    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
-    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

--- a/Examples/Image/GettingStarted/07_Deconvolution_PY.py
+++ b/Examples/Image/GettingStarted/07_Deconvolution_PY.py
@ -62,7 +62,8 @@ def deconv_mnist(max_epochs=3):

    # Instantiate the trainer object to drive the model training
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
-    trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner)
+    progress_printer = cntk.utils.ProgressPrinter(tag='Training')
+    trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
@ -70,7 +71,6 @@ def deconv_mnist(max_epochs=3):
    }

    cntk.utils.log_number_of_parameters(z) ; print()
-    progress_printer = cntk.utils.ProgressPrinter(tag='Training')

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -79,9 +79,8 @@ def deconv_mnist(max_epochs=3):
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[input_var].num_samples                     # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(epoch)))

    # rename final model
--- a/Examples/Image/TransferLearning/TransferLearning.py
+++ b/Examples/Image/TransferLearning/TransferLearning.py
@ -114,23 +114,22 @@ def train_model(base_model_file, feature_node_name, last_hidden_node_name,
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
-    trainer = Trainer(tl_model, (ce, pe), learner)
+    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
+    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    log_number_of_parameters(tl_model)
-    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
            if sample_count % (100 * mb_size) == 0:
                print ("Processed {0} samples".format(sample_count))

-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()

    return tl_model

--- a/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
+++ b/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
@ -52,7 +52,7 @@ def create_model():
 # train action         #
 ########################

-def train(reader, model, max_epochs, model_dir=None):
+def train(reader, model, max_epochs, model_dir=None, tensorboard_logdir=None):
    # Input variables denoting the features and label data
    query = cntk.blocks.Input(input_dim,  is_sparse=False)
    slot_labels = cntk.blocks.Input(num_labels, is_sparse=True)  # TODO: make sparse once it works
@ -73,16 +73,21 @@ def train(reader, model, max_epochs, model_dir=None):
    momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))  

    # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
-    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] 
-
-    # trainer object
+    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003]
    lr_per_sample = cntk.learner.learning_rate_schedule(lr_schedule, cntk.learner.UnitType.sample, epoch_size)
    learner = cntk.learner.adam_sgd(z.parameters,
                                    lr=lr_per_sample, momentum=momentum_time_constant,
                                    low_memory=True,
                                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

-    trainer = cntk.Trainer(z, (ce, pe), [learner])
+    # Progress writers
+    progress_writers = [cntk.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)] # more detailed logging
+    #progress_writers = [cntk.ProgressPrinter(tag='Training', num_epochs=max_epochs)]
+    if tensorboard_logdir is not None:
+        progress_writers.append(cntk.TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))
+
+    # trainer object
+    trainer = cntk.Trainer(z, (ce, pe), [learner], progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
@ -92,22 +97,30 @@ def train(reader, model, max_epochs, model_dir=None):

    # process minibatches and perform model training
    cntk.utils.log_number_of_parameters(z) ; print()
-    progress_printer = cntk.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging
-    #progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    t = 0
+    aggregate_loss = 0
+    aggregate_error = 0
+    total_samples = 0

    # loop over epochs
    for epoch in range(max_epochs):
        epoch_end = (epoch+1) * epoch_size

+        aggregate_loss = 0
+        aggregate_error = 0
+        total_samples = 0
+
        # loop over minibatches on the epoch
        while t < epoch_end:
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
            trainer.train_minibatch(data)                                   # update model with it
-            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
+            samples = trainer.previous_minibatch_sample_count
+            t += samples
+            total_samples += samples
+            aggregate_loss += trainer.previous_minibatch_loss_average * samples
+            aggregate_error += trainer.previous_minibatch_evaluation_average * samples

            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
@ -117,9 +130,9 @@ def train(reader, model, max_epochs, model_dir=None):
            #trace_node('stabilizer_param')
        if model_dir:
            z.save(os.path.join(model_dir, "atis" + "_{}.dnn".format(epoch)))
-        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()

-    return loss, metric
+    return aggregate_loss / total_samples, aggregate_error / total_samples


 #############################
@ -129,6 +142,8 @@ def train(reader, model, max_epochs, model_dir=None):
 if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='8')
+    parser.add_argument('-tensorboard_logdir', '--tensorboard_logdir',
+                        help='Directory where TensorBoard logs should be created', required=False, default=None)

    args = vars(parser.parse_args())
    max_epochs = int(args['epochs'])
@ -145,7 +160,7 @@ if __name__=='__main__':

    model_path = os.path.join(abs_path, "Models")
    # train
-    train(reader, model, max_epochs, model_path)
+    train(reader, model, max_epochs, model_path, args['tensorboard_logdir'])

    # test (TODO)
    reader = create_reader(data_dir + "/atis.test.ctf")
--- a/Examples/Tensorboard/LanguageUnderstanding.py
+++ b/Examples/Tensorboard/LanguageUnderstanding.py
@ -1,139 +0,0 @@
-# ==============================================================================
-# Copyright (c) Microsoft. All rights reserved.
-# Licensed under the MIT license. See LICENSE.md file in the project root
-# for full license information.
-# ==============================================================================
-
-from __future__ import print_function
-import os
-import math
-from cntk.layers import *  # Layers library
-from cntk.utils import *
-from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
-from cntk import Trainer
-from cntk.learner import adam_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
-from cntk.ops import cross_entropy_with_softmax, classification_error
-
-########################
-# variables and stuff  #
-########################
-
-cntk_dir = os.path.dirname(os.path.abspath(__file__)) + "/../.."  # data resides in the CNTK folder
-data_dir = cntk_dir + "/Examples/LanguageUnderstanding/ATIS/Data"       # under Examples/LanguageUnderstanding/ATIS
-vocab_size = 943 ; num_labels = 129 ; num_intents = 26    # number of words in vocab, slot labels, and intent labels
-
-model_dir = "./Models"
-
-# model dimensions
-input_dim  = vocab_size
-label_dim  = num_labels
-emb_dim    = 150
-hidden_dim = 300
-
-########################
-# define the reader    #
-########################
-
-def create_reader(path):
-    return MinibatchSource(CTFDeserializer(path, StreamDefs(
-        query         = StreamDef(field='S0', shape=input_dim,   is_sparse=True),
-        intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True),  # BUGBUG: unused, and should infer dim
-        slot_labels   = StreamDef(field='S2', shape=label_dim,   is_sparse=True)
-    )))
-
-########################
-# define the model     #
-########################
-
-def create_model():
-  with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
-    return Sequential([
-        Embedding(emb_dim),
-        Recurrence(LSTM(hidden_dim), go_backwards=False),
-        Dense(label_dim)
-    ])
-
-########################
-# train action         #
-########################
-
-def train(reader, model, max_epochs):
-    # Input variables denoting the features and label data
-    query       = Input(input_dim,  is_sparse=False)
-    slot_labels = Input(num_labels, is_sparse=True)  # TODO: make sparse once it works
-
-    # apply model to input
-    z = model(query)
-
-    # loss and metric
-    ce = cross_entropy_with_softmax(z, slot_labels)
-    pe = classification_error      (z, slot_labels)
-
-    # training config
-    epoch_size = 36000
-    minibatch_size = 70
-    num_mbs_to_show_result = 100
-    momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))  # TODO: Change to round number. This is 664.39. 700?
-
-    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
-
-    # trainer object
-    lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size)
-    learner = adam_sgd(z.parameters,
-                       lr=lr_per_sample, momentum=momentum_time_constant,
-                       unit_gain=True,
-                       low_memory=True,
-                       gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
-
-    # more detailed logging
-    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training')
-    #progress_printer = ProgressPrinter(tag='Training')
-    tensorboard_writer = TensorBoardProgressWriter(freq=100, log_dir='atis_log', model=z)
-
-    trainer = Trainer(z, (ce, pe), [learner], [progress_printer, tensorboard_writer])
-
-    # define mapping from reader streams to network inputs
-    input_map = {
-        query       : reader.streams.query,
-        slot_labels : reader.streams.slot_labels
-    }
-
-    # process minibatches and perform model training
-    log_number_of_parameters(z) ; print()
-
-    t = 0
-    for epoch in range(max_epochs):         # loop over epochs
-        epoch_end = (epoch+1) * epoch_size
-        while t < epoch_end:               # loop over minibatches on the epoch
-            # BUGBUG? The change of minibatch_size parameter vv has no effect.
-            data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
-            trainer.train_minibatch(data)                                   # update model with it
-            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
-            #def trace_node(name):
-            #    nl = [n for n in z.parameters if n.name() == name]
-            #    if len(nl) > 0:
-            #        print (name, np.asarray(nl[0].value))
-            #trace_node('W')
-            #trace_node('stabilizer_param')
-        trainer.summarize_training_progress()
-
-    tensorboard_writer.close()
-
-#############################
-# main function boilerplate #
-#############################
-
-if __name__=='__main__':
-    # TODO: leave these in for now as debugging aids; remove for beta
-    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
-    #set_computation_network_trace_level(1)  # TODO: remove debugging facilities once this all works
-    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
-    force_deterministic_algorithms()
-
-    reader = create_reader(data_dir + "/atis.train.ctf")
-    model = create_model()
-    # train
-    train(reader, model, max_epochs=8)
-    # test (TODO)
-    reader = create_reader(data_dir + "/atis.test.ctf")
-    #test(reader, model_dir + "/slu.cmf")  # TODO: what is the correct pattern here?
--- a/Examples/Tensorboard/SimpleMNIST.py
+++ b/Examples/Tensorboard/SimpleMNIST.py
@ -1,134 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-# Licensed under the MIT license. See LICENSE.md file in the project root
-# for full license information.
-# ==============================================================================
-
-import numpy as np
-import sys
-import os
-from cntk import Trainer
-from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
-from cntk.learner import sgd, learning_rate_schedule, UnitType
-from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant, \
-                     reduce_max, reduce_mean, reduce_min
-from cntk.utils import *
-
-abs_path = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.join(abs_path, "..", ".."))
-from Examples.common.nn import fully_connected_classifier_net
-
-
-def check_path(path):
-    if not os.path.exists(path):
-        readme_file = os.path.normpath(os.path.join(
-            os.path.dirname(path), "..", "README.md"))
-        raise RuntimeError(
-            "File '%s' does not exist. Please follow the instructions at %s to download and prepare it." %
-            (path, readme_file))
-
-
-def create_reader(path, is_training, input_dim, label_dim):
-    return MinibatchSource(CTFDeserializer(path, StreamDefs(
-        features=StreamDef(field='features', shape=input_dim, is_sparse=False),
-        labels=StreamDef(field='labels', shape=label_dim, is_sparse=False)
-    )), randomize=is_training, epoch_size=INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
-
-
-# Creates and trains a feedforward classification model for MNIST images
-def simple_mnist():
-    input_dim = 784
-    num_output_classes = 10
-    num_hidden_layers = 1
-    hidden_layers_dim = 200
-
-    # Input variables denoting the features and label data
-    features = input_variable(input_dim, np.float32)
-    label = input_variable(num_output_classes, np.float32)
-
-    # Instantiate the feedforward classification model
-    scaled_input = element_times(constant(0.00390625), features)
-    netout = fully_connected_classifier_net(
-        scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu)
-
-    ce = cross_entropy_with_softmax(netout, label)
-    pe = classification_error(netout, label)
-
-    try:
-        rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
-                                *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/"))
-    except KeyError:
-        rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/"))
-    path = os.path.normpath(os.path.join(abs_path, rel_path))
-    check_path(path)
-
-    reader_train = create_reader(path, True, input_dim, num_output_classes)
-
-    input_map = {
-        features: reader_train.streams.features,
-        label: reader_train.streams.labels
-    }
-
-    # Instantiate progress writers.
-    logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log")
-    tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout)
-    progress_printer = ProgressPrinter(freq=10, tag='Training')
-
-    # Instantiate the trainer object to drive the model training
-    lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch)
-    learner = sgd(netout.parameters, lr=lr_per_minibatch)
-    trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer])
-
-    # Get minibatches of images to train with and perform model training
-    minibatch_size = 64
-    num_samples_per_sweep = 6000
-    num_sweeps_to_train_with = 2
-    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
-
-    for minibatch_idx in range(0, int(num_minibatches_to_train)):
-        trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map))
-
-        # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed.
-        # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean.
-        if minibatch_idx % 10 == 9:
-            for p in netout.parameters:
-                tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx)
-                tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx)
-                tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx)
-
-    trainer.summarize_training_progress()
-
-    # Load test data
-    try:
-        rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
-                                *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/"))
-    except KeyError:
-        rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/"))
-    path = os.path.normpath(os.path.join(abs_path, rel_path))
-    check_path(path)
-
-    reader_test = create_reader(path, False, input_dim, num_output_classes)
-
-    input_map = {
-        features: reader_test.streams.features,
-        label: reader_test.streams.labels
-    }
-
-    # Test data for trained model
-    test_minibatch_size = 1024
-    num_samples = 10000
-    num_minibatches_to_test = num_samples / test_minibatch_size
-    test_result = 0.0
-    for i in range(0, int(num_minibatches_to_test)):
-        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
-        test_result += trainer.test_minibatch(mb)
-
-    # Average of evaluation errors of all test minibatches
-    trainer.summarize_test_progress()
-    return test_result / num_minibatches_to_test
-
-if __name__ == '__main__':
-    # Specify the target device to be used for computing, if you do not want to
-    # use the best available one, e.g.
-    # set_default_device(cpu())
-    error = simple_mnist()
-    print("Error: %f" % error)
--- a/Examples/Text/CharacterLM/char_rnn.py
+++ b/Examples/Text/CharacterLM/char_rnn.py
@ -165,7 +165,8 @@ def train_lm(training_file, epochs, max_num_minibatches):
    learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                           gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                           gradient_clipping_with_truncation=gradient_clipping_with_truncation)
-    trainer = Trainer(z, (ce, errs), learner)
+    progress_printer = ProgressPrinter(freq=100, tag='Training')
+    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)
@ -174,8 +175,6 @@ def train_lm(training_file, epochs, max_num_minibatches):
    log_number_of_parameters(z)
    print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))
    print()
-    
-    progress_printer = ProgressPrinter(freq=100, tag='Training')

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
@ -188,7 +187,6 @@ def train_lm(training_file, epochs, max_num_minibatches):
            mask = [False] 
            trainer.train_minibatch(arguments)

-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            global_minibatch = e*minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))
--- a/Examples/Text/WordLMWithSampledSoftmax/word_rnn.py
+++ b/Examples/Text/WordLMWithSampledSoftmax/word_rnn.py
@ -15,7 +15,7 @@ from cntk.ops.functions import load_model
 from cntk.blocks import LSTM, Stabilizer
 from cntk.layers import Recurrence, Dense
 from cntk.models import For, Sequential
-from cntk.utils import log_number_of_parameters, ProgressPrinter
+from cntk.utils import log_number_of_parameters
 from data_reader import DataReader
 from math import log, exp
 from cntk.device import set_default_device, cpu, gpu
--- a/Examples/Video/GettingStarted/Python/Conv3D_UCF11.py
+++ b/Examples/Video/GettingStarted/Python/Conv3D_UCF11.py
@ -197,11 +197,11 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    mm_schedule            = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
-    learner     = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
-    trainer     = Trainer(z, (ce, pe), learner)
+    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
+    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
+    trainer = Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()
-    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
@ -211,8 +211,7 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
            videos, labels, current_minibatch = train_reader.next_minibatch(minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

-            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
-        progress_printer.epoch_summary(with_metric=True)
+        trainer.summarize_training_progress()
    
    # Test data for trained model
    epoch_size     = 332
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/ConvNet_CIFAR10_DataAug_Distributed_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/ConvNet_CIFAR10_DataAug_Distributed_test.py
@ -8,6 +8,7 @@ import numpy as np
 import os
 import sys
 import signal
+import shutil
 import subprocess
 import re
 import pytest
@ -53,15 +54,29 @@ def mpiexec_test(device_id, script, params, expected_test_error, match_exactly=T
    assert np.allclose(float(results[0])/100, expected_test_error, atol=error_tolerance)

 def test_cifar_convnet_distributed(device_id):
+    # Create a path to TensorBoard log directory and make sure it does not exist.
+    abs_path = os.path.dirname(os.path.abspath(__file__))
+    tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log')
+    if os.path.exists(tb_logdir):
+        shutil.rmtree(tb_logdir)
+
    params = [ "-n", "2",
               "-m", "64", 
               "-e", "3200",
               "-datadir", prepare_CIFAR10_data(),
+               "-tensorboard_logdir", tb_logdir,
               "-q", "32",
               "-r",
               "-device", "0" ]
    mpiexec_test(device_id, script_under_test, params, 0.75, True)

+    # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
+    tb_files = 0
+    for tb_file in os.listdir(tb_logdir):
+        assert tb_file.startswith("events.out.tfevents")
+        tb_files += 1
+    assert tb_files == 1
+
 def test_cifar_convnet_distributed_1bitsgd(device_id):
    params = [ "-n", "2",
               "-m", "64", 
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/TrainResNet_CIFAR10_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/TrainResNet_CIFAR10_test.py
@ -6,6 +6,7 @@

 import numpy as np
 import os
+import shutil
 import sys
 from cntk.ops.tests.ops_test_utils import cntk_device
 from cntk.cntk_py import DeviceKind_GPU
@ -38,7 +39,14 @@ def test_cifar_resnet_error(device_id):
    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

-    test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1)
+    # Create a path to TensorBoard log directory and make sure it does not exist.
+    abs_path = os.path.dirname(os.path.abspath(__file__))
+    tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
+    if os.path.exists(tb_logdir):
+        shutil.rmtree(tb_logdir)
+
+    test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1,
+                                    tensorboard_logdir=tb_logdir)

 # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
 # tolerance back once convolution operator is determinsitic. 
@ -47,3 +55,9 @@ def test_cifar_resnet_error(device_id):

 #    assert np.allclose(test_error, expected_test_error,
 #                       atol=TOLERANCE_ABSOLUTE)
+
+    files = 0
+    for file in os.listdir(tb_logdir):
+        assert file.startswith("events.out.tfevents")
+        files += 1
+    assert files == 1
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/htk_deserializer_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/htk_deserializer_test.py
@ -1,14 +1,14 @@
 import cntk as C
-import numpy as np
 from cntk.io import MinibatchSource, HTKFeatureDeserializer, HTKMLFDeserializer, StreamDef, StreamDefs
-from cntk.blocks import LSTM, Placeholder, Input
-from cntk.layers import Recurrence, Dense, BatchNormalization
+from cntk.blocks import LSTM
+from cntk.layers import Recurrence, Dense
 from cntk.models import Sequential, For

-import os, sys
+import os
 abs_path = os.path.dirname(os.path.abspath(__file__))
 data_path = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Speech", "AN4", "Data")

+
 def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
@ -46,16 +46,15 @@ def test_htk_deserializers():
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
-    trainer = C.Trainer(z, (ce, errs), learner)
+    progress_printer = C.ProgressPrinter(freq=0)
+    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

-    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
-        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)

--- a/Tests/EndToEndTests/CNTKv2Python/Examples/language_understanding_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/language_understanding_test.py
@ -7,6 +7,7 @@
 from __future__ import print_function
 import os, sys
 import numpy as np
+import shutil
 from cntk import DeviceDescriptor

 TOLERANCE_ABSOLUTE = 1E-1  # TODO: Once set_fixed_random_seed(1) is honored, this must be tightened a lot.
@ -244,12 +245,26 @@ def test_language_understanding(device_id):

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
+        # Create a path to TensorBoard log directory and make sure it does not exist.
+        abs_path = os.path.dirname(os.path.abspath(__file__))
+        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
+        if os.path.exists(tb_logdir):
+            shutil.rmtree(tb_logdir)
+
        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
-        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
+        loss_avg, evaluation_avg = train(reader, model, max_epochs=1, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
+
+        # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
+        tb_files = 0
+        for tb_file in os.listdir(tb_logdir):
+            assert tb_file.startswith("events.out.tfevents")
+            tb_files += 1
+        assert tb_files == 1
+
        # example also saves and loads; we skip it here, so that we get a test case of no save/load
        # (we save/load in all cases above)

--- a/Tests/EndToEndTests/CNTKv2Python/Examples/simple_mnist_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/simple_mnist_test.py
@ -6,6 +6,7 @@

 import os, sys
 import numpy as np
+import shutil
 from cntk.device import set_default_device

 abs_path = os.path.dirname(os.path.abspath(__file__))
@ -15,11 +16,24 @@ from SimpleMNIST import simple_mnist
 TOLERANCE_ABSOLUTE = 1E-1

 def test_simple_mnist_error(device_id):
+    # Create a path to TensorBoard log directory and make sure it does not exist.
+    abs_path = os.path.dirname(os.path.abspath(__file__))
+    tb_logdir = os.path.join(abs_path, 'simple_mnist_test_log')
+    if os.path.exists(tb_logdir):
+        shutil.rmtree(tb_logdir)
+
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

-    test_error = simple_mnist()
+    test_error = simple_mnist(tb_logdir)
    expected_test_error = 0.09

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
+
+    # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
+    tb_files = 0
+    for tb_file in os.listdir(tb_logdir):
+        assert tb_file.startswith("events.out.tfevents")
+        tb_files += 1
+    assert tb_files == 1
--- a/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_202_Language_Understanding_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_202_Language_Understanding_test.py
@ -24,7 +24,7 @@ def test_cntk_202_language_understanding_trainerror(nb):
    for cell in nb.cells:
        try:
           if cell.cell_type == 'code':
-               m = re.search('\[Evaluation\].* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
+               m = re.search('Finished Evaluation.* metric = (?P<metric>\d+\.\d+)%', cell.outputs[0]['text'])
               if m:
                   metrics.append(float(m.group('metric')))
        except IndexError:
--- a/Tutorials/CNTK_105_Basic_Autoencoder_for_Dimensionality_Reduction.ipynb
+++ b/Tutorials/CNTK_105_Basic_Autoencoder_for_Dimensionality_Reduction.ipynb
@ -350,7 +350,8 @@
    "                       lr=lr_schedule, momentum=momentum_as_time_constant) \n",
    "    \n",
    "    # Instantiate the trainer\n",
-    "    trainer = Trainer(model, (loss, label_error), learner)\n",
+    "    progress_printer = ProgressPrinter(0)\n",
+    "    trainer = Trainer(model, (loss, label_error), learner, progress_printer)\n",
    "    \n",
    "    # Map the data streams to the input and labels.\n",
    "    # Note: for autoencoders input == label\n",
@ -359,17 +360,18 @@
    "        label  : reader_train.streams.features\n",
    "    } \n",
    "    \n",
-    "    pp = ProgressPrinter(0)\n",
+    "    aggregate_metric = 0\n",
    "    for i in range(num_minibatches_to_train):\n",
    "        # Read a mini batch from the training data file\n",
    "        data = reader_train.next_minibatch(minibatch_size, input_map = input_map)\n",
    "        \n",
    "        # Run the trainer on and perform model training\n",
-    "        trainer.train_minibatch(data)  \n",
-    "        pp.update_with_trainer(trainer, with_metric=True)\n",
+    "        trainer.train_minibatch(data)\n",
+    "        samples = trainer.previous_minibatch_sample_count\n",
+    "        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples\n",
    "        \n",
-    "    train_error = pp.avg_metric_since_start()*100\n",
-    "    print(\"Average training error: {0:0.2f}%\".format(pp.avg_metric_since_start()*100))\n",
+    "    train_error = (aggregate_metric*100.0) / (trainer.total_number_of_samples_seen)\n",
+    "    print(\"Average training error: {0:0.2f}%\".format(train_error))\n",
    "        \n",
    "    #############################################################################\n",
    "    # Testing the model\n",
--- a/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb
+++ b/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb
@ -472,10 +472,11 @@
    "    l2_reg_weight          = 0.001\n",
    "    \n",
    "    # trainer object\n",
-    "    learner     = momentum_sgd(z.parameters, \n",
-    "                               lr = lr_per_minibatch, momentum = momentum_time_constant, \n",
-    "                               l2_regularization_weight=l2_reg_weight)\n",
-    "    trainer     = Trainer(z, (ce, pe), [learner])\n",
+    "    learner = momentum_sgd(z.parameters, \n",
+    "                           lr = lr_per_minibatch, momentum = momentum_time_constant, \n",
+    "                           l2_regularization_weight=l2_reg_weight)\n",
+    "    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
+    "    trainer = Trainer(z, (ce, pe), [learner], [progress_printer])\n",
    "\n",
    "    # define mapping from reader streams to network inputs\n",
    "    input_map = {\n",
@ -484,7 +485,6 @@
    "    }\n",
    "\n",
    "    log_number_of_parameters(z) ; print()\n",
-    "    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
    "\n",
    "    # perform model training\n",
    "    batch_index = 0\n",
@ -502,9 +502,8 @@
    "            plot_data['loss'].append(trainer.previous_minibatch_loss_average)\n",
    "            plot_data['error'].append(trainer.previous_minibatch_evaluation_average)\n",
    "            \n",
-    "            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress\n",
    "            batch_index += 1\n",
-    "        progress_printer.epoch_summary(with_metric=True)\n",
+    "        trainer.summarize_training_progress()\n",
    "        \n",
    "    #\n",
    "    # Evaluation action\n",
--- a/Tutorials/CNTK_202_Language_Understanding.ipynb
+++ b/Tutorials/CNTK_202_Language_Understanding.ipynb
@ -433,12 +433,12 @@
    "                       gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)\n",
    "\n",
    "    # trainer\n",
-    "    trainer = Trainer(model, criterion, learner)\n",
+    "    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
+    "    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging\n",
+    "    trainer = Trainer(model, criterion, learner, progress_printer)\n",
    "\n",
    "    # process minibatches and perform model training\n",
    "    log_number_of_parameters(model)\n",
-    "    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)\n",
-    "    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging\n",
    "\n",
    "    t = 0\n",
    "    for epoch in range(max_epochs):         # loop over epochs\n",
@ -450,10 +450,7 @@
    "            })\n",
    "            trainer.train_minibatch(data)                                     # update model with it\n",
    "            t += data[criterion.arguments[1]].num_samples                     # samples so far\n",
-    "            progress_printer.update_with_trainer(trainer, with_metric=True)   # log progress\n",
-    "        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)\n",
-    "\n",
-    "    return loss, metric"
+    "        trainer.summarize_training_progress()\n"
   ]
  },
  {
@ -559,8 +556,8 @@
    "    momentum_as_time_constant = momentum_as_time_constant_schedule(0)\n",
    "    dummy_learner = adam_sgd(criterion.parameters, \n",
    "                             lr=lr_schedule, momentum=momentum_as_time_constant, low_memory=True)\n",
-    "    evaluator = Trainer(model, criterion, dummy_learner)\n",
    "    progress_printer = ProgressPrinter(tag='Evaluation', num_epochs=0)\n",
+    "    evaluator = Trainer(model, criterion, dummy_learner, progress_printer)\n",
    "\n",
    "    while True:\n",
    "        minibatch_size = 500\n",
@ -570,11 +567,8 @@
    "        })\n",
    "        if not data:                                 # until we hit the end\n",
    "            break\n",
-    "        metric = evaluator.test_minibatch(data)\n",
-    "        progress_printer.update(0, data[criterion.arguments[1]].num_samples, metric) # log progress\n",
-    "    loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)\n",
-    "\n",
-    "    return loss, metric"
+    "        evaluator.test_minibatch(data)\n",
+    "    evaluator.summarize_test_progress()\n"
   ]
  },
  {
--- a/Tutorials/CNTK_206_Basic_GAN.ipynb
+++ b/Tutorials/CNTK_206_Basic_GAN.ipynb
@ -387,7 +387,8 @@
   },
   "outputs": [],
   "source": [
-    "def build_graph(noise_shape, image_shape):\n",
+    "def build_graph(noise_shape, image_shape,\n",
+    "                G_progress_printer, D_progress_printer):\n",
    "    input_dynamic_axes = [C.Axis.default_batch_axis()]\n",
    "    Z = Input(noise_shape, dynamic_axes=input_dynamic_axes)\n",
    "    X_real = Input(image_shape, dynamic_axes=input_dynamic_axes)\n",
@ -420,12 +421,14 @@
    "    G_trainer = Trainer(\n",
    "        X_fake,\n",
    "        (G_loss, None),\n",
-    "        G_learner\n",
+    "        G_learner,\n",
+    "        G_progress_printer\n",
    "    )\n",
    "    D_trainer = Trainer(\n",
    "        D_real,\n",
    "        (D_loss, None),\n",
-    "        D_learner\n",
+    "        D_learner,\n",
+    "        D_progress_printer\n",
    "    )\n",
    "\n",
    "    return X_real, X_fake, Z, G_trainer, D_trainer"
@ -447,16 +450,15 @@
   "outputs": [],
   "source": [
    "def train(reader_train):\n",
-    "    X_real, X_fake, Z, G_trainer, D_trainer = \\\n",
-    "        build_graph(g_input_dim, d_input_dim)\n",
+    "    k = 2\n",
    "    \n",
    "    # print out loss for each model for upto 50 times\n",
    "    print_frequency_mbsize = num_minibatches // 50\n",
-    "\n",
    "    pp_G = ProgressPrinter(print_frequency_mbsize)\n",
-    "    pp_D = ProgressPrinter(print_frequency_mbsize)\n",
-    "    \n",
-    "    k = 2\n",
+    "    pp_D = ProgressPrinter(print_frequency_mbsize * k)\n",
+    "\n",
+    "    X_real, X_fake, Z, G_trainer, D_trainer = \\\n",
+    "        build_graph(g_input_dim, d_input_dim, pp_G, pp_D)\n",
    "    \n",
    "    input_map = {X_real: reader_train.streams.features}\n",
    "    for train_step in range(num_minibatches):\n",
@ -473,10 +475,7 @@
    "        Z_data = noise_sample(minibatch_size)\n",
    "        batch_inputs = {Z: Z_data}\n",
    "        G_trainer.train_minibatch(batch_inputs)\n",
-    "            \n",
-    "        pp_G.update_with_trainer(G_trainer)\n",
-    "        pp_D.update_with_trainer(D_trainer)\n",
-    "        \n",
+    "\n",
    "        G_trainer_loss = C.utils.get_train_loss(G_trainer)\n",
    "\n",
    "    return Z, X_fake, G_trainer_loss"
--- a/Tutorials/CNTK_207_Training_with_Sampled_Softmax.ipynb
+++ b/Tutorials/CNTK_207_Training_with_Sampled_Softmax.ipynb
@ -325,16 +325,16 @@
    "    learning_rate_schedule = C.learning_rate_schedule(Param.learning_rate, C.UnitType.sample)\n",
    "    momentum_schedule = C.momentum_as_time_constant_schedule(Param.momentum_time_constant)\n",
    "    learner = C.momentum_sgd(z.parameters, learning_rate_schedule, momentum_schedule, True)\n",
-    "    trainer = C.Trainer(z, (cross_entropy, errs), learner)\n",
+    "    progress_writers = None\n",
+    "    if do_print_progress:\n",
+    "        progress_writers = [ProgressPrinter(freq=Param.reporting_interval, tag='Training')]\n",
+    "    trainer = C.Trainer(z, (cross_entropy, errs), learner, progress_writers)\n",
    "\n",
    "    minbatch = 0\n",
    "    average_cross_entropy = compute_average_cross_entropy(z)\n",
    "    minbatch_data = [0] # store minibatch values\n",
    "    cross_entropy_data = [average_cross_entropy] # store cross_entropy values\n",
    "\n",
-    "    if do_print_progress:\n",
-    "        progress_printer = ProgressPrinter(freq=Param.reporting_interval, tag='Training') \n",
-    "\n",
    "    # Run training\n",
    "    t_total= 0\n",
    "\n",
@ -344,6 +344,8 @@
    "        label_data, indices = get_random_one_hot_data(Param.minibatch_size)\n",
    "        arguments = ({labels : label_data})\n",
    "\n",
+    "        # If do_print_progress is True, this will automatically print the progress using ProgressPrinter\n",
+    "        # The printed loss numbers are computed using the sampled softmax criterion\n",
    "        t_start = timeit.default_timer()\n",
    "        trainer.train_minibatch(arguments)\n",
    "        t_end = timeit.default_timer()\n",
@ -355,11 +357,6 @@
    "        if minbatch > 2:\n",
    "            t_total += t_delta\n",
    "\n",
-    "        # Print the progress using progress printer\n",
-    "        # The prints numbers computed the the sampled softmax criterion\n",
-    "        if do_print_progress:\n",
-    "            progress_printer.update_with_trainer(trainer, with_metric=True)\n",
-    "        \n",
    "        # For comparison also print result using the full criterion\n",
    "        if minbatch % Param.reporting_interval == int(Param.reporting_interval/2):\n",
    "            # memorize the progress data for plotting\n",
--- a/Tutorials/NumpyInterop/FeedForwardNet.py
+++ b/Tutorials/NumpyInterop/FeedForwardNet.py
@ -54,20 +54,21 @@ def ffnet():

    lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch)
    # Instantiate the trainer object to drive the model training
-    trainer = Trainer(netout, (ce, pe), sgd(netout.parameters, lr=lr_per_minibatch))
+    learner = sgd(netout.parameters, lr=lr_per_minibatch)
+    progress_printer = ProgressPrinter(128)
+    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

-    pp = ProgressPrinter(128)
    for i in range(1024):
        features, labels = generate_random_data(
            minibatch_size, input_dim, num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({input: features, label: labels})
-        pp.update_with_trainer(trainer)
-    pp.epoch_summary()
+
+    trainer.summarize_training_progress()
    test_features, test_labels = generate_random_data(
        minibatch_size, input_dim, num_output_classes)
    avg_error = trainer.test_minibatch(
--- a/bindings/python/cntk/tests/user_learner.py
+++ b/bindings/python/cntk/tests/user_learner.py
@ -50,20 +50,17 @@ def ffnet(optimizer):

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
-    trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)])
+    progress_printer = ProgressPrinter(0)
+    trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 63

-    pp = ProgressPrinter(0)
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
-        pp.update_with_trainer(trainer)
-
-    last_avg_error = pp.avg_loss_since_start()

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
--- a/bindings/python/cntk/utils/progress_print.py
+++ b/bindings/python/cntk/utils/progress_print.py
@ -4,12 +4,12 @@
 # for full license information.
 # ==============================================================================
 from __future__ import print_function
-import os
 import sys
 import time

 from cntk import cntk_py

+
 def _warn_deprecated(message):
    from warnings import warn
    warn('DEPRECATED: ' + message, DeprecationWarning, stacklevel=2)
@ -26,32 +26,33 @@ def _avg(numerator, denominator):
 # TODO: Let's switch to import logging in the future instead of print. [ebarsoum]
 class ProgressPrinter(cntk_py.ProgressWriter):
    '''
-    Allows printing various training time statistics (e.g. loss and metric) and printing them as training progresses.
+    Allows printing various statistics (e.g. loss and metric) as training/evaluation progresses.
+
+    Args:
+        freq (`int` or `None`, default `None`):  determines how often printing of training progress will occur.
+          A value of 0 means a geometric schedule (1,2,4,...).
+          A value > 0 means an arithmetic schedule (print for minibatch number: ``freq``,
+          print for minibatch number: 2*``freq``, print for minibatch number: 3*``freq``,...).
+          A value of None means no per-minibatch log.
+        first (`int`, default 0): Only start printing after the training minibatch number is greater or equal to
+          ``first``.
+        tag (`string`, default EmptyString): prepend minibatch log lines with your own string
+        log_to_file (`string` or `None`, default `None`): if None, output log data to stdout.
+          If a string is passed, the string is path to a file for log data.
+        rank (`int` or `None`, default `None`): set this to distributed.rank if you are using distributed
+          parallelism -- each rank's log will go to separate file.
+        gen_heartbeat (`bool`, default `False`): If True output a progress message every 10 seconds or so to stdout.
+        num_epochs (`int`, default 300): The total number of epochs to be trained.  Used for some metadata.
+          This parameter is optional.
+        test_freq (`int` or `None`, default `None`): similar to ``freq``, but applies to printing intermediate
+          test results.
+        test_first (`int`, default 0): similar to ``first``, but applies to printing intermediate test results.
    '''

    def __init__(self, freq=None, first=0, tag='', log_to_file=None, rank=None, gen_heartbeat=False, num_epochs=300,
                 test_freq=None, test_first=0):
        '''
        Constructor.
-
-        Args:
-            freq (`int` or `None`, default `None`):  determines how often
-              printing will occur. The value of 0 means an geometric
-              schedule (1,2,4,...). A value > 0 means a arithmetic schedule
-              (a log print for minibatch number: ``freq``, a log print for minibatch number: 2*``freq``,
-              a log print for minibatch number: 3*``freq``,...), and a value of None means no per-minibatch log.
-            first (`int`, default 0): Only start logging after the minibatch number is greater or equal to ``first``.
-            tag (`string`, default EmptyString): prepend minibatch log lines with your own string
-            log_to_file (`string` or `None`, default `None`): if None, output log data to stdout.
-              If a string is passed, the string is path to a file for log data.
-            rank (`int` or `None`, default `None`): set this to distributed.rank if you are using distributed
-              parallelism -- each rank's log will go to separate file.
-            gen_heartbeat (`bool`, default `False`): If True output a progress message every 10 seconds or so to stdout.
-            num_epochs (`int`, default 300): The total number of epochs to be trained.  Used for some metadata.
-              This parameter is optional.
-            test_freq (`int` or `None`, default `None`): similar to ``freq``, but applies to printing intermediate
-              test results.
-            test_first (`int`, default 0): similar to ``first``, but applies to printing intermediate test results.
        '''
        if freq is None:
            freq = sys.maxsize
@ -289,7 +290,7 @@ class ProgressPrinter(cntk_py.ProgressWriter):

    def update_with_trainer(self, trainer, with_metric=False):
        '''
-        DEPRECATED. Use :func:`cntk.utils.ProgressPrinter.update_training` instead.
+        DEPRECATED.

        Update the current loss, the minibatch size and optionally the metric using the information from the
        ``trainer``.
@ -300,7 +301,8 @@ class ProgressPrinter(cntk_py.ProgressWriter):
        '''
        if self.total_updates == 0:
            # Only warn once to avoid flooding with warnings.
-            _warn_deprecated('Use ProgressPrinter.update_progress() instead.')
+            _warn_deprecated('Inefficient. '
+                             'Please pass an instance of ProgressPrinter to Trainer upon construction.')

        if trainer is not None and trainer.previous_minibatch_sample_count != 0:
            self.update(
@ -389,25 +391,25 @@ class ProgressPrinter(cntk_py.ProgressWriter):

 class TensorBoardProgressWriter(cntk_py.ProgressWriter):
    '''
-    Allows tracking various training time statistics (e.g. loss and metric) and write them as TensorBoard event files.
+    Allows writing various statistics (e.g. loss and metric) to TensorBoard event files during training/evaluation.
    The generated files can be opened in TensorBoard to visualize the progress.
+
+    Args:
+        freq (`int` or `None`, default `None`): frequency at which training progress is written.
+          For example, the value of 2 will cause the progress to be logged every second time when
+          `:func:cntk.util.TensorBoardFileWriter.update_with_trainer` is invoked.
+          None indicates that progress is logged only when
+          `:func:cntk.util.TensorBoardFileWriter.summarize_progress` is invoked.
+          Must be a positive integer otherwise.
+        log_dir (`string`, default '.'): directory where to create a TensorBoard event file.
+        rank (`int` or `None`, default `None`): rank of a worker when using distributed training, or `None` if
+         training locally. If not `None`, event files will be created only by rank 0.
+        model (:class:`cntk.ops.Function` or `None`, default `None`): model graph to plot.
    '''

    def __init__(self, freq=None, log_dir='.', rank=None, model=None):
        '''
        Constructor.
-
-        Args:
-            freq (`int` or `None`, default `None`): frequency at which progress is logged.
-              For example, the value of 2 will cause the progress to be logged every second time when
-              `:func:cntk.util.TensorBoardFileWriter.update_with_trainer` is invoked.
-              None indicates that progress is logged only when
-              `:func:cntk.util.TensorBoardFileWriter.summarize_progress` is invoked.
-              Must be a positive integer otherwise.
-            log_dir (`string`, default '.'): directory where to create a TensorBoard event file.
-            rank (`int` or `None`, default `None`): rank of a worker when using distributed training, or `None` if
-             training locally. If not `None`, event files will be created in log_dir/rank[rank] rather than log_dir.
-            model (:class:`cntk.ops.Function` or `None`, default `None`): model graph to plot.
        '''
        if freq is None:
            freq = sys.maxsize
--- a/bindings/python/doc/simplenet.py
+++ b/bindings/python/doc/simplenet.py
@ -39,20 +39,22 @@ def ffnet():

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
-    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)])
+    progress_printer = ProgressPrinter(0)
+    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

-    pp = ProgressPrinter(0)
+    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
-        pp.update_with_trainer(trainer)
+        sample_count = trainer.previous_minibatch_sample_count
+        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

-    last_avg_error = pp.avg_loss_since_start()
+    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
--- a/bindings/python/doc/simplernn.py
+++ b/bindings/python/doc/simplernn.py
@ -65,17 +65,17 @@ def train_sequence_classifier(debug_output=False):

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
+    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
-                      sgd(classifier_output.parameters, lr=lr_per_sample))
+                      sgd(classifier_output.parameters, lr=lr_per_sample),
+                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

-    pp = ProgressPrinter(0)
    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
-        pp.update_with_trainer(trainer, True)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)