Add -profile option to ResNet and ConvNet example for Philly

This commit is contained in:
KeDengMS 2017-02-09 17:15:53 -08:00
Родитель 74fa7f440f
Коммит b15978106d
2 изменённых файлов: 23 добавлений и 14 удалений

Просмотреть файл

@ -114,7 +114,7 @@ def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_
return cntk.Trainer(network['output'], network['ce'], network['pe'], parameter_learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore, profiling=False):
# define mapping from intput streams to network inputs
input_map = {
@ -128,7 +128,7 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, "ConvNet_CIFAR10_DataAug"),
# save_all_checkpoints = False,
progress_frequency=epoch_size,
@ -137,13 +137,19 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
# cv_frequency = epoch_size,
restore=restore)
# Train all minibatches
# Train all minibatches
if profiling:
cntk.start_profiler(sync_gpu=True)
training_session.train()
if profiling:
cntk.stop_profiler()
# Train and evaluate the network.
def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64, epoch_size=50000, num_quantization_bits=32,
block_size=3200, warm_up=0, max_epochs=2, restore=False, log_to_file=None,
num_mbs_per_log=None, gen_heartbeat=False):
num_mbs_per_log=None, gen_heartbeat=False, profiling=False):
_cntk_py.set_computation_network_trace_level(0)
progress_printer = cntk.utils.ProgressPrinter(
@ -158,7 +164,7 @@ def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64,
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore, profiling)
if __name__=='__main__':
@ -177,6 +183,7 @@ if __name__=='__main__':
parser.add_argument('-b', '--block_samples', type=int, help="Number of samples per block for block momentum (BM) distributed learner (if 0 BM learner is not used)", required=False, default=None)
parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
parser.add_argument('-profile', '--profile', help="Turn on profiling", action='store_true', default=False)
args = vars(parser.parse_args())
@ -204,7 +211,8 @@ if __name__=='__main__':
restore=not args['restart'],
log_to_file=args['logdir'],
num_mbs_per_log=100,
gen_heartbeat=False)
gen_heartbeat=False,
profiling=args['profile'])
finally:
cntk.distributed.Communicator.finalize()

Просмотреть файл

@ -97,7 +97,7 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
return Trainer(network['output'], network['ce'], network['pe'], learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiler_dir=None):
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiling=False):
# define mapping from intput streams to network inputs
input_map = {
@ -111,22 +111,23 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
model_inputs_to_mb_source_mapping = input_map,
checkpoint_frequency = epoch_size,
checkpoint_filename="ResNet_CIFAR10_DataAug",
progress_frequency=epoch_size,
cv_source=test_source,
cv_mb_size_schedule=cntk.minibatch_size_schedule(16),
restore=False)
if profiler_dir:
start_profiler(profiler_dir, True)
if profiling:
start_profiler(sync_gpu=True)
training_session.train()
if profiler_dir:
if profiling:
stop_profiler()
# Train and evaluate the network.
def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, scale_up=False, profiler_dir=None):
def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, scale_up=False, profiling=False):
set_computation_network_trace_level(0)
@ -148,7 +149,7 @@ def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, n
trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiler_dir)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiling)
if __name__=='__main__':
@ -167,7 +168,7 @@ if __name__=='__main__':
parser.add_argument('-b', '--block_samples', type=int, help="Number of samples per block for block momentum (BM) distributed learner (if 0 BM learner is not used)", required=False, default=None)
parser.add_argument('-a', '--distributed_after', help='Number of samples to train with before running distributed', type=int, required=False, default='0')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
parser.add_argument('-p', '--profiler_dir', help='directory for saving profiler output', required=False, default=None)
parser.add_argument('-profile', '--profile', help="Turn on profiling", action='store_true', default=False)
args = vars(parser.parse_args())
@ -204,7 +205,7 @@ if __name__=='__main__':
max_epochs=epochs,
scale_up=scale_up,
log_to_file=args['logdir'],
profiler_dir=args['profiler_dir'])
profiling=args['profile'])
finally:
# Must call MPI finalize when process exit
Communicator.finalize()