Merge remote-tracking branch 'origin/master' into thilow/SimpleExampleSSM

This commit is contained in:
Thilo Will 2017-02-09 12:01:11 +01:00
Родитель 50e8e10406 236fa8ac9f
Коммит 98c2fbdcd1
70 изменённых файлов: 1979 добавлений и 44555 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -269,3 +269,4 @@ Tutorials/slots.wl
/packages
/CNTK.VC.db
/CNTK.VC.VC.opendb
.cache

Просмотреть файл

@ -6,8 +6,6 @@ DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros="$ConfigDir$/Macros.ndl"
precision = "float"
deviceId = "Auto"
@ -88,7 +86,7 @@ Train = {
SGD = {
epochSize = 0
minibatchSize = 256
# CNTK weights new gradient by (1-momentum) for unit gain, thus we multiply Caffe's learning rate by (1-momentum)
# CNTK weights new gradient by (1-momentum) for unit gain, thus we divide Caffe's learning rate by (1-momentum)
learningRatesPerMB = 0.1*25:0.01*25:0.001*25:0.0001*25:0.00001
momentumPerMB = 0.9
maxEpochs = 112

Просмотреть файл

@ -17,7 +17,7 @@ from cntk.ops import *
from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.blocks import Placeholder, Block
from cntk.layers import Convolution, Activation, MaxPooling, Dense, Dropout, default_options
from cntk.layers import Convolution2D, Activation, MaxPooling, Dense, Dropout, default_options
from cntk.models import Sequential
from cntk.initializer import normal
@ -32,6 +32,7 @@ image_height = 227
image_width = 227
num_channels = 3 # RGB
num_classes = 1000
model_name = "AlexNet.model"
# Create a minibatch source.
def create_image_mb_source(map_file, is_training, total_number_of_samples):
@ -58,6 +59,7 @@ def create_image_mb_source(map_file, is_training, total_number_of_samples):
ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize = is_training,
epoch_size=total_number_of_samples,
multithreaded_deserializer = True)
@ -95,21 +97,21 @@ def create_alexnet():
with default_options(activation=None, pad=True, bias=True):
z = Sequential([
# we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
Convolution((11,11), 96, init=normal(0.01), pad=False, strides=(4,4), name='conv1'),
Convolution2D((11,11), 96, init=normal(0.01), pad=False, strides=(4,4), name='conv1'),
Activation(activation=relu, name='relu1'),
LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'),
MaxPooling((3,3), (2,2), name='pool1'),
Convolution((5,5), 192, init=normal(0.01), init_bias=0.1, name='conv2'),
Convolution2D((5,5), 192, init=normal(0.01), init_bias=0.1, name='conv2'),
Activation(activation=relu, name='relu2'),
LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'),
MaxPooling((3,3), (2,2), name='pool2'),
Convolution((3,3), 384, init=normal(0.01), name='conv3'),
Convolution2D((3,3), 384, init=normal(0.01), name='conv3'),
Activation(activation=relu, name='relu3'),
Convolution((3,3), 384, init=normal(0.01), init_bias=0.1, name='conv4'),
Convolution2D((3,3), 384, init=normal(0.01), init_bias=0.1, name='conv4'),
Activation(activation=relu, name='relu4'),
Convolution((3,3), 256, init=normal(0.01), init_bias=0.1, name='conv5'),
Convolution2D((3,3), 256, init=normal(0.01), init_bias=0.1, name='conv5'),
Activation(activation=relu, name='relu5'),
MaxPooling((3,3), (2,2), name='pool5'),
@ -123,8 +125,9 @@ def create_alexnet():
])(input)
# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
pe5 = classification_error(z, label_var, topN=5)
log_number_of_parameters(z) ; print()
@ -133,6 +136,7 @@ def create_alexnet():
'label': label_var,
'ce' : ce,
'pe' : pe,
'pe5': pe5,
'output': z
}
@ -145,9 +149,10 @@ def create_trainer(network, epoch_size, num_quantization_bits):
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
# Create learner
local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
# Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
parameter_learner = data_parallel_distributed_learner(
cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight),
local_learner,
num_quantization_bits=num_quantization_bits,
distributed_after=0)
@ -155,7 +160,7 @@ def create_trainer(network, epoch_size, num_quantization_bits):
return cntk.Trainer(network['output'], network['ce'], network['pe'], parameter_learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size):
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
@ -163,36 +168,27 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(train_source, trainer,
cntk.minibatch_size_schedule(minibatch_size), progress_printer, input_map, os.path.join(model_path, "AlexNet_"), epoch_size)
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
# Train all minibatches
training_session.train()
# process minibatches and evaluate the model
metric_numer = 0
metric_denom = 0
minibatch_index = 0
while True:
data = test_source.next_minibatch(minibatch_size, input_map=input_map)
if not data: break
local_mb_samples=data[network['label']].num_samples
metric_numer += trainer.test_minibatch(data) * local_mb_samples
metric_denom += local_mb_samples
minibatch_index += 1
fin_msg = "Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)
progress_printer.end_progress_print(fin_msg)
print("")
print(fin_msg)
print("")
return metric_numer/metric_denom
# Train and evaluate the network.
def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=256, epoch_size = 1281167, max_epochs=112,
log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False):
restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=True):
_cntk_py.set_computation_network_trace_level(0)
progress_printer = ProgressPrinter(
@ -207,35 +203,46 @@ def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, mini
trainer = create_trainer(network, epoch_size, num_quantization_bits)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', help='specify the location of your data');
parser.add_argument('-logdir', help='specify where the training log will be saved');
parser.add_argument('-outputdir', help='specify where the output model/checkpoint files shall be saved');
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='112')
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='256')
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='1281167')
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32')
parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
args = vars(parser.parse_args())
if args['datadir'] != None:
data_path = args['datadir']
if args['logdir'] != None:
log_dir = args['logdir']
if args['outputdir'] != None:
if args['outputdir'] is not None:
model_path = args['outputdir'] + "/models"
if args['datadir'] is not None:
data_path = args['datadir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
cntk.device.set_default_device(cntk.device.gpu(args['device']))
train_data=os.path.join(data_path, 'train_map.txt')
test_data=os.path.join(data_path, 'val_map.txt')
alexnet_train_and_eval(train_data, test_data,
num_quantization_bits=32,
max_epochs=112,
log_to_file=log_dir,
num_mbs_per_log=500,
gen_heartbeat=True)
Communicator.finalize()
try:
alexnet_train_and_eval(train_data, test_data,
minibatch_size=args['minibatch_size'],
epoch_size=args['epoch_size'],
num_quantization_bits=args['quantized_bits'],
max_epochs=args['num_epochs'],
restore=not args['restart'],
log_to_file=args['logdir'],
num_mbs_per_log=200,
gen_heartbeat=True)
finally:
cntk.distributed.Communicator.finalize()

Просмотреть файл

@ -45,6 +45,7 @@ def create_image_mb_source(map_file, mean_file, train, total_number_of_samples):
cntk.io.ImageDeserializer(map_file, cntk.io.StreamDefs(
features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = cntk.io.StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize=train,
epoch_size=total_number_of_samples,
multithreaded_deserializer = True)
@ -105,15 +106,15 @@ def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_
l2_regularization_weight=l2_reg_weight)
if block_size != None:
learner = cntk.distributed.block_momentum_distributed_learner(local_learner, block_size=block_size)
parameter_learner = cntk.distributed.block_momentum_distributed_learner(local_learner, block_size=block_size)
else:
learner = cntk.distributed.data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
parameter_learner = cntk.distributed.data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
# Create trainer
return cntk.Trainer(network['output'], network['ce'], network['pe'], learner)
return cntk.Trainer(network['output'], network['ce'], network['pe'], parameter_learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, epoch_size):
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
@ -125,20 +126,23 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(64),
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, "ConvNet_CIFAR10_DataAug"),
# save_all_checkpoints = False,
progress_frequency=epoch_size,
cv_source = test_source,
cv_mb_size_schedule=cntk.minibatch_size_schedule(16),
restore=False)
cv_mb_size_schedule=cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore=restore)
# Train all minibatches
training_session.train()
# Train and evaluate the network.
def convnet_cifar10_dataaug(train_data, test_data, mean_data, epoch_size=50000, num_quantization_bits=32,
block_size=3200, warm_up=0, max_epochs=2, log_to_file=None,
def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64, epoch_size=50000, num_quantization_bits=32,
block_size=3200, warm_up=0, max_epochs=2, restore=False, log_to_file=None,
num_mbs_per_log=None, gen_heartbeat=False):
_cntk_py.set_computation_network_trace_level(0)
@ -154,7 +158,7 @@ def convnet_cifar10_dataaug(train_data, test_data, mean_data, epoch_size=50000,
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, epoch_size)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
if __name__=='__main__':
@ -165,20 +169,25 @@ if __name__=='__main__':
parser.add_argument('-datadir', '--datadir', help='Data directory where the CIFAR dataset is located', required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
parser.add_argument('-e', '--epochs', help='Total number of epochs to train', type=int, required=False, default='160')
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='160')
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='64')
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='50000')
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32')
parser.add_argument('-a', '--distributed_after', help='Number of samples to train with before running distributed', type=int, required=False, default='0')
parser.add_argument('-b', '--block_samples', type=int, help="Number of samples per block for block momentum (BM) distributed learner (if 0 BM learner is not used)", required=False, default=None)
parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
args = vars(parser.parse_args())
if args['outputdir'] is not None:
model_path = args['outputdir'] + "/models"
if args['device'] is not None:
cntk.device.set_default_device(cntk.device.gpu(args['device']))
if args['datadir'] is not None:
data_path = args['datadir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
cntk.device.set_default_device(cntk.device.gpu(args['device']))
mean_data=os.path.join(data_path, 'CIFAR-10_mean.xml')
train_data=os.path.join(data_path, 'train_map.txt')
@ -186,14 +195,16 @@ if __name__=='__main__':
try:
convnet_cifar10_dataaug(train_data, test_data, mean_data,
epoch_size=50000,
minibatch_size=args['minibatch_size'],
epoch_size=args['epoch_size'],
num_quantization_bits=args['quantized_bits'],
block_size=args['block_samples'],
warm_up=args['distributed_after'],
max_epochs=args['epochs'],
max_epochs=args['num_epochs'],
restore=not args['restart'],
log_to_file=args['logdir'],
num_mbs_per_log=10,
gen_heartbeat=True)
num_mbs_per_log=100,
gen_heartbeat=False)
finally:
cntk.distributed.Communicator.finalize()

Просмотреть файл

@ -1,4 +1,4 @@
# Node: ResNet-50 with ImageNet -- 101 layers bottleneck ResNet for image classification
# ResNet-101 with ImageNet -- 101 layers bottleneck ResNet for image classification
# Reference: "Deep Residual Learning for Image Recognition" https://arxiv.org/abs/1512.03385
command = TrainNetwork:BNStatistics:Eval

Просмотреть файл

@ -1,4 +1,4 @@
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
# ResNet110 applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
command = TrainConvNet:Eval

Просмотреть файл

@ -1,4 +1,4 @@
# Node: ResNet-50 with ImageNet -- 152 layers bottleneck ResNet for image classification
# ResNet-152 with ImageNet -- 152 layers bottleneck ResNet for image classification
# Reference: "Deep Residual Learning for Image Recognition" https://arxiv.org/abs/1512.03385
command = TrainNetwork:BNStatistics:Eval

Просмотреть файл

@ -1,4 +1,4 @@
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
# ResNet20 applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
command = TrainConvNet:Eval

Просмотреть файл

@ -1,4 +1,4 @@
# Node: ResNet-50 with ImageNet -- 50 layers bottleneck ResNet for image classification
# ResNet-50 with ImageNet -- 50 layers bottleneck ResNet for image classification
# Reference: "Deep Residual Learning for Image Recognition" https://arxiv.org/abs/1512.03385
command = TrainNetwork:BNStatistics:Eval

Просмотреть файл

@ -0,0 +1,19 @@
# CNTK Examples: Image/Classification/VGG
## BrainScript
### VGG16_ImageNet.cntk
This is the VGG model that contains 16 layers, which was referred as `ConvNet configuration D` in the [original paper](https://arxiv.org/pdf/1409.1556v6.pdf).
Run the example from the current folder using:
`cntk configFile=VGG16_ImageNet.cntk`
### VGG19_ImageNet.cntk
This is the VGG model that contains 19 layers, which was referred as `ConvNet configuration E` in the [original paper](https://arxiv.org/pdf/1409.1556v6.pdf).
Run the example from the current folder using:
`cntk configFile=VGG19_ImageNet.cntk`

Просмотреть файл

@ -0,0 +1,167 @@
# VGG16 with ImageNet -- 16 layers ConvNet for image classification
# Reference: "Very Deep Convolutional Networks for Large-Scale Image Recognition" https://arxiv.org/abs/1409.1556
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
precision = "float"
deviceId = "Auto"
command = Train:Test
parallelTrain = "true"
traceLevel = 1
numMBsToShowResult = 500
modelPath = "$ModelDir$/VGG16"
stderr = "$OutputDir$/VGG16"
ImageH = 224
ImageW = 224
ImageC = 3
NumLabels = 1000
parallelTrain = true
hyperCompressMemory = true
################################
Train = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$
labelDim = $NumLabels$
model = Sequential (
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {128, (3:3), pad = true} : ReLU :
ConvolutionalLayer {128, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {4096, activation=ReLU} : Dropout :
DenseLayer {4096, activation=ReLU} : Dropout :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
featNorm = features - Splice(Constant(104):Constant(117):Constant(124), axis=3)
labels = Input {labelDim}
# apply model to features
z = model (featNorm)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 128
# CNTK weights new gradient by (1-momentum) for unit gain, thus we divide Caffe's learning rate by (1-momentum)
learningRatesPerMB = 0.1*20:0.01*20:0.001*20:0.0001*10:0.00001
momentumPerMB = 0.9
maxEpochs = 80
gradUpdateType = None
L2RegWeight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
dropoutRate = 0.5
# TODO: try less bits?
ParallelTrain = {
parallelizationMethod = "DataParallelSGD"
distributedMBReading = "true"
parallelizationStartEpoch = 1
DataParallelSGD = {
gradientBits = 32
}
}
numMBsToShowResult = 250
}
# Reader
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "randomSide" ; sideRatio = 0.4375:0.875 ; jitterType = "uniRatio" } : # [256, 512] jitter in scale
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
cvreader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "Center" ; sideRatio = 0.5833333 } : # 384 crop to 224
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
}
################################
Test = {
action=test
minibatchSize=128
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Reader
reader = {
verbosity = 0
randomize = false
deserializers = (
{
type = "ImageDeserializer" ; module = "ImageReader"
file="$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop"; cropType = "center"; sideRatio = 0.5833333 } : # 384 crop to 224
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = 1000}
}
})
}
}

Просмотреть файл

@ -0,0 +1,170 @@
# VGG19 with ImageNet -- 19 layers ConvNet for image classification
# Reference: "Very Deep Convolutional Networks for Large-Scale Image Recognition" https://arxiv.org/abs/1409.1556
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
precision = "float"
deviceId = "Auto"
command = Train:Test
parallelTrain = "true"
traceLevel = 1
numMBsToShowResult = 500
modelPath = "$ModelDir$/VGG19"
stderr = "$OutputDir$/VGG19"
ImageH = 224
ImageW = 224
ImageC = 3
NumLabels = 1000
parallelTrain = true
hyperCompressMemory = true
################################
Train = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$
labelDim = $NumLabels$
model = Sequential (
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {128, (3:3), pad = true} : ReLU :
ConvolutionalLayer {128, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
ConvolutionalLayer {512, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {4096, activation=ReLU} : Dropout :
DenseLayer {4096, activation=ReLU} : Dropout :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
featNorm = features - Splice(Constant(104):Constant(117):Constant(124), axis=3)
labels = Input {labelDim}
# apply model to features
z = model (featNorm)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 128
# CNTK weights new gradient by (1-momentum) for unit gain, thus we divide Caffe's learning rate by (1-momentum)
learningRatesPerMB = 0.1*20:0.01*20:0.001*20:0.0001*10:0.00001
momentumPerMB = 0.9
maxEpochs = 80
gradUpdateType = None
L2RegWeight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
dropoutRate = 0.5
# TODO: try less bits?
ParallelTrain = {
parallelizationMethod = "DataParallelSGD"
distributedMBReading = "true"
parallelizationStartEpoch = 1
DataParallelSGD = {
gradientBits = 32
}
}
numMBsToShowResult = 250
}
# Reader
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "randomSide" ; sideRatio = 0.4375:0.875 ; jitterType = "uniRatio" } : # [256, 512] jitter in scale
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
cvreader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "Center" ; sideRatio = 0.5833333 } : # 384 crop to 224
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
}
################################
Test = {
action=test
minibatchSize=128
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Reader
reader = {
verbosity = 0
randomize = false
deserializers = (
{
type = "ImageDeserializer" ; module = "ImageReader"
file="$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop"; cropType = "center"; sideRatio = 0.5833333 } : # 384 crop to 224
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = 1000}
}
})
}
}

Просмотреть файл

@ -1,7 +0,0 @@
m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
# Add top-5 error prediction node.
ErrTop5 = ClassificationError(labels, OutputNodes.z, Const(5), tag = "eval")
SaveModel(m1, $NewModel$, format=cntk)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,55 +0,0 @@
# Fully-connected layer with ReLU activation.
DnnReLULayer(inDim, outDim, x, wScale, bValue)
[
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(outDim, init = fixedValue, value = bValue)
t = Times(W, x)
z = Plus(t, b)
y = RectifiedLinear(z)
]
# Fully-connected layer with batch normalization and ReLU activation.
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
[
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(outDim, 1, init = fixedValue, value = bValue)
sc = Parameter(outDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(outDim, 1, init = fixedValue, value = 0, learningRateMultiplier = 0)
v = Parameter(outDim, 1, init = fixedValue, value = 0, learningRateMultiplier = 0)
t = Times(W, x)
bn = BatchNormalization(t, sc, b, m, v, spatial = false)
y = RectifiedLinear(bn)
]
# Fully-connected layer.
DnnLayer(inDim, outDim, x, wScale, bValue)
[
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(outDim, init = fixedValue, value = bValue)
t = Times(W, x)
z = Plus(t, b)
]
# Convolutional layer with ReLU activation.
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
[
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
z = Plus(c, b);
y = RectifiedLinear(z);
]
# Convolutional layer with batch normalization and ReLU activation.
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
[
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
m = Parameter(outMap, 1, init = fixedValue, value = 0, learningRateMultiplier = 0)
v = Parameter(outMap, 1, init = fixedValue, value = 0, learningRateMultiplier = 0)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, v, spatial = true, imageLayout = "cudnn")
y = RectifiedLinear(bn);
]

Просмотреть файл

@ -0,0 +1,27 @@
# CNTK Examples: Image/Classification/VGG
## Python
### VGG16_ImageNet_Distributed.py
This is the VGG model that contains 16 layers, which was referred as `ConvNet configuration D` in the [original paper](https://arxiv.org/pdf/1409.1556v6.pdf).
Run the example from the current folder using:
`python VGG16_ImageNet_Distributed.py`
To run it in a distributed manner, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
`mpiexec -n <#workers> python VGG16_ImageNet_Distributed.py`
### VGG19_ImageNet_Distributed.py
This is the VGG model that contains 19 layers, which was referred as `ConvNet configuration E` in the [original paper](https://arxiv.org/pdf/1409.1556v6.pdf).
Run the example from the current folder using:
`python VGG19_ImageNet_Distributed.py`
To run it in a distributed manner, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
`mpiexec -n <#workers> python VGG19_ImageNet_Distributed.py`

Просмотреть файл

@ -0,0 +1,242 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import os
import math
import argparse
import numpy as np
import cntk
import _cntk_py
from cntk.utils import *
from cntk.ops import *
from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.blocks import Placeholder, Block
from cntk.layers import Convolution2D, Activation, MaxPooling, Dense, Dropout, default_options
from cntk.models import Sequential, LayerStack
from cntk.initializer import normal
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "..", "..", "DataSets", "ImageNet")
model_path = os.path.join(abs_path, "Models")
log_dir = None
# model dimensions
image_height = 224
image_width = 224
num_channels = 3 # RGB
num_classes = 1000
model_name = "VGG16.model"
cntk.cntk_py.enable_hyper_memory_compress()
# Create a minibatch source.
def create_image_mb_source(map_file, is_training, total_number_of_samples):
if not os.path.exists(map_file):
raise RuntimeError("File '%s' does not exist." %map_file)
# transformation pipeline for the features has jitter/crop only when training
transforms = []
if is_training:
transforms += [
ImageDeserializer.crop(crop_type='randomside', side_ratio='0.4375:0.875', jitter_type='uniratio') # train uses jitter
]
else:
transforms += [
ImageDeserializer.crop(crop_type='center', side_ratio=0.5833333) # test has no jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
]
# deserializer
return MinibatchSource(
ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize = is_training,
epoch_size=total_number_of_samples,
multithreaded_deserializer = True)
# Create the network.
def create_vgg16():
# Input variables denoting the features and label data
feature_var = input_variable((num_channels, image_height, image_width))
label_var = input_variable((num_classes))
# apply model to input
# remove mean value
input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input')
with default_options(activation=None, pad=True, bias=True):
z = Sequential([
# we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
LayerStack(2, lambda i: [
Convolution2D((3,3), 64, name='conv1_{}'.format(i)),
Activation(activation=relu, name='relu1_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool1'),
LayerStack(2, lambda i: [
Convolution2D((3,3), 128, name='conv2_{}'.format(i)),
Activation(activation=relu, name='relu2_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool2'),
LayerStack(3, lambda i: [
Convolution2D((3,3), 256, name='conv3_{}'.format(i)),
Activation(activation=relu, name='relu3_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool3'),
LayerStack(3, lambda i: [
Convolution2D((3,3), 512, name='conv4_{}'.format(i)),
Activation(activation=relu, name='relu4_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool4'),
LayerStack(3, lambda i: [
Convolution2D((3,3), 512, name='conv5_{}'.format(i)),
Activation(activation=relu, name='relu5_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool5'),
Dense(4096, name='fc6'),
Activation(activation=relu, name='relu6'),
Dropout(0.5, name='drop6'),
Dense(4096, name='fc7'),
Activation(activation=relu, name='relu7'),
Dropout(0.5, name='drop7'),
Dense(num_classes, name='fc8')
])(input)
# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
pe5 = classification_error(z, label_var, topN=5)
log_number_of_parameters(z) ; print()
return {
'feature': feature_var,
'label': label_var,
'ce' : ce,
'pe' : pe,
'pe5': pe5,
'output': z
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
mm_schedule = cntk.learner.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
# Create learner
local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
# Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
parameter_learner = data_parallel_distributed_learner(
local_learner,
num_quantization_bits=num_quantization_bits,
distributed_after=0)
# Create trainer
return cntk.Trainer(network['output'], network['ce'], network['pe'], parameter_learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
network['feature']: train_source.streams.features,
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
# Train all minibatches
training_session.train()
# Train and evaluate the network.
def vgg16_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80,
restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False):
_cntk_py.set_computation_network_trace_level(0)
progress_printer = ProgressPrinter(
freq=num_mbs_per_log,
tag='Training',
log_to_file=log_to_file,
rank=Communicator.rank(),
gen_heartbeat=gen_heartbeat,
num_epochs=max_epochs)
network = create_vgg16()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='80')
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='128')
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='1281167')
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32')
parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
args = vars(parser.parse_args())
if args['outputdir'] is not None:
model_path = args['outputdir'] + "/models"
if args['datadir'] is not None:
data_path = args['datadir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
cntk.device.set_default_device(cntk.device.gpu(args['device']))
train_data=os.path.join(data_path, 'train_map.txt')
test_data=os.path.join(data_path, 'val_map.txt')
try:
vgg16_train_and_eval(train_data, test_data,
minibatch_size=args['minibatch_size'],
epoch_size=args['epoch_size'],
num_quantization_bits=args['quantized_bits'],
max_epochs=args['num_epochs'],
restore=not args['restart'],
log_to_file=args['logdir'],
num_mbs_per_log=200,
gen_heartbeat=True)
finally:
cntk.distributed.Communicator.finalize()

Просмотреть файл

@ -0,0 +1,242 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import os
import math
import argparse
import numpy as np
import cntk
import _cntk_py
from cntk.utils import *
from cntk.ops import *
from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.blocks import Placeholder, Block
from cntk.layers import Convolution2D, Activation, MaxPooling, Dense, Dropout, default_options
from cntk.models import Sequential, LayerStack
from cntk.initializer import normal
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "..", "..", "DataSets", "ImageNet")
model_path = os.path.join(abs_path, "Models")
log_dir = None
# model dimensions
image_height = 224
image_width = 224
num_channels = 3 # RGB
num_classes = 1000
model_name = "VGG19.model"
cntk.cntk_py.enable_hyper_memory_compress()
# Create a minibatch source.
def create_image_mb_source(map_file, is_training, total_number_of_samples):
if not os.path.exists(map_file):
raise RuntimeError("File '%s' does not exist." %map_file)
# transformation pipeline for the features has jitter/crop only when training
transforms = []
if is_training:
transforms += [
ImageDeserializer.crop(crop_type='randomside', side_ratio='0.4375:0.875', jitter_type='uniratio') # train uses jitter
]
else:
transforms += [
ImageDeserializer.crop(crop_type='center', side_ratio=0.5833333) # test has no jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
]
# deserializer
return MinibatchSource(
ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize = is_training,
epoch_size=total_number_of_samples,
multithreaded_deserializer = True)
# Create the network.
def create_vgg19():
# Input variables denoting the features and label data
feature_var = input_variable((num_channels, image_height, image_width))
label_var = input_variable((num_classes))
# apply model to input
# remove mean value
input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input')
with default_options(activation=None, pad=True, bias=True):
z = Sequential([
# we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
LayerStack(2, lambda i: [
Convolution2D((3,3), 64, name='conv1_{}'.format(i)),
Activation(activation=relu, name='relu1_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool1'),
LayerStack(2, lambda i: [
Convolution2D((3,3), 128, name='conv2_{}'.format(i)),
Activation(activation=relu, name='relu2_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool2'),
LayerStack(4, lambda i: [
Convolution2D((3,3), 256, name='conv3_{}'.format(i)),
Activation(activation=relu, name='relu3_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool3'),
LayerStack(4, lambda i: [
Convolution2D((3,3), 512, name='conv4_{}'.format(i)),
Activation(activation=relu, name='relu4_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool4'),
LayerStack(4, lambda i: [
Convolution2D((3,3), 512, name='conv5_{}'.format(i)),
Activation(activation=relu, name='relu5_{}'.format(i)),
]),
MaxPooling((2,2), (2,2), name='pool5'),
Dense(4096, name='fc6'),
Activation(activation=relu, name='relu6'),
Dropout(0.5, name='drop6'),
Dense(4096, name='fc7'),
Activation(activation=relu, name='relu7'),
Dropout(0.5, name='drop7'),
Dense(num_classes, name='fc8')
])(input)
# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
pe5 = classification_error(z, label_var, topN=5)
log_number_of_parameters(z) ; print()
return {
'feature': feature_var,
'label': label_var,
'ce' : ce,
'pe' : pe,
'pe5': pe5,
'output': z
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
mm_schedule = cntk.learner.momentum_schedule(0.9)
l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
# Create learner
local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
# Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
parameter_learner = data_parallel_distributed_learner(
local_learner,
num_quantization_bits=num_quantization_bits,
distributed_after=0)
# Create trainer
return cntk.Trainer(network['output'], network['ce'], network['pe'], parameter_learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
network['feature']: train_source.streams.features,
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
# Train all minibatches
training_session.train()
# Train and evaluate the network.
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80,
restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False):
_cntk_py.set_computation_network_trace_level(0)
progress_printer = ProgressPrinter(
freq=num_mbs_per_log,
tag='Training',
log_to_file=log_to_file,
rank=Communicator.rank(),
gen_heartbeat=gen_heartbeat,
num_epochs=max_epochs)
network = create_vgg19()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default='80')
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default='128')
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default='1281167')
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32')
parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)
args = vars(parser.parse_args())
if args['outputdir'] is not None:
model_path = args['outputdir'] + "/models"
if args['datadir'] is not None:
data_path = args['datadir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
cntk.device.set_default_device(cntk.device.gpu(args['device']))
train_data=os.path.join(data_path, 'train_map.txt')
test_data=os.path.join(data_path, 'val_map.txt')
try:
vgg19_train_and_eval(train_data, test_data,
minibatch_size=args['minibatch_size'],
epoch_size=args['epoch_size'],
num_quantization_bits=args['quantized_bits'],
max_epochs=args['num_epochs'],
restore=not args['restart'],
log_to_file=args['logdir'],
num_mbs_per_log=200,
gen_heartbeat=True)
finally:
cntk.distributed.Communicator.finalize()

Просмотреть файл

@ -0,0 +1,25 @@
# CNTK Examples: Image/Classification/VGG
## Overview
|Data: |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
|:---------|:---
|Purpose |This folder contains examples that demonstrate how to use CNTK to define VGG network (https://arxiv.org/abs/1409.1556) for image classification.
|Network |VGG.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
We use the ILSVRC2012 datasets to demonstrate how to train the VGG model which was developed by the [Visual Geometry Group in University of Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/). It won the second place in the ILSVRC-2014 challenge. VGG has been a very popular model for its simple architect and high accuracy.
ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it through http://image-net.org.
## Details
We give examples for both Python and BrainScript.
### [Python](./Python)
### [BrainScript](./BrainScript)

Просмотреть файл

@ -1,76 +0,0 @@
load=ndlMacros
run=DNN
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
kW = 3
kH = 3
# Kernel stride.
hs = 1
vs = 1
# Pooling settings.
poolW = 2
poolH = 2
poolhs = 2
poolvs = 2
# Initial parameter values.
convWScale = 0.01 #7
convBValue = 0
fc1WScale = 0.01 #8
fc1BValue = 0
fc2WScale = 0.01 #3.2
fc2BValue = 0
fc3WScale = 0.01 #3.2
fc3BValue = 0
]
DNN=[
cMap1 = 64
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap2 = 128
conv2 = ConvReLULayer(pool1, cMap2, 576, kW, kH, hs, vs, convWScale, convBValue)
pool2 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 256
conv3 = ConvReLULayer(pool2, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
conv4 = ConvReLULayer(conv3, cMap3, 2304, kW, kH, hs, vs, convWScale, convBValue)
pool3 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 512
conv5 = ConvReLULayer(pool3, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv6 = ConvReLULayer(conv5, cMap5, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool4 = MaxPooling(conv6, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap6 = 512
conv7 = ConvReLULayer(pool4, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv8 = ConvReLULayer(conv7, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool5 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
h2 = DnnReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
h2_d = Dropout(h2)
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -1,109 +0,0 @@
# Note: This sample uses the deprecated NdlNetworkBuilder.
# An updated version using BrainScript is coming soon.
# Please find updated samples on Github, https://github.com/Microsoft/CNTK/tree/master/Examples /...
#
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros="$ConfigDir$/Macros.ndl"
precision="float"
deviceId="Auto"
command=Train:AddTop5Eval:Test
stderr="$OutputDir$/VGG_A"
traceLevel=1
numMBsToShowResult=500
Train=[
action="train"
modelPath="$ModelDir$/VGG_A"
traceLevel=1
NDLNetworkBuilder=[
networkDescription="$ConfigDir$/VGG_A.ndl"
]
SGD=[
epochSize=0
minibatchSize=32
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
momentumPerMB=0.9
maxEpochs=70
gradUpdateType="None"
L2RegWeight=0.0005
dropoutRate=0*5:0.5
numMBsToShowResult=10
]
reader=[
readerType="ImageReader"
# Map file which maps images to labels using the following format:
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file="$ConfigDir$/train_map.txt"
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize="Auto"
features=[
# Below are the required parameters.
width=224
height=224
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="RandomSide"
# Horizontal random flip, will be enabled by default because cropType=RandomSide
#hflip="true"
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]
AddTop5Eval=[
action="edit"
CurModel="$ModelDir$/VGG_A"
NewModel="$ModelDir$/VGG_A.Top5"
editPath="$ConfigDir$/CreateEvalModel.mel"
]
Test=[
action="test"
modelPath="$ModelDir$/VGG_A.Top5"
# Set minibatch size for testing.
minibatchSize=32
reader=[
readerType="ImageReader"
file="$ConfigDir$/val_map.txt"
randomize="None"
features=[
width=224
height=224
channels=3
cropType="Center"
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]

Просмотреть файл

@ -1,84 +0,0 @@
load=ndlMacros
run=DNN
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
kW = 3
kH = 3
# Kernel stride.
hs = 1
vs = 1
# Pooling settings.
poolW = 2
poolH = 2
poolhs = 2
poolvs = 2
# Initial parameter values.
convWScale = 7.07
convBValue = 0
fc1WScale = 3.0
fc1BValue = 1
fc2WScale = 3.0
fc2BValue = 1
fc3WScale = 1.0
fc3BValue = 1
]
DNN=[
cMap1 = 64
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
conv2 = ConvReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 128
conv3 = ConvReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue)
conv4 = ConvReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 256
conv5 = ConvReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue)
conv6 = ConvReLULayer(conv5, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv7 = ConvReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv8 = ConvReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap9 = 512
conv9 = ConvReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv10 = ConvReLULayer(conv9, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv11 = ConvReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv12 = ConvReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap13 = 512
conv13 = ConvReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv14 = ConvReLULayer(conv13, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv15 = ConvReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv16 = ConvReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
h2 = DnnReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
h2_d = Dropout(h2)
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -1,85 +0,0 @@
load=ndlMacros
run=DNN
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
kW = 3
kH = 3
# Kernel stride.
hs = 1
vs = 1
# Pooling settings.
poolW = 2
poolH = 2
poolhs = 2
poolvs = 2
# Initial parameter values.
convWScale = 7.07
convBValue = 0
scValue = 0.03
fc1WScale = 3.0
fc1BValue = 1
fc2WScale = 3.0
fc2BValue = 1
fc3WScale = 1.0
fc3BValue = 1
]
DNN=[
cMap1 = 64
conv1 = ConvBNReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv2 = ConvBNReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 128
conv3 = ConvBNReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv4 = ConvBNReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 256
conv5 = ConvBNReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv6 = ConvBNReLULayer(conv5, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv7 = ConvBNReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv8 = ConvBNReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap9 = 512
conv9 = ConvBNReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv10 = ConvBNReLULayer(conv9, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv11 = ConvBNReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv12 = ConvBNReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap13 = 512
conv13 = ConvBNReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv14 = ConvBNReLULayer(conv13, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv15 = ConvBNReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv16 = ConvBNReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnBNReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
h2 = DnnBNReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
h2_d = Dropout(h2)
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -1,118 +0,0 @@
# Note: This sample uses the deprecated NdlNetworkBuilder.
# An updated version using BrainScript is coming soon.
# Please find updated samples on Github, https://github.com/Microsoft/CNTK/tree/master/Examples /...
#
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros="$ConfigDir$/Macros.ndl"
precision="float"
deviceId="Auto"
command=Train:AddTop5Eval:Test
parallelTrain="false"
stderr="$OutputDir$/VGG_E_BN"
traceLevel=1
Train=[
action="train"
modelPath="$ModelDir$/VGG_E_BN"
NDLNetworkBuilder=[
networkDescription="$ConfigDir$/VGG_E_BN.ndl"
]
SGD=[
epochSize=0
minibatchSize=16
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
momentumPerMB=0.9
maxEpochs=70
gradUpdateType="None"
L2RegWeight=0.0005
dropoutRate=0*5:0.5
ParallelTrain=[
parallelizationMethod="DataParallelSGD"
distributedMBReading="true"
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=32
]
]
numMBsToShowResult=10
]
reader=[
readerType="ImageReader"
# Map file which maps images to labels using the following format:
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file="$DataDir$/train_map.txt"
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize="Auto"
features=[
# Below are the required parameters.
width=224
height=224
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]
AddTop5Eval=[
action="edit"
CurModel="$ModelDir$/VGG_E_BN"
NewModel="$ModelDir$/VGG_E_BN.Top5"
editPath="$ConfigDir$/CreateEvalModel.mel"
]
Test=[
action="test"
modelPath=$ModelDir$/VGG_E_BN.Top5
# Set minibatch size for testing.
minibatchSize=16
reader=[
readerType="ImageReader"
file="$DataDir$/val_map.txt"
randomize="None"
features=[
width=224
height=224
channels=3
cropType="Center"
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]

Просмотреть файл

@ -1,118 +0,0 @@
# Note: This sample uses the deprecated NdlNetworkBuilder.
# An updated version using BrainScript is coming soon.
# Please find updated samples on Github, https://github.com/Microsoft/CNTK/tree/master/Examples /...
#
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros="$ConfigDir$/Macros.ndl"
precision="float"
deviceId="Auto"
command=Train:AddTop5Eval:Test
parallelTrain="false"
stderr="$OutputDir$/VGG_E"
traceLevel=1
Train=[
action="train"
modelPath="$ModelDir$/VGG_E"
NDLNetworkBuilder=[
networkDescription="$ConfigDir$/VGG_E.ndl"
]
SGD=[
epochSize=0
minibatchSize=16
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
momentumPerMB=0.9
maxEpochs=70
gradUpdateType="None"
L2RegWeight=0.0005
dropoutRate=0*5:0.5
ParallelTrain=[
parallelizationMethod="DataParallelSGD"
distributedMBReading="true"
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=32
]
]
numMBsToShowResult=10
]
reader=[
readerType="ImageReader"
# Map file which maps images to labels using the following format:
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file="$DataDir$/train_map.txt"
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize="Auto"
features=[
# Below are the required parameters.
width=224
height=224
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]
AddTop5Eval=[
action="edit"
CurModel="$ModelDir$/VGG_E"
NewModel="$ModelDir$/VGG_E.Top5"
editPath="$ConfigDir$/CreateEvalModel.mel"
]
Test=[
action="test"
modelPath="$ModelDir$/VGG_E.Top5"
# Set minibatch size for testing.
minibatchSize=16
reader=[
readerType="ImageReader"
file="$DataDir$/val_map.txt"
randomize="None"
features=[
width=224
height=224
channels=3
cropType="Center"
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[
labelDim=1000
]
]
]

4
Source/CNTKv2LibraryDll/API/CNTKLibrary.h Normal file → Executable file
Просмотреть файл

@ -3880,7 +3880,9 @@ namespace CNTK
static MomentumSchedule DefaultVarianceMomentum = MomentumAsTimeConstantSchedule(2 * 3600 * 100);
///
/// Create an instance of the CNTK built-in Adam learner (only the low-memory variant is supported at the moment).
/// Create an instance of Adam learner as the original paper.
/// Due to history reason, the legacy implementation of AdamLearner is FSAdaGrad. To keep compitability on the interface, we
/// will switch to the original Adam only when lowMemory = false, while keep the legacy logic when it leaves default, aka. true.
///
CNTK_API LearnerPtr AdamLearner(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,

53
Source/CNTKv2LibraryDll/Learner.cpp Normal file → Executable file
Просмотреть файл

@ -543,6 +543,48 @@ namespace CNTK
s_targetAdagradAvDenom, momentum, varMomentum, UseUnitGainMomentum());
}
LearnerAdam::LearnerAdam(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
const MomentumSchedule& varianceMomentumSchedule,
AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRateSchedule, momentumSchedule,
unitGain, additionalOptions, /*allocateSmoothGradients*/ false),
m_varianceMomentumSchedule(varianceMomentumSchedule)
{
for (const auto& parameter : parameters)
{
const auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, { shape[0], 2 * shape[1] });
m_smoothedGradientValues.emplace(parameter, view);
m_smoothedCounts.emplace(parameter, 0.0);
}
}
/*virtual*/ void LearnerAdam::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerAdam::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
GET_WRITABLE_MATRICES;
const auto learningRate = LearningRate(trainingSampleCount);
const auto momentum = MomentumValueForMB(trainingSampleCount);
const auto varMomentum = VarianceMomentumValueForMB(trainingSampleCount);
double& smoothedCount = m_smoothedCounts.at(parameter);
smoothedGradientMatrix->AdamUpdate(*gradientMatrix, *parameterMatrix, smoothedCount, learningRate,
momentum, varMomentum, UseUnitGainMomentum());
}
LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
double gamma, double inc, double dec, double max, double min,
@ -623,16 +665,21 @@ namespace CNTK
LearnerPtr AdamLearner(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
bool unitGain, /*=true*/
const MomentumSchedule& varianceMomentumSchedule, /*= MomentumAsTimeConstantSchedulePerSample(2 * 3600 * 100)*/
bool lowMemory, /*= true*/
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
// TODO: Due to history reason, the legacy AdamLearner using FSAdaGrad implementation instead of the original paper implementation.
// To keep interface backward compatible, the new adam will be enabled only when lowMemory is false.
if (!lowMemory)
{
LogicError("AdamLearner: only the low-memory variant is supported at the moment.");
return MakeSharedObject<LearnerAdam>(parameters, learningRateSchedule, momentumSchedule, unitGain, varianceMomentumSchedule, additionalOptions);
}
else
{
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRateSchedule, momentumSchedule, unitGain, varianceMomentumSchedule, additionalOptions);
}
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRateSchedule, momentumSchedule, unitGain, varianceMomentumSchedule, additionalOptions);
}
LearnerPtr AdaGradLearner(const vector<Parameter>& parameters,

30
Source/CNTKv2LibraryDll/Learner.h Normal file → Executable file
Просмотреть файл

@ -234,6 +234,36 @@ namespace CNTK
MomentumSchedule m_varianceMomentumSchedule;
};
class LearnerAdam : public LearnerMomentumSGD
{
public:
LearnerAdam(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
const MomentumSchedule& varianceMomentumSchedule,
AdditionalLearningOptions additionalOptions);
protected:
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
private:
// returns current per-minibatch variance momentum value.
double VarianceMomentumValueForMB(size_t minibatchSize) const
{
return MomentumValueForMB(m_varianceMomentumSchedule, minibatchSize);
}
mutable std::unordered_map<Parameter, double> m_smoothedCounts;
MomentumSchedule m_varianceMomentumSchedule;
};
class LearnerRMSProp : public LearnerBase
{
public:

Просмотреть файл

@ -137,7 +137,6 @@ namespace CNTK
(op == PrimitiveOpType::CrossEntropyWithSoftmax) ||
(op == PrimitiveOpType::ClassificationError) ||
(op == PrimitiveOpType::Logistic) ||
(op == PrimitiveOpType::CosDistance) ||
(op == PrimitiveOpType::LambdaRank) ||
(op == PrimitiveOpType::NDCG))
{

Просмотреть файл

@ -432,7 +432,20 @@ template <class ElemType>
InputRef(INDEXDATA).MaskMissingValueColumnsTo(FrameRange(InputRef(INDEXDATA).GetMBLayout()), -1); // indicates an invalid column to Gather/Scatter
let& index = InputRef(INDEXDATA) .Value(); // column indices to copy from
let& source = InputRef(SOURCEDATA).Value(); // source data to copy
auto& output = Value(); // output goes here
#ifdef _MSC_VER
auto& outputValuePtrRef = ValuePtrRef();
#else
auto& outputValuePtrRef = this->template ValuePtrRef();
#endif
if ((source.GetMatrixType() == SPARSE) && (outputValuePtrRef->GetMatrixType() != SPARSE))
outputValuePtrRef = std::make_shared<Matrix<ElemType>>(outputValuePtrRef->GetNumRows(),
outputValuePtrRef->GetNumCols(),
outputValuePtrRef->GetPreferredDeviceId(),
source.GetMatrixType(),
source.GetFormat());
auto& output = Value(); // output goes here
output.DoGatherColumnsOf(/*beta=*/0, index, source, /*alpha=*/1);
}
@ -493,6 +506,19 @@ template <class ElemType>
InputRef(INDEXDATA).MaskMissingValueColumnsTo(FrameRange(InputRef(INDEXDATA).GetMBLayout()), -1); // indicates an invalid column to Gather/Scatter
let& index = InputRef(INDEXDATA) .Value(); // column indices to copy from
let& source = InputRef(SOURCEDATA).Value(); // source data to copy
#ifdef _MSC_VER
auto& outputValuePtrRef = ValuePtrRef();
#else
auto& outputValuePtrRef = this->template ValuePtrRef();
#endif
if ((source.GetMatrixType() == SPARSE) && (outputValuePtrRef->GetMatrixType() != SPARSE))
outputValuePtrRef = std::make_shared<Matrix<ElemType>>(outputValuePtrRef->GetNumRows(),
outputValuePtrRef->GetNumCols(),
outputValuePtrRef->GetPreferredDeviceId(),
source.GetMatrixType(),
source.GetFormat());
auto& output = Value(); // output goes here
output.DoScatterColumnsOf(/*beta=*/0, index, source, /*alpha=*/1);
}

Просмотреть файл

@ -69,6 +69,8 @@ public:
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<ReshapeNode<ElemType>>(nodeP);
node->m_beginDimParameter = m_beginDimParameter;
node->m_endDimParameter = m_endDimParameter;
node->m_replacementSampleLayout = m_replacementSampleLayout;
}
}

35
Source/Math/CPUMatrix.cpp Normal file → Executable file
Просмотреть файл

@ -1246,6 +1246,41 @@ void CPUMatrix<ElemType>::FSAdagrad(CPUMatrix<ElemType>& gradients,
}
}
template <class ElemType>
void CPUMatrix<ElemType>::Adam(CPUMatrix<ElemType>& gradients, CPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum)
{
size_t numColsNeeded = 2 * gradients.GetNumCols();
auto unitGainFactor = ElemType(unitGainMomentum ? (1.0 - momentum) : 1.0);
if (IsEmpty() || (GetNumCols() < numColsNeeded))
{
RequireSize(gradients.GetNumRows(), numColsNeeded);
SetValue(0.0);
}
assert((GetNumRows() == gradients.GetNumRows()) && (GetNumCols() == numColsNeeded));
size_t n = gradients.GetNumElements();
ElemType* grad = gradients.Data();
ElemType* smoothAda = Data();
ElemType* smoothMom = Data() + n;
ElemType* val = functionValues.Data();
#pragma omp parallel for
// TODO: Unroll 4-times for better performance leveraging vectorization
for (long i = 0; i < n; i++)
{
ElemType g = grad[i];
ElemType adaSqr = adaWeight * smoothAda[i] + (1.0f - adaWeight) * g * g;
smoothAda[i] = adaSqr;
ElemType ada = sqrt(adaSqr);
ElemType w = adaMul * (ElemType)( 1.0 / (ada + 1e-8));
g = momentum * smoothMom[i] + unitGainFactor * g;
smoothMom[i] = g;
val[i] -= g * w * learnRatePerSample;
}
}
template <class ElemType>
ElemType CPUMatrix<ElemType>::RmsProp(CPUMatrix<ElemType>& gradients,
ElemType RMS_GAMMA,

5
Source/Math/CPUMatrix.h Normal file → Executable file
Просмотреть файл

@ -95,7 +95,10 @@ public:
void FSAdagrad(CPUMatrix<ElemType>& gradients, CPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
void Adam(CPUMatrix<ElemType>& gradients, CPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
ElemType RmsProp(CPUMatrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,

25
Source/Math/GPUMatrix.cu Normal file → Executable file
Просмотреть файл

@ -1413,6 +1413,31 @@ void GPUMatrix<ElemType>::FSAdagrad(GPUMatrix<ElemType>& gradients,
learnRatePerSample, momentum, adaWeight, adaMul, unitGainMomentum);
}
template <class ElemType>
void GPUMatrix<ElemType>::Adam(GPUMatrix<ElemType>& gradients,
GPUMatrix<ElemType>& functionValues,
ElemType learnRatePerSample,
ElemType momentum,
ElemType adaWeight,
ElemType adaMul,
bool unitGainMomentum)
{
size_t numColsNeeded = 2 * gradients.GetNumCols();
if (IsEmpty() || (GetNumCols() < numColsNeeded))
{
RequireSize(gradients.GetNumRows(), numColsNeeded);
SetValue(0.0);
}
assert((GetNumRows() == gradients.GetNumRows()) && (GetNumCols() == numColsNeeded));
size_t n = gradients.GetNumElements();
int blocksPerGrid = (n + GridDim::maxThreadsPerBlock - 1) / GridDim::maxThreadsPerBlock;
_adam<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(n, gradients.Data(), Data(), Data() + n, functionValues.Data(),
learnRatePerSample, momentum, adaWeight, adaMul, unitGainMomentum);
}
template <class ElemType>
ElemType GPUMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>& gradients,
ElemType RMS_GAMMA,

7
Source/Math/GPUMatrix.h Normal file → Executable file
Просмотреть файл

@ -224,10 +224,13 @@ public:
}
ElemType Adagrad(GPUMatrix<ElemType>& gradients, const bool needAveMultiplier);
void FSAdagrad(GPUMatrix<ElemType>& gradients, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
void FSAdagrad(GPUMatrix<ElemType>& gradients, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
void Adam(GPUMatrix<ElemType>& gradients, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
ElemType RmsProp(GPUMatrix<ElemType>& gradients,
ElemType RMS_GAMMA,
ElemType RMS_WGT_INC,

60
Source/Math/GPUMatrixCUDAKernels.cuh Normal file → Executable file
Просмотреть файл

@ -5132,6 +5132,66 @@ __global__ void _maskColumnsValue(ElemType* a, const char* columnsMask, CUDA_LON
a[IDX2C(rowIdx, colIdx, numRows)] = val;
}
}
template <class ElemType>
__global__ void _adam(CUDA_LONG size, ElemType* grad, ElemType* smoothAda, ElemType* smoothMom, ElemType* val,
ElemType lr, ElemType mom, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum)
{
const ElemType unitGainFactor = unitGainMomentum ? (1.0 - mom) : 1.0;
CUDA_LONG idx = blockIdx.x * blockDim.x + threadIdx.x;
CUDA_LONG stride = blockDim.x * gridDim.x;
for (; idx < size; idx += stride)
{
ElemType g = grad[idx];
ElemType adaSqr = adaWeight * smoothAda[idx] + (1.0f - adaWeight) * g * g;
smoothAda[idx] = adaSqr;
ElemType w;
if (sizeof(ElemType) == sizeof(double))
{
w = adaMul * rsqrt(adaSqr + 1e-8);
}
else
{
w = adaMul * rsqrtf(adaSqr + 1e-8);
}
g = mom * smoothMom[idx] + unitGainFactor * g;
smoothMom[idx] = g;
g = lr*g*w;
val[idx] -= g;
}
}
template <class ElemType>
__global__ void _adam4BlockSparseCol(CUDA_LONG size,
ElemType* grad_bsc, const GPUSPARSE_INDEX_TYPE* colOrRow2blockId, const size_t len,
ElemType* smoothAda, ElemType* smoothMom, ElemType* val,
ElemType lr, ElemType mom, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum)
{
const ElemType unitGainFactor = unitGainMomentum ? (1.0 - mom) : 1.0;
CUDA_LONG idx = blockIdx.x * blockDim.x + threadIdx.x;
CUDA_LONG stride = blockDim.x * gridDim.x;
for (; idx < size; idx += stride)
{
ElemType g = _getvalue4BlockSparseCol(grad_bsc, colOrRow2blockId, len, idx);
ElemType adaSqr = adaWeight * smoothAda[idx] + (1.0f - adaWeight) * g * g;
smoothAda[idx] = adaSqr;
ElemType w;
if (sizeof(ElemType) == sizeof(double))
{
w = adaMul * rsqrt(adaSqr + 1e-8);
}
else
{
w = adaMul * rsqrtf(adaSqr + 1e-8);
}
g = mom * smoothMom[idx] + unitGainFactor * g;
smoothMom[idx] = g;
g = lr*g*w;
val[idx] -= g;
}
}
}
}
}

34
Source/Math/GPUSparseMatrix.cu Normal file → Executable file
Просмотреть файл

@ -1148,6 +1148,7 @@ void GPUSparseMatrix<ElemType>::ConvolveAndWeightedAdd(ElemType alpha, const GPU
{
RuntimeError("Only support c += alpha * a operation");
}
int blocksPerGrid = (int) ceil(1.0 * cRows / GridDim::maxThreadsPerBlock);
SyncGuard syncGuard;
for (int rowInB = 0; rowInB < l; rowInB++)
@ -1546,6 +1547,39 @@ void GPUSparseMatrix<ElemType>::FSAdagrad(
learnRatePerSample, momentum, adaWeight, adaMul, unitGainMomentum);
}
template <class ElemType>
void GPUSparseMatrix<ElemType>::Adam(
GPUMatrix<ElemType>& c,
GPUMatrix<ElemType>& functionValues,
ElemType learnRatePerSample,
ElemType momentum,
ElemType adaWeight,
ElemType adaMul,
bool unitGainMomentum)
{
if (GetFormat() != MatrixFormat::matrixFormatSparseBlockCol)
{
NOT_IMPLEMENTED;
}
size_t numColsNeeded = 2 * GetNumCols();
if (c.IsEmpty() || (c.GetNumCols() < numColsNeeded))
{
c.RequireSize(GetNumRows(), numColsNeeded);
c.SetValue(0.0);
}
assert((c.GetNumRows() == GetNumRows()) && (c.GetNumCols() == numColsNeeded));
size_t n = GetNumElements();
int blocksPerGrid = (n + GridDim::maxThreadsPerBlock - 1) / GridDim::maxThreadsPerBlock;
_adam4BlockSparseCol<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(
n, Data(), ColOrRow2BlockId(), GetNumRows(),
c.Data(), c.Data() + n, functionValues.Data(),
learnRatePerSample, momentum, adaWeight, adaMul, unitGainMomentum);
}
template <class ElemType>
ElemType GPUSparseMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>& c,
ElemType RMS_GAMMA,

1
Source/Math/GPUSparseMatrix.h Normal file → Executable file
Просмотреть файл

@ -412,6 +412,7 @@ public:
ElemType Adagrad(GPUMatrix<ElemType>& c, const bool needAveMultiplier);
void FSAdagrad(GPUMatrix<ElemType>& c, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
ElemType RmsProp(GPUMatrix<ElemType>& c, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier);
void Adam(GPUMatrix<ElemType>& c, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum);
static void Multiply(const GPUSparseMatrix<ElemType>& S, const GPUMatrix<ElemType>& D, GPUMatrix<ElemType>& C);
static void Multiply(const GPUMatrix<ElemType>& D, const GPUSparseMatrix<ElemType>& S, GPUMatrix<ElemType>& C);

58
Source/Math/Matrix.cpp Normal file → Executable file
Просмотреть файл

@ -1089,6 +1089,9 @@ Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matri
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoGatherColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoGatherColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
@ -1101,8 +1104,7 @@ Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matri
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(),
m_GPUSparseMatrix->GetNumNZElements());
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoGatherColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
@ -1121,11 +1123,27 @@ Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matr
{
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0
if (a.GetMatrixType() != this->GetMatrixType())
RuntimeError("Matrix::DoScatterColumnsOf: The source and target matrices must have same storage type (SPARSE/DENSE).");
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{ NOT_IMPLEMENTED; });
{
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
CPUSparseMatrix<ElemType> tempThis(m_GPUSparseMatrix->GetFormat(), m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->GetNumNZElements());
m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempThis);
tempThis.DoScatterColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
m_GPUSparseMatrix->SetValue(tempThis);
});
return *this;
}
@ -1672,6 +1690,40 @@ void Matrix<ElemType>::FSAdagradUpdate(size_t mbSize,
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
///
// Implement the original adam algorithm according to the paper
// Ref: ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION, https://arxiv.org/pdf/1412.6980.pdf
///
template <class ElemType>
void Matrix<ElemType>::AdamUpdate(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, double& smoothedCount,
const double learnRatePerSample, const double meanMomentum, const double varMomentum, bool unitGainMomentum)
{
smoothedCount++;
// Bias correction
let biasCorrection = (ElemType)(sqrt(1- pow(varMomentum, smoothedCount))/(1- pow(meanMomentum, smoothedCount)));
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
{
m_CPUMatrix->Adam(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, unitGainMomentum);
SetDataLocation(CPU);
},
{
m_GPUMatrix->Adam(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum, (ElemType)varMomentum,
biasCorrection, unitGainMomentum);
SetDataLocation(GPU);
},
{ NOT_IMPLEMENTED; },
{ gradients.m_GPUSparseMatrix->Adam(*m_GPUMatrix, *functionValues.m_GPUMatrix,
(ElemType)learnRatePerSample, (ElemType)meanMomentum,
(ElemType)varMomentum, biasCorrection, unitGainMomentum);
SetDataLocation(GPU); });
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
}
template <class ElemType>
ElemType Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
ElemType RMS_GAMMA,

4
Source/Math/Matrix.h Normal file → Executable file
Просмотреть файл

@ -215,6 +215,10 @@ public:
Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, double& smoothedCount,
const double learnRatePerSample, const double targetAdagradAvDenom,
const double meanMomentum, const double varMomentum, bool unitGainMomentum = true);
void AdamUpdate(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, double& smoothedCount,
const double learnRatePerSample, const double meanMomentum, const double varMomentum, bool unitGainMomentum = true);
ElemType RmsProp(Matrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 10000, bool growOnly = true); // by default we only reallocate if need to grow

12
Source/Math/NoGPU.cpp Normal file → Executable file
Просмотреть файл

@ -261,6 +261,11 @@ void GPUSparseMatrix<ElemType>::FSAdagrad(GPUMatrix<ElemType>&, GPUMatrix<ElemTy
{
}
template<class ElemType>
void GPUSparseMatrix<ElemType>::Adam(GPUMatrix<ElemType>& c, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum)
{
}
template<class ElemType>
ElemType GPUSparseMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>&, ElemType, ElemType, ElemType, ElemType, ElemType, const bool)
{
@ -1077,6 +1082,13 @@ void GPUMatrix<ElemType>::FSAdagrad(GPUMatrix<ElemType>& gradients, GPUMatrix<El
{
}
template <class ElemType>
void GPUMatrix<ElemType>::Adam(GPUMatrix<ElemType>& gradients, GPUMatrix<ElemType>& functionValues, ElemType learnRatePerSample,
ElemType momentum, ElemType adaWeight, ElemType adaMul, bool unitGainMomentum)
{
}
template <class ElemType>
ElemType GPUMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier)
{

Просмотреть файл

@ -7,50 +7,28 @@
import numpy as np
import os
import sys
import signal
import subprocess
import re
import pytest
from cntk.ops.tests.ops_test_utils import cntk_device
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import set_default_device
from cntk.io import ReaderConfig, ImageDeserializer
from cntk import distributed
import pytest
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "AlexNet", "Python"))
example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "AlexNet", "Python")
sys.path.append(example_dir)
from prepare_test_data import prepare_ImageNet_data
from AlexNet_ImageNet_Distributed import alexnet_train_and_eval
from ConvNet_CIFAR10_DataAug_Distributed_test import mpiexec_test
script_under_test = os.path.join(example_dir, "AlexNet_ImageNet_Distributed.py")
#TOLERANCE_ABSOLUTE = 2E-1
def test_alexnet_error(device_id):
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU')
set_default_device(cntk_device(device_id))
base_path = prepare_ImageNet_data()
# change dir to locate data.zip correctly
os.chdir(base_path)
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
set_computation_network_trace_level(1)
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
#force_deterministic_algorithms()
# TODO: do the above; they lead to slightly different results, so not doing it for now
# for test purpose we train and test on same data
train_data=os.path.join(base_path, 'val1024_map.txt')
test_data=os.path.join(base_path, 'val1024_map.txt')
test_error = alexnet_train_and_eval(train_data, test_data,
num_quantization_bits=32,
minibatch_size=16,
epoch_size=64,
max_epochs=2)
distributed.Communicator.finalize()
# expected_test_error = 0.0
# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
# tolerance back once convolution operator is determinsitic.
# assert np.allclose(test_error, expected_test_error,
# atol=TOLERANCE_ABSOLUTE)
def test_alexnet_imagenet_distributed(device_id):
params = [ "-n", "2",
"-m", "8",
"-e", "16",
"-datadir", prepare_ImageNet_data(),
"-q", "32",
"-r",
"-device", "0" ]
mpiexec_test(device_id, script_under_test, params, 0.99, True)

Просмотреть файл

@ -16,28 +16,15 @@ from cntk.cntk_py import DeviceKind_GPU
from cntk.device import set_default_device
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python")
script_under_test = os.path.join(example_dir, "ConvNet_CIFAR10_DataAug_Distributed.py")
sys.path.append(example_dir)
from prepare_test_data import prepare_CIFAR10_data
script_under_test = os.path.join(example_dir, "ConvNet_CIFAR10_DataAug_Distributed.py")
TOLERANCE_ABSOLUTE = 2E-1
TIMEOUT_SECONDS = 300
def data_set_directory():
try:
base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
*"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
# N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
# and CIFAR-10_mean.xml in the base_path.
except KeyError:
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
*"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))
base_path = os.path.normpath(base_path)
os.chdir(os.path.join(base_path, '..'))
return base_path
def mpiexec_test(device_id, script, params, expected_test_error, match_exactly=True, per_minibatch_tolerance=TOLERANCE_ABSOLUTE, error_tolerance=TOLERANCE_ABSOLUTE):
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU')
@ -56,6 +43,7 @@ def mpiexec_test(device_id, script, params, expected_test_error, match_exactly=T
results = re.findall("Cross Validation \[.+?\]: Minibatch\[.+?\]: errs = (.+?)%", str_out)
assert len(results) == 2
print(results)
if match_exactly:
assert results[0] == results[1]
@ -65,23 +53,32 @@ def mpiexec_test(device_id, script, params, expected_test_error, match_exactly=T
assert np.allclose(float(results[0])/100, expected_test_error, atol=error_tolerance)
def test_cifar_convnet_distributed(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
params = [ "-n", "2",
"-m", "64",
"-e", "3200",
"-datadir", prepare_CIFAR10_data(),
"-q", "32",
"-r",
"-device", "0" ]
mpiexec_test(device_id, script_under_test, params, 0.617)
mpiexec_test(device_id, script_under_test, params, 0.75, True)
def test_cifar_convnet_distributed_1bitsgd(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
params = [ "-n", "2",
"-m", "64",
"-e", "3200",
"-datadir", prepare_CIFAR10_data(),
"-q", "1",
"-r",
"-device", "0" ]
mpiexec_test(device_id, script_under_test, params, 0.617)
mpiexec_test(device_id, script_under_test, params, 0.75, True)
def test_cifar_convnet_distributed_block_momentum(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
"-b", "3200",
params = [ "-n", "2",
"-m", "64",
"-e", "3200",
"-datadir", prepare_CIFAR10_data(),
"-b", "1600",
"-r",
"-device", "0" ]
mpiexec_test(device_id, script_under_test, params, 0.6457, False, 10)
mpiexec_test(device_id, script_under_test, params, 0.78, False, 10)

Просмотреть файл

@ -30,8 +30,7 @@ def test_cifar_convnet_error(device_id):
# change dir to locate data.zip correctly
os.chdir(base_path)
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
set_computation_network_trace_level(1)
from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
#force_deterministic_algorithms()
# TODO: do the above; they lead to slightly different results, so not doing it for now

Просмотреть файл

@ -15,13 +15,14 @@ example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image"
sys.path.append(example_dir)
sys.path.append(abs_path)
from ConvNet_CIFAR10_DataAug_Distributed_test import mpiexec_test, data_set_directory
from prepare_test_data import prepare_CIFAR10_data
from ConvNet_CIFAR10_DataAug_Distributed_test import mpiexec_test
script_under_test = os.path.join(example_dir, "TrainResNet_CIFAR10_Distributed.py")
def test_cifar_resnet_distributed(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
"-datadir", prepare_CIFAR10_data(),
"-q", "32",
"-es", "512",
"-device", "0" ]
@ -29,7 +30,7 @@ def test_cifar_resnet_distributed(device_id):
def test_cifar_resnet_distributed_1bitsgd(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
"-datadir", prepare_CIFAR10_data(),
"-q", "1",
"-es", "512",
"-device", "0" ]
@ -38,7 +39,7 @@ def test_cifar_resnet_distributed_1bitsgd(device_id):
def test_cifar_resnet_distributed_block_momentum(device_id):
params = [ "-e", "2",
"-datadir", data_set_directory(),
"-datadir", prepare_CIFAR10_data(),
"-b", "3200",
"-es", "512",
"-device", "0" ]

Просмотреть файл

@ -0,0 +1,34 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import numpy as np
import os
import sys
import signal
import subprocess
import re
import pytest
from cntk.ops.tests.ops_test_utils import cntk_device
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import set_default_device
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "VGG", "Python")
sys.path.append(example_dir)
from prepare_test_data import prepare_ImageNet_data
from ConvNet_CIFAR10_DataAug_Distributed_test import mpiexec_test
script_under_test = os.path.join(example_dir, "VGG16_ImageNet_Distributed.py")
# def test_alexnet_imagenet_distributed(device_id):
# params = [ "-n", "2",
# "-m", "1",
# "-e", "2",
# "-datadir", prepare_ImageNet_data(),
# "-q", "32",
# "-r",
# "-device", "0" ]
# mpiexec_test(device_id, script_under_test, params, 0.99, True)

Просмотреть файл

@ -29,16 +29,19 @@ def prepare_CIFAR10_data():
def prepare_ImageNet_data():
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
*"../../../../Examples/Image/DataSets/ImageNet".split("/"))
*"../../../../Examples/Image/DataSets/ImageNet/test_data".split("/"))
base_path = os.path.normpath(base_path)
if not os.path.isdir(base_path):
os.mkdir(base_path)
# If val1024_map.txt don't exist locally, copy to local location
if not os.path.isfile(os.path.join(base_path, 'val1024_map.txt')):
if not (os.path.isfile(os.path.join(base_path, 'train_map.txt')) and os.path.isfile(os.path.join(base_path, 'val_map.txt'))):
# copy from backup location
base_path_bak = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
*"Image/ImageNet/2012/v0".split("/"))
base_path_bak = os.path.normpath(base_path_bak)
copyfile(os.path.join(base_path_bak, 'val1024_map.txt'), os.path.join(base_path, 'val1024_map.txt'))
copyfile(os.path.join(base_path_bak, 'val1024_map.txt'), os.path.join(base_path, 'train_map.txt'))
copyfile(os.path.join(base_path_bak, 'val1024_map.txt'), os.path.join(base_path, 'val_map.txt'))
copyfile(os.path.join(base_path_bak, 'val1024.zip'), os.path.join(base_path, 'val1024.zip'))
return base_path

Просмотреть файл

@ -1,43 +0,0 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import numpy as np
import os
import sys
import platform
from cntk.io import ReaderConfig, ImageDeserializer, FULL_DATA_SWEEP
from cntk import distributed
from cntk.device import set_default_device, gpu
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python"))
from prepare_test_data import prepare_CIFAR10_data
from ConvNet_CIFAR10_DataAug_Distributed import convnet_cifar10_dataaug
def run_cifar_convnet_distributed():
base_path = prepare_CIFAR10_data()
# change dir to locate data.zip correctly
os.chdir(base_path)
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
set_computation_network_trace_level(1)
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
#force_deterministic_algorithms()
# TODO: do the above; they lead to slightly different results, so not doing it for now
train_data = os.path.join(base_path, 'train_map.txt')
mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
test_data = os.path.join(base_path, 'test_map.txt')
num_quantization_bits = 32
return convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits, epoch_size=512, max_epochs=2)
if __name__=='__main__':
assert distributed.Communicator.rank() < distributed.Communicator.num_workers()
set_default_device(gpu(0)) # force using GPU-0 in test for speed
run_cifar_convnet_distributed()
distributed.Communicator.finalize()

Разница между файлами не показана из-за своего большого размера Загрузить разницу

30
Tests/UnitTests/MathTests/CPUMatrixTests.cpp Normal file → Executable file
Просмотреть файл

@ -898,6 +898,36 @@ BOOST_FIXTURE_TEST_CASE(CPUMatrixSeedingDouble, RandomSeedFixture)
BOOST_CHECK(m1.IsEqualTo(m2));
}
BOOST_FIXTURE_TEST_CASE(CPUMatrixAdam, RandomSeedFixture)
{
CPUMatrix<double> adamMatrix;
CPUMatrix<double> gradients(2, 1);
CPUMatrix<double> parameters(2, 1);
CPUMatrix<double> expectedParameters(2, 1);
CPUMatrix<double> expectedStates(2, 2);
double gradientValues[] = { 0.1, -0.1 };
double paramValues[] = { 0.1, 0.1 };
double expectedValues[] = { -0.05811338, 0.25811338 };
double expectedStateValues[] = {1e-5, 0.01, 1e-5, -0.01};
gradients.SetValue(2, 1, gradientValues, matrixFormatRowMajor);
parameters.SetValue(2, 1, paramValues, matrixFormatRowMajor);
expectedParameters.SetValue(2, 1, expectedValues, matrixFormatRowMajor);
expectedStates.SetValue(2, 2, expectedStateValues, matrixFormatRowMajor);
adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, true);
BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-6));
BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-6));
double expectedValues2[] = { -0.27059249, 0.47059249 };
double expectedStateValues2[] = { 2e-05, 0.019, 2e-05, -0.019 };
expectedParameters.SetValue(2, 1, expectedValues2, matrixFormatRowMajor);
expectedStates.SetValue(2, 2, expectedStateValues2, matrixFormatRowMajor);
adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, true);
BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-6));
BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-6));
}
BOOST_AUTO_TEST_SUITE_END()
}
} } }

28
Tests/UnitTests/MathTests/GPUMatrixTests.cpp Normal file → Executable file
Просмотреть файл

@ -537,6 +537,34 @@ BOOST_FIXTURE_TEST_CASE(GPUMatrixCurandSeedingDouble, RandomSeedFixture)
BOOST_CHECK(m1.IsEqualTo(m2));
}
BOOST_FIXTURE_TEST_CASE(GPUMatrixAdam, RandomSeedFixture)
{
GPUMatrix<double> adamMatrix(c_deviceIdZero);
GPUMatrix<double> gradients(2, 1, c_deviceIdZero);
GPUMatrix<double> parameters(2, 1, c_deviceIdZero);
GPUMatrix<double> expectedParameters(2, 1, c_deviceIdZero);
GPUMatrix<double> expectedStates(2, 2, c_deviceIdZero);
double gradientValues[] = { 0.1, -0.1 };
double paramValues[] = { 0.1, 0.1 };
double expectedValues[] = { -0.05803489, 0.25803488 };
double expectedStateValues[] = { 1e-5, 0.01, 1e-5, -0.01 };
gradients.SetValue(2, 1, c_deviceIdZero, gradientValues, matrixFormatRowMajor);
parameters.SetValue(2, 1, c_deviceIdZero, paramValues, matrixFormatRowMajor);
expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues, matrixFormatRowMajor);
expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues, matrixFormatRowMajor);
adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, true);
BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-6));
BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-6));
double expectedValues2[] = { -0.27046135, 0.47046134 };
double expectedStateValues2[] = { 2e-05, 0.019, 2e-05, -0.019 };
expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues2, matrixFormatRowMajor);
expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues2, matrixFormatRowMajor);
adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, true);
BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-6));
BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-6));
}
#if 0 // Temporarily disabling
BOOST_FIXTURE_TEST_CASE(GPUMatrixLargeInequality, RandomSeedFixture)
{

13
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp Normal file → Executable file
Просмотреть файл

@ -95,6 +95,15 @@ void TestFSAdaGradLearner(size_t numParameters, size_t numMinibatches, bool unit
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestAdamLearner(size_t numParameters, size_t numMinibatches, bool unitGainMomentum, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = AdamLearner(parameters, LearningRatePerSampleSchedule({ 0.5 }), MomentumAsTimeConstantSchedule({ 10.0, 100.0, 1000.0 }), unitGainMomentum, MomentumPerSampleSchedule(0.99), false);
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestRMSPropLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
@ -335,6 +344,8 @@ void LearnerTests()
TestMomentumSGDLearner<float>(numParameters, numMinibatches, unitGain, device);
TestNesterovLearner<float>(numParameters, numMinibatches, unitGain, device);
TestFSAdaGradLearner<double>(numParameters, numMinibatches, unitGain, device);
TestAdamLearner<float>(numParameters, numMinibatches, unitGain, device);
TestAdamLearner<double>(numParameters, numMinibatches, unitGain, device);
}
}
}
}

0
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp Normal file → Executable file
Просмотреть файл

Просмотреть файл

@ -76,7 +76,7 @@
"name": "Fast R-CNN",
"url": "https://github.com/Microsoft/CNTK/wiki/Object-Detection-using-Fast-R-CNN",
"description": "Train object detection from images by adapting pre-trained classification models on arbitrarily sized regions of interest using ROI pooling.",
"language": ["BrainScript"],
"language": ["Python", "BrainScript"],
"type": ["Tutorial", "Recipe"]
},
{
@ -204,7 +204,7 @@
"name": "VGG",
"url": "https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/VGG",
"description": "Deep CNN from University of Oxford. This was the winning model for the ILSVRC2014 localization task.",
"language": ["BrainScript"],
"language": ["Python", "BrainScript"],
"type": ["Recipe"]
},
{

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

0
bindings/csharp/Swig/cntk_cs.i Normal file → Executable file
Просмотреть файл

Просмотреть файл

@ -292,3 +292,10 @@ class Value(cntk_py.Value):
'''
return self.shape[0]
def user_function(user_func):
'''
Wraps the passed Function to create a composite representing the
composite Function graph rooted at the passed root Function.
'''
from . import as_composite
return as_composite(user_func)

Просмотреть файл

@ -42,6 +42,11 @@ def depth_first_search(node, visitor):
pass
if visitor(node):
if node.is_parameter:
node = node.as_parameter()
elif node.is_constant:
node = node.as_constant()
accum.append(node)
visited.add(node)

Просмотреть файл

@ -349,7 +349,10 @@ def Recurrence(over, go_backwards=False, initial_state=initial_state_default_or_
f_x_h_c = over(x, prev_state) # apply the recurrent over
# this returns a Function (x, (h_prev, c_prev)) -> (h, c)
h_c = f_x_h_c.outputs
replacements = { value_forward: value for (value_forward, value) in zip(list(_as_tuple(state_forward)), h_c) }
if type(state_forward) is tuple and len(state_forward) > 1:
replacements = { value_forward: value for (value_forward, value) in zip(list(_as_tuple(state_forward)), h_c) }
else:
replacements = {(state_forward,)[0] : h_c[0] }
f_x_h_c.replace_placeholders(replacements) # resolves state_forward := h_c
h = f_x_h_c.outputs[0] # 'h' is a Variable (the output of a Function that computed it)
if _trace_layers:

Просмотреть файл

@ -564,9 +564,6 @@ def adam_sgd(parameters, lr, momentum, unit_gain=default_unit_gain_value(),
<http://arxiv.org/abs/1412.6980>`_. International Conference for
Learning Representations, 2015.
'''
if not low_memory:
raise NotImplementedError('adam: low_memory=True currently required')
_verify_learning_rate_type(lr)
_verify_momentum_type(momentum)
_verify_momentum_type(variance_momentum)

Просмотреть файл

@ -647,11 +647,7 @@ class UserFunction(Function):
'''
def __init__(self, inputs, name=''):
# FIXME we need to save a reference here so that the function does not
# disappear
self.var_inputs = inputs
super(Function, self).__init__(inputs, name)
super(UserFunction, self).__init__(inputs, name)
# Memory management for user defined functions has to be controlled by
# the C++ side. For more information:
@ -753,11 +749,22 @@ class UserFunction(Function):
outputs.extend(self.infer_outputs())
def infer_outputs(self):
raise NotImplementedError('infer_outputs has to be overridden')
'''
Returns a list of all output variables this user-defined function
outputs.
Output variables are created by
:meth:`~cntk.ops.functions.output_variable`.
'''
raise NotImplementedError('infer_outputs has to be overwritten')
def op_name(self):
'''
Returns the operator name.
'''
return 'UserFunction'
@typemap
def load_model(filename, device=None):
'''

Просмотреть файл

@ -0,0 +1,29 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
"""
Unit tests for the cosine distance class.
"""
import numpy as np
import pytest
from .. import *
from ...axis import Axis
from ... import sequence
def test_cosine_distance():
a = np.reshape(np.arange(25.0, dtype = np.float32), (5,5))
b = np.reshape(np.arange(0, 5, dtype=np.float32), (1,5))
src = input_variable(shape=(5), dynamic_axes=[ Axis.default_batch_axis(), Axis("Seq")])
tgt = input_variable(shape=(5))
tgt_br = sequence.broadcast_as(tgt, src)
cos_seq = cosine_distance(src, tgt_br)
assert len(cos_seq.dynamic_axes)==2
assert cos_seq.dynamic_axes[1].name=="Seq"
val = cos_seq.eval({src:[a], tgt:[b]})
expected = [[ 1., 0.914659, 0.878459, 0.86155, 0.851852]]
print(np.allclose(val, expected))

Просмотреть файл

@ -14,7 +14,7 @@ import pytest
from .ops_test_utils import unittest_helper, _test_unary_op, _test_binary_op, AA, I, precision, PRECISION_TO_TYPE, cntk_device
import cntk as C
from cntk.axis import Axis
from ...utils import sanitize_dtype_cntk
from ...utils import sanitize_dtype_cntk, one_hot
from .. import constant
EPS_IN_LOG = 1e-37 # 1e-37 is the highest guaranteed precision
@ -395,3 +395,39 @@ def test_op_gather_derived_dynamic_axes_equivalence(device_id, precision):
res = z.eval({a: input_data1, b: input_data2})
expected_forward = [[[3.]]]
assert np.array_equal(res, expected_forward)
def test_op_gather_sparse(device_id):
from .. import sequence, times
input_sparse_indices = [[1, 3, 5], [2, 4]]
vocab_size = 6
input_data = one_hot(input_sparse_indices, vocab_size)
a = I(shape=(vocab_size,), is_sparse=True, name='a')
a_last = sequence.last(a)
a_last_dense = times(a_last, np.eye(vocab_size))
res = a_last_dense.eval({a : input_data})
assert np.array_equal(res, [[[0, 0, 0, 0, 0, 1]], [[0, 0, 0, 0, 1, 0]]])
a_last_2 = sequence.slice(a, -2, 0)
a_last_2_dense = times(a_last_2, np.eye(vocab_size))
res = a_last_2_dense.eval({a : input_data})
assert np.array_equal(res, [[[0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1]], [[0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0]]])
def test_op_scatter_sparse(device_id):
from .. import sequence, times
input_sparse_indices = [[1, 3, 5], [2, 4]]
vocab_size = 6
input_data = one_hot(input_sparse_indices, vocab_size)
a = I(shape=(vocab_size,), is_sparse=True, name='a')
a_last_scatter = sequence.scatter(sequence.last(a), sequence.is_first(a))
a_last_scatter_dense = times(a_last_scatter, np.eye(vocab_size))
res = a_last_scatter_dense.eval({a : input_data})
assert np.array_equal(res[0], np.asarray([[0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]))
assert np.array_equal(res[1], np.asarray([[0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]))

Просмотреть файл

@ -46,7 +46,8 @@ def linear_layer(input_var, output_dim):
def dense_layer(input, output_dim, nonlinearity):
r = linear_layer(input, output_dim)
r = nonlinearity(r)
if isinstance(nonlinearity, UserFunction):
r = user_function(nonlinearity(r))
return r
def fully_connected_classifier_net(input, num_output_classes, hidden_layer_dim,

Просмотреть файл

@ -25,7 +25,8 @@ class MyPlus(UserFunction):
self.backward_calls = 0
def infer_outputs(self):
return [output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)]
return [output_variable(self.inputs[0].shape,
self.inputs[0].dtype, self.inputs[0].dynamic_axes)]
def forward(self, arguments, device=None, outputs_to_retain=None):
assert len(self.inputs)==2
@ -45,7 +46,7 @@ def test_ext_eval_1():
dim = 4
p = parameter(shape=(dim,), init=10, name='p')
i = input_variable(dim, needs_gradient=True, name='i_var')
m = MyPlus(i, constant(3))
m = user_function(MyPlus(i, constant(3)))
z = m+p
input_data = np.random.rand(dim)
@ -56,7 +57,7 @@ def test_ext_eval_2_only_param():
dim = 4
p = parameter(shape=(dim,), init=10, name='p')
i = input_variable(dim, needs_gradient=True, name='i_var')
m = MyPlus(p, constant(3))
m = user_function(MyPlus(p, constant(3)))
# combine does not work
# z = combine([m.output])
z = m+i
@ -68,7 +69,7 @@ def test_ext_eval_2_only_param():
def test_ext_eval_3_no_input():
dim = 4
p = parameter(shape=(dim,), init=10, name='p')
m = MyPlus(p, constant(3))
m = user_function(MyPlus(p, constant(3)))
z = m+0
result = z.eval()
@ -79,7 +80,7 @@ def test_ext_eval_4_a_inside_graph():
dim = 4
p_init = 10
p = parameter(shape=(dim,), init=p_init, name='p')
m = MyPlus(p, constant(3))
m = user_function(MyPlus(p, constant(3)))
z = p * m
result = z.eval()
@ -90,7 +91,7 @@ def test_ext_eval_4_b_inside_graph():
dim = 4
p_init = 10
p = parameter(shape=(dim,), init=p_init, name='p')
z = p * MyPlus(p, constant(3))
z = user_function(p * MyPlus(p, constant(3)))
result = z.eval()
# No batch dimension since we have no input
@ -100,14 +101,14 @@ def test_ext_eval_5_times():
dim = 2
p_init = 10
p = parameter(shape=(dim,), init=p_init, name='p')
m = MyPlus(p, constant(3))
m = user_function(MyPlus(p, constant(3)))
z = times(m, parameter(shape=(2,50), init=2))
result = z.eval()
# No batch dimension since we have no input
assert np.allclose(result, ((p_init*np.ones_like(result))+3)*2*2)
def test_ext_clone():
def test_ext_eval_6_clone():
dim = 4
i = input_variable(dim, needs_gradient=True, name='i_var')
m = i + 3
@ -115,20 +116,34 @@ def test_ext_clone():
p = parameter(shape=(dim,), init=10, name='p')
z = m + p
m_udf = MyPlus(i, constant(3))
m_udf = user_function(MyPlus(i, constant(3)))
z_clone = z.clone('share', {m : m_udf} );
input_data = np.random.rand(dim)
result = z_clone.eval([input_data])
assert np.allclose(result[0][0], input_data+3+10)
def test_ext_eval_7_placeholder():
dim = 4
p = parameter(shape=(dim,), init=10, name='p')
i = input_variable(dim, needs_gradient=True, name='i_var')
pl = placeholder_variable()
m = user_function(MyPlus(pl, constant(3)))
z = m+p
z.replace_placeholder(i)
input_data = np.random.rand(dim)
result = z.eval([input_data])
assert np.allclose(result[0][0], input_data+3+10)
def test_ext_train():
dim = 4
p = parameter(shape=(dim,), init=10)
i = input_variable(dim, needs_gradient=True, name='i_var')
m = MyPlus(i, constant(3))
z = m+p
# keeping m unwrapped since we need to access its member variables
z = user_function(m)+p
momentum_time_constant = momentum_as_time_constant_schedule(1100)
lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
@ -172,7 +187,7 @@ def test_ext_backpropstate(payload):
p = parameter(shape=(dim,), init=10)
in1 = input_variable(dim, needs_gradient=True, name='i_var')
m = TestBackPropState(in1, payload)
m = user_function(TestBackPropState(in1, payload))
z = m+p
lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
@ -223,6 +238,7 @@ def test_ext_lambdafunc():
m = LambdaFunc(k,
when=lambda arg: np.sum(arg)>1,
execute=cb.inc)
m = user_function(m)
z = m+0
momentum_time_constant = momentum_as_time_constant_schedule(1100)
@ -262,7 +278,7 @@ def test_udf_plus_and_last():
x = input_variable(shape=(2,))
y = input_variable(shape=(2,), dynamic_axes=[Axis.default_batch_axis()])
func = as_composite(PlusAndLast(x, y))
func = user_function(PlusAndLast(x, y))
dt_precision = np.float32
operand1 = [AA([[1., 2.], [3., 4.]], dtype=dt_precision)]

Просмотреть файл

@ -132,6 +132,26 @@ class Variable(VariableMixin, TensorOpsMixin, cntk_py.Variable):
super(Variable, self).__init__(shape, is_sparse, dtype, needs_gradient, name,
dynamic_axes)
@typemap
def as_parameter(self):
'''
Converts this instance into a :class:`Parameter`
'''
if not self.is_parameter:
raise TypeError('cannot be converted into a Parameter')
return cntk_py.Parameter(self)
@typemap
def as_constant(self):
'''
Converts this instance into a :class:`Constant`
'''
if not self.is_constant:
raise TypeError('cannot be converted into a Constant')
return cntk_py.Constant(self)
class Parameter(VariableMixin, TensorOpsMixin, cntk_py.Parameter):
'''

Просмотреть файл

@ -20,15 +20,13 @@ def _graph_dict():
d['i1'] = input_variable(
shape=(2, 3), dynamic_axes=input_dynamic_axes, name='i1')
d['i2'] = input_variable(
shape=(2, 3), dynamic_axes=input_dynamic_axes, name='i2')
d['c1'] = constant(shape=(2, 3), value=6, name='c1')
d['p1'] = parameter(shape=(3, 2), init=7, name='p1')
d['p1'] = parameter(shape=(3, 2), name='p1')
d['op1'] = plus(d['i1'], d['i2'], name='op1')
d['op1'] = plus(d['i1'], d['c1'], name='op1')
d['op2'] = times(d['op1'], d['p1'], name='op2')
#d['slice'] = slice(d['i2'], Axis.default_dynamic_axis(), 0, 3)
#d['slice'] = slice(d['c1'], Axis.default_dynamic_axis(), 0, 3)
#label_sentence_start = sequence.first(raw_labels)
# no name
@ -49,9 +47,9 @@ def _simple_dict():
d = {}
d['i1'] = input_variable(shape=(2, 3), name='i1')
d['i2'] = input_variable(shape=(2, 3), name='i2')
d['p1'] = parameter(shape=(3, 2), name='p1')
d['op1'] = plus(d['i1'], d['i2'], name='op1')
d['c1'] = constant(shape=(2, 3), value=6, name='c1')
d['p1'] = parameter(shape=(3, 2), init=7, name='p1')
d['op1'] = plus(d['i1'], d['c1'], name='op1')
d['op2'] = times(d['op1'], d['p1'], name='op2')
d['root'] = d['op2']
@ -64,7 +62,7 @@ def _simple_dict():
def test_find_nodes():
d = _graph_dict()
for name in ['i1', 'i2', 'p1', 'op1', 'op2', 'past']:
for name in ['i1', 'c1', 'p1', 'op1', 'op2', 'past']:
n = find_all_with_name(d['root'], name)
assert len(n) == 1, name
assert n[0].name == name, name
@ -89,6 +87,17 @@ def test_find_nodes():
assert find_by_name(d['root'], 'none') is None
def test_find_nodes_returning_proper_types():
d = _graph_dict()
c1 = find_by_name(d['root'], 'c1')
assert isinstance(c1, Constant)
assert np.allclose(c1.value, np.zeros((2,3))+6)
p1 = find_by_name(d['root'], 'p1')
assert isinstance(p1, Parameter)
assert np.allclose(p1.value, np.zeros((3,2))+7)
def test_plot():
d = _simple_dict()
@ -107,4 +116,4 @@ def test_depth_first_search():
found = depth_first_search(d['op2'], lambda x:True)
found_names = [v.name for v in found]
assert found_names == ['op2', 'op1', 'i1', 'i2', 'p1']
assert found_names == ['op2', 'op1', 'i1', 'c1', 'p1']

Просмотреть файл

@ -8,6 +8,10 @@ import numpy as np
import pytest
from ..layers import *
from ..blocks import init_default_or_glorot_uniform, Parameter, _INFERRED, Placeholder
from ..utils import _as_tuple
from ..ops import sigmoid, times, tanh, element_times, plus, combine, input_variable
from ..axis import Axis
def test_layers_name(device_id):
from cntk import placeholder_variable, combine
@ -19,3 +23,45 @@ def test_layers_name(device_id):
q = Convolution((3,3), 3, name='conv33')(I)
assert(q.root_function.name == 'conv33')
def gru_cell(shape, init=init_default_or_glorot_uniform, name=''): # (x, (h,c))
shape = _as_tuple(shape)
if len(shape) != 1 :
raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)")
# determine stacking dimensions
cell_shape_stacked = shape * 2 # patched dims with stack_axis duplicated 4 times
# parameters
Wz = Parameter(cell_shape_stacked, init = init, name='Wz')
Wr = Parameter(cell_shape_stacked, init = init, name='Wr')
Wh = Parameter(cell_shape_stacked, init = init, name='Wh')
Uz = Parameter( _INFERRED + shape, init = init, name = 'Uz')
Ur = Parameter( _INFERRED + shape, init = init, name = 'Ur')
Uh = Parameter( _INFERRED + shape, init = init, name = 'Uh')
def create_s_placeholder():
# we pass the known dimensions here, which makes dimension inference easier
return Placeholder(shape=shape, name='S') # (h, c)
# parameters to model function
x = Placeholder(name='gru_block_arg')
prev_status = create_s_placeholder()
# formula of model function
Sn_1 = prev_status
z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'), name='z')
r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'), name='r')
h = tanh(times(x, Uh, name='x*Uh') + times(element_times(Sn_1, r, name='Sprev*r'), Wh), name='h')
s = plus(element_times((1-z), h, name='(1-z)*h'), element_times(z, Sn_1, name='z*SPrev'), name=name)
apply_x_s = combine([s])
apply_x_s.create_placeholder = create_s_placeholder
return apply_x_s
def test_recurrence():
r = Recurrence(gru_cell(5), go_backwards=False)
a = input_variable(shape=(5,), dynamic_axes=[Axis.default_batch_axis(), Axis('Seq')])
x = np.reshape(np.arange(0,25, dtype=np.float32), (1,5,5))
rt = r(a).eval({a:x})
print(rt)

Просмотреть файл

@ -34,6 +34,8 @@ def sanitize_precision(precision):
return np.float32
elif precision in [cntk_py.DataType_Double, 'double', 'float64', np.float64]:
return np.float64
elif precision in [cntk_py.DataType_Unknown]:
return None
else:
raise ValueError('precision value: "%s" is not supported' % precision)
@ -112,6 +114,7 @@ def sanitize_input(arg, fallback_dtype=np.float32, reshape=None):
``arg`` is a number or NumPy array. Variable otherwise.
"""
from cntk.ops.functions import UserFunction
from cntk.ops.variables import Constant, Variable, Parameter
from cntk.ops.functions import Function
from cntk.ops import constant
@ -483,6 +486,8 @@ def sanitize_dtype_cntk(dtype):
return cntk_py.DataType_Float
elif dtype == np.float64:
return cntk_py.DataType_Double
elif dtype == object:
return cntk_py.DataType_Unknown
else:
raise ValueError('data type "%s" is not supported' % dtype)

Просмотреть файл

@ -48,7 +48,10 @@ tuple, strings, etc.)::
This can now be used as a normal operator like::
s = MySigmoid(prev_node)
s = user_function(MySigmoid(prev_node))
Note that we cannot pass the `UserFunction` instance directly into the graph.
It is representing a primitive function, which we have to pass through `user_function()`.
In case, the operator is initialized with multiple inputs, ``forward()`` 's
``argument`` will be a list of those inputs::
@ -133,7 +136,7 @@ interesting behavior, for instance::
debug_node = LambdaFunc(node,
when=lambda arg: np.var(arg)>1,
execute=lambda arg: pdb.set_trace())
# out = ... using debug_node ...
# out = ... using user_function(debug_node) ...
# ... training out
Now, if the variance of the input tensor exceeds 1, we will be put into