creating final version of ImageHandsOn
This commit is contained in:
@ -1153,6 +1153,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\U
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tutorials", "Tutorials", "{8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ImageHandsOn", "ImageHandsOn", "{2230BF3D-4317-4A3F-A743-DDD6160503F8}"
ProjectSection(SolutionItems) = preProject
Tutorials\ImageHandsOn\cifar10.cmf = Tutorials\ImageHandsOn\cifar10.cmf
Tutorials\ImageHandsOn\ = Tutorials\ImageHandsOn\
Tutorials\ImageHandsOn\ImageHandsOn.cntk = Tutorials\ImageHandsOn\ImageHandsOn.cntk
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1598,5 +1607,6 @@ Global
{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
{9F999212-AFC5-4EAC-AA78-F7247D46C456} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{2230BF3D-4317-4A3F-A743-DDD6160503F8} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
@ -1,15 +1,15 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization.
# CNTK Configuration File for training a simple CIFAR-10 convnet.
# During the hands-on tutorial, this will be fleshed out into a ResNet-20 model.
command = TrainConvNet:Eval
#command = TrainConvNetWithBN:Eval
makeMode = false ; traceLevel = 0 ; deviceId = "auto"
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
rootDir = "." ; dataDir = "$rootDir$" ; modelDir = "$rootDir$/Models"
modelPath = "$ModelDir$/cifar10.cmf"
modelPath = "$modelDir$/cifar10.cmf"
# Training without BN
# Training action for a convolutional network
TrainConvNet = {
action = "train"
@ -18,181 +18,19 @@ TrainConvNet = {
labelDim = 10
# basic model
model_1 (features) =
model (features) = {
featNorm = features - Constant (128)
l1 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 0.0043} (featNorm)
p1 = MaxPoolingLayer {(3:3), stride = (2:2)} (l1)
l2 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 1.414} (p1)
p2 = MaxPoolingLayer {(3:3), stride = (2:2)} (l2)
l3 = ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 1.414} (p2)
p3 = MaxPoolingLayer {(3:3), stride = (2:2)} (l3)
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
z = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1)
# with self-defined layer
MyLayer (x, dim, initValueScale) =
c = ConvolutionalLayer {dim, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = initValueScale} (x)
p = MaxPoolingLayer {(3:3), stride = (2:2)} (c)
model_f (features) =
featNorm = features - Constant (128)
p1 = MyLayer (featNorm, 32, 0.0043)
p2 = MyLayer (p1, 32, 1.414)
p3 = MyLayer (p2, 64, 1.414)
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
d1_d = Dropout (d1)
z = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1_d)
// --- with BatchNorm
# with self-defined layer
MyLayerWithBN (x, dim, initValueScale) =
c = ConvolutionalLayer {dim, (5:5), pad = true, init = "gaussian", initValueScale = initValueScale} (x)
b = BatchNormalizationLayer {spatialRank = 2} (c)
r = ReLU (b)
p = MaxPoolingLayer {(3:3), stride = (2:2)} (r)
model_bn (features) =
featNorm = features - Constant (128)
p1 = MyLayerWithBN (featNorm, 32, 0.0043)
p2 = MyLayerWithBN (p1, 32, 1.414)
p3 = MyLayerWithBN (p2, 64, 1.414)
d1 = DenseLayer {64, init = "gaussian", initValueScale = 12} (p3)
d1_bnr = ReLU (BatchNormalizationLayer {} (d1))
d1_d = Dropout (d1_bnr)
z = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1_d)
// --- ResNet
MyConvBN (x, dim, initValueScale, stride) = # TO BE WRITTEN BY PARTICIPANT
c = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale} (x)
b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (c)
MyConvBNReLU (x, dim, initValueScale, stride) =
c = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale} (x)
b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (c)
r = ReLU (b)
ResNetNode (x, dim) =
c1 = MyConvBNReLU (x, dim, 7.07, 1)
X2 = MyConvBNReLU (c1, dim, 7.07, 1) # wrong
c2 = MyConvBN (c1, dim, 7.07, 1)
r = ReLU (x + c2)
}.r # change to X2
ResNetResample (x, dim) =
x2 = MaxPoolingLayer {(1:1), stride = (2:2)} (x) # sub-sample by 2
pad = ConstantTensor (0, (1:1:dim/2)) # pad with zeroes
p = Splice ((x2 : pad), axis = 3)
ResNetIncNode (x, dim) =
c1 = MyConvBNReLU (x, dim, 7.07, 2)
c2 = MyConvBN (c1, dim, 7.07, 1)
px = ResNetResample (x, dim)
b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (px)
r = ReLU (b + c2) # ReLU between C1 and C2 and after summation
# these are the ones the participants are given upfront
ResNetNode1 (x, dim) =
c1 = MyConvBNReLU (x, dim, 7.07, 1)
c2 = MyConvBNReLU (c1, dim, 7.07, 1)
ResNetIncNode1 (x, dim) =
px = ResNetResample (x, dim) # sub-sample but double the dims
b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (px)
r = ReLU (b)
# this must be written
ResNetNodeStack (x, dim, L) =
if L == 0 then x
else ResNetNode (ResNetNodeStack (x, dim, L-1), dim)
model (features) =
conv1 = MyConvBNReLU (features, 16, 0.26, 1)
#rn1 = ResNetNode1 (ResNetNode1 (ResNetNode1 (conv1, 16), 16), 16)
rn1 = ResNetNodeStack (conv1, 16, 3) # 3 means 3 such nodes
rn2_1 = ResNetIncNode1 (rn1, 32)
#rn2 = ResNetNode1 (ResNetNode1 (rn2_1, 32), 32)
rn2 = ResNetNodeStack (rn2_1, 32, 2)
rn3_1 = ResNetIncNode1 (rn2, 64)
#rn3 = ResNetNode1 (ResNetNode1 (rn3_1, 64), 64)
rn3 = ResNetNodeStack (rn3_1, 64, 2)
pool = AveragePoolingLayer {(8:8)} (rn3)
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
// --- ResNet, functional style
MyConvBNLayer {dim, initValueScale, stride} =
# note: (3:3), while the macro above is (5:5)
C = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale}
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
apply (x) = B(C(x))
ResNetLayer {dim, initValueScale} =
C1 = MyConvBNLayer {dim, initValueScale, 1} # first convolution layer
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
#B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
# ^^ Note: Adding an exra BN to 'x' trains slightly better.
apply (x) = ReLU (x + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
ResNetIncLayer {dim, initValueScale} =
# first branch. This doubles the #channels but halves the image size
C1 = MyConvBNLayer {dim, initValueScale, 2} # first convolution layer, stride = 2
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
# second branch:
# sub-sample spatially by a factor of 2
DownSamplingLayer {stride} = MaxPoolingLayer {(1:1), stride = stride}
# append dim/2 zero output channels
pad = ConstantTensor (0, (1:1:dim/2)) # the 1s will broadcast to image size
P(x) = Splice ((DownSamplingLayer {(2:2)} (x) : pad), axis = 3)
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
# layer sums both branches and rectifies the result
apply (x) = ReLU (B(P(x)) + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
model_resNet (features) =
conv1 = MyConvBNLayer {16, 0.26, 1} (features)
rl1 = ReLU (conv1)
rn1 = LayerStack {3, _ => ResNetLayer {16, 7.07}} (rl1)
rn2_1 = ResNetIncLayer {32, 7.07} (rn1)
rn2 = LayerStack {2, _ => ResNetLayer {32, 7.07}} (rn2_1)
rn3_1 = ResNetIncLayer {64, 7.07} (rn2)
rn3 = LayerStack {2, _ => ResNetLayer {64, 7.07}} (rn3_1)
pool = AveragePoolingLayer {(8:8)} (rn3)
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=0.0043} (featNorm)
p1 = MaxPoolingLayer {(3:3), stride=(2:2)} (l1)
l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=1.414} (p1)
p2 = MaxPoolingLayer {(3:3), stride=(2:2)} (l2)
l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=1.414} (p2)
p3 = MaxPoolingLayer {(3:3), stride=(2:2)} (l3)
d1 = DenseLayer {64, activation=ReLU, init="gaussian", initValueScale=12} (p3)
z = LinearLayer {10, init="gaussian", initValueScale=1.5} (d1)
# inputs
@ -205,54 +43,37 @@ TrainConvNet = {
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
top5Errs = ErrorPrediction (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs) # top5Errs only used in Eval
evaluationNodes = (errs)
outputNodes = (z)
SGD = {
epochSize = 50000
# without BatchNormalization:
#maxEpochs = 30 ; minibatchSize = 64
#learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
#momentumAsTimeConstant = 600*20:6400
#L2RegWeight = 0.03
#dropoutRate = 0*5:0.5 ##### added
maxEpochs = 30 ; minibatchSize = 64
learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
momentumAsTimeConstant = 600*20:6400
L2RegWeight = 0.03
# with BatchNormalization:
#maxEpochs = 30 ; minibatchSize = 64
#learningRatesPerSample = 0.00046875*7:0.00015625*10:0.000046875*10:0.000015625
#momentumAsTimeConstant = 0
#L2RegWeight = 0
#dropoutRate = 0*5:0.5 ##### added
# ResNet
maxEpochs = 160 ; minibatchSize = 128
learningRatesPerSample = 0.0078125*80:0.00078125*40:0.000078125
momentumAsTimeConstant = 1200
L2RegWeight = 0.0001
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
firstMBsToShowResult = 10 ; numMBsToShowResult = 100
reader = {
verbosity = 0
randomize = true
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/cifar-10-batches-py/train_map.txt"
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
labels = { labelDim = 10 }
labels = { labelDim = 10 }
@ -262,19 +83,18 @@ TrainConvNet = {
Eval = {
action = "eval"
minibatchSize = 16
evalNodeNames = errs:top5Errs # also test top-5 error rate
evalNodeNames = errs
reader = {
verbosity = 0
randomize = true
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/cifar-10-batches-py/test_map.txt"
file = "$dataDir$/cifar-10-batches-py/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
labels = { labelDim = 10 }
labels = { labelDim = 10 }
Двоичный файл не отображается.
Ссылка в новой задаче