From 98d5b92ef4e6597d4ba29a8ba79f9d53234366cb Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 5 Jan 2016 20:55:00 -0800 Subject: [PATCH] Updated samples. --- .../Image/MNIST/Config/01_OneHidden.config | 3 ++ .../Image/MNIST/Config/02_Convolution.config | 5 ++ .../Miscellaneous/CIFAR-10/01_Conv.config | 30 +++++++---- .../Miscellaneous/CIFAR-10/01_Convolution.ndl | 18 +++---- .../CIFAR-10/02_BatchNormConv.config | 40 ++++++++------ .../CIFAR-10/02_BatchNormConv.mel | 8 +-- .../CIFAR-10/02_BatchNormConv.ndl | 25 ++++----- .../Image/Miscellaneous/CIFAR-10/Macros.ndl | 54 ++++++++----------- .../Image/Miscellaneous/CIFAR-10/readme.txt | 2 +- Source/CNTK/NetworkDescriptionLanguage.cpp | 2 +- 10 files changed, 97 insertions(+), 90 deletions(-) diff --git a/Examples/Image/MNIST/Config/01_OneHidden.config b/Examples/Image/MNIST/Config/01_OneHidden.config index af3c9aea7..eb2593f56 100644 --- a/Examples/Image/MNIST/Config/01_OneHidden.config +++ b/Examples/Image/MNIST/Config/01_OneHidden.config @@ -19,6 +19,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl" # comment the following line to write logs to the console stderr = "$OutputDir$/01_OneHidden_out" +traceLevel=1 +numMBsToShowResult=500 ####################################### # TRAINING CONFIG # @@ -63,6 +65,7 @@ train = [ test = [ action = "test" + minibatchSize = 16 NDLNetworkBuilder=[ networkDescription = "$ConfigDir$/01_OneHidden.ndl" diff --git a/Examples/Image/MNIST/Config/02_Convolution.config b/Examples/Image/MNIST/Config/02_Convolution.config index 266785001..f561ced36 100644 --- a/Examples/Image/MNIST/Config/02_Convolution.config +++ b/Examples/Image/MNIST/Config/02_Convolution.config @@ -19,6 +19,10 @@ ndlMacros = "$ConfigDir$/Macros.ndl" # comment the following line to write logs to the console stderr = "$OutputDir$/02_Convolution_out" +traceLevel=1 +numMBsToShowResult=500 + +prefetch=true ####################################### # TRAINING CONFIG # @@ -63,6 +67,7 @@ train = [ test = [ action = test + minibatchSize = 16 NDLNetworkBuilder = [ networkDescription = "$ConfigDir$/02_Convolution.ndl" diff --git a/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.config b/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.config index bd4bebdb4..1909c6d0e 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.config +++ b/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.config @@ -1,20 +1,28 @@ -WorkDir=. -ModelDir=$WorkDir$/_out/$ConfigName$ -stderr=$WorkDir$/_out/$ConfigName$ +RootDir = "." -ndlMacros=$WorkDir$/Macros.ndl +ConfigDir = "$RootDir$" +DataDir = "$RootDir$" +OutputDir = "$RootDir$/Output" +ModelDir = "$OutputDir$/Models" + +ndlMacros=$ConfigDir$/Macros.ndl precision=float deviceId=Auto +prefetch=true command=Train:Test +stderr=$OutputDir$/01_Conv +traceLevel=1 +numMBsToShowResult=500 + Train=[ action=train modelPath=$ModelDir$/01_Convolution NDLNetworkBuilder=[ - networkDescription=$WorkDir$/01_Convolution.ndl + networkDescription=$ConfigDir$/01_Convolution.ndl ] SGD=[ @@ -29,7 +37,7 @@ Train=[ reader=[ readerType=UCIFastReader - file=$WorkDir$/Train.txt + file=$DataDir$/Train.txt randomize=None features=[ dim=3072 @@ -39,7 +47,7 @@ Train=[ dim=1 start=0 labelDim=10 - labelMappingFile=$WorkDir$/labelsmap.txt + labelMappingFile=$DataDir$/labelsmap.txt ] ] ] @@ -48,15 +56,15 @@ Test=[ action=test modelPath=$ModelDir$/01_Convolution # Set minibatch size for testing. - minibatchSize=128 + minibatchSize=16 NDLNetworkBuilder=[ - networkDescription=$WorkDir$/01_Convolution.ndl + networkDescription=$ConfigDir$/01_Convolution.ndl ] reader=[ readerType=UCIFastReader - file=$WorkDir$/Test.txt + file=$DataDir$/Test.txt randomize=None features=[ dim=3072 @@ -66,7 +74,7 @@ Test=[ dim=1 start=0 labelDim=10 - labelMappingFile=$WorkDir$/labelsmap.txt + labelMappingFile=$DataDir$/labelsmap.txt ] ] ] diff --git a/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl b/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl index 3462a335a..8be106761 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl +++ b/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl @@ -28,50 +28,50 @@ DNN=[ # conv1 kW1 = 5 kH1 = 5 - cMap1 = 36 + cMap1 = 32 hStride1 = 1 vStride1 = 1 # weight[cMap1, kW1 * kH1 * ImageC] - conv1_act = ConvReLULayer(featScaled, cMap1, ImageC, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue) + conv1_act = ConvReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue) # pool1 pool1W = 3 pool1H = 3 pool1hStride = 2 pool1vStride = 2 - pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride) + pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn") # conv2 kW2 = 5 kH2 = 5 - cMap2 = 28 + cMap2 = 32 hStride2 = 1 vStride2 = 1 # weight[cMap2, kW2 * kH2 * cMap1] - conv2_act = ConvReLULayer(pool1, cMap2, cMap1, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue) + conv2_act = ConvReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue) # pool2 pool2W = 3 pool2H = 3 pool2hStride = 2 pool2vStride = 2 - pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride) + pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn") # conv3 kW3 = 5 kH3 = 5 - cMap3 = 68 + cMap3 = 64 hStride3 = 1 vStride3 = 1 # weight[cMap3, kW3 * kH3 * cMap2] - conv3_act = ConvReLULayer(pool2, cMap3, cMap2, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue) + conv3_act = ConvReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue) # pool3 pool3W = 3 pool3H = 3 pool3hStride = 2 pool3vStride = 2 - pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride) + pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn") hiddenDim = 64 h1 = DNNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue) diff --git a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.config b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.config index a7393eb0e..b77507a3e 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.config +++ b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.config @@ -1,37 +1,43 @@ -WorkDir=. -ModelDir=$WorkDir$/_out/$ConfigName$ -stderr=$WorkDir$/_out/$ConfigName$ +RootDir = "." -ndlMacros=$WorkDir$/Macros.ndl +ConfigDir = "$RootDir$" +DataDir = "$RootDir$" +OutputDir = "$RootDir$/Output" +ModelDir = "$OutputDir$/Models" + +ndlMacros=$ConfigDir$/Macros.ndl precision=float deviceId=Auto prefetch=true -parallelTrain=false command=Train:AddBNEval:Test +stderr=$OutputDir$/02_BatchNormConv +traceLevel=1 +numMBsToShowResult=500 + Train=[ action=train modelPath=$ModelDir$/02_BatchNormConv NDLNetworkBuilder=[ - networkDescription=$WorkDir$/02_BatchNormConv.ndl + networkDescription=$ConfigDir$/02_BatchNormConv.ndl ] SGD=[ epochSize=49984 minibatchSize=64 - learningRatesPerMB=0.03*7:0.01*8:0.003 - #momentumPerMB=0.9*10:0.99 - maxEpochs=10 - #L2RegWeight=0.03 + learningRatesPerMB=0.03*7:0.01 + momentumPerMB=0 + maxEpochs=1 + L2RegWeight=0 dropoutRate=0*1:0.5 ] reader=[ readerType=UCIFastReader - file=$WorkDir$/Train.txt + file=$DataDir$/Train.txt randomize=None features=[ dim=3072 @@ -41,7 +47,7 @@ Train=[ dim=1 start=0 labelDim=10 - labelMappingFile=$WorkDir$/labelsmap.txt + labelMappingFile=$DataDir$/labelsmap.txt ] ] ] @@ -50,22 +56,22 @@ AddBNEval=[ action=edit CurModel=$ModelDir$/02_BatchNormConv NewModel=$ModelDir$/02_BatchNormConv.Eval - editPath=$WorkDir$/02_BatchNormConv.mel + editPath=$ConfigDir$/02_BatchNormConv.mel ] Test=[ action=test modelPath=$ModelDir$/02_BatchNormConv.Eval # Set minibatch size for testing. - minibatchSize=128 + minibatchSize=16 NDLNetworkBuilder=[ - networkDescription=$WorkDir$/02_BatchNormConv.ndl + networkDescription=$ConfigDir$/02_BatchNormConv.ndl ] reader=[ readerType=UCIFastReader - file=$WorkDir$/Test.txt + file=$DataDir$/Test.txt randomize=None features=[ dim=3072 @@ -75,7 +81,7 @@ Test=[ dim=1 start=0 labelDim=10 - labelMappingFile=$WorkDir$/labelsmap.txt + labelMappingFile=$DataDir$/labelsmap.txt ] ] ] diff --git a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.mel b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.mel index ca23bbbca..55a75fbc9 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.mel +++ b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.mel @@ -1,16 +1,16 @@ m=LoadModel($CurModel$, format=cntk) SetDefaultModel(m) -ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true) +ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true, imageLayout = "cudnn") SetNodeInput(conv1.c, 1, ibn_e) -conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true) +conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true, imageLayout = "cudnn") SetNodeInput(conv2.c, 1, conv2.bn_e) -conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true) +conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true, imageLayout = "cudnn") SetNodeInput(conv3.c, 1, conv3.bn_e) -h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false) +h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false, imageLayout = "cudnn") SetNodeInput(h1.t, 1, h1.bn_e) SaveModel(m, $NewModel$, format=cntk) \ No newline at end of file diff --git a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.ndl b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.ndl index 8d89b0a7b..4b39fb52b 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.ndl +++ b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.ndl @@ -7,8 +7,8 @@ ndlMnistMacros = [ ImageC = 3 LabelDim = 10 - features = ImageInput(ImageW, ImageH, ImageC, tag = feature) - featOffs = Const(128, rows = 3072) + features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn") + featOffs = Const(128) featScaled = Minus(features, featOffs) labels = Input(LabelDim, tag = label) @@ -18,6 +18,9 @@ ndlMnistMacros = [ conv2BValue = 0 conv3WScale = 1.414 conv3BValue = 0 + + scScale = 0.03 + fc1WScale = 12 fc1BValue = 0 fc2WScale = 1.5 @@ -25,12 +28,6 @@ ndlMnistMacros = [ ] DNN=[ - ib = Parameter(ImageC, 1, init = Uniform, initValueScale = 100) - isc = Parameter(ImageC, 1, init = Uniform, initValueScale = 100) - im = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false) - iisd = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false) - ibn = BatchNormalization(featScaled, isc, ib, im, iisd, eval = false, spatial = true) - # conv1 kW1 = 5 kH1 = 5 @@ -38,14 +35,14 @@ DNN=[ hStride1 = 1 vStride1 = 1 # weight[cMap1, kW1 * kH1 * ImageC] - conv1 = ConvReLULayer(ibn, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue) + conv1 = ConvBNReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue, scScale) # pool1 pool1W = 3 pool1H = 3 pool1hStride = 2 pool1vStride = 2 - pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride) + pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn") # conv2 kW2 = 5 @@ -54,14 +51,14 @@ DNN=[ hStride2 = 1 vStride2 = 1 # weight[cMap2, kW2 * kH2 * cMap1] - conv2 = ConvBNReLULayer(pool1, cMap1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue) + conv2 = ConvBNReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue, scScale) # pool2 pool2W = 3 pool2H = 3 pool2hStride = 2 pool2vStride = 2 - pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride) + pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn") # conv3 kW3 = 5 @@ -70,14 +67,14 @@ DNN=[ hStride3 = 1 vStride3 = 1 # weight[cMap3, kW3 * kH3 * cMap2] - conv3 = ConvBNReLULayer(pool2, cMap2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue) + conv3 = ConvBNReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue, scScale) # pool3 pool3W = 3 pool3H = 3 pool3hStride = 2 pool3vStride = 2 - pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride) + pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn") hiddenDim = 64 h1 = DnnBNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue) diff --git a/Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl b/Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl index a39fa0154..278e84569 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl +++ b/Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl @@ -1,83 +1,71 @@ -ConvReLULayer(inp, outMap, inMap, kW, kH, hStride, vStride, wScale, bValue) +ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) { - W = ImageParameter(kW, kH, inMap, init = Gaussian, initValueScale = wScale, imageLayout = "cudnn") + W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn") c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn") p = Plus(c, b); y = RectifiedLinear(p); } -ConvBNReLULayer(inp, inMap, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) -{ - W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) - b = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03) - sc = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03) - m = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false) - isd = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false) - bn = BatchNormalization(inp, sc, b, m, isd, eval = false, spatial = true) - c = Convolution(W, bn, kW, kH, outMap, hStride, vStride, zeroPadding = true) - y = RectifiedLinear(c); -} - -ConvBNReLULayer2(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue) +ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scScale) { W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) b = Parameter(outMap, 1, init = fixedValue, value = bValue) - sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue) + sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale) m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) - c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true) - bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0) + c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn") + bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn") y = RectifiedLinear(bn); } -ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue) +ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale) { W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) b1 = Parameter(outMap, 1, init = fixedValue, value = bValue) - sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue) + sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale) m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) - c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true) - bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0) + c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn") + bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn") y1 = RectifiedLinear(bn1); W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) b2 = Parameter(outMap, 1, init = fixedValue, value = bValue) - sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue) + sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale) m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) - c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true) - bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0) + c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn") + bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn") p = Plus(bn2, inp) y2 = RectifiedLinear(p); } -ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj) +ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, Wproj) { W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale) b1 = Parameter(outMap, 1, init = fixedValue, value = bValue) - sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue) + sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale) m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) - c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true) - bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0) + c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn") + bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn") y1 = RectifiedLinear(bn1); W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale) b2 = Parameter(outMap, 1, init = fixedValue, value = bValue) - sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue) + sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale) m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false) - c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true) - bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0) + c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn") + bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn") - cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false) + cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn") p = Plus(bn2, cproj) y2 = RectifiedLinear(p); } diff --git a/Examples/Image/Miscellaneous/CIFAR-10/readme.txt b/Examples/Image/Miscellaneous/CIFAR-10/readme.txt index 433678dba..ea57413fc 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/readme.txt +++ b/Examples/Image/Miscellaneous/CIFAR-10/readme.txt @@ -15,7 +15,7 @@ Short description of the network: 01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here: https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg (main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer). -The network produces 22% of error after training for about 4 minutes on GPU. +The network produces 21% of error after training for about 3 minutes on GPU. To run the sample, navigate to this folder and run the following command: configFile=01_Conv.config configName=01_Conv diff --git a/Source/CNTK/NetworkDescriptionLanguage.cpp b/Source/CNTK/NetworkDescriptionLanguage.cpp index 3b6976d56..5beb57708 100644 --- a/Source/CNTK/NetworkDescriptionLanguage.cpp +++ b/Source/CNTK/NetworkDescriptionLanguage.cpp @@ -152,7 +152,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(SparseInputValue), L"SparseInput")) ret = true; - else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter"), L"ImageParameter") + else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter")) ret = true; else if (EqualInsensitive(nodeType, L"ImageParameter")) ret = true;