creating final version of ImageHandsOn

2016-08-13 11:21:37 -07:00 · 2016-08-13 11:21:37 -07:00 · c7df94d68c
--- a/CNTK.sln
+++ b/CNTK.sln
@ -1153,6 +1153,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\U
 		{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
 	EndProjectSection
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tutorials", "Tutorials", "{8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ImageHandsOn", "ImageHandsOn", "{2230BF3D-4317-4A3F-A743-DDD6160503F8}"
+	ProjectSection(SolutionItems) = preProject
+		Tutorials\ImageHandsOn\cifar10.cmf = Tutorials\ImageHandsOn\cifar10.cmf
+		Tutorials\ImageHandsOn\CifarConverter.py = Tutorials\ImageHandsOn\CifarConverter.py
+		Tutorials\ImageHandsOn\ImageHandsOn.cntk = Tutorials\ImageHandsOn\ImageHandsOn.cntk
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1598,5 +1607,6 @@ Global
 		{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
 		{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
 		{9F999212-AFC5-4EAC-AA78-F7247D46C456} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
+		{2230BF3D-4317-4A3F-A743-DDD6160503F8} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
 	EndGlobalSection
 EndGlobal
--- a/Tutorials/ImageHandsOn/ImageHandsOn.cntk
+++ b/Tutorials/ImageHandsOn/ImageHandsOn.cntk
@ -1,15 +1,15 @@
-# Simple CIFAR-10 convnet, without and with BatchNormalization.
+# CNTK Configuration File for training a simple CIFAR-10 convnet.
+# During the hands-on tutorial, this will be fleshed out into a ResNet-20 model.

 command = TrainConvNet:Eval
-#command = TrainConvNetWithBN:Eval

 makeMode = false ; traceLevel = 0 ; deviceId = "auto"

-RootDir = "." ; DataDir  = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
+rootDir = "." ; dataDir  = "$rootDir$" ; modelDir = "$rootDir$/Models"

-modelPath = "$ModelDir$/cifar10.cmf"
+modelPath = "$modelDir$/cifar10.cmf"

-# Training without BN
+# Training action for a convolutional network
 TrainConvNet = {
    action = "train"

@ -18,181 +18,19 @@ TrainConvNet = {
        labelDim = 10

        # basic model
-        model_1 (features) =
-        {
+        model (features) = {
            featNorm = features - Constant (128)
-            l1 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
-                                     init = "gaussian", initValueScale = 0.0043} (featNorm)
-            p1 = MaxPoolingLayer {(3:3), stride = (2:2)} (l1)
-            l2 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
-                                     init = "gaussian", initValueScale = 1.414} (p1)
-            p2 = MaxPoolingLayer {(3:3), stride = (2:2)} (l2)
-            l3 = ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU,
-                                     init = "gaussian", initValueScale = 1.414} (p2)
-            p3 = MaxPoolingLayer {(3:3), stride = (2:2)} (l3)
-            d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
-            z  = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1)
-        }.z
-
-        # with self-defined layer
-        MyLayer (x, dim, initValueScale) =
-        {
-            c = ConvolutionalLayer {dim, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = initValueScale} (x)
-            p = MaxPoolingLayer {(3:3), stride = (2:2)} (c)
-        }.p
-        model_f (features) =
-        {
-            featNorm = features - Constant (128)
-            p1 = MyLayer (featNorm, 32, 0.0043)
-            p2 = MyLayer (p1,       32, 1.414)
-            p3 = MyLayer (p2,       64, 1.414)
-            d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
-            d1_d = Dropout (d1)
-            z  = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1_d)
-        }.z
-
-        // --- with BatchNorm
-        # with self-defined layer
-        MyLayerWithBN (x, dim, initValueScale) =
-        {
-            c = ConvolutionalLayer {dim, (5:5), pad = true, init = "gaussian", initValueScale = initValueScale} (x)
-            b = BatchNormalizationLayer {spatialRank = 2} (c)
-            r = ReLU (b)
-            p = MaxPoolingLayer {(3:3), stride = (2:2)} (r)
-        }.p
-        model_bn (features) =
-        {
-            featNorm = features - Constant (128)
-            p1 = MyLayerWithBN (featNorm, 32, 0.0043)
-            p2 = MyLayerWithBN (p1,       32, 1.414)
-            p3 = MyLayerWithBN (p2,       64, 1.414)
-            d1 = DenseLayer {64, init = "gaussian", initValueScale = 12} (p3)
-            d1_bnr = ReLU (BatchNormalizationLayer {} (d1))
-            d1_d = Dropout (d1_bnr)
-            z  = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1_d)
-        }.z
-
-        // --- ResNet
-        MyConvBN (x, dim, initValueScale, stride) =  # TO BE WRITTEN BY PARTICIPANT
-        {
-            c = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale} (x)
-            b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (c)
-        }.b
-        MyConvBNReLU (x, dim, initValueScale, stride) =
-        {
-            c = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale} (x)
-            b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (c)
-            r = ReLU (b)
-        }.r
-        ResNetNode (x, dim) =
-        {
-            c1 = MyConvBNReLU (x,  dim, 7.07, 1)
-            X2 = MyConvBNReLU (c1, dim, 7.07, 1)     # wrong
-            c2 = MyConvBN     (c1, dim, 7.07, 1)
-            r = ReLU (x + c2)
-        }.r   # change to X2
-        ResNetResample (x, dim) =
-        {
-            x2 = MaxPoolingLayer {(1:1), stride = (2:2)} (x)  # sub-sample by 2
-            pad = ConstantTensor (0, (1:1:dim/2))             # pad with zeroes
-            p = Splice ((x2 : pad), axis = 3)
-        }.p
-        ResNetIncNode (x, dim) =
-        {
-            c1 = MyConvBNReLU (x,  dim, 7.07, 2)
-            c2 = MyConvBN     (c1, dim, 7.07, 1)
-
-            px = ResNetResample (x, dim)
-            b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (px)
-
-            r = ReLU (b + c2)  # ReLU between C1 and C2 and after summation
-        }.r
-
-        # these are the ones the participants are given upfront
-        ResNetNode1 (x, dim) =
-        {
-            c1 = MyConvBNReLU (x,  dim, 7.07, 1)
-            c2 = MyConvBNReLU (c1, dim, 7.07, 1)
-        }.c2
-        ResNetIncNode1 (x, dim) =
-        {
-            px = ResNetResample (x, dim)  # sub-sample but double the dims
-            b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (px)
-            r = ReLU (b)
-        }.r
-
-        # this must be written
-        ResNetNodeStack (x, dim, L) =
-            if L == 0 then x
-            else ResNetNode (ResNetNodeStack (x, dim, L-1), dim)
-
-        model (features) =
-        {
-            conv1 = MyConvBNReLU (features, 16, 0.26, 1)
-            #rn1 = ResNetNode1 (ResNetNode1 (ResNetNode1 (conv1, 16), 16), 16)
-            rn1   = ResNetNodeStack (conv1, 16, 3)  # 3 means 3 such nodes
-
-            rn2_1 = ResNetIncNode1 (rn1, 32)
-            #rn2 = ResNetNode1 (ResNetNode1 (rn2_1, 32), 32)
-            rn2   = ResNetNodeStack (rn2_1, 32, 2)
-
-            rn3_1 = ResNetIncNode1 (rn2, 64)
-            #rn3 = ResNetNode1 (ResNetNode1 (rn3_1, 64), 64)
-            rn3   = ResNetNodeStack (rn3_1, 64, 2)
-
-            pool = AveragePoolingLayer {(8:8)} (rn3)
-
-            z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
-        }.z
-
-        // --- ResNet, functional style
-        MyConvBNLayer {dim, initValueScale, stride} =
-        {
-            # note: (3:3), while the macro above is (5:5)
-            C = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale}
-            B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
-            apply (x) = B(C(x))
-        }.apply
-        ResNetLayer {dim, initValueScale} =
-        {
-            C1 = MyConvBNLayer {dim, initValueScale, 1}  # first convolution layer
-            C2 = MyConvBNLayer {dim, initValueScale, 1}  # second convolution layer
-            #B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
-            # ^^ Note: Adding an exra BN to 'x' trains slightly better.
-            apply (x) = ReLU (x + C2(ReLU(C1(x))))  # ReLU between C1 and C2 and after summation
-        }.apply
-        ResNetIncLayer {dim, initValueScale} =
-        {
-            # first branch. This doubles the #channels but halves the image size
-            C1 = MyConvBNLayer {dim, initValueScale, 2}  # first convolution layer, stride = 2
-            C2 = MyConvBNLayer {dim, initValueScale, 1}  # second convolution layer
-
-            # second branch:
-            # sub-sample spatially by a factor of 2
-            DownSamplingLayer {stride} = MaxPoolingLayer {(1:1), stride = stride}
-            # append dim/2 zero output channels
-            pad = ConstantTensor (0, (1:1:dim/2))  # the 1s will broadcast to image size
-            P(x) = Splice ((DownSamplingLayer {(2:2)} (x) : pad), axis = 3)
-            B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
-
-            # layer sums both branches and rectifies the result
-            apply (x) = ReLU (B(P(x)) + C2(ReLU(C1(x))))  # ReLU between C1 and C2 and after summation
-        }.apply
-        model_resNet (features) =
-        {
-            conv1 = MyConvBNLayer {16, 0.26, 1} (features)
-            rl1   = ReLU (conv1)
-            rn1   = LayerStack {3, _ => ResNetLayer {16, 7.07}} (rl1)
-
-            rn2_1 = ResNetIncLayer {32, 7.07} (rn1)
-            rn2   = LayerStack {2, _ => ResNetLayer {32, 7.07}} (rn2_1)
-
-            rn3_1 = ResNetIncLayer {64, 7.07} (rn2)
-            rn3   = LayerStack {2, _ => ResNetLayer {64, 7.07}} (rn3_1)
-
-            pool = AveragePoolingLayer {(8:8)} (rn3)
-
-            z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
+            l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
+                                     init="gaussian", initValueScale=0.0043} (featNorm)
+            p1 = MaxPoolingLayer {(3:3), stride=(2:2)} (l1)
+            l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
+                                     init="gaussian", initValueScale=1.414} (p1)
+            p2 = MaxPoolingLayer {(3:3), stride=(2:2)} (l2)
+            l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU,
+                                     init="gaussian", initValueScale=1.414} (p2)
+            p3 = MaxPoolingLayer {(3:3), stride=(2:2)} (l3)
+            d1 = DenseLayer {64, activation=ReLU, init="gaussian", initValueScale=12} (p3)
+            z  = LinearLayer {10, init="gaussian", initValueScale=1.5} (d1)
        }.z

        # inputs
@ -205,54 +43,37 @@ TrainConvNet = {
        # connect to system
        ce       = CrossEntropyWithSoftmax (labels, z)
        errs     = ErrorPrediction         (labels, z)
-        top5Errs = ErrorPrediction         (labels, z, topN=5)  # only used in Eval action

        featureNodes    = (features)
        labelNodes      = (labels)
        criterionNodes  = (ce)
-        evaluationNodes = (errs)  # top5Errs only used in Eval
+        evaluationNodes = (errs)
        outputNodes     = (z)
    }

    SGD = {
        epochSize = 50000

-        # without BatchNormalization:
-        #maxEpochs = 30 ; minibatchSize = 64
-        #learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
-        #momentumAsTimeConstant = 600*20:6400
-        #L2RegWeight = 0.03
-        #dropoutRate = 0*5:0.5   ##### added
+        maxEpochs = 30 ; minibatchSize = 64
+        learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
+        momentumAsTimeConstant = 600*20:6400
+        L2RegWeight = 0.03

-        # with BatchNormalization:
-        #maxEpochs = 30 ; minibatchSize = 64
-        #learningRatesPerSample = 0.00046875*7:0.00015625*10:0.000046875*10:0.000015625
-        #momentumAsTimeConstant = 0
-        #L2RegWeight = 0
-        #dropoutRate = 0*5:0.5   ##### added
-
-        # ResNet
-        maxEpochs = 160 ; minibatchSize = 128
-        learningRatesPerSample = 0.0078125*80:0.00078125*40:0.000078125
-        momentumAsTimeConstant = 1200
-        L2RegWeight = 0.0001
-
-        firstMBsToShowResult = 10 ; numMBsToShowResult = 500
+        firstMBsToShowResult = 10 ; numMBsToShowResult = 100
    }

    reader = {
-        verbosity = 0
-        randomize = true
+        verbosity = 0 ; randomize = true
        deserializers = ({
            type = "ImageDeserializer" ; module = "ImageReader"
-            file = "$DataDir$/cifar-10-batches-py/train_map.txt"
+            file = "$dataDir$/cifar-10-batches-py/train_map.txt"
            input = {
                features = { transforms = (
                    { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
                    { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
                    { type = "Transpose" }
                )}
-                labels =   { labelDim = 10 }
+                labels = { labelDim = 10 }
            }
        })
    }
@ -262,19 +83,18 @@ TrainConvNet = {
 Eval = {
    action = "eval"
    minibatchSize = 16
-    evalNodeNames = errs:top5Errs  # also test top-5 error rate
+    evalNodeNames = errs
    reader = {
-        verbosity = 0
-        randomize = true
+        verbosity = 0 ; randomize = true
        deserializers = ({
            type = "ImageDeserializer" ; module = "ImageReader"
-            file = "$DataDir$/cifar-10-batches-py/test_map.txt"
+            file = "$dataDir$/cifar-10-batches-py/test_map.txt"
            input = {
                features = { transforms = (
                   { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
                   { type = "Transpose" }
                )}
-                labels =   { labelDim = 10 }
+                labels = { labelDim = 10 }
            }
        })
    }
--- a/Tutorials/ImageHandsOn/cifar10.cmf
+++ b/Tutorials/ImageHandsOn/cifar10.cmf