Integrate v-lelu/bn-inception-bs into master

2017-01-19 10:42:13 -08:00 · 2017-01-19 10:42:13 -08:00 · a745f3489f
--- a/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/BN-Inception.bs
+++ b/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/BN-Inception.bs
@ -0,0 +1,47 @@
+#
+# BN-Inception network definition
+# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
+#
+
+BN_Inception(input, labelDim, bnScale, bnTimeConst) =
+{
+    # 224 x 224 x 3
+    conv1 = ConvBNReLULayer{64, (7:7), (2:2), true, bnScale, bnTimeConst}(input)
+    # 112 x 112 x 64
+    pool1 = MaxPoolingLayer{(3:3), stride = (2:2), pad = true}(conv1)
+    # 56 x 56 x 64
+    conv2a = ConvBNReLULayer{64, (1:1), (1:1), true, bnScale, bnTimeConst}(pool1)
+    # 56 x 56 x 64
+    conv2b = ConvBNReLULayer{192, (3:3), (1:1), true, bnScale, bnTimeConst}(conv2a)
+    # 56 x 56 x 192
+    pool2 = MaxPoolingLayer{(3:3), stride = (2:2), pad = true}(conv2b)
+    
+    # Inception Blocks
+    # 28 x 28 x 192
+    inception3a = InceptionWithAvgPoolLayer{64, 64, 64, 64, 96, 32, bnScale, bnTimeConst}(pool2) 
+    # 28 x 28 x 256
+    inception3b = InceptionWithAvgPoolLayer{64, 64, 96, 64, 96, 64, bnScale, bnTimeConst}(inception3a) 
+    # 28 x 28 x 320
+    inception3c = InceptionPassThroughLayer{0, 128, 160, 64, 96, 0, bnScale, bnTimeConst}(inception3b) 
+    # 14 x 14 x 576
+    inception4a = InceptionWithAvgPoolLayer{224, 64, 96, 96, 128, 128, bnScale, bnTimeConst}(inception3c) 
+    # 14 x 14 x 576
+    inception4b = InceptionWithAvgPoolLayer{192, 96, 128, 96, 128, 128, bnScale, bnTimeConst}(inception4a) 
+    # 14 x 14 x 576
+    inception4c = InceptionWithAvgPoolLayer{160, 128, 160, 128, 160, 128, bnScale, bnTimeConst}(inception4b) 
+    # 14 x 14 x 576
+    inception4d = InceptionWithAvgPoolLayer{96, 128, 192, 160, 192, 128, bnScale, bnTimeConst}(inception4c) 
+    # 14 x 14 x 576
+    inception4e = InceptionPassThroughLayer{0, 128, 192, 192, 256, 0, bnScale, bnTimeConst}(inception4d)
+    # 7 x 7 x 1024
+    inception5a = InceptionWithAvgPoolLayer{352, 192, 320, 160, 224, 128, bnScale, bnTimeConst}(inception4e) 
+    # 7 x 7 x 1024
+    inception5b = InceptionWithMaxPoolLayer{352, 192, 320, 192, 224, 128, bnScale, bnTimeConst}(inception5a) 
+    
+    # Global Average
+    # 7 x 7 x 1024
+    pool3 = AveragePoolingLayer{(7:7)}(inception5b)
+    # 1 x 1 x 1024
+    z = LinearLayer{labelDim, init = 'heNormal'}(pool3)
+}
+
--- a/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/BN-Inception.cntk
+++ b/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/BN-Inception.cntk
@ -0,0 +1,153 @@
+#
+# BN-Inception network
+# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
+#
+
+RootDir = "."
+ParentDir = ".."
+
+ConfigDir = "$RootDir$"
+DataDir = "$ParentDir$/Data"
+OutputDir = "$ParentDir$/Output"
+ModelDir = "$OutputDir$/Models"
+MeanDir = "$ConfigDir$"
+
+stderr = "$OutputDir$/BN-Inception"
+
+precision = "float"
+deviceId = "Auto"
+
+command = Train:Eval
+
+parallelTrain = "true"
+
+traceLevel = 1
+numMBsToShowResult = 100
+
+###################
+# TRAINING CONFIG #
+###################
+
+Train = [
+    action = "train"
+    modelPath = "$ModelDir$/BN-Inception"
+    
+    BrainScriptNetworkBuilder = {
+        include "InceptionLayers.bs"
+        include "BN-Inception.bs"
+        
+        imageShape  = 224:224:3                 # image dimensions
+        labelDim    = 1000                      # number of distinct labels
+        bnScale     = 1
+        bnTimeConst = 4096
+        
+        # inputs
+        features = Input {imageShape}
+        labels   = Input {labelDim}
+        
+        # apply model to features
+        model = BN_Inception(features, labelDim, bnScale, bnTimeConst)
+        z     = model.z
+        
+        # connect to system
+        ce       = CrossEntropyWithSoftmax (labels, z)
+        errs     = ClassificationError     (labels, z)
+        top5Errs = ClassificationError     (labels, z, topN = 5)
+
+        # define special nodes
+        featureNodes    = (features)
+        labelNodes      = (labels)
+        criterionNodes  = (ce)
+        evaluationNodes = (errs : top5Errs)
+        outputNodes     = (z)
+        
+    }
+
+    SGD = [
+        epochSize = 0
+        minibatchSize = 256 # 8 GPUs
+        learningRatesPerMB = 3.6*2:3.384
+        momentumPerMB = 0.9
+        maxEpochs = 300
+        gradUpdateType = "None"
+        L2RegWeight = 0.0001
+        numMBsToShowResult = 100
+        
+        autoAdjust = [
+            autoAdjustLR = "adjustAfterEpoch"
+            reduceLearnRateIfImproveLessThan = 1000
+            learnRateAdjustInterval = 2
+            learnRateDecreaseFactor = 0.94
+            loadBestModel = false
+        ]
+
+        ParallelTrain = [
+            parallelizationMethod = "DataParallelSGD"
+            distributedMBReading = "true"
+            parallelizationStartEpoch = 1
+            DataParallelSGD = [
+                gradientBits = 32
+            ]
+        ]
+    ]
+    
+    reader = [
+        readerType = "ImageReader"
+        file = "$DataDir$/train_map.txt"
+        randomize = "Auto"
+        features = [
+            width = 224
+            height = 224
+            channels = 3
+            cropType = "Random"
+            cropRatio = 0.46666:0.875
+            jitterType = "UniRatio"
+            meanFile = "$MeanDir$/ImageNet1K_mean.xml"
+        ]
+        labels = [
+            labelDim = 1000
+        ]
+    ]
+
+
+    cvreader = [
+        readerType = "ImageReader"
+        file = "$DataDir$/val_map.txt"
+        randomize = "None"
+        features = [
+            width = 224
+            height = 224
+            channels = 3
+            cropType = "Center"
+            cropRatio = 0.875
+            meanFile = "$MeanDir$/ImageNet1K_mean.xml"
+        ]
+        labels = [
+            labelDim = 1000
+        ]
+    ]        
+]
+
+Eval = [
+    action = "test"
+    modelPath = "$ModelDir$/BN-Inception"
+    evalNodeNames = errs:top5Errs  # also test top-5 error rate
+    minibatchSize = 256
+
+    reader = [
+        readerType = "ImageReader"
+        file = "$DataDir$/val_map.txt"
+        randomize = "None"
+        features = [
+            width = 224
+            height = 224
+            channels = 3
+            cropType = "Center"
+            cropRatio = 0.875
+            meanFile = "$MeanDir$/ImageNet1K_mean.xml"
+        ]
+        labels = [
+            labelDim = 1000
+        ]
+    ]    
+]
--- a/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/InceptionLayers.bs
+++ b/Examples/Image/Classification/GoogLeNet/BN-Inception/BrainScript/InceptionLayers.bs
@ -0,0 +1,88 @@
+#
+# BN-Inception network components
+# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
+#
+
+ConvBNReLULayer {outChannels, kernel, stride, pad, bnScale, bnTimeConst} = Sequential(
+    ConvolutionalLayer{outChannels, kernel, init = 'heNormal', stride = stride, pad = pad, bias = false} :
+    BatchNormalizationLayer{spatialRank = 2, normalizationTimeConstant = bnTimeConst, initialScale = bnScale} :
+    ReLU
+)
+
+InceptionWithAvgPoolLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
+    apply(x) = {
+        # 1x1 Convolution
+        branch1x1 = ConvBNReLULayer{num1x1, (1:1), (1:1), true, bnScale, bnTimeConst}(x)
+
+        # 3x3 Convolution
+        branch3x3 = Sequential( 
+            ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3,  (3:3), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        # Double 3x3 Convolution
+        branch3x3dbl = Sequential(
+            ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        # Average Pooling
+        branch_pool = Sequential(
+            AveragePoolingLayer{(3:3), stride = (1:1), pad = true} :
+            ConvBNReLULayer{numPool, (1:1), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        out = Splice((branch1x1:branch3x3:branch3x3dbl:branch_pool), axis=3)
+    }.out
+}.apply
+
+InceptionWithMaxPoolLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
+    apply(x) = {
+        # 1x1 Convolution
+        branch1x1 = ConvBNReLULayer{num1x1, (1:1), (1:1), true, bnScale, bnTimeConst}(x)
+
+        # 3x3 Convolution
+        branch3x3 = Sequential( 
+            ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3,  (3:3), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        # Double 3x3 Convolution
+        branch3x3dbl = Sequential(
+            ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        # Max Pooling
+        branch_pool = Sequential(
+            MaxPoolingLayer{(3:3), stride=(1:1), pad=true} :
+            ConvBNReLULayer{numPool, (1:1), (1:1), true, bnScale, bnTimeConst}
+        ) (x)
+
+        out = Splice((branch1x1:branch3x3:branch3x3dbl:branch_pool), axis=3)
+    }.out
+}.apply
+
+InceptionPassThroughLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
+    apply(x) = {
+        # 3x3 Convolution
+        branch3x3 = Sequential( 
+            ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3,  (3:3), (2:2), true, bnScale, bnTimeConst}
+        ) (x)
+
+        # Double 3x3 Convolution
+        branch3x3dbl = Sequential(
+            ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (1:1), true, bnScale, bnTimeConst} :
+            ConvBNReLULayer{num3x3dbl,  (3:3), (2:2), true, bnScale, bnTimeConst}
+        ) (x)
+        
+        # Max Pooling
+        branch_pool = MaxPoolingLayer{(3:3), stride=(2:2), pad=true}(x)
+
+        out = Splice((branch3x3:branch3x3dbl:branch_pool), axis=3)
+    }.out
+}.apply
--- a/Examples/Image/Classification/GoogLeNet/BN-Inception/README.md
+++ b/Examples/Image/Classification/GoogLeNet/BN-Inception/README.md
@ -0,0 +1,24 @@
+# CNTK Examples: Image/Classification/GoogLeNet/BN-Inception
+
+## Overview
+
+|Data:     |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
+|:---------|:---
+|Purpose   |This folder contains examples that demonstrate how to use CNTK to define BN-Inception (https://arxiv.org/abs/1502.03167) for image classification.
+|Network   |Deep convolutional neural networks codenamed "Inception" (GoogLeNet) with Batch Normalization.
+|Training  |Stochastic gradient descent with momentum.
+|Comments  |See below.
+
+## Running the example
+
+### Getting the data
+We use the ILSVRC2012 datasets to demonstrate how to train a BN-Inception network. BN-Inception was initially published by Researchers at Google Inc., and it is firstly described in the Batch Normalization paper (https://arxiv.org/abs/1502.03167) to demonstrate the power of Batch Normalization with minor changes on the original GoogLeNet. It has been proved that it could increase the training speed and achieve better accuracy, compared with the GoogLeNet v1 which have been well known for winning first place in the [ILSVRC](http://www.image-net.org/challenges/LSVRC/) 2014 detection challenge.
+
+
+ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it through http://image-net.org.
+
+## Details
+
+We currently offer the BN-Inception model (https://arxiv.org/abs/1502.03167). Only BrainScript version is available at this moment.
+
+### [BrainScript](./BrainScript)
--- a/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionBlocks.bs
+++ b/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionBlocks.bs
--- a/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionV3.bs
+++ b/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionV3.bs
--- a/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionV3.cntk
+++ b/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/InceptionV3.cntk
--- a/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/README.md
+++ b/Examples/Image/Classification/GoogLeNet/InceptionV3/BrainScript/README.md
@ -1,4 +1,4 @@
-# CNTK Examples: Image/Classification/GoogLeNet
+# CNTK Examples: Image/Classification/GoogLeNet/InceptionV3

 ## BrainScript

--- a/Examples/Image/Classification/GoogLeNet/InceptionV3/README.md
+++ b/Examples/Image/Classification/GoogLeNet/InceptionV3/README.md
@ -0,0 +1,24 @@
+# CNTK Examples: Image/Classification/GoogLeNet/InceptionV3
+
+## Overview
+
+|Data:     |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
+|:---------|:---
+|Purpose   |This folder contains examples that demonstrate how to use CNTK to define Inception V3 (https://arxiv.org/abs/1512.00567) for image classification.
+|Network   |Deep convolutional neural networks codenamed "Inception" (GoogLeNet) version 3.
+|Training  |RMSProp.
+|Comments  |See below.
+
+## Running the example
+
+### Getting the data
+We use the ILSVRC2012 datasets to demonstrate how to train an Inception V3 network. Inception V3 was initially published by Researchers at Google Inc., and it is fine-tuned to have excellent classification accuracy and low computation cost. Its original version, GoogLeNet, won first place in the [ILSVRC](http://www.image-net.org/challenges/LSVRC/) 2014 detection challenge.
+
+
+ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it through http://image-net.org.
+
+## Details
+
+We currently offer the Inception V3 model, published in December 2015 (https://arxiv.org/abs/1512.00567). Only BrainScript version is available at this moment.
+
+### [BrainScript](./BrainScript)
--- a/Examples/Image/Classification/GoogLeNet/README.md
+++ b/Examples/Image/Classification/GoogLeNet/README.md
@ -4,9 +4,9 @@

 |Data:     |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
 |:---------|:---
-|Purpose   |This folder contains examples that demonstrate how to use CNTK to define GoogLeNet (https://arxiv.org/abs/1409.4842) for image classification.
+|Purpose   |This folder contains examples that demonstrate how to use CNTK to define GoogLeNet (https://arxiv.org/abs/1409.4842) and its derivations for image classification.
 |Network   |Deep convolutional neural networks codenamed "Inception" (GoogLeNet).
-|Training  |RMSProp.
+|Training  |See the details.
 |Comments  |See below.

 ## Running the example
@ -19,6 +19,8 @@ ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it

 ## Details

-We currently offer the Inception V3 model, published in December 2015 (https://arxiv.org/abs/1512.00567). Only BrainScript version is available at this moment.
+We currently offer the BN-Inception (https://arxiv.org/abs/1502.03167) and Inception V3 (https://arxiv.org/abs/1512.00567) models.

-### [BrainScript](./BrainScript)
+### [BN-Inception](./BN-Inception)
+
+### [Inception V3](./InceptionV3)
--- a/Tools/samples.json
+++ b/Tools/samples.json
@ -71,10 +71,18 @@
        "language":  ["BrainScript"],
        "type":  ["Tutorial", "Recipe"]
    },
+    {
+        "category": ["Image"],
+        "name":  "GoogLeNet (BN-Inception)",
+        "url":  "https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/GoogLeNet/BN-Inception#cntk-examples-imageclassificationgooglenetbn-inception",
+        "description":  "GoogLeNet (BN-Inception) network for image classification.",
+        "language":  ["BrainScript"],
+        "type":  ["Recipe"]
+    },
    {
        "category": ["Image"],
        "name":  "GoogLeNet (Inception V3)",
-        "url":  "https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/GoogLeNet#cntk-examples-imageclassificationgooglenet",
+        "url":  "https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/GoogLeNet/InceptionV3#cntk-examples-imageclassificationgooglenetinceptionv3",
        "description":  "GoogLeNet (Inception V3) network for image classification.",
        "language":  ["BrainScript"],
        "type":  ["Recipe"]