created SLUHandsOn.md

2016-08-13 20:04:44 -07:00 · 2016-08-13 20:04:44 -07:00 · f483334ff0
--- a/Tutorials/ImageHandsOn/30/cifar10.cmf
+++ b/Tutorials/ImageHandsOn/30/cifar10.cmf
--- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk
+++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk
@ -22,7 +22,7 @@ TrainConvNet = {
            {
                c = ConvolutionalLayer {dim, (5:5), pad = true,  ##### no activation=ReLU
                                        init = "gaussian", initValueScale = initValueScale} (x)
-                b = BatchNormalizationLayer {spatialRank = 2} (c)
+                b = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} (c)
                r = ReLU (b)   ##### now called explicitly
                p = MaxPoolingLayer {(3:3), stride = (2:2)} (r)
            }.p
@ -31,7 +31,7 @@ TrainConvNet = {
            p2 = MyLayer (p1,       32, 1.414)
            p3 = MyLayer (p2,       64, 1.414)
            d1 = DenseLayer {64, init = "gaussian", initValueScale = 12} (p3)
-            d1_bnr = ReLU (BatchNormalizationLayer {} (d1))  ##### added BN and explicit ReLU
+            d1_bnr = ReLU (BatchNormalizationLayer {normalizationTimeConstant = 4096} (d1))  ##### added BN and explicit ReLU
            d1_d = Dropout (d1_bnr)                          ##### d1 -> d1_bnr
            z  = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1_d)
        }.z
--- a/Tutorials/SLUHandsOn/SLUHandsOn.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn.cntk
@ -20,12 +20,28 @@ TrainTagger = {
        hiddenDim = 300
        #hiddenDim = 150

+        TrigramLayer{} = {
+            Left  = DelayLayer{T=-2}
+            Right = DelayLayer{T=-1}
+            apply (x) = Splice (/*Left (x) :*/ x : Right(x))
+        }.apply
+
+        BiRecurrentLSTMLayer {hiddenDim} =
+        {
+            L2R = RecurrentLSTMLayer {hiddenDim, goBackwards=false}
+            R2L = RecurrentLSTMLayer {hiddenDim, goBackwards=true}
+            apply (x) = Splice (L2R (x) : R2L (x))
+        }.apply
+
        model = Sequential (
-            Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) :  # 3-word window
+            TrigramLayer{} :
            EmbeddingLayer {embDim} :                                             # embedding
-            RecurrentLSTMLayer {hiddenDim} :              # LSTM
+            BatchNormalizationLayer {normalizationTimeConstant=2048} :
+            RecurrentLSTMLayer {hiddenDim, goBackwards=false} :              # LSTM
+            #BiRecurrentLSTMLayer {hiddenDim} : 
            #Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) :  # bidirectional LSTM
            #Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) :  # bidirectional LSTM
+            BatchNormalizationLayer {normalizationTimeConstant=2048} :
            DenseLayer {labelDim, initValueScale=7}        # output layer
        )

@ -83,7 +99,7 @@ TrainTagger = {
    }

    SGD = {
-        maxEpochs = 20 ; epochSize = 36000
+        maxEpochs = 8 ; epochSize = 36000

        minibatchSize = 70