updated SLUHandsOn tests

2016-08-24 18:15:09 -07:00 · 2016-08-24 18:15:09 -07:00 · 769b2602a2
--- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
+++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@ -46,11 +46,12 @@ DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueSca
 # EmbeddingLayer -- create a linear embedding layer
 EmbeddingLayer {outDim,                                   # dimension of embedding
                init='heNormal', initValueScale=1,
                embeddingPath = '', transpose = false} =  # load a fixed embedding from a path instead
 {
    shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
    E = if embeddingPath == ''
-        then ParameterTensor {shape, init='heNormal'}  # learnable
+        then ParameterTensor {shape, init=init, initValueScale=initValueScale}  # learnable
        else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0}  # fixed from file
    TimesOp = if transpose then TransposeTimes else Times
    apply (x) = TimesOp (E, x)    # x is expected to be sparse one-hot
--- a/Source/Math/RNNCommon.h
+++ b/Source/Math/RNNCommon.h
@ -23,7 +23,7 @@ struct RnnAttributes
    RnnAttributes(bool bidirectional, size_t numLayers, size_t hiddenSize, const wstring& recurrentOp, int axis) :
        m_bidirectional(bidirectional), m_numLayers(numLayers), m_hiddenSize(hiddenSize), m_recurrentOp(recurrentOp), m_axis(axis)
    {
-        if (m_recurrentOp != wstring(L"lstm") && m_recurrentOp != wstring(L"gru") &&
+        if (m_recurrentOp != wstring(L"lstm")    && m_recurrentOp != wstring(L"gru") &&
            m_recurrentOp != wstring(L"rnnReLU") && m_recurrentOp != wstring(L"rnnTanh"))
        {
            InvalidArgument("Unknown cell type '%ls'. Supported values are 'lstm', 'gru', 'rnnReLU', 'rnnTanh'.", m_recurrentOp.c_str());
@ -45,12 +45,12 @@ struct RnnAttributes
        for (size_t i = 0; i < m_numLayers; i++)
        {
            size_t oneNetTotal =
-                numNetworks * m_hiddenSize                   // 1, 3, or 4 networks producing hidden-dim output
+                numNetworks * m_hiddenSize              // 1, 3, or 4 networks producing hidden-dim output
-                            * (inputDim + m_hiddenSize)      // each network has these two inputs
+                            * (inputDim + m_hiddenSize) // each network has these two inputs
-              + numNetworks * m_hiddenSize                   // biases
+              + numNetworks * m_hiddenSize              // biases
-                            * 2;                             // for unknown reasons, cudnn5 uses 2 bias terms everywhere
+                            * 2;                        // for unknown reasons, cudnn5 uses 2 bias terms everywhere
-            total += oneNetTotal * bidirFactor;    // 1 or 2 directions
+            total += oneNetTotal * bidirFactor;         // 1 or 2 directions
-            inputDim = bidirFactor * m_hiddenSize; // next layer continues with this as input
+            inputDim = bidirFactor * m_hiddenSize;      // next layer continues with this as input
        }
        return make_pair(m_hiddenSize, total / m_hiddenSize);
    }
@ -59,10 +59,10 @@ struct RnnAttributes
    {
        return
            m_bidirectional == other.m_bidirectional &&
-            m_numLayers   == other.m_numLayers       &&
+            m_numLayers    == other.m_numLayers      &&
-            m_hiddenSize  == other.m_hiddenSize      &&
+            m_hiddenSize   == other.m_hiddenSize     &&
-            m_recurrentOp     == other.m_recurrentOp         &&
+            m_recurrentOp  == other.m_recurrentOp    &&
-            m_axis        == other.m_axis;
+            m_axis         == other.m_axis;
    }
    void Read(File& stream, bool readAxis)
--- a/Tutorials/SLUHandsOn/SLUHandsOn.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn.cntk
@ -21,9 +21,9 @@ TrainTagger = {
        hiddenDim = 300
        model = Sequential (
-            EmbeddingLayer {embDim} :                            # embedding
+            EmbeddingLayer {embDim, init='uniform'} :                            # embedding
-            RecurrentLSTMLayer {hiddenDim, goBackwards=false} :  # LSTM
+            RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} :  # LSTM
-            DenseLayer {labelDim, initValueScale=7}              # output layer
+            DenseLayer {labelDim, init='uniform', initValueScale=7}              # output layer
        )
        # features
--- a/Tutorials/SLUHandsOn/SLUHandsOn_Solution1.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn_Solution1.cntk
@ -20,11 +20,11 @@ TrainTagger = {
        hiddenDim = 300
        model = Sequential (
-            EmbeddingLayer {embDim} :                            # embedding
+            EmbeddingLayer {embDim, init='uniform'} :                            # embedding
-            BatchNormalizationLayer {normalizationTimeConstant=2048} :  ##### added
+            BatchNormalizationLayer {normalizationTimeConstant=2048} :           ##### added
-            RecurrentLSTMLayer {hiddenDim, goBackwards=false} :  # LSTM
+            RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} :  # LSTM
-            BatchNormalizationLayer {normalizationTimeConstant=2048} :  ##### added
+            BatchNormalizationLayer {normalizationTimeConstant=2048} :           ##### added
-            DenseLayer {labelDim, initValueScale=7}              # output layer
+            DenseLayer {labelDim, init='uniform', initValueScale=7}              # output layer
        )
        # features
--- a/Tutorials/SLUHandsOn/SLUHandsOn_Solution2.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn_Solution2.cntk
@ -22,12 +22,12 @@ TrainTagger = {
        LookaheadLayer (x) = Splice (x : FutureValue (0, x, defaultHiddenActivation=0))
        model = Sequential (
-            EmbeddingLayer {embDim} :                            # embedding
+            EmbeddingLayer {embDim, init='uniform'} :                            # embedding
            LookaheadLayer :                   ##### added
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
-            RecurrentLSTMLayer {hiddenDim, goBackwards=false} :  # LSTM
+            RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} :  # LSTM
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
-            DenseLayer {labelDim, initValueScale=7}              # output layer
+            DenseLayer {labelDim, init='uniform', initValueScale=7}              # output layer
        )
        # features
--- a/Tutorials/SLUHandsOn/SLUHandsOn_Solution3.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn_Solution3.cntk
@ -20,17 +20,17 @@ TrainTagger = {
        hiddenDim = 150
        BiRecurrentLSTMLayer {outDim} = {
-            F = RecurrentLSTMLayer {outDim, goBackwards=false}
+            F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
-            G = RecurrentLSTMLayer {outDim, goBackwards=true}
+            G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
            apply (x) = Splice (F(x):G(x))
        }.apply
        model = Sequential (
-            EmbeddingLayer {embDim} :
+            EmbeddingLayer {embDim, init='uniform'} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
            BiRecurrentLSTMLayer {hiddenDim} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
-            DenseLayer {labelDim, initValueScale=7}
+            DenseLayer {labelDim, init='uniform', initValueScale=7}
        )
        # features
--- a/Tutorials/SLUHandsOn/SLUHandsOn_Solution4.cntk
+++ b/Tutorials/SLUHandsOn/SLUHandsOn_Solution4.cntk
@ -20,17 +20,17 @@ TrainTagger = {
        hiddenDim = 150
        BiRecurrentLSTMLayer {outDim} = {
-            F = RecurrentLSTMLayer {outDim, goBackwards=false}
+            F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
-            G = RecurrentLSTMLayer {outDim, goBackwards=true}
+            G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
            apply (x) = Splice (BS.Sequences.Last(F(x)):BS.Sequences.First(G(x)))
        }.apply
        model = Sequential (
-            EmbeddingLayer {embDim} :
+            EmbeddingLayer {embDim, init='uniform'} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
            BiRecurrentLSTMLayer {hiddenDim} :
            BatchNormalizationLayer {normalizationTimeConstant=2048} :
-            DenseLayer {intentDim, initValueScale=7}
+            DenseLayer {intentDim, init='uniform', initValueScale=7}
        )
        # features