Add E2E test for binary MLF

2018-08-07 11:17:15 -07:00 · 2018-08-07 11:17:15 -07:00 · cb3d08ac51
--- a/Tests/EndToEndTests/Speech/Data/mlf2.bin
+++ b/Tests/EndToEndTests/Speech/Data/mlf2.bin
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/baseline.cpu.txt
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/baseline.gpu.txt
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/cntk.cntk
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/cntk.cntk
@ -0,0 +1,165 @@
 # end-to-end test for recurrent LSTM for speech
 precision = "float"
 deviceId = $DeviceId$
 command = speechTrain
 # Note: These options are overridden from the command line in some test cases.
 frameMode = false
 truncated = true
 parallelTrain = false
 speechTrain = [
    action = "train"
    modelPath = "$RunDir$/models/cntkSpeech.dnn"
    #deviceId = $DeviceId$
    traceLevel = 1
    SGD = [
        epochSize = 20480
        minibatchSize = 20
        learningRatesPerMB = 0.5
        numMBsToShowResult = 10
        momentumPerMB = 0:0.9
        maxEpochs = 4
        keepCheckPointFiles = true       
    ]
    reader = [
        verbosity = 0
        randomize = true
        # A list of deserializers to use.
        deserializers = (
            [   
                type = "HTKFeatureDeserializer"
                module = "HTKDeserializers"
                # Description of input streams
                input = [
                    # Description of a particular input stream
                    features = [
                        dim = 363
                        scpFile = "$DataDir$/glob_0000.scp"
                    ]
                ]
            ]:
            [
                type = "HTKMLFBinaryDeserializer"
                module = "HTKDeserializers"
                chunkSizeInBytes = 36
                input = [
                    labels = [
                        mlfFile = "$DataDir$/mlf2.bin"
                        dim = 132
                    ]
                ]
            ]
        )
    ]
    # TODO share this section ../../../LSTM/cntk.cntk
    # define network using BrainScript
    BrainScriptNetworkBuilder = [
        useSelfStabilization = true
        // define basic I/O
        baseFeatDim = 33
        featDim = 11 * baseFeatDim
        labelDim = 132
        // hidden dimensions
        innerCellDim  = 1024
        hiddenDim     = 256
        numLSTMLayers = 3        // number of hidden LSTM model layers
        // features
        features = Input((1 : featDim),  tag='feature') // TEST: Artificially reading data transposed
        realFeatures = Transpose (features)             //       and swapping them back to (featDim:1), for testing Transpose()
        labels   = Input(labelDim, tag='label')
        feashift = RowSlice(featDim - baseFeatDim, baseFeatDim, realFeatures);
        featNorm = MeanVarNorm(feashift)
        // we define the LSTM locally for now, since the one in CNTK.core.bs has a slightly changed configuration that breaks this test
        Stabilize (x, enabled=true) =
            if enabled
            then [
                beta = Exp (BS.Parameters.BiasParam ((1))) # init value is 0
                result = beta .* x
            ].result
            else x
        LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
        [
            _privateInnards = [       // encapsulate the inner workings
                dh = prevState.h // previous values
                dc = prevState.c
                // parameter macros--these carry their own weight matrices
                B() = BS.Parameters.BiasParam (cellDim)
                W(v) = BS.Parameters.WeightParam (cellDim, inputDim)  * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
                H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
                C(c) = BS.Parameters.DiagWeightParam (cellDim)       .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
                // note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
                it = Sigmoid (W(x) + B() + H(dh) + C(dc))          // input gate(t)
                bit = it .* Tanh (W(x) + (H(dh) + B()))            // applied to tanh of input network
                ft = Sigmoid (W(x) + B() + H(dh) + C(dc))          // forget-me-not gate(t)
                bft = ft .* dc                                     // applied to cell(t-1)
                ct = bft + bit                                     // c(t) is sum of both
                ot = Sigmoid (W(x) + B() + H(dh) + C(ct))          // output gate(t)
                ht = ot .* Tanh (ct)                               // applied to tanh(cell(t))
            ]
            # our return values
            c = _privateInnards.ct          // cell value
            h = if outputDim != cellDim     // output/hidden state
                then [                      // project
                    Wmr = BS.Parameters.WeightParam (outputDim, cellDim);
                    htp = Wmr * Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
                ].htp         // TODO: ^^ extend BS syntax to allow to say: then [ Wmr = WeightParam(outputDim, cellDim) ] in Wmr * Stabilize (...)
                else _privateInnards.ht     // no projection
            dim = outputDim
        ]
        RecurrentLSTMP (outputDim, cellDim=outputDim.dim, x, inputDim=x.dim, previousHook=BS.RNNs.PreviousHC, enableSelfStabilization=false) =
        [
            prevState = previousHook (lstmState)
            inputDim1 = inputDim ; cellDim1 = cellDim ; enableSelfStabilization1 = enableSelfStabilization
            lstmState = LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, prevState, enableSelfStabilization=enableSelfStabilization1)
        ].lstmState // we return the state record (h,c)
        // define the stack of hidden LSTM layers  --TODO: change to RecurrentLSTMPStack(), change stabilizer config
        S(x) = Stabilize (x, enabled=useSelfStabilization)
        LSTMoutput[k:1..numLSTMLayers] =
            if k == 1
            then /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (featNorm),        inputDim=baseFeatDim, enableSelfStabilization=useSelfStabilization).h
            else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim,   enableSelfStabilization=useSelfStabilization).h
        // and add a softmax layer on top
        W = BS.Parameters.WeightParam (labelDim, hiddenDim)
        B = BS.Parameters.BiasParam   (labelDim)
        z = W * S(LSTMoutput[numLSTMLayers]) + B; // top-level input to Softmax
        // training
        # this shows how both CE and frame error rate can be constructed as BS expressions
        # BUGBUG: The per-sample criterion will trigger a bug fix in momentum computation
        # which leads to a slightly better objective value than the baseline.
        # For now, we will use SumElements() to neutralize this. Once we have a chance to update
        # the baselines, we should remove SumElements() below.
        ce  = /*Pass*/ SumElements (ReduceLogSum (z) - TransposeTimes (labels,          z),  tag='criterion')  // manually-defined per-sample objective
        err = /*Pass*/ SumElements (BS.Constants.One - TransposeTimes (labels, Hardmax (z)), tag='evaluation') // also track frame errors
        // decoding
        logPrior = LogPrior(labels)	 
        ScaledLogLikelihood = Pass (z - logPrior, tag='output') // using Pass() since we can't assign a tag to x - y
    ]
 ]
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/run-test
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/run-test
@ -0,0 +1,8 @@
 #!/bin/bash
 . $TEST_ROOT_DIR/run-test-common
 ConfigDir=$TEST_DIR/../../../HTKDeserializers/LSTM/FullUtteranceBinaryMLF
 # cntkrun <CNTK config file name> <additional CNTK args>
 cntkrun cntk.cntk 'Truncated=false speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]] shareNodeValueMatrices=true' || exit $?
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/testcases.yml
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/LSTM/FullUtteranceBinaryMLF/testcases.yml
@ -0,0 +1,31 @@
 dataDir: ../../../Data
 tags:
     - bvt-e (build_sku == 'gpu') and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu')))
     - nightly-e (build_sku == 'gpu')
     - weekly-e (build_sku == 'gpu')
 testCases:
  CNTK Run must be completed:
    patterns:
      - __COMPLETED__
  Must train epochs in exactly same order and parameters:
    patterns:
      - Starting Epoch {{integer}}
      - learning rate per sample = {{float}}
      - momentum = {{float}}
 #  Epochs must be finished with expected results:
 #    patterns:
 #      - ^Finished Epoch[{{integer}} of {{integer}}]
 #      - CrossEntropyWithSoftmax = {{float,tolerance=.1%}}
 #      - EvalClassificationError = {{float,tolerance=.1%}}
 #      - learningRatePerSample = {{float,tolerance=0.001%}}
 #
 #  Per-minibatch training results must match:
 #    patterns:
 #      - ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
 #      - " * {{integer}}; "
 #      - CrossEntropyWithSoftmax = {{float,tolerance=.1%}}
 #      - EvalClassificationError = {{float,tolerance=.1%}}
 #