Add E2E test for binary MLF
This commit is contained in:
Родитель
7c59a63d6c
Коммит
cb3d08ac51
Двоичный файл не отображается.
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,165 @@
|
|||
# end-to-end test for recurrent LSTM for speech
|
||||
|
||||
precision = "float"
|
||||
deviceId = $DeviceId$
|
||||
|
||||
command = speechTrain
|
||||
|
||||
# Note: These options are overridden from the command line in some test cases.
|
||||
frameMode = false
|
||||
truncated = true
|
||||
parallelTrain = false
|
||||
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech.dnn"
|
||||
#deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 20
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
|
||||
reader = [
|
||||
verbosity = 0
|
||||
randomize = true
|
||||
|
||||
# A list of deserializers to use.
|
||||
deserializers = (
|
||||
[
|
||||
type = "HTKFeatureDeserializer"
|
||||
module = "HTKDeserializers"
|
||||
|
||||
# Description of input streams
|
||||
input = [
|
||||
# Description of a particular input stream
|
||||
features = [
|
||||
dim = 363
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
]
|
||||
]:
|
||||
[
|
||||
type = "HTKMLFBinaryDeserializer"
|
||||
module = "HTKDeserializers"
|
||||
chunkSizeInBytes = 36
|
||||
input = [
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/mlf2.bin"
|
||||
dim = 132
|
||||
]
|
||||
]
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
# TODO share this section ../../../LSTM/cntk.cntk
|
||||
# define network using BrainScript
|
||||
BrainScriptNetworkBuilder = [
|
||||
|
||||
useSelfStabilization = true
|
||||
|
||||
// define basic I/O
|
||||
baseFeatDim = 33
|
||||
featDim = 11 * baseFeatDim
|
||||
labelDim = 132
|
||||
|
||||
// hidden dimensions
|
||||
innerCellDim = 1024
|
||||
hiddenDim = 256
|
||||
numLSTMLayers = 3 // number of hidden LSTM model layers
|
||||
|
||||
// features
|
||||
features = Input((1 : featDim), tag='feature') // TEST: Artificially reading data transposed
|
||||
realFeatures = Transpose (features) // and swapping them back to (featDim:1), for testing Transpose()
|
||||
labels = Input(labelDim, tag='label')
|
||||
feashift = RowSlice(featDim - baseFeatDim, baseFeatDim, realFeatures);
|
||||
|
||||
featNorm = MeanVarNorm(feashift)
|
||||
|
||||
// we define the LSTM locally for now, since the one in CNTK.core.bs has a slightly changed configuration that breaks this test
|
||||
Stabilize (x, enabled=true) =
|
||||
if enabled
|
||||
then [
|
||||
beta = Exp (BS.Parameters.BiasParam ((1))) # init value is 0
|
||||
result = beta .* x
|
||||
].result
|
||||
else x
|
||||
|
||||
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
|
||||
[
|
||||
_privateInnards = [ // encapsulate the inner workings
|
||||
dh = prevState.h // previous values
|
||||
dc = prevState.c
|
||||
|
||||
// parameter macros--these carry their own weight matrices
|
||||
B() = BS.Parameters.BiasParam (cellDim)
|
||||
|
||||
W(v) = BS.Parameters.WeightParam (cellDim, inputDim) * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
||||
H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
||||
C(c) = BS.Parameters.DiagWeightParam (cellDim) .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
||||
|
||||
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
||||
it = Sigmoid (W(x) + B() + H(dh) + C(dc)) // input gate(t)
|
||||
bit = it .* Tanh (W(x) + (H(dh) + B())) // applied to tanh of input network
|
||||
|
||||
ft = Sigmoid (W(x) + B() + H(dh) + C(dc)) // forget-me-not gate(t)
|
||||
bft = ft .* dc // applied to cell(t-1)
|
||||
|
||||
ct = bft + bit // c(t) is sum of both
|
||||
|
||||
ot = Sigmoid (W(x) + B() + H(dh) + C(ct)) // output gate(t)
|
||||
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
|
||||
]
|
||||
|
||||
# our return values
|
||||
c = _privateInnards.ct // cell value
|
||||
h = if outputDim != cellDim // output/hidden state
|
||||
then [ // project
|
||||
Wmr = BS.Parameters.WeightParam (outputDim, cellDim);
|
||||
htp = Wmr * Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
|
||||
].htp // TODO: ^^ extend BS syntax to allow to say: then [ Wmr = WeightParam(outputDim, cellDim) ] in Wmr * Stabilize (...)
|
||||
else _privateInnards.ht // no projection
|
||||
dim = outputDim
|
||||
]
|
||||
|
||||
RecurrentLSTMP (outputDim, cellDim=outputDim.dim, x, inputDim=x.dim, previousHook=BS.RNNs.PreviousHC, enableSelfStabilization=false) =
|
||||
[
|
||||
prevState = previousHook (lstmState)
|
||||
inputDim1 = inputDim ; cellDim1 = cellDim ; enableSelfStabilization1 = enableSelfStabilization
|
||||
lstmState = LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
].lstmState // we return the state record (h,c)
|
||||
|
||||
// define the stack of hidden LSTM layers --TODO: change to RecurrentLSTMPStack(), change stabilizer config
|
||||
S(x) = Stabilize (x, enabled=useSelfStabilization)
|
||||
LSTMoutput[k:1..numLSTMLayers] =
|
||||
if k == 1
|
||||
then /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (featNorm), inputDim=baseFeatDim, enableSelfStabilization=useSelfStabilization).h
|
||||
else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim, enableSelfStabilization=useSelfStabilization).h
|
||||
|
||||
// and add a softmax layer on top
|
||||
W = BS.Parameters.WeightParam (labelDim, hiddenDim)
|
||||
B = BS.Parameters.BiasParam (labelDim)
|
||||
|
||||
z = W * S(LSTMoutput[numLSTMLayers]) + B; // top-level input to Softmax
|
||||
|
||||
// training
|
||||
# this shows how both CE and frame error rate can be constructed as BS expressions
|
||||
# BUGBUG: The per-sample criterion will trigger a bug fix in momentum computation
|
||||
# which leads to a slightly better objective value than the baseline.
|
||||
# For now, we will use SumElements() to neutralize this. Once we have a chance to update
|
||||
# the baselines, we should remove SumElements() below.
|
||||
ce = /*Pass*/ SumElements (ReduceLogSum (z) - TransposeTimes (labels, z), tag='criterion') // manually-defined per-sample objective
|
||||
err = /*Pass*/ SumElements (BS.Constants.One - TransposeTimes (labels, Hardmax (z)), tag='evaluation') // also track frame errors
|
||||
|
||||
// decoding
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood = Pass (z - logPrior, tag='output') // using Pass() since we can't assign a tag to x - y
|
||||
]
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
. $TEST_ROOT_DIR/run-test-common
|
||||
|
||||
ConfigDir=$TEST_DIR/../../../HTKDeserializers/LSTM/FullUtteranceBinaryMLF
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
cntkrun cntk.cntk 'Truncated=false speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]] shareNodeValueMatrices=true' || exit $?
|
|
@ -0,0 +1,31 @@
|
|||
dataDir: ../../../Data
|
||||
tags:
|
||||
- bvt-e (build_sku == 'gpu') and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu')))
|
||||
- nightly-e (build_sku == 'gpu')
|
||||
- weekly-e (build_sku == 'gpu')
|
||||
|
||||
testCases:
|
||||
CNTK Run must be completed:
|
||||
patterns:
|
||||
- __COMPLETED__
|
||||
|
||||
Must train epochs in exactly same order and parameters:
|
||||
patterns:
|
||||
- Starting Epoch {{integer}}
|
||||
- learning rate per sample = {{float}}
|
||||
- momentum = {{float}}
|
||||
|
||||
# Epochs must be finished with expected results:
|
||||
# patterns:
|
||||
# - ^Finished Epoch[{{integer}} of {{integer}}]
|
||||
# - CrossEntropyWithSoftmax = {{float,tolerance=.1%}}
|
||||
# - EvalClassificationError = {{float,tolerance=.1%}}
|
||||
# - learningRatePerSample = {{float,tolerance=0.001%}}
|
||||
#
|
||||
# Per-minibatch training results must match:
|
||||
# patterns:
|
||||
# - ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
|
||||
# - " * {{integer}}; "
|
||||
# - CrossEntropyWithSoftmax = {{float,tolerance=.1%}}
|
||||
# - EvalClassificationError = {{float,tolerance=.1%}}
|
||||
#
|
Загрузка…
Ссылка в новой задаче