This commit is contained in:
Vadim Mazalov 2017-12-25 10:33:59 -08:00
Родитель 60c299647f
Коммит d38c060f07
9 изменённых файлов: 373 добавлений и 2 удалений

Просмотреть файл

@ -140,7 +140,7 @@ public:
tempmatrix = loglikelihood.ColumnSlice(ts, numframes);
// if (m_deviceid == CPUDEVICE)
{
CopyFromCNTKMatrixToSSEMatrix(tempmatrix, numframes, predstripe, false);
CopyFromCNTKMatrixToSSEMatrix(tempmatrix, numframes, predstripe);
}
if (m_deviceid != CPUDEVICE)
@ -176,7 +176,7 @@ public:
// if (doreferencealign || m_deviceid == CPUDEVICE)
{
CopyFromCNTKMatrixToSSEMatrix(tempmatrix, numframes, predstripe, false);
CopyFromCNTKMatrixToSSEMatrix(tempmatrix, numframes, predstripe);
}
if (m_deviceid != CPUDEVICE)

Просмотреть файл

@ -0,0 +1,7 @@
m1 = LoadModel("$currModel$", format="cntk")
SetDefaultModel(m1)
HDim = 512
HL$newLayer$ = DNNLayer(HDim, HDim, HL$currLayer$.y)
SetInput(OL.t, 1, HL$newLayer$.y)
SetInput(HL$newLayer$.t, 1, HL$currLayer$.y)
SaveModel(m1, "$newModel$", format="cntk")

Просмотреть файл

@ -0,0 +1,193 @@
precision = "float"
deviceId = $DeviceId$
command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain:replaceCriterionNode:sequenceTrain
ndlMacros = "$ConfigDir$/macros.txt"
globalMeanPath = "GlobalStats/mean.363"
globalInvStdPath = "GlobalStats/var.363"
globalPriorPath = "GlobalStats/prior.132"
traceLevel = 1
truncated = false
# Default SGD value used for pre-training.
SGD = [
epochSize = 81920
minibatchSize = 256
learningRatesPerMB = 0.8
numMBsToShowResult = 10
momentumPerMB = 0.9
dropoutRate = 0.0
maxEpochs = 2
]
dptPre1 = [
action = "train"
modelPath = "$RunDir$/models/Pre1/cntkSpeech"
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/dnn_1layer.txt"
]
]
addLayer2 = [
action = "edit"
currLayer = 1
newLayer = 2
currModel = "$RunDir$/models/Pre1/cntkSpeech"
newModel = "$RunDir$/models/Pre2/cntkSpeech.0"
editPath = "$ConfigDir$/add_layer.mel"
]
dptPre2 = [
action = "train"
modelPath = "$RunDir$/models/Pre2/cntkSpeech"
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/dnn_1layer.txt"
]
]
AddLayer3 = [
action = "edit"
currLayer = 2
newLayer = 3
currModel = "$RunDir$/models/Pre2/cntkSpeech"
newModel = "$RunDir$/models/cntkSpeech.0"
editPath = "$ConfigDir$/add_layer.mel"
]
speechTrain = [
action = "train"
modelPath = "$RunDir$/models/cntkSpeech"
#deviceId = $DeviceId$
traceLevel = 1
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/dnn.txt"
]
SGD = [
epochSize = 81920
minibatchSize = 256:512
learningRatesPerMB = 0.8:1.6
numMBsToShowResult = 10
momentumPerSample = 0.999589
dropoutRate = 0.0
maxEpochs = 4
gradUpdateType = "none"
normWithAveMultiplier = true
clippingThresholdPerSample = 1#INF
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
sequenceTrain = [
action = "train"
modelPath = $RunDir$/models/cntkSpeech.sequence
traceLevel = 1
SGD = [
epochSize = 6000000
minibatchSize = 800000
learningRatesPerSample = 0.000002
momentumPerSample = 0.999589
dropoutRate = 0.0
maxEpochs = 3
gradientClippingWithTruncation = true
clippingThresholdPerSample = 1.0
]
reader = [
verbosity = 0
randomize = false
# A list of deserializers the reader uses.
deserializers = (
[
type = "HTKFeatureDeserializer"
module = "HTKDeserializers"
input = [
# Description of input stream to feed the Input node named "features"
features = [
dim=363
scpFile = "$DataDir$/glob_0000.scp"
]
]
]:
[
type = "HTKMLFDeserializer"
module = "HTKDeserializers"
input = [
# Description of input stream to feed the Input node named "labels"
labels = [
dim = 132
mlfFile="$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
]
]
]:
[
type = "LatticeDeserializer"
module = "HTKDeserializers"
input = [
lattice=[
latticeIndexFile="$DataDir$/latticeIndex.txt"
]
]
]
)
]
BrainScriptNetworkBuilder = {
baseFeatDim = 33
featDim = 11 * baseFeatDim
labelDim = 132
latticeAxis = DynamicAxis()
features = Input{featDim}
labels = Input{labelDim}
lattice = Input{1,dynamicAxis=latticeAxis}
featExtNetwork = BS.Network.Load("$RunDir$/models/cntkSpeech")
featExt = BS.Network.CloneFunction (
(featExtNetwork.features),
[netEval = featExtNetwork.OL_z;scaledLogLikelihood = featExtNetwork.scaledLogLikelihood ],
parameters="learnable")
clonedmodel= featExt(features)
cr = SequenceWithLattice(labels, clonedmodel.netEval, clonedmodel.scaledLogLikelihood, lattice, "$DataDir$/CY2SCH010061231_1369712653.numden.lats.symlist", "$DataDir$/model.overalltying", "$DataDir$/state.list", "$DataDir$/model.transprob", tag="criterion")
Err = ClassificationError(labels,clonedmodel.netEval,tag="evaluation");
}
]

Просмотреть файл

@ -0,0 +1,42 @@
load = ndlMacroDefine
run = DNN
ndlMacroDefine = [
# Macro definitions
MeanVarNorm(x) = [
xMean = Mean(x);
xStdDev = InvStdDev(x)
xNorm = PerDimMeanVarNormalization(x,xMean,xStdDev)
]
]
DNN = [
#define basic i/o
featDim = 363
labelDim = 132
hiddenDim = 512
features = Input(featDim, tag="feature")
labels = Input(labelDim, tag="label")
globalMean = Parameter(featDim, 1, init="fromFile", initFromFilePath="$globalMeanPath$", computeGradient=false)
globalInvStd = Parameter(featDim, 1, init="fromFile", initFromFilePath="$globalInvStdPath$", computeGradient=false)
globalPrior = Parameter(labelDim, 1, init="fromFile", initFromFilePath="$globalPriorPath$", computeGradient=false)
logPrior = Log(globalPrior)
# define network
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
# layer 1 363 X 512
z1 = DNNLayer(featDim, hiddenDim, featNorm);
# layer 2 512 X 512
z2 = DNNLayer(hiddenDim, hiddenDim, z1);
# layer 3 512 X 512
z3 = DNNLayer(hiddenDim, hiddenDim, z2);
# last layer 512 X 132
z4 = DNNLastLayer(hiddenDim, labelDim, z3);
ce = CrossEntropyWithSoftmax(labels, z4, tag="criterion");
err = ClassificationError(labels, z4, tag="evaluation");
scaledLogLikelihood = Minus(z4, logPrior, tag="output")
]

Просмотреть файл

@ -0,0 +1,38 @@
load = ndlMacroDefine
run = DNN
ndlMacroDefine = [
# Macro definitions
MeanVarNorm(x) = [
xMean = Mean(x);
xStdDev = InvStdDev(x)
xNorm = PerDimMeanVarNormalization(x, xMean, xStdDev)
]
]
DNN = [
#define basic i/o
featDim = 363
labelDim = 132
hiddenDim = 512
features = Input(featDim, tag="feature")
labels = Input(labelDim, tag="label")
globalMean = Parameter(featDim, 1, init="fromFile", initFromFilePath="$globalMeanPath$", computeGradient=false)
globalInvStd = Parameter(featDim, 1, init="fromFile", initFromFilePath="$globalInvStdPath$", computeGradient=false)
globalPrior = Parameter(labelDim, 1, init="fromFile", initFromFilePath="$globalPriorPath$", computeGradient=false)
logPrior = Log(globalPrior)
# define network
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
# layer 1 363 X 512
HL1 = DNNLayer(featDim, hiddenDim, featNorm);
# last layer 512 X 132
OL = DNNLastLayer(hiddenDim, labelDim, HL1);
ce = CrossEntropyWithSoftmax(labels, OL, tag="criterion");
err = ClassificationError(labels, OL, tag="evaluation");
scaledLogLikelihood = Minus(OL, logPrior, tag="output")
]

Просмотреть файл

@ -0,0 +1,18 @@
DNNLayer(inDim, outDim, x) = [
#W = Parameter(outDim, inDim, init="uniform", initValueScale=1, initOnCPUOnly=true, randomSeed=1); # randomizing on CPU with fixed seed to get reproducable results across configurations
#b = Parameter(outDim, 1, init="uniform", initValueScale=1, initOnCPUOnly=true, randomSeed=1);
W = Parameter(outDim, inDim);
b = Parameter(outDim, 1);
t = Times(W, x);
z = Plus(t, b);
y = sigmoid(z);
]
DNNLastLayer(hiddenDim, LabelDim, x) = [
#W = Parameter(LabelDim, hiddenDim, init="uniform", initValueScale=1, initOnCPUOnly=true, randomSeed=1);
#b = Parameter(LabelDim, 1, init="uniform", initValueScale=1, initOnCPUOnly=true, randomSeed=1);
W = Parameter(LabelDim, hiddenDim);
b = Parameter(LabelDim, 1);
t = Times(W, x);
z = Plus(t, b);
]

Просмотреть файл

@ -0,0 +1,11 @@
m1 = LoadModel("$currModel$", format="cntk")
SetDefaultModel(m1)
SetProperty(ce, "criterion", false)
Remove(ce)
# note: we use the same name, since Jenkins does not allow to specify two different node names
ce = SequenceWithSoftmax(labels, OL.z, scaledLogLikelihood)
SetProperty(ce, criterion, true)
SaveModel(m1, "$newModel$", format="cntk")
Dump(m1, "$newModel$.dump.txt")

Просмотреть файл

@ -0,0 +1,31 @@
#!/bin/bash
. $TEST_ROOT_DIR/run-test-common
# This test uses a large dataset which is not part of the CNTK repository itself
# We use the dataset from an external location specified using an environment variable
if [[ "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" == "" || ! -d "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" ]]; then
echo 'This test uses external data that is not part of the CNTK repository. Environment variable CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY must be set to point to the external test data location'
exit 1
fi
if [ "$OS" == "Windows_NT" ]; then
DataSourceDir=`cygpath -au $CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY`/Speech/AN4Corpus/v0
else
DataSourceDir=$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY/Speech/AN4Corpus/v0
fi
# Copy the test data to the test run directory
DataDir=$TEST_RUN_DIR/TestData
mkdir $DataDir
cp -R $DataSourceDir/* $DataDir || exit $?
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk_sequence.cntk
ExitCode=$?
# Delete the test data
rm -rf $DataDir
exit $ExitCode

Просмотреть файл

@ -0,0 +1,31 @@
dataDir: ../../Data
tags:
# Note: Sequence training is currently not supported on the CPU
- bvt-s (build_sku == 'gpu') and (device == 'gpu') and ((flavor == 'debug') ^ (os == 'windows'))
- nightly-s (build_sku == 'gpu') and (device == 'gpu')
- weekly-s (build_sku == 'gpu') and (device == 'gpu')
testCases:
CNTK Run must be completed:
patterns:
- __COMPLETED__
Must train epochs in exactly same order and parameters:
patterns:
- Starting Epoch {{integer}}
- learning rate per sample = {{float}}
- momentum = {{float}}
Epochs must be finished with expected results:
patterns:
- Finished Epoch[{{integer}} of {{integer}}]
- ce = {{float,tolerance=.1%}}
- err = {{float,tolerance=.25%}}
- learningRatePerSample = {{float,tolerance=0.001%}}
Per-minibatch training results must match:
patterns:
- Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
- " * {{integer}}; "
- ce = {{float,tolerance=.2%}}