cleaned up the Jenkins test configs w.r.t. indentation, spacing, casing. Also quoted all strings in config (not yet in NDL) for BS compat
This commit is contained in:
Родитель
cf407d2b3f
Коммит
db20043bb5
22
CNTK.sln
22
CNTK.sln
|
@ -194,7 +194,8 @@ EndProject
|
|||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{19EE975B-232D-49F0-94C7-6F1C6424FB53}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\Speech\LSTM\cntk.config = Tests\Speech\LSTM\cntk.config
|
||||
..\..\..\..\..\work\cntk-public\Tests\Speech\LSTM\lstm.bs = ..\..\..\..\..\work\cntk-public\Tests\Speech\LSTM\lstm.bs
|
||||
Tests\Speech\LSTM\lstm.bs = Tests\Speech\LSTM\lstm.bs
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\macros.txt = Tests\Speech\DNN\DiscriminativePreTraining\macros.txt
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ParseConfig", "MachineLearning\ParseConfig\ParseConfig.vcxproj", "{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}"
|
||||
|
@ -379,11 +380,15 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelNoQuantization", "P
|
|||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DiscriminativePreTraining", "DiscriminativePreTraining", "{39B9BB97-D0E8-439A-8A1B-8DB8E7CF73C3}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\add_layer.mel = Tests\Speech\DNN\DiscriminativePreTraining\add_layer.mel
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\baseline.cpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.cpu.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\baseline.gpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.gpu.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.cpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.cpu.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.gpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.gpu.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\cntk_dpt.config = Tests\Speech\DNN\DiscriminativePreTraining\cntk_dpt.config
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\dnn.txt = Tests\Speech\DNN\DiscriminativePreTraining\dnn.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\dnn_1layer.txt = Tests\Speech\DNN\DiscriminativePreTraining\dnn_1layer.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\macros.txt = Tests\Speech\DNN\DiscriminativePreTraining\macros.txt
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\run-test = Tests\Speech\DNN\DiscriminativePreTraining\run-test
|
||||
Tests\Speech\DNN\DiscriminativePreTraining\testcases.yml = Tests\Speech\DNN\DiscriminativePreTraining\testcases.yml
|
||||
EndProjectSection
|
||||
|
@ -413,6 +418,20 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UCIFastReaderTests", "Tests
|
|||
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceTraining", "SequenceTraining", "{BB8B9FC5-C4B3-477F-80E2-665DC8E431BD}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\Speech\DNN\SequenceTraining\add_layer.mel = Tests\Speech\DNN\SequenceTraining\add_layer.mel
|
||||
Tests\Speech\DNN\SequenceTraining\baseline.gpu.txt = Tests\Speech\DNN\SequenceTraining\baseline.gpu.txt
|
||||
Tests\Speech\DNN\SequenceTraining\baseline.windows.gpu.txt = Tests\Speech\DNN\SequenceTraining\baseline.windows.gpu.txt
|
||||
Tests\Speech\DNN\SequenceTraining\cntk_sequence.config = Tests\Speech\DNN\SequenceTraining\cntk_sequence.config
|
||||
Tests\Speech\DNN\SequenceTraining\dnn.txt = Tests\Speech\DNN\SequenceTraining\dnn.txt
|
||||
Tests\Speech\DNN\SequenceTraining\dnn_1layer.txt = Tests\Speech\DNN\SequenceTraining\dnn_1layer.txt
|
||||
Tests\Speech\DNN\SequenceTraining\macros.txt = Tests\Speech\DNN\SequenceTraining\macros.txt
|
||||
Tests\Speech\DNN\SequenceTraining\replace_ce_with_sequence_criterion.mel = Tests\Speech\DNN\SequenceTraining\replace_ce_with_sequence_criterion.mel
|
||||
Tests\Speech\DNN\SequenceTraining\run-test = Tests\Speech\DNN\SequenceTraining\run-test
|
||||
Tests\Speech\DNN\SequenceTraining\testcases.yml = Tests\Speech\DNN\SequenceTraining\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Mixed Platforms = Debug|Mixed Platforms
|
||||
|
@ -699,5 +718,6 @@ Global
|
|||
{4701E678-5E6F-470D-B348-9CD1A2C095D1} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
|
||||
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
|
||||
{BB8B9FC5-C4B3-477F-80E2-665DC8E431BD} = {6994C86D-A672-4254-824A-51F4DFEB807F}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
||||
// ParseCommandLine - parse the command line parameters
|
||||
// argc - count of arguments
|
||||
// argv - array of argument parameters
|
||||
|
@ -113,10 +112,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Ensure that the same config file isn't included twice, by keeping track of the config
|
||||
// files that have already been resolved in the resolvedPaths vector.
|
||||
resolvedConfigFiles.push_back(filePath);
|
||||
newConfigString += ResolveIncludeStatements(
|
||||
ReadConfigFile(filePath),
|
||||
resolvedConfigFiles
|
||||
);
|
||||
newConfigString += ResolveIncludeStatements(ReadConfigFile(filePath), resolvedConfigFiles);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -396,35 +396,35 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
std::string propName = params[1];
|
||||
MELProperty prop=melPropNull;
|
||||
if (EqualInsensitive(propName, "ComputeGradient", "NeedsGradient"))
|
||||
if (EqualInsensitive(propName, "computeGradient", "needsGradient"))
|
||||
{
|
||||
prop = melPropComputeGradient;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "Feature"))
|
||||
else if (EqualInsensitive(propName, "feature"))
|
||||
{
|
||||
prop = melPropFeature;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "Label"))
|
||||
else if (EqualInsensitive(propName, "label"))
|
||||
{
|
||||
prop = melPropLabel;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "FinalCriterion", "Criteria"))
|
||||
else if (EqualInsensitive(propName, "finalCriterion", "criterion") || EqualInsensitive(propName, "finalCriterion", "Criteria"))
|
||||
{
|
||||
prop = melPropFinalCriterion;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "MultiSeq", "ReqMultiSeqHandling"))
|
||||
else if (EqualInsensitive(propName, "multiSeq", "reqMultiSeqHandling"))
|
||||
{
|
||||
fprintf(stderr, "WARNING: '%s' property is defunct and will be ignored.\n", propName.c_str());
|
||||
}
|
||||
else if (EqualInsensitive(propName, "Evaluation", "Eval"))
|
||||
else if (EqualInsensitive(propName, "evaluation", "eval"))
|
||||
{
|
||||
prop = melPropEvaluation;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "Output"))
|
||||
else if (EqualInsensitive(propName, "output"))
|
||||
{
|
||||
prop = melPropOutput;
|
||||
}
|
||||
else if (EqualInsensitive(propName, "Recurrent"))
|
||||
else if (EqualInsensitive(propName, "recurrent"))
|
||||
{
|
||||
prop = melPropRecurrent;
|
||||
}
|
||||
|
|
|
@ -282,13 +282,13 @@ public:
|
|||
{
|
||||
SetOutputNode(m_net->LabelNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "criteria"))
|
||||
else if (!_stricmp(value.c_str(), "criterion") || !_stricmp(value.c_str(), "criteria"))
|
||||
{
|
||||
SetOutputNode(m_net->FinalCriterionNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "multiseq"))
|
||||
else if (!_stricmp(value.c_str(), "multiSeq"))
|
||||
{
|
||||
fprintf(stderr, "'multiseq' tag is defunct.\n");
|
||||
fprintf(stderr, "'multiSeq' tag is defunct.\n");
|
||||
}
|
||||
else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters
|
||||
{
|
||||
|
|
|
@ -1,63 +1,63 @@
|
|||
deviceId=$DeviceId$
|
||||
command=SimpleMultiGPU
|
||||
precision=float
|
||||
deviceId = $DeviceId$
|
||||
command = simpleMultiGPU
|
||||
precision = "float"
|
||||
|
||||
parallelTrain=true
|
||||
parallelTrain = true
|
||||
|
||||
SimpleMultiGPU=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/Simple.dnn
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
simpleMultiGPU = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/Simple.dnn"
|
||||
#deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
SimpleNetworkBuilder=[
|
||||
SimpleNetworkBuilder = [
|
||||
# 2 input, 2 50-element hidden, 2 output
|
||||
layerSizes=2:50*2:2
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
layerTypes=Sigmoid
|
||||
initValueScale=1.0
|
||||
applyMeanVarNorm=true
|
||||
uniformInit=true
|
||||
needPrior=true
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
uniformInit = true
|
||||
needPrior = true
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=25
|
||||
learningRatesPerMB=0.5:0.2*20:0.1
|
||||
momentumPerMB=0.9
|
||||
dropoutRate=0.0
|
||||
maxEpochs=4
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 25
|
||||
learningRatesPerMB = 0.5:0.2*20:0.1
|
||||
momentumPerMB = 0.9
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 4
|
||||
|
||||
ParallelTrain=[
|
||||
parallelizationMethod=DataParallelSGD
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
ParallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
DataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
# Parameter values for the reader
|
||||
reader=[
|
||||
# reader to use
|
||||
readerType=UCIFastReader
|
||||
file=$DataDir$/SimpleDataTrain.txt
|
||||
reader = [
|
||||
# reader to use
|
||||
readerType = "UCIFastReader"
|
||||
file = "$DataDir$/SimpleDataTrain.txt"
|
||||
|
||||
miniBatchMode=Partial
|
||||
randomize=None
|
||||
verbosity=1
|
||||
miniBatchMode = "partial"
|
||||
randomize = "none"
|
||||
verbosity = 1
|
||||
|
||||
features=[
|
||||
dim=2 # two-dimensional input data
|
||||
start=0 # Start with first element on line
|
||||
]
|
||||
features = [
|
||||
dim = 2 # two-dimensional input data
|
||||
start = 0 # Start with first element on line
|
||||
]
|
||||
|
||||
labels=[
|
||||
start=2 # Skip two elements
|
||||
dim=1 # One label dimension
|
||||
labelDim=2 # Two labels possible
|
||||
labelMappingFile=$DataDir$/SimpleMapping.txt
|
||||
]
|
||||
labels = [
|
||||
start = 2 # Skip two elements
|
||||
dim = 1 # One label dimension
|
||||
labelDim = 2 # Two labels possible
|
||||
labelMappingFile = $DataDir$/SimpleMapping.txt""
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
m1=LoadModel($CurrModel$, format=cntk)
|
||||
m1 = LoadModel($CurrModel$, format=cntk)
|
||||
SetDefaultModel(m1)
|
||||
HDim=512
|
||||
HL$NewLayer$=DNNLayer(HDim, HDim, HL$CurrLayer$.y)
|
||||
HDim = 512
|
||||
HL$NewLayer$ = DNNLayer(HDim, HDim, HL$CurrLayer$.y)
|
||||
SetInput(OL.t, 1, HL$NewLayer$.y)
|
||||
SetInput(HL$NewLayer$.t, 1, HL$CurrLayer$.y)
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
||||
|
|
|
@ -1,104 +1,106 @@
|
|||
precision=float
|
||||
deviceId=$DeviceId$
|
||||
command=DPT_Pre1:AddLayer2:DPT_Pre2:AddLayer3:speechTrain
|
||||
precision = "float"
|
||||
deviceId = $DeviceId$
|
||||
command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
|
||||
|
||||
ndlMacros=$ConfigDir$/macros.txt
|
||||
ndlMacros = "$ConfigDir$/macros.txt"
|
||||
|
||||
GlobalMean=GlobalStats/mean.363
|
||||
GlobalInvStd=GlobalStats/var.363
|
||||
GlobalPrior=GlobalStats/prior.132
|
||||
# TODO: are these used? Where?
|
||||
globalMean = "GlobalStats/mean.363"
|
||||
globalInvStd = "GlobalStats/var.363"
|
||||
globalPrior = "GlobalStats/prior.132"
|
||||
|
||||
traceLevel=1
|
||||
traceLevel = 1
|
||||
|
||||
# Default SGD value used for pre-training.
|
||||
SGD=[
|
||||
epochSize=81920
|
||||
minibatchSize=256
|
||||
learningRatesPerMB=0.8
|
||||
numMBsToShowResult=10
|
||||
momentumPerMB=0.9
|
||||
dropoutRate=0.0
|
||||
maxEpochs=2
|
||||
SGD = [
|
||||
epochSize = 81920
|
||||
minibatchSize = 256
|
||||
learningRatesPerMB = 0.8
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 2
|
||||
]
|
||||
|
||||
DPT_Pre1=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/Pre1/cntkSpeech
|
||||
dptPre1 = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/Pre1/cntkSpeech"
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn_1layer.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn_1layer.txt"
|
||||
]
|
||||
]
|
||||
|
||||
AddLayer2=[
|
||||
action=edit
|
||||
CurrLayer=1
|
||||
NewLayer=2
|
||||
CurrModel=$RunDir$/models/Pre1/cntkSpeech
|
||||
NewModel=$RunDir$/models/Pre2/cntkSpeech.0
|
||||
editPath=$ConfigDir$/add_layer.mel
|
||||
addLayer2 = [
|
||||
action = "edit"
|
||||
currLayer = 1
|
||||
newLayer = 2
|
||||
currModel = "$RunDir$/models/Pre1/cntkSpeech"
|
||||
newModel = "$RunDir$/models/Pre2/cntkSpeech.0"
|
||||
editPath = "$ConfigDir$/add_layer.mel"
|
||||
]
|
||||
|
||||
DPT_Pre2=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/Pre2/cntkSpeech
|
||||
dptPre2 = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/Pre2/cntkSpeech"
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn_1layer.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn_1layer.txt"
|
||||
]
|
||||
]
|
||||
|
||||
AddLayer3=[
|
||||
action=edit
|
||||
CurrLayer=2
|
||||
NewLayer=3
|
||||
CurrModel=$RunDir$/models/Pre2/cntkSpeech
|
||||
NewModel=$RunDir$/models/cntkSpeech.0
|
||||
editPath=$ConfigDir$/add_layer.mel
|
||||
addLayer3 = [
|
||||
action = "edit"
|
||||
currLayer = 2
|
||||
newLayer = 3
|
||||
currModel = "$RunDir$/models/Pre2/cntkSpeech"
|
||||
newModel = "$RunDir$/models/cntkSpeech.0"
|
||||
editPath = "$ConfigDir$/add_layer.mel"
|
||||
]
|
||||
|
||||
speechTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech"
|
||||
deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn.txt"
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=81920
|
||||
minibatchSize=256:512
|
||||
learningRatesPerMB=0.8:1.6
|
||||
numMBsToShowResult=10
|
||||
momentumPerSample=0.999589
|
||||
dropoutRate=0.0
|
||||
maxEpochs=4
|
||||
|
||||
gradUpdateType=None
|
||||
normWithAveMultiplier=true
|
||||
clippingThresholdPerSample=1#INF
|
||||
|
||||
SGD = [
|
||||
epochSize = 81920
|
||||
minibatchSize = 256:512
|
||||
learningRatesPerMB = 0.8:1.6
|
||||
numMBsToShowResult = 10
|
||||
momentumPerSample = 0.999589
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 4
|
||||
|
||||
gradUpdateType = "none"
|
||||
normWithAveMultiplier = true
|
||||
clippingThresholdPerSample = 1#INF
|
||||
]
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=$DataDir$/glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
|
||||
labelDim = 132
|
||||
labelType = Category
|
||||
]
|
||||
]
|
||||
|
|
|
@ -1,45 +1,42 @@
|
|||
load=ndlMacroDefine
|
||||
run=DNN
|
||||
load = ndlMacroDefine
|
||||
run = DNN
|
||||
|
||||
ndlMacroDefine=[
|
||||
ndlMacroDefine = [
|
||||
# Macro definitions
|
||||
MeanVarNorm(x)=[
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
|
||||
MeanVarNorm(x) = [
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm = PerDimMeanVarNormalization(x, xMean, xStdDev)
|
||||
]
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
||||
DNN = [
|
||||
#define basic i/o
|
||||
featDim=363
|
||||
LabelDim=132
|
||||
hiddenDim=512
|
||||
featDim = 363
|
||||
labelDim = 132
|
||||
hiddenDim = 512
|
||||
|
||||
features=Input(featDim, tag=feature)
|
||||
labels=Input(LabelDim, tag=label)
|
||||
|
||||
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
GlobalInvStd=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior=Log(GlobalPrior)
|
||||
features = Input(featDim, tag=feature)
|
||||
labels = Input(labelDim, tag=label)
|
||||
|
||||
globalMean = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
globalInvStd = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
globalPrior = Parameter(labelDim, 1, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior = Log(globalPrior)
|
||||
|
||||
# define network
|
||||
featNorm=PerDimMeanVarNormalization(features, GlobalMean, GlobalInvStd)
|
||||
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
|
||||
|
||||
# layer 1 363 X 512
|
||||
z1=DNNLayer(featDim, hiddenDim, featNorm);
|
||||
z1 = DNNLayer(featDim, hiddenDim, featNorm);
|
||||
# layer 2 512 X 512
|
||||
z2=DNNLayer(hiddenDim, hiddenDim, z1);
|
||||
z2 = DNNLayer(hiddenDim, hiddenDim, z1);
|
||||
# layer 3 512 X 512
|
||||
z3=DNNLayer(hiddenDim, hiddenDim, z2);
|
||||
z3 = DNNLayer(hiddenDim, hiddenDim, z2);
|
||||
# last layer 512 X 132
|
||||
z4=DNNLastLayer(hiddenDim, LabelDim, z3);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, z4, tag=Criteria);
|
||||
Err = ErrorPrediction(labels, z4, tag=Eval);
|
||||
ScaledLogLikelihood=Minus(z4, logPrior, tag=Output)
|
||||
]
|
||||
z4 = DNNLastLayer(hiddenDim, labelDim, z3);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, z4, tag=criterion);
|
||||
err = ErrorPrediction(labels, z4, tag=eval);
|
||||
scaledLogLikelihood = Minus(z4, logPrior, tag=output)
|
||||
]
|
||||
|
|
|
@ -1,38 +1,38 @@
|
|||
load=ndlMacroDefine
|
||||
run=DNN
|
||||
load = ndlMacroDefine
|
||||
run = DNN
|
||||
|
||||
ndlMacroDefine=[
|
||||
ndlMacroDefine = [
|
||||
# Macro definitions
|
||||
MeanVarNorm(x)=[
|
||||
MeanVarNorm(x) = [
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm=PerDimMeanVarNormalization(x, xMean, xStdDev)
|
||||
xNorm = PerDimMeanVarNormalization(x, xMean, xStdDev)
|
||||
]
|
||||
]
|
||||
|
||||
DNN=[
|
||||
DNN = [
|
||||
#define basic i/o
|
||||
featDim=363
|
||||
LabelDim=132
|
||||
hiddenDim=512
|
||||
featDim = 363
|
||||
LabelDim = 132
|
||||
hiddenDim = 512
|
||||
|
||||
features=Input(featDim, tag=feature)
|
||||
labels=Input(LabelDim, tag=label)
|
||||
features = Input(featDim, tag=feature)
|
||||
labels = Input(LabelDim, tag=label)
|
||||
|
||||
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
GlobalInvStd=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior=Log(GlobalPrior)
|
||||
globalMean = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
globalInvStd = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
globalPrior = Parameter(LabelDim, 1, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior = Log(globalPrior)
|
||||
|
||||
# define network
|
||||
featNorm=PerDimMeanVarNormalization(features, GlobalMean, GlobalInvStd)
|
||||
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
|
||||
|
||||
# layer 1 363 X 512
|
||||
HL1=DNNLayer(featDim, hiddenDim, featNorm);
|
||||
HL1 = DNNLayer(featDim, hiddenDim, featNorm);
|
||||
# last layer 512 X 132
|
||||
OL=DNNLastLayer(hiddenDim, LabelDim, HL1);
|
||||
OL = DNNLastLayer(hiddenDim, LabelDim, HL1);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, OL, tag=Criteria);
|
||||
Err = ErrorPrediction(labels, OL, tag=Eval);
|
||||
ScaledLogLikelihood=Minus(OL, logPrior, tag=Output)
|
||||
cr = CrossEntropyWithSoftmax(labels, OL, tag=criterion);
|
||||
err = ErrorPrediction(labels, OL, tag=Eval);
|
||||
scaledLogLikelihood = Minus(OL, logPrior, tag=Output)
|
||||
]
|
||||
|
|
|
@ -1,20 +1,18 @@
|
|||
DNNLayer(inDim, outDim, x)
|
||||
{
|
||||
DNNLayer(inDim, outDim, x) = [
|
||||
#W = Parameter(outDim, inDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1); # randomizing on CPU with fixed seed to get reproducable results across configurations
|
||||
#b = Parameter(outDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(outDim, 1, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
W = Parameter(outDim, inDim);
|
||||
b = Parameter(outDim);
|
||||
b = Parameter(outDim, 1);
|
||||
t = Times(W, x);
|
||||
z = Plus(t, b);
|
||||
y = sigmoid(z);
|
||||
}
|
||||
]
|
||||
|
||||
DNNLastLayer(hiddenDim, LabelDim, x)
|
||||
{
|
||||
DNNLastLayer(hiddenDim, LabelDim, x) = [
|
||||
#W = Parameter(LabelDim, hiddenDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(LabelDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(LabelDim, 1, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
W = Parameter(LabelDim, hiddenDim);
|
||||
b = Parameter(LabelDim);
|
||||
b = Parameter(LabelDim 1,);
|
||||
t = Times(W, x);
|
||||
z = Plus(t, b);
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1,172 +1,174 @@
|
|||
precision=float
|
||||
deviceId=$DeviceId$
|
||||
command=DPT_Pre1:AddLayer2:DPT_Pre2:AddLayer3:speechTrain:replaceCriterionNode:sequenceTrain
|
||||
precision = "float"
|
||||
deviceId = $DeviceId$
|
||||
command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain:replaceCriterionNode:sequenceTrain
|
||||
|
||||
ndlMacros=$ConfigDir$/macros.txt
|
||||
ndlMacros = "$ConfigDir$/macros.txt"
|
||||
|
||||
GlobalMean=GlobalStats/mean.363
|
||||
GlobalInvStd=GlobalStats/var.363
|
||||
GlobalPrior=GlobalStats/prior.132
|
||||
globalMean = "GlobalStats/mean.363"
|
||||
globalInvStd = "GlobalStats/var.363"
|
||||
globalPrior = "GlobalStats/prior.132"
|
||||
|
||||
traceLevel=1
|
||||
Truncated=false
|
||||
traceLevel = 1
|
||||
truncated = false
|
||||
|
||||
# Default SGD value used for pre-training.
|
||||
SGD=[
|
||||
epochSize=81920
|
||||
minibatchSize=256
|
||||
learningRatesPerMB=0.8
|
||||
numMBsToShowResult=10
|
||||
momentumPerMB=0.9
|
||||
dropoutRate=0.0
|
||||
maxEpochs=2
|
||||
SGD = [
|
||||
epochSize = 81920
|
||||
minibatchSize = 256
|
||||
learningRatesPerMB = 0.8
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 2
|
||||
]
|
||||
|
||||
DPT_Pre1=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/Pre1/cntkSpeech
|
||||
dptPre1 = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/Pre1/cntkSpeech"
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn_1layer.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn_1layer.txt"
|
||||
]
|
||||
]
|
||||
|
||||
AddLayer2=[
|
||||
action=edit
|
||||
CurrLayer=1
|
||||
NewLayer=2
|
||||
CurrModel=$RunDir$/models/Pre1/cntkSpeech
|
||||
NewModel=$RunDir$/models/Pre2/cntkSpeech.0
|
||||
editPath=$ConfigDir$/add_layer.mel
|
||||
addLayer2 = [
|
||||
action = "edit"
|
||||
currLayer = 1
|
||||
newLayer = 2
|
||||
currModel = "$RunDir$/models/Pre1/cntkSpeech"
|
||||
newModel = "$RunDir$/models/Pre2/cntkSpeech.0"
|
||||
editPath = "$ConfigDir$/add_layer.mel"
|
||||
]
|
||||
|
||||
DPT_Pre2=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/Pre2/cntkSpeech
|
||||
dptPre2 = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/Pre2/cntkSpeech"
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn_1layer.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn_1layer.txt"
|
||||
]
|
||||
]
|
||||
|
||||
AddLayer3=[
|
||||
action=edit
|
||||
CurrLayer=2
|
||||
NewLayer=3
|
||||
CurrModel=$RunDir$/models/Pre2/cntkSpeech
|
||||
NewModel=$RunDir$/models/cntkSpeech.0
|
||||
editPath=$ConfigDir$/add_layer.mel
|
||||
AddLayer3 = [
|
||||
action = "edit"
|
||||
currLayer = 2
|
||||
newLayer = 3
|
||||
currModel = "$RunDir$/models/Pre2/cntkSpeech"
|
||||
newModel = "$RunDir$/models/cntkSpeech.0"
|
||||
editPath = "$ConfigDir$/add_layer.mel"
|
||||
]
|
||||
|
||||
speechTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech"
|
||||
#deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/dnn.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/dnn.txt"
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=81920
|
||||
minibatchSize=256:512
|
||||
learningRatesPerMB=0.8:1.6
|
||||
numMBsToShowResult=10
|
||||
momentumPerSample=0.999589
|
||||
dropoutRate=0.0
|
||||
maxEpochs=4
|
||||
SGD = [
|
||||
epochSize = 81920
|
||||
minibatchSize = 256:512
|
||||
learningRatesPerMB = 0.8:1.6
|
||||
numMBsToShowResult = 10
|
||||
momentumPerSample = 0.999589
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 4
|
||||
|
||||
gradUpdateType=None
|
||||
normWithAveMultiplier=true
|
||||
clippingThresholdPerSample=1#INF
|
||||
gradUpdateType = "none"
|
||||
normWithAveMultiplier = true
|
||||
clippingThresholdPerSample = 1#INF
|
||||
]
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=$DataDir$/glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
|
||||
replaceCriterionNode=[
|
||||
action=edit
|
||||
CurrModel=$RunDir$/models/cntkSpeech
|
||||
NewModel=$RunDir$/models/cntkSpeech.sequence.0
|
||||
editPath=$ConfigDir$/replace_ce_with_sequence_criterion.mel
|
||||
replaceCriterionNode = [
|
||||
action = "edit"
|
||||
currModel = "$RunDir$/models/cntkSpeech"
|
||||
newModel = "$RunDir$/models/cntkSpeech.sequence.0"
|
||||
editPath = "$ConfigDir$/replace_ce_with_sequence_criterion.mel"
|
||||
]
|
||||
|
||||
sequenceTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech.sequence
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
sequenceTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech.sequence"
|
||||
#deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
# This path is not really used since we use the seed model
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/nonexistentfile.txt
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/nonexistentfile.txt"
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=81920
|
||||
minibatchSize=10
|
||||
learningRatesPerSample=0.000002
|
||||
momentumPerSample=0.999589
|
||||
dropoutRate=0.0
|
||||
maxEpochs=3
|
||||
hsmoothingWeight=0.95
|
||||
frameDropThresh=1e-10
|
||||
numMBsToShowResult=10
|
||||
gradientClippingWithTruncation=true
|
||||
clippingThresholdPerSample=1.0
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
|
||||
SGD = [
|
||||
epochSize = 81920
|
||||
minibatchSize = 10
|
||||
learningRatesPerSample = 0.000002
|
||||
momentumPerSample = 0.999589
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 3
|
||||
hsmoothingWeight = 0.95
|
||||
frameDropThresh = 1e-10
|
||||
numMBsToShowResult = 10
|
||||
gradientClippingWithTruncation = true
|
||||
clippingThresholdPerSample = 1.0
|
||||
]
|
||||
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
|
||||
frameMode=false
|
||||
nbruttsineachrecurrentiter=2
|
||||
frameMode = false
|
||||
nbruttsineachrecurrentiter = 2
|
||||
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=$DataDir$/glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
|
||||
hmms=[
|
||||
phoneFile=$DataDir$/model.overalltying
|
||||
transpFile=$DataDir$/model.transprob
|
||||
]
|
||||
|
||||
lattices=[
|
||||
denlatTocFile=$DataDir$/*.lats.toc
|
||||
]
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
|
||||
hmms = [
|
||||
phoneFile = "$DataDir$/model.overalltying"
|
||||
transpFile = "$DataDir$/model.transprob"
|
||||
]
|
||||
|
||||
lattices = [
|
||||
denlatTocFile = "$DataDir$/*.lats.toc"
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -1,45 +1,42 @@
|
|||
load=ndlMacroDefine
|
||||
run=DNN
|
||||
load = ndlMacroDefine
|
||||
run = DNN
|
||||
|
||||
ndlMacroDefine=[
|
||||
ndlMacroDefine = [
|
||||
# Macro definitions
|
||||
MeanVarNorm(x)=[
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
|
||||
MeanVarNorm(x) = [
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm = PerDimMeanVarNormalization(x,xMean,xStdDev)
|
||||
]
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
||||
DNN = [
|
||||
#define basic i/o
|
||||
featDim=363
|
||||
LabelDim=132
|
||||
hiddenDim=512
|
||||
featDim = 363
|
||||
labelDim = 132
|
||||
hiddenDim = 512
|
||||
|
||||
features=Input(featDim, tag=feature)
|
||||
labels=Input(LabelDim, tag=label)
|
||||
|
||||
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
GlobalInvStd=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior=Log(GlobalPrior)
|
||||
features = Input(featDim, tag=feature)
|
||||
labels = Input(labelDim, tag=label)
|
||||
|
||||
globalMean = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
globalInvStd = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
globalPrior = Parameter(labelDim, 1, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior = Log(globalPrior)
|
||||
|
||||
# define network
|
||||
featNorm=PerDimMeanVarNormalization(features, GlobalMean, GlobalInvStd)
|
||||
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
|
||||
|
||||
# layer 1 363 X 512
|
||||
z1=DNNLayer(featDim, hiddenDim, featNorm);
|
||||
z1 = DNNLayer(featDim, hiddenDim, featNorm);
|
||||
# layer 2 512 X 512
|
||||
z2=DNNLayer(hiddenDim, hiddenDim, z1);
|
||||
z2 = DNNLayer(hiddenDim, hiddenDim, z1);
|
||||
# layer 3 512 X 512
|
||||
z3=DNNLayer(hiddenDim, hiddenDim, z2);
|
||||
z3 = DNNLayer(hiddenDim, hiddenDim, z2);
|
||||
# last layer 512 X 132
|
||||
z4=DNNLastLayer(hiddenDim, LabelDim, z3);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, z4, tag=Criteria);
|
||||
Err = ErrorPrediction(labels, z4, tag=Eval);
|
||||
ScaledLogLikelihood=Minus(z4, logPrior, tag=Output)
|
||||
]
|
||||
z4 = DNNLastLayer(hiddenDim, labelDim, z3);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, z4, tag=criterion);
|
||||
err = ErrorPrediction(labels, z4, tag=eval);
|
||||
scaledLogLikelihood = Minus(z4, logPrior, tag=output)
|
||||
]
|
||||
|
|
|
@ -1,38 +1,38 @@
|
|||
load=ndlMacroDefine
|
||||
run=DNN
|
||||
load = ndlMacroDefine
|
||||
run = DNN
|
||||
|
||||
ndlMacroDefine=[
|
||||
ndlMacroDefine = [
|
||||
# Macro definitions
|
||||
MeanVarNorm(x)=[
|
||||
xMean = Mean(x);
|
||||
MeanVarNorm(x) = [
|
||||
xMean = Mean(x);
|
||||
xStdDev = InvStdDev(x)
|
||||
xNorm=PerDimMeanVarNormalization(x, xMean, xStdDev)
|
||||
xNorm = PerDimMeanVarNormalization(x, xMean, xStdDev)
|
||||
]
|
||||
]
|
||||
|
||||
DNN=[
|
||||
DNN = [
|
||||
#define basic i/o
|
||||
featDim=363
|
||||
LabelDim=132
|
||||
hiddenDim=512
|
||||
featDim = 363
|
||||
labelDim = 132
|
||||
hiddenDim = 512
|
||||
|
||||
features=Input(featDim, tag=feature)
|
||||
labels=Input(LabelDim, tag=label)
|
||||
features = Input(featDim, tag=feature)
|
||||
labels = Input(labelDim, tag=label)
|
||||
|
||||
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
GlobalInvStd=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior=Log(GlobalPrior)
|
||||
globalMean = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
|
||||
globalInvStd = Parameter(featDim, 1, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
|
||||
globalPrior = Parameter(labelDim, 1, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
|
||||
logPrior = Log(globalPrior)
|
||||
|
||||
# define network
|
||||
featNorm=PerDimMeanVarNormalization(features, GlobalMean, GlobalInvStd)
|
||||
featNorm = PerDimMeanVarNormalization(features, globalMean, globalInvStd)
|
||||
|
||||
# layer 1 363 X 512
|
||||
HL1=DNNLayer(featDim, hiddenDim, featNorm);
|
||||
HL1 = DNNLayer(featDim, hiddenDim, featNorm);
|
||||
# last layer 512 X 132
|
||||
OL=DNNLastLayer(hiddenDim, LabelDim, HL1);
|
||||
OL = DNNLastLayer(hiddenDim, labelDim, HL1);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, OL, tag=Criteria);
|
||||
Err = ErrorPrediction(labels, OL, tag=Eval);
|
||||
ScaledLogLikelihood=Minus(OL, logPrior, tag=Output)
|
||||
cr = CrossEntropyWithSoftmax(labels, OL, tag=criterion);
|
||||
err = ErrorPrediction(labels, OL, tag=eval);
|
||||
scaledLogLikelihood = Minus(OL, logPrior, tag=output)
|
||||
]
|
||||
|
|
|
@ -1,20 +1,18 @@
|
|||
DNNLayer(inDim, outDim, x)
|
||||
{
|
||||
DNNLayer(inDim, outDim, x) = [
|
||||
#W = Parameter(outDim, inDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1); # randomizing on CPU with fixed seed to get reproducable results across configurations
|
||||
#b = Parameter(outDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(outDim, 1, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
W = Parameter(outDim, inDim);
|
||||
b = Parameter(outDim);
|
||||
b = Parameter(outDim, 1);
|
||||
t = Times(W, x);
|
||||
z = Plus(t, b);
|
||||
y = sigmoid(z);
|
||||
}
|
||||
]
|
||||
|
||||
DNNLastLayer(hiddenDim, LabelDim, x)
|
||||
{
|
||||
DNNLastLayer(hiddenDim, LabelDim, x) = [
|
||||
#W = Parameter(LabelDim, hiddenDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(LabelDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
#b = Parameter(LabelDim, 1, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
W = Parameter(LabelDim, hiddenDim);
|
||||
b = Parameter(LabelDim);
|
||||
b = Parameter(LabelDim, 1);
|
||||
t = Times(W, x);
|
||||
z = Plus(t, b);
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
m1=LoadModel($CurrModel$, format=cntk)
|
||||
m1 = LoadModel($CurrModel$, format=cntk)
|
||||
SetDefaultModel(m1)
|
||||
SetProperty(cr, Criteria, false)
|
||||
Remove(cr)
|
||||
|
||||
SEwithSoftmax=SequenceWithSoftmax(labels, OL.z, ScaledLogLikelihood)
|
||||
SetProperty(SEwithSoftmax, Criteria, true)
|
||||
SEwithSoftmax = SequenceWithSoftmax(labels, OL.z, scaledLogLikelihood)
|
||||
SetProperty(SEwithSoftmax, criterion, true)
|
||||
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
||||
Dump(m1,$NewModel$.dump.txt)
|
||||
|
||||
Dump(m1, $NewModel$.dump.txt)
|
||||
|
|
|
@ -1,31 +1,32 @@
|
|||
precision=float
|
||||
command=speechTrain
|
||||
deviceId=$DeviceId$
|
||||
precision = "float"
|
||||
command = speechTrain
|
||||
deviceId = $DeviceId$
|
||||
|
||||
parallelTrain=true
|
||||
parallelTrain = true
|
||||
|
||||
speechTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech.dnn
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
SimpleNetworkBuilder=[
|
||||
layerSizes=363:512:512:132
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
layerTypes=Sigmoid
|
||||
initValueScale=1.0
|
||||
applyMeanVarNorm=true
|
||||
uniformInit=true
|
||||
needPrior=true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech.dnn"
|
||||
deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
uniformInit = true
|
||||
needPrior = true
|
||||
]
|
||||
|
||||
ExperimentalNetworkBuilder=[ // the same as above but with BS
|
||||
layerSizes=363:512:512:132
|
||||
trainingCriterion='CE'
|
||||
evalCriterion='Err'
|
||||
ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = 'CE'
|
||||
evalCriterion = 'Err'
|
||||
|
||||
applyMeanVarNorm=true
|
||||
applyMeanVarNorm = true
|
||||
|
||||
L = Length(layerSizes)-1 // number of model layers
|
||||
features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label')
|
||||
|
@ -48,52 +49,52 @@ speechTrain=[
|
|||
ScaledLogLikelihood = Minus (outZ, logPrior, tag='output')
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=20480
|
||||
minibatchSize=64:256:1024
|
||||
learningRatesPerMB=1.0:0.5:0.1
|
||||
numMBsToShowResult=10
|
||||
momentumPerMB=0.9:0.656119
|
||||
dropoutRate=0.0
|
||||
maxEpochs=3
|
||||
keepCheckPointFiles=true
|
||||
|
||||
ParallelTrain=[
|
||||
parallelizationMethod=DataParallelSGD
|
||||
distributedMBReading=true
|
||||
DataParallelSGD=[
|
||||
gradientBits=32
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 64:256:1024
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
clippingThresholdPerSample = 1#INF
|
||||
|
||||
ParallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
DataParallelSGD = [
|
||||
gradientBits = 32
|
||||
]
|
||||
]
|
||||
|
||||
AutoAdjust=[
|
||||
reduceLearnRateIfImproveLessThan=0
|
||||
loadBestModel=true
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
autoAdjustLR=AdjustAfterEpoch
|
||||
|
||||
AutoAdjust = [
|
||||
reduceLearnRateIfImproveLessThan = 0
|
||||
loadBestModel = true
|
||||
increaseLearnRateIfImproveMoreThan = 1000000000
|
||||
learnRateDecreaseFactor = 0.5
|
||||
learnRateIncreaseFactor = 1.382
|
||||
autoAdjustLR = "adjustAfterEpoch"
|
||||
]
|
||||
clippingThresholdPerSample=1#INF
|
||||
]
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -1,47 +1,48 @@
|
|||
precision=float
|
||||
command=speechTrain
|
||||
deviceId=$DeviceId$
|
||||
precision = "float"
|
||||
command = speechTrain
|
||||
deviceId = $DeviceId$
|
||||
|
||||
parallelTrain=false
|
||||
parallelTrain = false
|
||||
|
||||
frameMode=false
|
||||
Truncated=true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
|
||||
speechTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech.dnn
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech.dnn"
|
||||
#deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
|
||||
SGD=[
|
||||
epochSize=20480
|
||||
minibatchSize=20
|
||||
learningRatesPerMB=0.5
|
||||
numMBsToShowResult=10
|
||||
momentumPerMB=0:0.9
|
||||
maxEpochs=4
|
||||
keepCheckPointFiles=true
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 20
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
miniBatchMode=Partial
|
||||
nbruttsineachrecurrentiter=32
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=$DataDir$/glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
nbruttsineachrecurrentiter = 32
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
|
||||
# define network using BrainScript
|
||||
|
|
|
@ -1,27 +1,27 @@
|
|||
precision=float
|
||||
command=speechTrain
|
||||
deviceId=$DeviceId$
|
||||
precision = "float"
|
||||
command = speechTrain
|
||||
deviceId = $DeviceId$
|
||||
|
||||
parallelTrain=false
|
||||
makeMode=false
|
||||
parallelTrain = false
|
||||
makeMode = false
|
||||
|
||||
speechTrain=[
|
||||
action=train
|
||||
modelPath=$RunDir$/models/cntkSpeech.dnn
|
||||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
SimpleNetworkBuilder=[
|
||||
layerSizes=363:512:512:132
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
layerTypes=Sigmoid
|
||||
applyMeanVarNorm=true
|
||||
initValueScale=1.0
|
||||
uniformInit=true
|
||||
needPrior=true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
modelPath = "$RunDir$/models/cntkSpeech.dnn"
|
||||
deviceId = $DeviceId$
|
||||
traceLevel = 1
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
initValueScale = 1.0
|
||||
uniformInit = true
|
||||
needPrior = true
|
||||
]
|
||||
|
||||
ExperimentalNetworkBuilder=[ // the same as above but with BS
|
||||
ExperimentalNetworkBuilder = [ // the same as above but with BS. Currently not used. Enable by removing the SimpleNetworkBuilder above.
|
||||
// note: this does not produce identical results because of different initialization order of random-initialized LearnableParameters
|
||||
layerSizes=363:512:512:132 // [0..]
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
|
@ -51,45 +51,44 @@ speechTrain=[
|
|||
ScaledLogLikelihood = Minus (outZ, logPrior, tag='output')
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=20480
|
||||
minibatchSize=64:256:1024:
|
||||
learningRatesPerMB=1.0:0.5:0.1
|
||||
numMBsToShowResult=10
|
||||
momentumPerMB=0.9:0.656119
|
||||
dropoutRate=0.0
|
||||
maxEpochs=3
|
||||
keepCheckPointFiles=true
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 64:256:1024:
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
dropoutRate = 0.0
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
|
||||
AutoAdjust=[
|
||||
reduceLearnRateIfImproveLessThan=0
|
||||
loadBestModel=true
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
autoAdjustLR=AdjustAfterEpoch
|
||||
AutoAdjust = [
|
||||
reduceLearnRateIfImproveLessThan = 0
|
||||
loadBestModel = true
|
||||
increaseLearnRateIfImproveMoreThan = 1000000000
|
||||
learnRateDecreaseFactor = 0.5
|
||||
learnRateIncreaseFactor = 1.382
|
||||
autoAdjustLR = "adjustAfterEpoch"
|
||||
]
|
||||
clippingThresholdPerSample=1#INF
|
||||
clippingThresholdPerSample = 1#INF
|
||||
]
|
||||
reader=[
|
||||
readerType=HTKMLFReader
|
||||
readMethod=blockRandomize
|
||||
miniBatchMode=Partial
|
||||
randomize=Auto
|
||||
verbosity=0
|
||||
features=[
|
||||
dim=363
|
||||
type=Real
|
||||
scpFile=glob_0000.scp
|
||||
]
|
||||
|
||||
labels=[
|
||||
mlfFile=$DataDir$/glob_0000.mlf
|
||||
labelMappingFile=$DataDir$/state.list
|
||||
|
||||
labelDim=132
|
||||
labelType=Category
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "glob_0000.scp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче