cleaned up seq-2-seq sample further and added the CMUDict corpus
This commit is contained in:
Родитель
1a7ecaecfa
Коммит
0ba41da07b
37
CNTK.sln
37
CNTK.sln
|
@ -1116,14 +1116,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelBM", "ParallelBM",
|
||||||
EndProject
|
EndProject
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceToSequence", "SequenceToSequence", "{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}"
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceToSequence", "SequenceToSequence", "{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{85A05261-41D0-41DF-80B5-ADB6ABB54632}"
|
|
||||||
EndProject
|
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "G2P", "G2P", "{4AD12278-9705-4BBA-B2C3-D6D5856AADC3}"
|
|
||||||
ProjectSection(SolutionItems) = preProject
|
|
||||||
Examples\SequenceToSequence\Miscellaneous\G2P\G2P.cntk = Examples\SequenceToSequence\Miscellaneous\G2P\G2P.cntk
|
|
||||||
Examples\SequenceToSequence\Miscellaneous\G2P\README.txt = Examples\SequenceToSequence\Miscellaneous\G2P\README.txt
|
|
||||||
EndProjectSection
|
|
||||||
EndProject
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Source\Extensibility\CPPEvalClient\CPPEvalClient.vcxproj", "{578D52A0-3928-4405-A016-F016E8B49031}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Source\Extensibility\CPPEvalClient\CPPEvalClient.vcxproj", "{578D52A0-3928-4405-A016-F016E8B49031}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ExperimentalHtkmlfReader", "ExperimentalHtkmlfReader", "{977ECCB7-598D-4548-B95B-BACA9CC7D98B}"
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ExperimentalHtkmlfReader", "ExperimentalHtkmlfReader", "{977ECCB7-598D-4548-B95B-BACA9CC7D98B}"
|
||||||
|
@ -1149,6 +1141,30 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUttera
|
||||||
EndProject
|
EndProject
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Truncated", "Truncated", "{1141DC61-E014-4DEC-9157-F6B1FC055C7A}"
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Truncated", "Truncated", "{1141DC61-E014-4DEC-9157-F6B1FC055C7A}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CMUDict", "CMUDict", "{EC7298E3-AAA9-4672-941F-0B342C494CB3}"
|
||||||
|
ProjectSection(SolutionItems) = preProject
|
||||||
|
Examples\SequenceToSequence\CMUDict\README.md = Examples\SequenceToSequence\CMUDict\README.md
|
||||||
|
EndProjectSection
|
||||||
|
EndProject
|
||||||
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{ECED747C-86D7-4009-B2A9-0525FE5DF4EB}"
|
||||||
|
ProjectSection(SolutionItems) = preProject
|
||||||
|
Examples\SequenceToSequence\CMUDict\Config\G2P.cntk = Examples\SequenceToSequence\CMUDict\Config\G2P.cntk
|
||||||
|
EndProjectSection
|
||||||
|
EndProject
|
||||||
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{25E144C1-0B7C-4CD4-811A-2E9F4943120D}"
|
||||||
|
ProjectSection(SolutionItems) = preProject
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.test = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.test
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.test.txt = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.test.txt
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-1-21 = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-1-21
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-1-21.txt = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-1-21.txt
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-20-21 = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-20-21
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-20-21.txt = Examples\SequenceToSequence\CMUDict\Data\cmudict-0.7b.train-dev-20-21.txt
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\README.txt = Examples\SequenceToSequence\CMUDict\Data\README.txt
|
||||||
|
Examples\SequenceToSequence\CMUDict\Data\ThirdPartyNotice.md = Examples\SequenceToSequence\CMUDict\Data\ThirdPartyNotice.md
|
||||||
|
EndProjectSection
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
|
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
|
||||||
|
@ -1546,8 +1562,6 @@ Global
|
||||||
{4D6F731C-4A6D-4E21-AC3C-9E1F26E5547E} = {6994C86D-A672-4254-824A-51F4DFEB807F}
|
{4D6F731C-4A6D-4E21-AC3C-9E1F26E5547E} = {6994C86D-A672-4254-824A-51F4DFEB807F}
|
||||||
{36C42845-0D48-4A46-9C67-2B593A80A09C} = {6994C86D-A672-4254-824A-51F4DFEB807F}
|
{36C42845-0D48-4A46-9C67-2B593A80A09C} = {6994C86D-A672-4254-824A-51F4DFEB807F}
|
||||||
{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26} = {47755F2E-D674-4175-9E38-8EA053455072}
|
{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26} = {47755F2E-D674-4175-9E38-8EA053455072}
|
||||||
{85A05261-41D0-41DF-80B5-ADB6ABB54632} = {A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}
|
|
||||||
{4AD12278-9705-4BBA-B2C3-D6D5856AADC3} = {85A05261-41D0-41DF-80B5-ADB6ABB54632}
|
|
||||||
{578D52A0-3928-4405-A016-F016E8B49031} = {60F87E25-BC87-4782-8E20-1621AAEBB113}
|
{578D52A0-3928-4405-A016-F016E8B49031} = {60F87E25-BC87-4782-8E20-1621AAEBB113}
|
||||||
{977ECCB7-598D-4548-B95B-BACA9CC7D98B} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
|
{977ECCB7-598D-4548-B95B-BACA9CC7D98B} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
|
||||||
{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
|
{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
|
||||||
|
@ -1556,5 +1570,8 @@ Global
|
||||||
{BA6A65C5-92A2-4040-ADC3-0727A45694F6} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
|
{BA6A65C5-92A2-4040-ADC3-0727A45694F6} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
|
||||||
{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
|
{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
|
||||||
{1141DC61-E014-4DEC-9157-F6B1FC055C7A} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
|
{1141DC61-E014-4DEC-9157-F6B1FC055C7A} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
|
||||||
|
{EC7298E3-AAA9-4672-941F-0B342C494CB3} = {A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}
|
||||||
|
{ECED747C-86D7-4009-B2A9-0525FE5DF4EB} = {EC7298E3-AAA9-4672-941F-0B342C494CB3}
|
||||||
|
{25E144C1-0B7C-4CD4-811A-2E9F4943120D} = {EC7298E3-AAA9-4672-941F-0B342C494CB3}
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
EndGlobal
|
EndGlobal
|
||||||
|
|
|
@ -10,81 +10,85 @@
|
||||||
|
|
||||||
# directory defaults (if not overridden)
|
# directory defaults (if not overridden)
|
||||||
|
|
||||||
RunRootDir = "../.." # default if not overridden
|
RunRootDir = "../.." # default if not overridden
|
||||||
DataDir = "$RunRootDir$/Data"
|
DataDir = "$RunRootDir$/Data"
|
||||||
CacheDir = "$DataDir$/cache" # (not used currently)
|
OutDir = "$RunRootDir$/Out"
|
||||||
ExpRootDir = "$RunRootDir$"
|
|
||||||
|
|
||||||
# command to execute
|
# command to execute
|
||||||
|
|
||||||
command = train
|
command = train
|
||||||
#command = write
|
#command = write
|
||||||
#command = dump
|
#command = dump
|
||||||
makeMode = false
|
|
||||||
|
makeMode = false # set this to true to enable restarting fr0m checkpoint
|
||||||
|
traceLevel = 1
|
||||||
|
|
||||||
# experiment id
|
# experiment id
|
||||||
deviceId = 0 # set the GPU device here, or "auto" to auto-select; or override from the command line.
|
|
||||||
ExpId = g2p-1-$deviceId$ # choose a meaningful id here. This is used for unique directory and filenames.
|
|
||||||
#ExpId = g2p-1-0 # change to different id when decoding a different model
|
|
||||||
|
|
||||||
# directories
|
deviceId = 0 # set the GPU device here, or "auto" to auto-select; or override from the command line.
|
||||||
ExpDir = "$ExpRootDir$/$ExpId$"
|
ExpId = g2p-01-$deviceId$ # choose a meaningful id here. This is used for unique directory and filenames.
|
||||||
ModelDir = "$ExpDir$/Models"
|
#ExpId = g2p-01-0 # change to different id when decoding a different model
|
||||||
|
|
||||||
stderr = $ExpDir$/G2P
|
# model
|
||||||
|
|
||||||
precision = "float"
|
modelPath = "$OutDir$/$ExpId$/G2P.dnn"
|
||||||
traceLevel = 1
|
stderr = "$OutDir$/$ExpId$/G2P"
|
||||||
modelPath = "$ModelDir$/G2P.dnn"
|
|
||||||
|
|
||||||
# decoding config --used by the "write" command ("write" decodes and writes the result)
|
# decoding config --used by the "write" command ("write" decodes and writes the result)
|
||||||
|
|
||||||
beamDepth = 3 # 0=predict; 1=greedy; >1=beam
|
beamDepth = 3 # 0=predict; 1=greedy; >1=beam
|
||||||
decodeModel = 9
|
decodeModel = 9
|
||||||
decodeModelPath = "$modelPath$.$decodeModel$" # note: epoch to decode is appended to the model path
|
decodeModelPath = "$modelPath$.$decodeModel$" # note: epoch to decode is appended to the model path
|
||||||
decodeOutputPath = "$decodeModelPath$.$beamDepth$" # results are written next to the model, with beamDepth appended
|
decodeOutputPath = "$decodeModelPath$.$beamDepth$" # results are written next to the model, with beamDepth appended
|
||||||
|
|
||||||
# dump config --used by the "dump" command, for inspecting the model parameters
|
# dump config --used by the "dump" command, for inspecting the model parameters
|
||||||
|
|
||||||
dumpModelPath = "$modelPath$.2" # put the epoch id here
|
dumpModelPath = "$modelPath$.2" # put the epoch id here
|
||||||
|
|
||||||
# top-level model configuration
|
# top-level model configuration
|
||||||
|
|
||||||
hiddenDim = 512
|
hiddenDim = 512
|
||||||
|
precision = "float"
|
||||||
maxLayer = 2
|
maxLayer = 2
|
||||||
isBidirectional = false
|
isBidirectional = false
|
||||||
|
|
||||||
# comment/uncomment this or the next block to switch between readers
|
# comment/uncomment this or the next block to switch between readers
|
||||||
# Note: Currently this configuration cannot reach the same result with CNTKTextFormatReader.
|
|
||||||
# This is being investigated. For now, please use the LMSequenceReader.
|
|
||||||
# --- begin uncomment for LMSequenceReader ---
|
# --- begin uncomment for LMSequenceReader ---
|
||||||
readerType = "LMSequenceReader"
|
readerType = "LMSequenceReader"
|
||||||
useCNTKTextFormatReader = false
|
useCNTKTextFormatReader = false
|
||||||
inputVocabSize = 69
|
inputVocabSize = 69
|
||||||
labelVocabSize = 69
|
labelVocabSize = 69
|
||||||
|
mbSizes = 144:144:288:576
|
||||||
shareEmbeddings = true
|
shareEmbeddings = true
|
||||||
fileExt = "joint"
|
fileExt = "txt"
|
||||||
# --- end uncomment ---
|
# --- end uncomment ---
|
||||||
|
|
||||||
# --- begin uncomment for CNTKTextFormatReader ---
|
# --- begin uncomment for CNTKTextFormatReader ---
|
||||||
|
# Note: Currently this configuration cannot reach the same result with CNTKTextFormatReader.
|
||||||
|
# This is being investigated. For now, please use the LMSequenceReader.
|
||||||
#readerType = "CNTKTextFormatReader"
|
#readerType = "CNTKTextFormatReader"
|
||||||
#useCNTKTextFormatReader = true
|
#useCNTKTextFormatReader = true
|
||||||
#inputVocabSize = 29 # 26 letters plus start, end, apostrophe
|
#inputVocabSize = 29 # 26 letters plus start, end, apostrophe
|
||||||
#labelVocabSize = 41 # 39 phonemes (~AX missing), plus start and end symbol (in index 0)
|
#labelVocabSize = 41 # 39 phonemes (~AX missing), plus start and end symbol (in index 0)
|
||||||
|
#mbSizes = 72:72:144:288 # new reader is based on max(stream lengths) instead of sum(stream lengths)
|
||||||
#shareEmbeddings = false
|
#shareEmbeddings = false
|
||||||
#fileExt = "ctf"
|
#fileExt = "bsf.ctf"
|
||||||
# --- end uncomment ---
|
# --- end uncomment ---
|
||||||
|
|
||||||
# corpus
|
# corpus
|
||||||
|
|
||||||
maxLength = 20 # 0 disables attention
|
maxLength = 20 # 0 disables attention
|
||||||
isAutoEncoder=false
|
startSymbol = "<s>" # (need to override the default which is </s>)
|
||||||
startSymbol = "<s>"
|
trainFile = "cmudict-0.7b.train-dev-20-21.$fileExt$"
|
||||||
trainFile = "g014b2b.train-dev-20-21.bsf.$fileExt$"
|
validFile = "cmudict-0.7b.train-dev-1-21.$fileExt$"
|
||||||
validFile = "g014b2b.train-dev-1-21.bsf.$fileExt$"
|
testFile = "cmudict-0.7b.test.$fileExt$"
|
||||||
testFile = "g014b2b.test.bsf.$fileExt$"
|
mappingFile = "cmudict-0.7b.mapping"
|
||||||
vocabFile = "g014b2b.wl"
|
|
||||||
|
|
||||||
# some reader variables that occur multiple times
|
# some reader variables that occur multiple times
|
||||||
cntkReaderInputDef = [ rawInput = [ alias = "s" ; dim = $inputVocabSize$ ; format = "sparse" ] ; rawLabels = [ alias = "t" ; dim = $labelVocabSize$ ; format = "sparse" ] ]
|
|
||||||
lmSequenceReaderInputDef = [ dim = 0 ]
|
cntkReaderInputDef = [ rawInput = [ alias = "s" ; dim = $inputVocabSize$ ; format = "sparse" ] ; rawLabels = [ alias = "t" ; dim = $labelVocabSize$ ; format = "sparse" ] ]
|
||||||
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ; labelDim = "$inputVocabSize$" ; labelMappingFile = "$DataDir$/$vocabFile$" ; beginSequence = "</s>" ; endSequence = "</s>" ]
|
lmSequenceReaderInputDef = [ dim = 0 ]
|
||||||
|
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ; labelDim = "$inputVocabSize$" ; labelMappingFile = "$DataDir$/$mappingFile$" ; beginSequence = "</s>" ; endSequence = "</s>" ]
|
||||||
|
|
||||||
#######################################
|
#######################################
|
||||||
# network definition #
|
# network definition #
|
||||||
|
@ -98,7 +102,7 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||||
inputVocabDim = $inputVocabSize$
|
inputVocabDim = $inputVocabSize$
|
||||||
labelVocabDim = $labelVocabSize$
|
labelVocabDim = $labelVocabSize$
|
||||||
|
|
||||||
isAutoencoder = $isAutoEncoder$ # input is only one sequence, meant to reproduce itself
|
isAutoencoder = false # input is only one sequence, meant to reproduce itself (not used for this task)
|
||||||
attentionSpan = $maxLength$ # attention window, must be large enough for largest input sequence. 0 to disable. Exactly 20 is needed for the g2p CMUDict task
|
attentionSpan = $maxLength$ # attention window, must be large enough for largest input sequence. 0 to disable. Exactly 20 is needed for the g2p CMUDict task
|
||||||
useBidirectionalEncoder = $isBidirectional$ # bi-directional LSTM for encoder
|
useBidirectionalEncoder = $isBidirectional$ # bi-directional LSTM for encoder
|
||||||
|
|
||||||
|
@ -161,9 +165,10 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||||
isFirstLabel = BS.Loop.IsFirst (labelSequence)
|
isFirstLabel = BS.Loop.IsFirst (labelSequence)
|
||||||
|
|
||||||
#############################################################
|
#############################################################
|
||||||
# embeddings --as long as we cannot read multiple sequences, we got one embedding
|
# embeddings
|
||||||
#############################################################
|
#############################################################
|
||||||
|
|
||||||
|
# Note: when reading input and labels from a single text file, we share the token mapping and embedding.
|
||||||
# Note: Embeddings are linear. Should we use BatchNormalization?
|
# Note: Embeddings are linear. Should we use BatchNormalization?
|
||||||
|
|
||||||
# note: this is assumed to be applied transposed, hence the swapped dimensions. Actually--why? Still needed?
|
# note: this is assumed to be applied transposed, hence the swapped dimensions. Actually--why? Still needed?
|
||||||
|
@ -183,24 +188,20 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||||
# encoder (processes inputEmbedded)
|
# encoder (processes inputEmbedded)
|
||||||
#############################################################
|
#############################################################
|
||||||
|
|
||||||
# TODO: do not reverse our inputs; instead, if needed, use a backwards-running loop here
|
# Note: We reverse our input by running the recurrence from right to left.
|
||||||
|
|
||||||
encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
|
encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
|
||||||
encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
|
encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
|
||||||
previousHook=BS.RNNs.PreviousHC,
|
previousHook=if useBidirectionalEncoder then BS.RNNs.NextHC else BS.RNNs.PreviousHC,
|
||||||
enableSelfStabilization=useStabilizer)
|
enableSelfStabilization=useStabilizer)
|
||||||
encoderOutput = encoder[Length (encoderDims)-1]
|
encoderOutput = encoder[Length (encoderDims)-1]
|
||||||
|
|
||||||
# There are three ways of passing encoder state:
|
# get the final encoder state for use as the initial state (not used with attention model)
|
||||||
# 1. as initial state for decoder (Google style)
|
# Since we run right-to-left, the final state is the first, not the last.
|
||||||
# 2. as side information for every decoder step (NYU style)
|
|
||||||
# 3. attention
|
|
||||||
|
|
||||||
# get the final encoder state for use as the initial state
|
|
||||||
# For beam decoding, we will also inject a second dimension.
|
# For beam decoding, we will also inject a second dimension.
|
||||||
thoughtVector = [
|
thoughtVector = [
|
||||||
h = ReshapeDimension (BS.Sequences.Last (encoderOutput.h), 1, (dim:1))
|
h = ReshapeDimension (BS.Sequences.First (encoderOutput.h), 1, (dim:1))
|
||||||
c = ReshapeDimension (BS.Sequences.Last (encoderOutput.c), 1, (dim:1))
|
c = ReshapeDimension (BS.Sequences.First (encoderOutput.c), 1, (dim:1))
|
||||||
dim = encoderOutput.dim
|
dim = encoderOutput.dim
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -253,6 +254,11 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||||
# decoder
|
# decoder
|
||||||
#############################################################
|
#############################################################
|
||||||
|
|
||||||
|
# There are three ways of passing encoder state:
|
||||||
|
# 1. as initial state for decoder (Google style)
|
||||||
|
# 2. as side information for every decoder step (NYU style)
|
||||||
|
# 3. attention
|
||||||
|
|
||||||
decoderInput = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
|
decoderInput = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
|
||||||
decoderInputDim = labelEmbeddingDim
|
decoderInputDim = labelEmbeddingDim
|
||||||
|
|
||||||
|
@ -304,12 +310,6 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||||
# training criteria
|
# training criteria
|
||||||
#############################################################
|
#############################################################
|
||||||
|
|
||||||
#ce = Pass (ReduceLogSum (z) - ReduceSum (labelSequence .* z ), tag='criterion')
|
|
||||||
#errs = Pass (BS.Constants.One - ReduceSum (labelSequence .* Hardmax (z)), tag='evaluation')
|
|
||||||
#ce2 = Negate (ReduceSum (labelSequence .* LogSoftmax (z)), tag='evaluation')
|
|
||||||
#ce1 = CrossEntropyWithSoftmax (labelSequence, z, tag='evaluation') // this is the training objective
|
|
||||||
#errs = ErrorPrediction (labelSequence, z, tag='evaluation') // this also gets tracked
|
|
||||||
|
|
||||||
ce = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence, z), tag='criterion')
|
ce = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence, z), tag='criterion')
|
||||||
errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
|
errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
|
||||||
|
|
||||||
|
@ -340,22 +340,17 @@ train = [
|
||||||
# BrainScriptNetworkBuilder is defined in outer scope
|
# BrainScriptNetworkBuilder is defined in outer scope
|
||||||
|
|
||||||
SGD = [
|
SGD = [
|
||||||
minibatchSize = 144:144:288:576
|
minibatchSize = $mbSizes$
|
||||||
learningRatesPerSample = 0.007*2:0.0035
|
learningRatesPerSample = 0.007*2:0.0035 # works well for LMSequenceReader config
|
||||||
momentumAsTimeConstant = 1100
|
momentumAsTimeConstant = 1100
|
||||||
gradientClippingWithTruncation = true # (as opposed to clipping the Frobenius norm of the matrix)
|
gradientClippingWithTruncation = true # (as opposed to clipping the Frobenius norm of the matrix)
|
||||||
clippingThresholdPerSample = 2.3 # visibly impacts objectives, but not final result, so keep it for safety
|
clippingThresholdPerSample = 2.3 # visibly impacts objectives, but not final result, so keep it for safety
|
||||||
maxEpochs = 50
|
maxEpochs = 50
|
||||||
numMBsToShowResult = 100
|
numMBsToShowResult = 100
|
||||||
firstMBsToShowResult = 10
|
firstMBsToShowResult = 10
|
||||||
gradUpdateType = "none" # FSAdaGrad?
|
gradUpdateType = "none" # TODO: Try FSAdaGrad?
|
||||||
loadBestModel = false # true # broken for some models (rereading overwrites something that got set by validation)
|
loadBestModel = false # true # broken for some models (rereading overwrites something that got set by validation)
|
||||||
|
|
||||||
# tracing (enable these for debugging)
|
|
||||||
#traceNodeNamesReal = labelsEmbedded:decoderInput:"decoder[0].lstmState._privateInnards.ht":z.Plus_left.Times_right.result:z:ce
|
|
||||||
#traceNodeNamesReal = labelsEmbedded:decoderInput:z:ce
|
|
||||||
#traceNodeNamesCategory = inputSequence.out:labelSequence
|
|
||||||
|
|
||||||
dropoutRate = 0.0
|
dropoutRate = 0.0
|
||||||
|
|
||||||
# settings for Auto Adjust Learning Rate
|
# settings for Auto Adjust Learning Rate
|
||||||
|
@ -461,7 +456,7 @@ write = [
|
||||||
format = [
|
format = [
|
||||||
type = "category"
|
type = "category"
|
||||||
transpose = false
|
transpose = false
|
||||||
labelMappingFile = "$DataDir$/$vocabFile$"
|
labelMappingFile = "$DataDir$/$mappingFile$"
|
||||||
]
|
]
|
||||||
|
|
||||||
minibatchSize = 8192 # choose this to be big enough for the longest sentence
|
minibatchSize = 8192 # choose this to be big enough for the longest sentence
|
|
@ -0,0 +1,5 @@
|
||||||
|
...document source of corpus and post-processing
|
||||||
|
|
||||||
|
http://www.speech.cs.cmu.edu/cgi-bin/cmudict
|
||||||
|
|
||||||
|
File: http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b
|
|
@ -0,0 +1,19 @@
|
||||||
|
The contents of this folder is based on or incorporates material from the projects listed below. Microsoft is not the original author of the Third Party Code. The original copyright notice and the license under which Microsoft received such Third Party Code, are set forth below. Such licenses and notices are provided for informational purposes only. Microsoft, not the third party, licenses the Third Party Code to you under the terms set forth in the EULA for the Microsoft Product. Microsoft reserves all rights not expressly granted under this agreement, whether by implication, estoppel or otherwise.
|
||||||
|
|
||||||
|
Provided for Informational Purposes Only
|
||||||
|
|
||||||
|
Carnegie Mellon University Pronouncing Dictionary
|
||||||
|
|
||||||
|
Copyright (C) 1993-2015 Carnegie Mellon University. All rights reserved.
|
||||||
|
|
||||||
|
BSD License
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ""AS IS"" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,23 +0,0 @@
|
||||||
|
|
||||||
This example demonstrates the use of CNTK for letter-to-sound conversion using a
|
|
||||||
sequence-to-sequence model with attention.
|
|
||||||
|
|
||||||
The code supports a number of alternative configurations. As configured currently, it implements
|
|
||||||
* a 3-hidden layer unidirectional LSTM encoder network, all hidden dimensions are 512
|
|
||||||
* a 3-hidden layer unidirectional LSTM decoder network, all hidden dimensions are 512
|
|
||||||
* encoder state is passed to the decoder by means of attention, with projection dimension 128 and maximum input length of 20 tokens
|
|
||||||
* embedding disabled (the vocabulary is very small)
|
|
||||||
* beam decoder with beam width 3
|
|
||||||
|
|
||||||
This example uses the CMUDict as a corpus. The data or a conversion script will be included soon.
|
|
||||||
|
|
||||||
To Use:
|
|
||||||
=======
|
|
||||||
|
|
||||||
Modify the following in G2P.cntk:
|
|
||||||
* pathnames
|
|
||||||
* deviceId to specify CPU (-1) or GPU (>=0 or "auto")
|
|
||||||
|
|
||||||
Run:
|
|
||||||
* command line: cntk configFile=Examples/SequenceToSequence/Miscellaneous/G2P/G2P.cntk RunRootDir=g2p
|
|
||||||
* VS Debugger: configFile=$(SolutionDir)Examples/SequenceToSequence/Miscellaneous/G2P/G2P.cntk RunRootDir=$(SolutionDir)g2p
|
|
Загрузка…
Ссылка в новой задаче