Adding suppport for parallelized sequence training in Kaldi2Reader
This commit is contained in:
Родитель
fc676c579a
Коммит
73c6db513d
|
@ -225,20 +225,28 @@ void DataReader<ElemType>::SetRandomSeed(int seed)
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool DataReader<ElemType>::GetForkedUtterance(std::wstring& uttID, std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
bool DataReader<ElemType>::GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<ElemType>& sentenceBegin,
|
||||
std::vector<MinibatchPackingFlag>& minibatchPackingFlag)
|
||||
{
|
||||
bool ans = false;
|
||||
for (size_t i = 0; i < m_ioNames.size(); i++)
|
||||
ans = (m_dataReader[m_ioNames[i]]->GetForkedUtterance(uttID, matrices) || ans);
|
||||
ans = (m_dataReader[m_ioNames[i]]->GetMinibatchCopy(uttInfo, matrices, sentenceBegin, minibatchPackingFlag) || ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool DataReader<ElemType>::ComputeDerivativeFeatures(const std::wstring& uttID, const Matrix<ElemType>& outputs)
|
||||
bool DataReader<ElemType>::SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag)
|
||||
{
|
||||
bool ans = false;
|
||||
for (size_t i = 0; i < m_ioNames.size(); i++)
|
||||
ans = (m_dataReader[m_ioNames[i]]->ComputeDerivativeFeatures(uttID, outputs) || ans);
|
||||
ans = (m_dataReader[m_ioNames[i]]->SetNetOutput(uttInfo, outputs, sentenceBegin, minibatchPackingFlag) || ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,14 +85,27 @@ public:
|
|||
|
||||
void SetDoRandomize(bool b){ mDoRandomize = b; }
|
||||
|
||||
// Gets utterance before getting the actual minibatch, which will not affect
|
||||
// getting the minibatches. This can be useful in sequence training.
|
||||
virtual bool GetForkedUtterance(std::wstring& , std::map<std::wstring, Matrix<ElemType>*>& ) { return false; }
|
||||
// Gets a copy of the minibatch for the forward computation. This can be
|
||||
// useful if some of the computation has to happen in the reader.
|
||||
virtual bool GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& /*uttInfo*/,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& /*matrices*/,
|
||||
Matrix<ElemType>& /*sentenceBegin*/,
|
||||
std::vector<MinibatchPackingFlag>& /*minibatchPackingFlag*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Computes certain derivatives given outputs from neural networks, which
|
||||
// will later be fed to the neural network as features. This can be useful
|
||||
// in sequence training.
|
||||
virtual bool ComputeDerivativeFeatures(const std::wstring& , const Matrix<ElemType>& ) { return false; }
|
||||
// Sets the neural network output to the reader. This can be useful if some
|
||||
// of the computation has to happen in the reader.
|
||||
virtual bool SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& /*uttInfo*/,
|
||||
const Matrix<ElemType>& /*outputs*/,
|
||||
const Matrix<ElemType>& /*sentenceBegin*/,
|
||||
const std::vector<MinibatchPackingFlag>& /*minibatchPackingFlag*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// GetReader - get a reader type from the DLL
|
||||
|
@ -193,14 +206,21 @@ public:
|
|||
|
||||
virtual bool DataEnd(EndDataType endDataType);
|
||||
|
||||
// Gets utterance before getting the actual minibatch, which will not affect
|
||||
// getting the minibatches. This can be useful in sequence training.
|
||||
virtual bool GetForkedUtterance(std::wstring& uttID, std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
// Gets a copy of the minibatch for the forward computation. This can be
|
||||
// useful if some of the computation has to happen in the reader.
|
||||
virtual bool GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<ElemType>& sentenceBegin,
|
||||
std::vector<MinibatchPackingFlag>& minibatchPackingFlag);
|
||||
|
||||
// Computes certain derivatives given outputs from neural networks, which
|
||||
// will later be fed to the neural network as features. This can be useful
|
||||
// in sequence training.
|
||||
virtual bool ComputeDerivativeFeatures(const std::wstring& uttID, const Matrix<ElemType>& outputs);
|
||||
// Sets the neural network output to the reader. This can be useful if some
|
||||
// of the computation has to happen in the reader.
|
||||
virtual bool SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag);
|
||||
|
||||
void SetSentenceSegBatch(Matrix<ElemType> & sentenceBegin, vector<MinibatchPackingFlag>& minibatchPackingFlag);
|
||||
|
||||
|
|
|
@ -49,9 +49,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_frameSource = NULL;
|
||||
m_lattices = NULL;
|
||||
m_sequenceTrainingIO = NULL;
|
||||
m_minibatchBuffer.resize(0);
|
||||
m_minibatchBufferIndex = 0;
|
||||
m_minibatchBufferLeftovers = 0;
|
||||
m_noData = false;
|
||||
m_convertLabelsToTargets = false;
|
||||
m_doSeqTrain = false;
|
||||
m_getMinibatchCopy = false;
|
||||
|
||||
if (readerConfig.Exists("legacyMode"))
|
||||
{
|
||||
|
@ -60,7 +64,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// If <m_framemode> is false, throw away any utterance that is longer
|
||||
// than the specified <m_maxUtteranceLength>.
|
||||
m_maxUtteranceLength = readerConfig("maxUtteranceLength", "1500");
|
||||
m_maxUtteranceLength = readerConfig("maxUtteranceLength", "10000");
|
||||
|
||||
// m_truncated:
|
||||
// If true, truncate utterances to fit the minibatch size. Otherwise
|
||||
|
@ -172,7 +176,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_sequenceTrainingIO = new KaldiSequenceTrainingIO<ElemType>(
|
||||
denlatRspecifier, aliRspecifier, transModelFilename,
|
||||
silencePhoneStr, m_seqTrainCriterion, oldAcousticScale,
|
||||
acousticScale, lmScale, oneSilenceClass);
|
||||
acousticScale, lmScale,
|
||||
oneSilenceClass, m_numberOfuttsPerMinibatch);
|
||||
|
||||
// Scans the configurations to get "seqTrainDeriv" type input and
|
||||
// "seqTrainObj" type input. Both are feature nodes, we feed derivatives
|
||||
|
@ -293,6 +298,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
m_featureNameToIdMap[featureNames[i]] = iFeat;
|
||||
assert(iFeat == m_featureIdToNameMap.size());
|
||||
m_featureIdToNameMap.push_back(featureNames[i]);
|
||||
scriptpaths.push_back(new msra::asr::FeatureSection(thisFeature("scpFile"), thisFeature("rx"), thisFeature("featureTransform", "")));
|
||||
m_featureNameToDimMap[featureNames[i]] = m_featDims[i];
|
||||
|
||||
|
@ -334,6 +341,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
statelistpaths.push_back(thisLabel("labelMappingFile",L""));
|
||||
|
||||
m_labelNameToIdMap[labelNames[i]] = iLabel;
|
||||
assert(iLabel == m_labelIdToNameMap.size());
|
||||
m_labelIdToNameMap.push_back(labelNames[i]);
|
||||
m_labelNameToDimMap[labelNames[i]] = m_labelDims[i];
|
||||
mlfpaths.clear();
|
||||
mlfpaths.push_back(thisLabel("mlfFile"));
|
||||
|
@ -599,6 +608,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
m_featureNameToIdMap[featureNames[i]]= iFeat;
|
||||
assert(iFeat == m_featureIdToNameMap.size());
|
||||
m_featureIdToNameMap.push_back(featureNames[i]);
|
||||
scriptpaths.push_back(new msra::asr::FeatureSection(thisFeature("scpFile"), thisFeature("rx"), thisFeature("featureTransform", "")));
|
||||
m_featureNameToDimMap[featureNames[i]] = realDims[i];
|
||||
|
||||
|
@ -736,6 +747,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void HTKMLFReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
|
||||
{
|
||||
m_mbSize = mbSize;
|
||||
m_currentMBSize = mbSize;
|
||||
|
||||
if (m_trainOrTest)
|
||||
{
|
||||
|
@ -788,7 +800,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_mbiter = NULL;
|
||||
}
|
||||
msra::dbn::minibatchsource* source = m_frameSource;
|
||||
m_mbiter = new msra::dbn::minibatchiterator(*source, epoch, requestedEpochSamples, mbSize, datapasses);
|
||||
size_t currentMBSize = (m_framemode == true) ? mbSize : 1;
|
||||
m_mbiter = new msra::dbn::minibatchiterator(*source, epoch, requestedEpochSamples, currentMBSize, datapasses);
|
||||
|
||||
// Clears feature and label buffer.
|
||||
if (!m_featuresBufferMultiIO.empty())
|
||||
|
@ -882,7 +895,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// if startFrame = 5, endFrame = 10, then we copy frames 5, 6, 7, 8, 9.
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::PopulateUtteranceInMinibatch(
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
const std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
size_t uttIndex, size_t startFrame,
|
||||
size_t endFrame, size_t mbSize, size_t mbOffset)
|
||||
{
|
||||
|
@ -897,15 +910,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
return false;
|
||||
}
|
||||
if (m_doSeqTrain && m_numberOfuttsPerMinibatch > 1)
|
||||
{
|
||||
LogicError("nbrUttsInEachRecurrentIter has to be 1 in sequence training.\n");
|
||||
}
|
||||
|
||||
size_t numOfFea = m_featuresBufferMultiIO.size();
|
||||
size_t numOfLabel = m_labelsBufferMultiIO.size();
|
||||
typename std::map<std::wstring, Matrix<ElemType>*>::iterator iter;
|
||||
for (iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
for (auto iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{ // Features.
|
||||
|
@ -972,65 +980,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (m_doSeqTrain)
|
||||
{
|
||||
// TODO(GUOGUO): if we are going to allow "m_truncate" for
|
||||
// sequence training, we will have to modify the
|
||||
// following -- the following always assume we
|
||||
// start filling the minibatch from index 0.
|
||||
// If we do sequence training we have to populate the derivative
|
||||
// features as well as the objective features. But unlike the
|
||||
// features and labels, we put them in to <matrices> directly.
|
||||
// We assume we only process one utterance at a time in the
|
||||
// current implementation.
|
||||
assert(uttIndex == 0);
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainDeriv)
|
||||
{
|
||||
wstring uttID = m_uttInfo[uttIndex][0].first;
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_sequenceTrainingIO->HasDerivatives(uttID))
|
||||
m_sequenceTrainingIO->GetDerivatives(startFrame, endFrame, mbSize, uttID, data);
|
||||
else
|
||||
{
|
||||
data.Resize(data.GetNumRows(), mbSize);
|
||||
data.SetValue(0);
|
||||
}
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainObj)
|
||||
{
|
||||
wstring uttID = m_uttInfo[uttIndex][0].first;
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_sequenceTrainingIO->HasDerivatives(uttID))
|
||||
m_sequenceTrainingIO->GetObjectives(startFrame, endFrame, uttID, data);
|
||||
else
|
||||
data.SetValue(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
bool HTKMLFReader<ElemType>::GetOneMinibatchToTrainOrTestDataBuffer(
|
||||
const std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
bool skip = false;
|
||||
|
||||
// On first minibatch, check if we have input for given names.
|
||||
if (m_checkDictionaryKeys)
|
||||
{
|
||||
std::map<std::wstring,size_t>::iterator iter;
|
||||
for (auto iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
if (m_nameToTypeMap.find(iter->first) == m_nameToTypeMap.end())
|
||||
{
|
||||
throw std::runtime_error(msra::strfun::strprintf("minibatch requested for input node %S not found in reader - cannot generate input\n", iter->first.c_str()));
|
||||
throw std::runtime_error(msra::strfun::strprintf(
|
||||
"minibatch requested for input node %S not found in"
|
||||
"reader - cannot generate input\n", iter->first.c_str()));
|
||||
}
|
||||
|
||||
}
|
||||
m_checkDictionaryKeys=false;
|
||||
}
|
||||
|
||||
size_t currentMBSize = m_mbSize;
|
||||
// If we are doing sequence training, we need to keep the utterance
|
||||
// information.
|
||||
if (m_doSeqTrain)
|
||||
{
|
||||
m_minibatchUttInfo.assign(m_numberOfuttsPerMinibatch,
|
||||
std::vector<std::pair<wstring, size_t>>(0));
|
||||
}
|
||||
|
||||
m_currentMBSize = m_mbSize;
|
||||
do
|
||||
{
|
||||
// Checks if we have finished all the utterances.
|
||||
|
@ -1050,28 +1034,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
// If <m_truncated> is true, <currentMBSize> is <m_mbSize>
|
||||
// If <m_truncated> is false, <currentMBSize> equals to the longest
|
||||
// If <m_truncated> is true, <m_currentMBSize> is <m_mbSize>
|
||||
// If <m_truncated> is false, <m_currentMBSize> equals to the longest
|
||||
// utterance in the minibatch.
|
||||
if (!m_truncated)
|
||||
{
|
||||
currentMBSize = 0;
|
||||
m_currentMBSize = 0;
|
||||
for (size_t i = 0; i < m_numberOfuttsPerMinibatch; i++)
|
||||
{
|
||||
if (m_currentBufferFrames[i] > currentMBSize)
|
||||
if (m_currentBufferFrames[i] > m_currentMBSize)
|
||||
{
|
||||
currentMBSize = m_currentBufferFrames[i];
|
||||
m_currentMBSize = m_currentBufferFrames[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We initialize the sentence boundary information before we process
|
||||
// the utterances.
|
||||
m_sentenceBegin.Resize(m_numberOfuttsPerMinibatch, currentMBSize);
|
||||
m_minibatchPackingFlag.resize(currentMBSize);
|
||||
m_sentenceBegin.Resize(m_numberOfuttsPerMinibatch, m_currentMBSize);
|
||||
m_minibatchPackingFlag.resize(m_currentMBSize);
|
||||
for (size_t i = 0; i < m_numberOfuttsPerMinibatch; i++)
|
||||
{
|
||||
for (size_t j = 0; j < currentMBSize; j++)
|
||||
for (size_t j = 0; j < m_currentMBSize; j++)
|
||||
{
|
||||
m_sentenceBegin.SetValue(i, j, (ElemType) SENTENCE_MIDDLE);
|
||||
}
|
||||
|
@ -1085,7 +1069,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t startFrame = m_processedFrame[i];
|
||||
size_t endFrame = 0;
|
||||
|
||||
if ((startFrame + currentMBSize) < m_toProcess[i])
|
||||
if ((startFrame + m_currentMBSize) < m_toProcess[i])
|
||||
{
|
||||
// There is only 1 case:
|
||||
// 1. <m_framemode> is false, and <m_truncated> is true.
|
||||
|
@ -1099,11 +1083,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_minibatchPackingFlag[0] |= MinibatchPackingFlag::UtteranceStart;
|
||||
}
|
||||
|
||||
endFrame = startFrame + currentMBSize;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, currentMBSize);
|
||||
m_processedFrame[i] += currentMBSize;
|
||||
endFrame = startFrame + m_currentMBSize;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, m_currentMBSize);
|
||||
if (m_doSeqTrain && populateSucc) { m_minibatchUttInfo[i].push_back(m_uttInfo[i][0]); }
|
||||
m_processedFrame[i] += m_currentMBSize;
|
||||
}
|
||||
else if ((startFrame + currentMBSize) == m_toProcess[i])
|
||||
else if ((startFrame + m_currentMBSize) == m_toProcess[i])
|
||||
{
|
||||
// There are 3 cases:
|
||||
// 1. <m_framemode> is false, and <m_truncated> is true,
|
||||
|
@ -1132,9 +1117,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// Now puts the utterance into the minibatch, and loads the
|
||||
// next one.
|
||||
endFrame = startFrame + currentMBSize;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, currentMBSize);
|
||||
m_processedFrame[i] += currentMBSize;
|
||||
endFrame = startFrame + m_currentMBSize;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, m_currentMBSize);
|
||||
if (m_doSeqTrain && populateSucc) { m_minibatchUttInfo[i].push_back(m_uttInfo[i][0]); }
|
||||
m_processedFrame[i] += m_currentMBSize;
|
||||
bool reNewSucc = ReNewBufferForMultiIO(i);
|
||||
}
|
||||
else
|
||||
|
@ -1151,7 +1137,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Checks if we have reached the end of the minibatch.
|
||||
if (startFrame == m_toProcess[i])
|
||||
{
|
||||
for (size_t k = 0; k < currentMBSize; k++)
|
||||
for (size_t k = 0; k < m_currentMBSize; k++)
|
||||
{
|
||||
m_sentenceBegin.SetValue(i, k, (ElemType) NO_LABELS);
|
||||
m_minibatchPackingFlag[k] |= MinibatchPackingFlag::NoLabel;
|
||||
|
@ -1159,7 +1145,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Populates <NO_LABELS> with real features, the
|
||||
// following implementation is not efficient...
|
||||
assert(m_toProcess[i] > 0);
|
||||
PopulateUtteranceInMinibatch(matrices, i, 0, 1, currentMBSize, k);
|
||||
PopulateUtteranceInMinibatch(matrices, i, 0, 1, m_currentMBSize, k);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -1194,13 +1180,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
endFrame = m_toProcess[i];
|
||||
size_t currentMBFilled = endFrame - startFrame;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, currentMBSize);
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(matrices, i, startFrame, endFrame, m_currentMBSize);
|
||||
if (m_doSeqTrain && populateSucc) { m_minibatchUttInfo[i].push_back(m_uttInfo[i][0]); }
|
||||
m_processedFrame[i] += currentMBFilled;
|
||||
bool reNewSucc = ReNewBufferForMultiIO(i);
|
||||
|
||||
// Third, if the next utterance can fit into the current
|
||||
// minibatch, we also pack the next utterance.
|
||||
while (reNewSucc && (currentMBFilled + m_toProcess[i] <= currentMBSize))
|
||||
while (reNewSucc && (currentMBFilled + m_toProcess[i] <= m_currentMBSize))
|
||||
{
|
||||
// Sets the utterance boundary.
|
||||
assert(currentMBFilled + m_toProcess[i] <= m_sentenceBegin.GetNumCols());
|
||||
|
@ -1208,7 +1195,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_minibatchPackingFlag[currentMBFilled] |= MinibatchPackingFlag::UtteranceStart;
|
||||
m_sentenceBegin.SetValue(i, currentMBFilled + m_toProcess[i] - 1, (ElemType)SENTENCE_END);
|
||||
m_minibatchPackingFlag[currentMBFilled + m_toProcess[i] - 1] |= MinibatchPackingFlag::UtteranceEnd;
|
||||
populateSucc = PopulateUtteranceInMinibatch(matrices, i, 0, m_toProcess[i], currentMBSize, currentMBFilled);
|
||||
populateSucc = PopulateUtteranceInMinibatch(matrices, i, 0, m_toProcess[i], m_currentMBSize, currentMBFilled);
|
||||
if (m_doSeqTrain && populateSucc) { m_minibatchUttInfo[i].push_back(m_uttInfo[i][0]); }
|
||||
assert(m_processedFrame[i] == 0);
|
||||
m_processedFrame[i] = m_toProcess[i];
|
||||
currentMBFilled += m_toProcess[i];
|
||||
|
@ -1219,9 +1207,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// minibatch is not full.
|
||||
if (reNewSucc && !m_framemode && m_truncated)
|
||||
{
|
||||
populateSucc = PopulateUtteranceInMinibatch(matrices, i, 0, currentMBSize - currentMBFilled, currentMBSize, currentMBFilled);
|
||||
m_processedFrame[i] += currentMBSize - currentMBFilled;
|
||||
if (currentMBFilled < currentMBSize)
|
||||
populateSucc = PopulateUtteranceInMinibatch(matrices, i, 0, m_currentMBSize - currentMBFilled, m_currentMBSize, currentMBFilled);
|
||||
if (m_doSeqTrain && populateSucc) { m_minibatchUttInfo[i].push_back(m_uttInfo[i][0]); }
|
||||
m_processedFrame[i] += m_currentMBSize - currentMBFilled;
|
||||
if (currentMBFilled < m_currentMBSize)
|
||||
{
|
||||
m_sentenceBegin.SetValue(i, currentMBFilled, (ElemType)SENTENCE_BEGIN);
|
||||
m_minibatchPackingFlag[currentMBFilled] |= MinibatchPackingFlag::UtteranceStart;
|
||||
|
@ -1229,7 +1218,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else
|
||||
{
|
||||
for (size_t k = currentMBFilled; k < currentMBSize; k++)
|
||||
for (size_t k = currentMBFilled; k < m_currentMBSize; k++)
|
||||
{
|
||||
m_sentenceBegin.SetValue(i, k, (ElemType) NO_LABELS);
|
||||
m_minibatchPackingFlag[k] |= MinibatchPackingFlag::NoLabel;
|
||||
|
@ -1237,29 +1226,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Populates <NO_LABELS> with real features, the
|
||||
// following implementation is not efficient...
|
||||
assert(m_toProcess[i] > 0);
|
||||
PopulateUtteranceInMinibatch(matrices, i, 0, 1, currentMBSize, k);
|
||||
PopulateUtteranceInMinibatch(matrices, i, 0, 1, m_currentMBSize, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typename std::map<std::wstring, Matrix<ElemType>*>::iterator iter;
|
||||
for (iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
size_t id = m_featureNameToIdMap[iter->first];
|
||||
size_t dim = m_featureNameToDimMap[iter->first];
|
||||
data.SetValue(dim, currentMBSize * m_numberOfuttsPerMinibatch, m_featuresBufferMultiIO[id] , matrixFlagNormal);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
|
||||
{
|
||||
size_t id = m_labelNameToIdMap[iter->first];
|
||||
size_t dim = m_labelNameToDimMap[iter->first];
|
||||
data.SetValue(dim, currentMBSize * m_numberOfuttsPerMinibatch, m_labelsBufferMultiIO[id], matrixFlagNormal);
|
||||
}
|
||||
}
|
||||
skip=false;
|
||||
}
|
||||
while(skip);
|
||||
|
@ -1267,6 +1239,209 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return true;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::ShouldCopyMinibatchFromBuffer()
|
||||
{
|
||||
if (m_doSeqTrain)
|
||||
{
|
||||
// If <m_getMinibatchCopy> is false, then we should copy data from
|
||||
// buffer for back-propagation.
|
||||
if (m_getMinibatchCopy == false && m_minibatchBuffer.size() > 0)
|
||||
{
|
||||
m_minibatchBufferIndex = 0;
|
||||
m_minibatchBufferLeftovers = m_minibatchBuffer.size() - 1; // Will pop one more.
|
||||
return true;
|
||||
}
|
||||
|
||||
// If <m_getMinibatchCopy> is true, we first have to re-compute
|
||||
// the likelihood for the frames that are already in the buffer.
|
||||
if (m_getMinibatchCopy == true && m_minibatchBufferLeftovers > 0)
|
||||
{
|
||||
if (m_minibatchBufferLeftovers == m_minibatchBuffer.size())
|
||||
{
|
||||
m_minibatchBufferIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_minibatchBufferIndex += 1;
|
||||
}
|
||||
m_minibatchBufferLeftovers -= 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void HTKMLFReader<ElemType>::CopyMinibatchToBuffer()
|
||||
{
|
||||
MinibatchBufferUnit currentMinibatch;
|
||||
|
||||
// Stores variables realted to the current minibatch.
|
||||
currentMinibatch.sentenceBegin.SetValue(m_sentenceBegin);
|
||||
currentMinibatch.minibatchPackingFlag = m_minibatchPackingFlag;
|
||||
currentMinibatch.currentMBSize = m_currentMBSize;
|
||||
currentMinibatch.minibatchUttInfo = m_minibatchUttInfo;
|
||||
|
||||
size_t size = m_currentMBSize * m_numberOfuttsPerMinibatch;
|
||||
|
||||
// Copies features.
|
||||
currentMinibatch.features.resize(0);
|
||||
for (size_t i = 0; i < m_featuresBufferMultiIO.size(); ++i)
|
||||
{
|
||||
std::vector<ElemType> tmpFeatures(m_featuresBufferMultiIO[i],
|
||||
m_featuresBufferMultiIO[i] + size * m_featureNameToDimMap[m_featureIdToNameMap[i]]);
|
||||
currentMinibatch.features.push_back(tmpFeatures);
|
||||
}
|
||||
|
||||
// Copies labels.
|
||||
currentMinibatch.labels.resize(0);
|
||||
for (size_t i = 0; i < m_labelsBufferMultiIO.size(); ++i)
|
||||
{
|
||||
std::vector<ElemType> tmpLabels(m_labelsBufferMultiIO[i],
|
||||
m_labelsBufferMultiIO[i] + size * m_labelNameToDimMap[m_labelIdToNameMap[i]]);
|
||||
currentMinibatch.labels.push_back(tmpLabels);
|
||||
}
|
||||
|
||||
m_minibatchBuffer.push_back(currentMinibatch);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void HTKMLFReader<ElemType>::CopyMinibatchFromBufferToMatrix(
|
||||
size_t index,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
assert(m_minibatchBuffer.size() > index);
|
||||
|
||||
// Restores the variables related to the minibatch.
|
||||
m_sentenceBegin.SetValue(m_minibatchBuffer[index].sentenceBegin);
|
||||
m_minibatchPackingFlag = m_minibatchBuffer[index].minibatchPackingFlag;
|
||||
m_currentMBSize = m_minibatchBuffer[index].currentMBSize;
|
||||
m_minibatchUttInfo = m_minibatchBuffer[index].minibatchUttInfo;
|
||||
|
||||
// Copies data to the matrix.
|
||||
for (auto iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
size_t id = m_featureNameToIdMap[iter->first];
|
||||
size_t dim = m_featureNameToDimMap[iter->first];
|
||||
assert(id < m_minibatchBuffer[index].features.size());
|
||||
data.SetValue(dim,
|
||||
m_minibatchBuffer[index].features[id].size() / dim,
|
||||
m_minibatchBuffer[index].features[id].data(),
|
||||
matrixFlagNormal);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
|
||||
{
|
||||
size_t id = m_labelNameToIdMap[iter->first];
|
||||
size_t dim = m_labelNameToDimMap[iter->first];
|
||||
assert(id < m_minibatchBuffer[index].labels.size());
|
||||
data.SetValue(dim,
|
||||
m_minibatchBuffer[index].labels[id].size() / dim,
|
||||
m_minibatchBuffer[index].labels[id].data(),
|
||||
matrixFlagNormal);
|
||||
}
|
||||
else if (m_doSeqTrain && !m_getMinibatchCopy)
|
||||
{
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainDeriv)
|
||||
{
|
||||
m_sequenceTrainingIO->GetDerivative(
|
||||
m_minibatchUttInfo, m_sentenceBegin,
|
||||
m_minibatchPackingFlag, matrices[iter->first]);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainObj)
|
||||
{
|
||||
m_sequenceTrainingIO->GetObjective(m_minibatchUttInfo,
|
||||
matrices[iter->first]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we are not in the minibatch copy mode, then we can remove the
|
||||
// minibatch from buffer.
|
||||
if (m_getMinibatchCopy == false)
|
||||
{
|
||||
assert(index == 0);
|
||||
m_minibatchBuffer.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void HTKMLFReader<ElemType>::CopyMinibatchToMatrix(
|
||||
size_t size,
|
||||
const vector<ElemType*>& featureBuffer,
|
||||
const vector<ElemType*>& labelBuffer,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices) const
|
||||
{
|
||||
for (auto iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
size_t id = m_featureNameToIdMap[iter->first];
|
||||
size_t dim = m_featureNameToDimMap[iter->first];
|
||||
assert(id < featureBuffer.size());
|
||||
data.SetValue(dim, size, featureBuffer[id] , matrixFlagNormal);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
|
||||
{
|
||||
size_t id = m_labelNameToIdMap[iter->first];
|
||||
size_t dim = m_labelNameToDimMap[iter->first];
|
||||
assert(id < labelBuffer.size());
|
||||
data.SetValue(dim, size, labelBuffer[id], matrixFlagNormal);
|
||||
}
|
||||
else if (m_doSeqTrain)
|
||||
{
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainDeriv)
|
||||
{
|
||||
data.Resize(data.GetNumRows(), m_currentMBSize);
|
||||
data.SetValue(0);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::seqTrainObj)
|
||||
{
|
||||
data.SetValue(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetMinibatchToTrainOrTest(
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
// We either copy a new minibatch from buffer or read one from minibatch
|
||||
// iterator.
|
||||
bool success = false;
|
||||
if (ShouldCopyMinibatchFromBuffer())
|
||||
{
|
||||
CopyMinibatchFromBufferToMatrix(m_minibatchBufferIndex, matrices);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
success = GetOneMinibatchToTrainOrTestDataBuffer(matrices);
|
||||
if (success)
|
||||
{
|
||||
CopyMinibatchToMatrix(
|
||||
m_currentMBSize * m_numberOfuttsPerMinibatch,
|
||||
m_featuresBufferMultiIO, m_labelsBufferMultiIO, matrices);
|
||||
}
|
||||
|
||||
// Checks if we need to move the current minibatch to buffer.
|
||||
if (success && m_getMinibatchCopy)
|
||||
{
|
||||
CopyMinibatchToBuffer();
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
|
@ -1567,82 +1742,60 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (!(*m_mbiter))
|
||||
m_noData = true;
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Gets a copy of the utterance that corresponds to the current minibatches,
|
||||
// which will be used to do a neural network forward computation.
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetForkedUtterance(std::wstring& uttID,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
bool HTKMLFReader<ElemType>::GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<ElemType>& sentenceBegin,
|
||||
std::vector<MinibatchPackingFlag>& minibatchPackingFlag)
|
||||
{
|
||||
if (!m_doSeqTrain)
|
||||
// We need to get a "copy" of the minibatch to do the forward
|
||||
// computation for sequence training.
|
||||
if (m_doSeqTrain)
|
||||
{
|
||||
assert(m_framemode == false);
|
||||
if (m_sequenceTrainingIO->NeedLikelihoodToComputeDerivative())
|
||||
{
|
||||
m_getMinibatchCopy = true;
|
||||
if (GetMinibatchToTrainOrTest(matrices))
|
||||
{
|
||||
sentenceBegin.SetValue(m_sentenceBegin);
|
||||
minibatchPackingFlag = m_minibatchPackingFlag;
|
||||
uttInfo = m_minibatchUttInfo;
|
||||
m_getMinibatchCopy = false;
|
||||
return true;
|
||||
}
|
||||
m_getMinibatchCopy = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
assert(m_framemode == false);
|
||||
|
||||
// For the moment we only support single utterance.
|
||||
if (m_numberOfuttsPerMinibatch != 1)
|
||||
{
|
||||
RuntimeError("The current sequence training implementation does not support multiple utterances.\n");
|
||||
}
|
||||
|
||||
// Under our current assumption, we only have one utterance at a time.
|
||||
uttID = m_uttInfo[0][0].first;
|
||||
if (!m_sequenceTrainingIO->HasDerivatives(uttID))
|
||||
{
|
||||
size_t startFrame = 0;
|
||||
size_t endFrame = m_uttInfo[0][0].second;
|
||||
size_t currentMBSize = endFrame - startFrame;
|
||||
bool populateSucc = PopulateUtteranceInMinibatch(
|
||||
matrices, 0, startFrame, endFrame, currentMBSize);
|
||||
if (!populateSucc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sets sentence boundary.
|
||||
m_sentenceBegin.Resize(1, currentMBSize);
|
||||
m_minibatchPackingFlag.resize(currentMBSize);
|
||||
for (size_t i = 0; i < currentMBSize; i++)
|
||||
{
|
||||
m_sentenceBegin.SetValue(0, i, (ElemType) SENTENCE_MIDDLE);
|
||||
}
|
||||
std::fill(m_minibatchPackingFlag.begin(), m_minibatchPackingFlag.end(), MinibatchPackingFlag::None);
|
||||
m_sentenceBegin.SetValue(0, 0, (ElemType)SENTENCE_BEGIN);
|
||||
m_sentenceBegin.SetValue(0, m_sentenceBegin.GetNumCols() - 1, (ElemType) SENTENCE_END);
|
||||
m_minibatchPackingFlag[0] = MinibatchPackingFlag::UtteranceStart;
|
||||
m_minibatchPackingFlag[m_sentenceBegin.GetNumCols() - 1] = MinibatchPackingFlag::UtteranceEnd;
|
||||
|
||||
typename std::map<std::wstring, Matrix<ElemType>*>::iterator iter;
|
||||
for (iter = matrices.begin(); iter != matrices.end(); iter++)
|
||||
{
|
||||
Matrix<ElemType>& data = *matrices[iter->first];
|
||||
if (m_nameToTypeMap[iter->first] == InputOutputTypes::real)
|
||||
{
|
||||
size_t id = m_featureNameToIdMap[iter->first];
|
||||
size_t dim = m_featureNameToDimMap[iter->first];
|
||||
data.SetValue(dim, currentMBSize * m_numberOfuttsPerMinibatch, m_featuresBufferMultiIO[id] , matrixFlagNormal);
|
||||
}
|
||||
else if (m_nameToTypeMap[iter->first] == InputOutputTypes::category)
|
||||
{
|
||||
size_t id = m_labelNameToIdMap[iter->first];
|
||||
size_t dim = m_labelNameToDimMap[iter->first];
|
||||
data.SetValue(dim, currentMBSize * m_numberOfuttsPerMinibatch, m_labelsBufferMultiIO[id], matrixFlagNormal);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool HTKMLFReader<ElemType>::ComputeDerivativeFeatures(const std::wstring& uttID,
|
||||
const Matrix<ElemType>& outputs)
|
||||
bool HTKMLFReader<ElemType>::SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag)
|
||||
{
|
||||
return m_sequenceTrainingIO->ComputeDerivatives(uttID, outputs);
|
||||
// Set the likelihoods for the utterance with which we can comput the
|
||||
// derivatives. Note that the minibatch may only contain partial output
|
||||
// for the utterance, <m_sequenceTrainingIO> takes care of "pasting"
|
||||
// them together.
|
||||
if (m_doSeqTrain)
|
||||
{
|
||||
assert(m_framemode == false);
|
||||
return m_sequenceTrainingIO->SetLikelihood(uttInfo, outputs,
|
||||
sentenceBegin,
|
||||
minibatchPackingFlag);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,14 +24,25 @@ private:
|
|||
msra::dbn::latticesource* m_lattices;
|
||||
map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
|
||||
|
||||
// Sequence training related. Note that for now we only support single
|
||||
// utterance in sequence training. But the utterance information holders
|
||||
// are designed as if they support multiple utterances -- in case we will
|
||||
// extend this soon.
|
||||
// Sequence training realted members.
|
||||
struct MinibatchBufferUnit
|
||||
{
|
||||
std::vector<std::vector<ElemType>> features;
|
||||
std::vector<std::vector<ElemType>> labels;
|
||||
Matrix<ElemType> sentenceBegin;
|
||||
vector<MinibatchPackingFlag> minibatchPackingFlag;
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> minibatchUttInfo;
|
||||
size_t currentMBSize;
|
||||
};
|
||||
bool m_doSeqTrain;
|
||||
bool m_getMinibatchCopy;
|
||||
size_t m_minibatchBufferIndex;
|
||||
size_t m_minibatchBufferLeftovers;
|
||||
wstring m_seqTrainCriterion;
|
||||
KaldiSequenceTrainingIO<ElemType>* m_sequenceTrainingIO;
|
||||
std::deque<MinibatchBufferUnit> m_minibatchBuffer;
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> m_uttInfo;
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> m_minibatchUttInfo;
|
||||
|
||||
vector<bool> m_sentenceEnd;
|
||||
bool m_readAhead;
|
||||
|
@ -42,6 +53,7 @@ private:
|
|||
size_t m_numberOfuttsPerMinibatch;
|
||||
size_t m_actualnumberOfuttsPerMinibatch;
|
||||
size_t m_mbSize;
|
||||
size_t m_currentMBSize;
|
||||
vector<size_t> m_currentBufferFrames;
|
||||
vector<size_t> m_toProcess;
|
||||
vector<size_t> m_switchFrame;
|
||||
|
@ -72,6 +84,8 @@ private:
|
|||
std::map<std::wstring,size_t> m_nameToTypeMap;
|
||||
std::map<std::wstring,size_t> m_featureNameToDimMap;
|
||||
std::map<std::wstring,size_t> m_labelNameToDimMap;
|
||||
std::vector<std::wstring> m_featureIdToNameMap;
|
||||
std::vector<std::wstring> m_labelIdToNameMap;
|
||||
// for writing outputs to files (standard single input/output network) - deprecate eventually
|
||||
bool m_checkDictionaryKeys;
|
||||
bool m_convertLabelsToTargets;
|
||||
|
@ -89,10 +103,22 @@ private:
|
|||
void PrepareForSequenceTraining(const ConfigParameters& config);
|
||||
|
||||
bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
bool GetOneMinibatchToTrainOrTestDataBuffer(const std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
bool PopulateUtteranceInMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t uttIndex, size_t startFrame, size_t endFrame, size_t mbSize, size_t mbOffset = 0);
|
||||
bool PopulateUtteranceInMinibatch(const std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t uttIndex, size_t startFrame, size_t endFrame, size_t mbSize, size_t mbOffset = 0);
|
||||
|
||||
//-void GetCurrentUtteranceInfo(size_t uttIndex, size_t startFrame, size_t endFrame, wstring& uttID, size_t& startFrameInUtt, size_t& endFrameInUtt);
|
||||
// If we have to read the current minibatch from buffer, return true,
|
||||
// otherwise return false.
|
||||
bool ShouldCopyMinibatchFromBuffer();
|
||||
|
||||
// Copys the current minibatch to buffer.
|
||||
void CopyMinibatchToBuffer();
|
||||
|
||||
// Copys one minibatch from buffer to matrix.
|
||||
void CopyMinibatchFromBufferToMatrix(size_t index, std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
|
||||
// Copys one minibatch from <m_featuresBufferMultiIO> to matrix.
|
||||
void CopyMinibatchToMatrix(size_t size, const std::vector<ElemType*>& featureBuffer, const std::vector<ElemType*>& labelBuffer, std::map<std::wstring, Matrix<ElemType>*>& matrices) const;
|
||||
|
||||
void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
|
||||
void StartMinibatchLoopToWrite(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
|
||||
|
@ -157,9 +183,16 @@ public:
|
|||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
|
||||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
|
||||
|
||||
virtual bool GetForkedUtterance(std::wstring& uttID, std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
virtual bool ComputeDerivativeFeatures(const std::wstring& uttID, const Matrix<ElemType>& outputs);
|
||||
|
||||
virtual bool GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<ElemType>& sentenceBegin,
|
||||
vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
|
||||
virtual bool SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const vector<MinibatchPackingFlag>& sentenceExistsBeginOrNoLabels);
|
||||
|
||||
virtual bool DataEnd(EndDataType endDataType);
|
||||
void SetSentenceEndInBatch(vector<size_t> &/*sentenceEnd*/);
|
||||
|
|
|
@ -11,7 +11,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const wstring& transModelFilename, const wstring& silencePhoneStr,
|
||||
const wstring& trainCriterion,
|
||||
ElemType oldAcousticScale, ElemType acousticScale,
|
||||
ElemType lmScale, bool oneSilenceClass)
|
||||
ElemType lmScale, bool oneSilenceClass, size_t numberOfuttsPerMinibatch)
|
||||
{
|
||||
using namespace msra::asr;
|
||||
assert(denlatRspecifier != L"");
|
||||
|
@ -26,8 +26,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_lmScale = lmScale;
|
||||
m_trainCriterion = trainCriterion;
|
||||
m_oneSilenceClass = oneSilenceClass;
|
||||
m_objective = 0;
|
||||
m_posteriors.clear();
|
||||
m_numUttsPerMinibatch = numberOfuttsPerMinibatch;
|
||||
m_needLikelihood = true;
|
||||
m_currentObj = 0;
|
||||
m_minibatchIndex = 1;
|
||||
m_lastCompleteMinibatch.assign(m_numUttsPerMinibatch, 0);
|
||||
if (!kaldi::SplitStringToIntegers(toStr(silencePhoneStr),
|
||||
":", false, &m_silencePhones))
|
||||
{
|
||||
|
@ -35,13 +38,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
if (m_trainCriterion != L"mpfe" && m_trainCriterion != L"smbr")
|
||||
{
|
||||
LogicError("Supported sequence training criterion are: mpfe, smbr.\n");
|
||||
LogicError("Supported sequence training criterion: mpfe, smbr.\n");
|
||||
}
|
||||
m_derivRead = false;
|
||||
m_objRead = false;
|
||||
m_currentUttHasDeriv = false;
|
||||
m_currentUttID = L"";
|
||||
m_currentUttLength = 0;
|
||||
}
|
||||
|
||||
// Destructor.
|
||||
|
@ -61,50 +59,43 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool KaldiSequenceTrainingIO<ElemType>::HasDerivatives(const wstring& uttID)
|
||||
bool KaldiSequenceTrainingIO<ElemType>::ComputeDerivative(
|
||||
const wstring& uttID)
|
||||
{
|
||||
if (uttID == m_currentUttID && m_currentUttHasDeriv)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool KaldiSequenceTrainingIO<ElemType>::ComputeDerivatives(
|
||||
const wstring& uttID, const Matrix<ElemType>& logLikelihoodIn)
|
||||
{
|
||||
// Checks if we need to move data to CPU.
|
||||
Matrix<ElemType> logLikelihood(logLikelihoodIn);
|
||||
if (logLikelihood.GetDeviceId() >= 0)
|
||||
logLikelihood.TransferFromDeviceToDevice(logLikelihood.GetDeviceId(), CPUDEVICE, true, false, false);
|
||||
assert(m_uttPool.find(uttID) != m_uttPool.end());
|
||||
assert(m_uttPool[uttID].hasDerivative == false);
|
||||
Matrix<ElemType>& logLikelihood = m_uttPool[uttID].logLikelihood;
|
||||
|
||||
std::string uttIDStr = msra::asr::toStr(uttID);
|
||||
|
||||
// Sanity check.
|
||||
if (m_transModel.NumPdfs() != logLikelihood.GetNumRows())
|
||||
{
|
||||
RuntimeError("Number of labels in logLikelihood does not match that in the Kaldi model for utterance %S: %d v.s. %d\n", uttID.c_str(), logLikelihood.GetNumRows(), m_transModel.NumPdfs());
|
||||
RuntimeError("Number of labels in logLikelihood does not match that"
|
||||
" in the Kaldi model for utterance %S: %d v.s. %d\n",
|
||||
uttID.c_str(), logLikelihood.GetNumRows(),
|
||||
m_transModel.NumPdfs());
|
||||
}
|
||||
|
||||
// Reads alignment.
|
||||
if (!m_aliReader->HasKey(uttIDStr))
|
||||
{
|
||||
RuntimeError("Alignment not found for utterance %s\n", uttIDStr.c_str());
|
||||
RuntimeError("Alignment not found for utterance %s\n",
|
||||
uttIDStr.c_str());
|
||||
}
|
||||
const std::vector<int32> ali = m_aliReader->Value(uttIDStr);
|
||||
if (ali.size() != logLikelihood.GetNumCols())
|
||||
{
|
||||
RuntimeError("Number of frames in logLikelihood does not match that in the alignment for utterance %S: %d v.s. %d\n", uttID.c_str(), logLikelihood.GetNumCols(), ali.size());
|
||||
RuntimeError("Number of frames in logLikelihood does not match that"
|
||||
" in the alignment for utterance %S: %d v.s. %d\n",
|
||||
uttID.c_str(), logLikelihood.GetNumCols(), ali.size());
|
||||
}
|
||||
|
||||
// Reads denominator lattice.
|
||||
if (!m_denlatReader->HasKey(uttIDStr))
|
||||
{
|
||||
RuntimeError("Denominator lattice not found for utterance %S\n", uttID.c_str());
|
||||
RuntimeError("Denominator lattice not found for utterance %S\n",
|
||||
uttID.c_str());
|
||||
}
|
||||
kaldi::CompactLattice clat = m_denlatReader->Value(uttIDStr);
|
||||
fst::CreateSuperFinal(&clat); /* One final state with weight One() */
|
||||
|
@ -115,7 +106,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// acoustic scale to 0.
|
||||
if (m_oldAcousticScale != 1.0)
|
||||
{
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(m_oldAcousticScale), &lat);
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(m_oldAcousticScale),
|
||||
&lat);
|
||||
}
|
||||
|
||||
// Topsort lattice.
|
||||
|
@ -133,7 +125,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
kaldi::int32 maxTime = kaldi::LatticeStateTimes(lat, &stateTimes);
|
||||
if (maxTime != logLikelihood.GetNumCols())
|
||||
{
|
||||
RuntimeError("Number of frames in the logLikelihood does not match that in the denominator lattice for utterance %S\n", uttID.c_str(), logLikelihood.GetNumRows(), maxTime);
|
||||
RuntimeError("Number of frames in the logLikelihood does not match"
|
||||
" that in the denominator lattice for utterance %S\n",
|
||||
uttID.c_str(), logLikelihood.GetNumRows(), maxTime);
|
||||
}
|
||||
|
||||
// Does lattice acoustic rescoring with the new posteriors from the
|
||||
|
@ -143,7 +137,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Second pass acoustic and language model scale.
|
||||
if (m_acousticScale != 1.0 || m_lmScale != 1.0)
|
||||
{
|
||||
fst::ScaleLattice(fst::LatticeScale(m_lmScale, m_acousticScale), &lat);
|
||||
fst::ScaleLattice(fst::LatticeScale(m_lmScale, m_acousticScale),
|
||||
&lat);
|
||||
}
|
||||
|
||||
// Forward-backward on the lattice.
|
||||
|
@ -152,39 +147,39 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_trainCriterion == L"smbr")
|
||||
{
|
||||
thisObj = kaldi::LatticeForwardBackwardMpeVariants(
|
||||
m_transModel, m_silencePhones, lat, ali, "smbr", m_oneSilenceClass, &post);
|
||||
m_transModel, m_silencePhones, lat,
|
||||
ali, "smbr", m_oneSilenceClass, &post);
|
||||
}
|
||||
else if (m_trainCriterion == L"mpfe")
|
||||
{
|
||||
thisObj = kaldi::LatticeForwardBackwardMpeVariants(
|
||||
m_transModel, m_silencePhones, lat, ali, "mpfe", m_oneSilenceClass, &post);
|
||||
m_transModel, m_silencePhones, lat,
|
||||
ali, "mpfe", m_oneSilenceClass, &post);
|
||||
}
|
||||
|
||||
kaldi::ConvertPosteriorToPdfs(m_transModel, post, &m_posteriors);
|
||||
kaldi::ConvertPosteriorToPdfs(m_transModel,
|
||||
post, &(m_uttPool[uttID].posterior));
|
||||
|
||||
// Uses "expected error rate" instead of "expected accuracy".
|
||||
m_objective = logLikelihood.GetNumCols() - thisObj;
|
||||
m_uttPool[uttID].objective = logLikelihood.GetNumCols() - thisObj;
|
||||
|
||||
assert(m_posteriors.size() == logLikelihood.GetNumCols());
|
||||
assert(m_uttPool[uttID].posterior.size() == logLikelihood.GetNumCols());
|
||||
|
||||
m_derivRead = false;
|
||||
m_objRead = false;
|
||||
m_currentUttHasDeriv = true;
|
||||
m_currentUttID = uttID;
|
||||
m_currentUttLength = logLikelihood.GetNumCols();
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void KaldiSequenceTrainingIO<ElemType>::LatticeAcousticRescore(
|
||||
const std::vector<kaldi::int32>& stateTimes,
|
||||
const Matrix<ElemType>& logLikelihood, kaldi::Lattice* lat)
|
||||
const Matrix<ElemType>& logLikelihood, kaldi::Lattice* lat) const
|
||||
{
|
||||
std::vector<std::vector<kaldi::int32>> timeStateMap(logLikelihood.GetNumCols());
|
||||
std::vector<std::vector<kaldi::int32>> timeStateMap(
|
||||
logLikelihood.GetNumCols());
|
||||
size_t num_states = lat->NumStates();
|
||||
for (size_t s = 0; s < num_states; s++)
|
||||
{
|
||||
assert(stateTimes[s] >= 0 && stateTimes[s] <= logLikelihood.GetNumCols());
|
||||
assert(stateTimes[s] >= 0
|
||||
&& stateTimes[s] <= logLikelihood.GetNumCols());
|
||||
if (stateTimes[s] < logLikelihood.GetNumCols())
|
||||
{
|
||||
timeStateMap[stateTimes[s]].push_back(s);
|
||||
|
@ -196,14 +191,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (size_t i = 0; i < timeStateMap[t].size(); ++i)
|
||||
{
|
||||
kaldi::int32 state = timeStateMap[t][i];
|
||||
for (fst::MutableArcIterator<kaldi::Lattice> aiter(lat, state); !aiter.Done(); aiter.Next())
|
||||
for (fst::MutableArcIterator<kaldi::Lattice> aiter(lat, state);
|
||||
!aiter.Done(); aiter.Next())
|
||||
{
|
||||
kaldi::LatticeArc arc = aiter.Value();
|
||||
kaldi::int32 trans_id = arc.ilabel;
|
||||
if (trans_id != 0)
|
||||
{
|
||||
kaldi::int32 pdf_id = m_transModel.TransitionIdToPdf(trans_id);
|
||||
arc.weight.SetValue2(-logLikelihood(pdf_id, t) + arc.weight.Value2());
|
||||
kaldi::int32 pdf_id =
|
||||
m_transModel.TransitionIdToPdf(trans_id);
|
||||
arc.weight.SetValue2(-logLikelihood(pdf_id, t)
|
||||
+ arc.weight.Value2());
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
|
@ -219,97 +217,285 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
void KaldiSequenceTrainingIO<ElemType>::GetDerivatives(size_t startFrame,
|
||||
size_t endFrame,
|
||||
size_t mbSize,
|
||||
const std::wstring& uttID,
|
||||
Matrix<ElemType>& derivativesIn)
|
||||
void KaldiSequenceTrainingIO<ElemType>::ProcessUttInfo(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag,
|
||||
std::vector<std::vector<std::pair<wstring, std::pair<size_t, size_t>>>>* uttInfoInMinibatch) const
|
||||
{
|
||||
Matrix<ElemType> derivatives(CPUDEVICE);
|
||||
|
||||
// Does some sanity check first.
|
||||
if (uttID != m_currentUttID)
|
||||
assert(uttInfoInMinibatch != NULL);
|
||||
assert(uttInfo.size() == m_numUttsPerMinibatch);
|
||||
assert(sentenceBegin.GetNumRows() == m_numUttsPerMinibatch);
|
||||
assert(minibatchPackingFlag.size() == sentenceBegin.GetNumCols());
|
||||
uttInfoInMinibatch->clear();
|
||||
uttInfoInMinibatch->resize(uttInfo.size());
|
||||
for (size_t i = 0; i < uttInfo.size(); ++i)
|
||||
{
|
||||
RuntimeError("Requested utterance does not matched the utterance that we have computed derivatives for: %S v.s. %S\n", uttID.c_str(), m_currentUttID.c_str());
|
||||
}
|
||||
if (!m_currentUttHasDeriv)
|
||||
{
|
||||
RuntimeError("Derivatives have not been computed, you have to call KaldiSequenceTrainingIO::ComputeDerivative() before using it.\n");
|
||||
}
|
||||
assert(startFrame >= 0);
|
||||
assert(endFrame <= m_currentUttLength);
|
||||
|
||||
derivatives.Resize(m_transModel.NumPdfs(), mbSize);
|
||||
derivatives.SetValue(0);
|
||||
for (size_t t = startFrame; t < endFrame; ++t)
|
||||
{
|
||||
for (size_t i = 0; i < m_posteriors[t].size(); ++i)
|
||||
size_t startFrameIndexInMinibatch = 0;
|
||||
size_t numFrames = 0;
|
||||
for (size_t j = 0; j < sentenceBegin.GetNumCols(); ++j)
|
||||
{
|
||||
size_t pdf_id = m_posteriors[t][i].first;
|
||||
assert(pdf_id < m_transModel.NumPdfs());
|
||||
derivatives(pdf_id, t - startFrame) -= m_posteriors[t][i].second; /* Flip the sign */
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if we need to move data to GPU.
|
||||
if (derivativesIn.GetDeviceId() >= 0)
|
||||
derivatives.TransferFromDeviceToDevice(CPUDEVICE, derivativesIn.GetDeviceId(), true, false, false);
|
||||
|
||||
derivativesIn.SetValue(derivatives);
|
||||
|
||||
// We've used up all the derivatives, reset it.
|
||||
if (endFrame >= m_currentUttLength)
|
||||
{
|
||||
m_derivRead = true;
|
||||
if (m_objRead)
|
||||
{
|
||||
m_currentUttID = L"";
|
||||
m_currentUttHasDeriv = false;
|
||||
m_currentUttLength = 0;
|
||||
if (((size_t)sentenceBegin(i, j) & NO_LABELS) == NO_LABELS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
numFrames += 1;
|
||||
if ((((size_t)sentenceBegin(i, j) & SENTENCE_END) == SENTENCE_END)
|
||||
|| j == sentenceBegin.GetNumCols() - 1)
|
||||
{
|
||||
size_t uttIndex = (*uttInfoInMinibatch)[i].size();
|
||||
wstring uttID = uttInfo[i][uttIndex].first;
|
||||
(*uttInfoInMinibatch)[i].push_back(
|
||||
make_pair(uttID, make_pair(startFrameIndexInMinibatch, numFrames)));
|
||||
startFrameIndexInMinibatch = j + 1;
|
||||
numFrames = 0;
|
||||
}
|
||||
}
|
||||
assert(uttInfo[i].size() == (*uttInfoInMinibatch)[i].size());
|
||||
}
|
||||
}
|
||||
|
||||
// Suppose we have a, b, c 3 streams, the <logLikelihoodIn> is the in the
|
||||
// following format:
|
||||
// 1: a11 b11 c11 a12 b12 c12...
|
||||
// 2: a21 b21 c21 a22 b22 c22...
|
||||
// 3: a31 b31 c31 a32 b32 c32...
|
||||
template<class ElemType>
|
||||
void KaldiSequenceTrainingIO<ElemType>::GetObjectives(size_t startFrame,
|
||||
size_t endFrame,
|
||||
const std::wstring& uttID,
|
||||
Matrix<ElemType>& objectivesIn)
|
||||
bool KaldiSequenceTrainingIO<ElemType>::SetLikelihood(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& logLikelihoodIn,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag)
|
||||
{
|
||||
Matrix<ElemType> objectives(CPUDEVICE);
|
||||
assert(m_needLikelihood == true);
|
||||
std::vector<std::vector<
|
||||
std::pair<wstring, std::pair<size_t, size_t>>>> uttInfoInMinibatch;
|
||||
ProcessUttInfo(uttInfo, sentenceBegin,
|
||||
minibatchPackingFlag, &uttInfoInMinibatch);
|
||||
|
||||
// Does some sanity check first.
|
||||
if (uttID != m_currentUttID)
|
||||
// Checks if we need to move data to CPU.
|
||||
Matrix<ElemType> logLikelihood(logLikelihoodIn);
|
||||
if (logLikelihood.GetDeviceId() >= 0)
|
||||
{
|
||||
RuntimeError("Requested utterance does not matched the utterance that we have computed objectives for: %S v.s. %S\n", uttID.c_str(), m_currentUttID.c_str());
|
||||
logLikelihood.TransferFromDeviceToDevice(
|
||||
logLikelihood.GetDeviceId(), CPUDEVICE, true, false, false);
|
||||
}
|
||||
if (!m_currentUttHasDeriv)
|
||||
|
||||
bool minibatchComplete = true;
|
||||
size_t currentMBSize = minibatchPackingFlag.size();
|
||||
for (size_t i = 0; i < uttInfo.size(); ++i)
|
||||
{
|
||||
RuntimeError("Objectives have not been computed, you have to call KaldiSequenceTrainingIO::ComputeDerivative() before using it.\n");
|
||||
}
|
||||
assert(startFrame >= 0);
|
||||
assert(endFrame <= m_currentUttLength);
|
||||
|
||||
objectives.Resize(1, 1);
|
||||
objectives.SetValue(m_objective * static_cast<ElemType>(endFrame - startFrame) / static_cast<ElemType>(m_currentUttLength));
|
||||
|
||||
// Checks if we need to move data to GPU.
|
||||
if (objectivesIn.GetDeviceId() >= 0)
|
||||
objectives.TransferFromDeviceToDevice(CPUDEVICE, objectivesIn.GetDeviceId(), true, false, false);
|
||||
|
||||
objectivesIn.SetValue(objectives);
|
||||
|
||||
// We've used up all the objectives, reset it.
|
||||
if (endFrame >= m_currentUttLength)
|
||||
{
|
||||
m_objRead = true;
|
||||
if (m_derivRead)
|
||||
assert(uttInfo[i].size() == uttInfoInMinibatch[i].size());
|
||||
for (size_t j = 0; j < uttInfo[i].size(); ++j)
|
||||
{
|
||||
m_currentUttID = L"";
|
||||
m_currentUttHasDeriv = false;
|
||||
m_currentUttLength = 0;
|
||||
wstring uttID = uttInfo[i][j].first;
|
||||
if (m_uttPool.find(uttID) == m_uttPool.end())
|
||||
{
|
||||
UtteranceDerivativeUnit tmpUttUnit;
|
||||
tmpUttUnit.hasDerivative = false;
|
||||
tmpUttUnit.uttLength = uttInfo[i][j].second;
|
||||
tmpUttUnit.progress = 0;
|
||||
tmpUttUnit.streamID = i;
|
||||
tmpUttUnit.logLikelihood.Resize(m_transModel.NumPdfs(),
|
||||
tmpUttUnit.uttLength);
|
||||
m_uttPool[uttID] = tmpUttUnit;
|
||||
}
|
||||
|
||||
// Sets the likelihood and computes derivatives.
|
||||
assert(m_uttPool.find(uttID) != m_uttPool.end());
|
||||
if (m_uttPool[uttID].hasDerivative == false)
|
||||
{
|
||||
assert(uttID == uttInfoInMinibatch[i][j].first);
|
||||
size_t startFrame = uttInfoInMinibatch[i][j].second.first;
|
||||
size_t numFrames = uttInfoInMinibatch[i][j].second.second;
|
||||
assert(m_uttPool[uttID].progress + numFrames
|
||||
<= m_uttPool[uttID].uttLength);
|
||||
|
||||
// Sets the likelihood.
|
||||
for (size_t k = 0; k < numFrames; ++k)
|
||||
{
|
||||
m_uttPool[uttID].logLikelihood.SetColumn(
|
||||
logLikelihood.ColumnSlice(
|
||||
(startFrame + k) * m_numUttsPerMinibatch + i, 1),
|
||||
m_uttPool[uttID].progress + k);
|
||||
}
|
||||
|
||||
m_uttPool[uttID].progress += numFrames;
|
||||
if (m_uttPool[uttID].progress == m_uttPool[uttID].uttLength)
|
||||
{
|
||||
ComputeDerivative(uttID);
|
||||
m_uttPool[uttID].hasDerivative = true;
|
||||
m_uttPool[uttID].progress = 0;
|
||||
if (startFrame + numFrames == currentMBSize)
|
||||
{
|
||||
m_lastCompleteMinibatch[m_uttPool[uttID].streamID]
|
||||
= m_minibatchIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_lastCompleteMinibatch[m_uttPool[uttID].streamID]
|
||||
= m_minibatchIndex - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if we are ready to provide derivatives.
|
||||
m_minCompleteMinibatchIndex = *std::min_element(
|
||||
m_lastCompleteMinibatch.begin(), m_lastCompleteMinibatch.end());
|
||||
m_needLikelihood = (m_minCompleteMinibatchIndex >= 1) ? false : true;
|
||||
m_minibatchIndex += 1;
|
||||
}
|
||||
|
||||
// Suppose we have a, b, c 3 streams, the <derivativesOut> should be in the
|
||||
// following format:
|
||||
// 1: a11 b11 c11 a12 b12 c12...
|
||||
// 2: a21 b21 c21 a22 b22 c22...
|
||||
// 3: a31 b31 c31 a32 b32 c32...
|
||||
template<class ElemType>
|
||||
bool KaldiSequenceTrainingIO<ElemType>::GetDerivative(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag,
|
||||
Matrix<ElemType>* derivativesOut)
|
||||
{
|
||||
assert(derivativesOut != NULL);
|
||||
std::vector<std::vector<
|
||||
std::pair<wstring, std::pair<size_t, size_t>>>> uttInfoInMinibatch;
|
||||
ProcessUttInfo(uttInfo, sentenceBegin,
|
||||
minibatchPackingFlag, &uttInfoInMinibatch);
|
||||
|
||||
Matrix<ElemType> derivatives(CPUDEVICE);
|
||||
derivatives.Resize(m_transModel.NumPdfs(),
|
||||
sentenceBegin.GetNumCols() * sentenceBegin.GetNumRows());
|
||||
derivatives.SetValue(0);
|
||||
|
||||
m_currentObj = 0;
|
||||
for (size_t i = 0; i < uttInfo.size(); ++i)
|
||||
{
|
||||
assert(uttInfo[i].size() == uttInfoInMinibatch[i].size());
|
||||
for (size_t j = 0; j < uttInfo[i].size(); ++j)
|
||||
{
|
||||
wstring uttID = uttInfo[i][j].first;
|
||||
|
||||
// Checks if we have derivatives.
|
||||
if (m_uttPool.find(uttID) == m_uttPool.end()
|
||||
|| (m_uttPool.find(uttID) != m_uttPool.end()
|
||||
&& m_uttPool[uttID].hasDerivative == false))
|
||||
{
|
||||
RuntimeError("Derivatives are not ready for utterance:"
|
||||
" %S\n", uttID.c_str());
|
||||
}
|
||||
|
||||
// Assign the derivatives.
|
||||
assert(uttID == uttInfoInMinibatch[i][j].first);
|
||||
size_t startFrame = uttInfoInMinibatch[i][j].second.first;
|
||||
size_t startFrameInUtt = m_uttPool[uttID].progress;
|
||||
size_t numFrames = uttInfoInMinibatch[i][j].second.second;
|
||||
for (size_t k = 0; k < numFrames; ++k)
|
||||
{
|
||||
size_t posStart = startFrameInUtt + k;
|
||||
for (size_t l = 0;
|
||||
l < m_uttPool[uttID].posterior[posStart].size(); ++l)
|
||||
{
|
||||
size_t pdf_id =
|
||||
m_uttPool[uttID].posterior[posStart][l].first;
|
||||
assert(pdf_id < m_transModel.NumPdfs());
|
||||
derivatives(pdf_id,
|
||||
(startFrame + k) * m_numUttsPerMinibatch + i) -=
|
||||
m_uttPool[uttID].posterior[posStart][l].second;
|
||||
}
|
||||
}
|
||||
m_currentObj += m_uttPool[uttID].objective
|
||||
* numFrames / m_uttPool[uttID].uttLength;
|
||||
m_uttPool[uttID].progress += numFrames;
|
||||
assert(m_uttPool[uttID].progress <= m_uttPool[uttID].uttLength);
|
||||
if (m_uttPool[uttID].progress == m_uttPool[uttID].uttLength)
|
||||
{
|
||||
m_uttPool.erase(uttID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if we need to move data to GPU.
|
||||
if (derivativesOut->GetDeviceId() >= 0)
|
||||
{
|
||||
derivatives.TransferFromDeviceToDevice(
|
||||
CPUDEVICE, derivativesOut->GetDeviceId(), true, false, false);
|
||||
}
|
||||
derivativesOut->SetValue(derivatives);
|
||||
|
||||
// Keeps the utterance information so we can check next time when we
|
||||
// gives the objectives.
|
||||
m_currentUttInfo = uttInfo;
|
||||
|
||||
// Checks if we need to read more loglikelihoods.
|
||||
m_needLikelihood = false;
|
||||
m_minCompleteMinibatchIndex -= 1;
|
||||
if (m_minCompleteMinibatchIndex <= 0)
|
||||
{
|
||||
m_needLikelihood = true;
|
||||
m_minibatchIndex = 1;
|
||||
m_lastCompleteMinibatch.assign(m_numUttsPerMinibatch, 0);
|
||||
|
||||
// Un-do the logLikelihood for partial utterances.
|
||||
for (auto iter = m_uttPool.begin(); iter != m_uttPool.end(); ++iter)
|
||||
{
|
||||
if (iter->second.hasDerivative == false)
|
||||
{
|
||||
iter->second.progress = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
bool KaldiSequenceTrainingIO<ElemType>::GetObjective(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
Matrix<ElemType>* objectivesIn)
|
||||
{
|
||||
assert(objectivesIn != NULL);
|
||||
|
||||
// Checks utterance information.
|
||||
bool match = true;
|
||||
if (uttInfo.size() == m_currentUttInfo.size())
|
||||
{
|
||||
for (size_t i = 0; i < uttInfo.size(); ++i)
|
||||
{
|
||||
if (uttInfo[i].size() != m_currentUttInfo[i].size())
|
||||
{
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
for (size_t j = 0; j < uttInfo[i].size(); ++j)
|
||||
{
|
||||
if (uttInfo[i][j].first != m_currentUttInfo[i][j].first ||
|
||||
uttInfo[i][j].second != m_currentUttInfo[i][j].second)
|
||||
{
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
match = false;
|
||||
}
|
||||
if (!match)
|
||||
{
|
||||
RuntimeError("Current objective does not correspond to the"
|
||||
" minibatch utterance information, perhaps you did not"
|
||||
" run GetObjective() right after GetDerivatives()?");
|
||||
}
|
||||
|
||||
// Sets the objectives...
|
||||
objectivesIn->Resize(1, 1);
|
||||
objectivesIn->SetValue(m_currentObj);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template class KaldiSequenceTrainingIO<float>;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "kaldi.h"
|
||||
#include "Matrix.h"
|
||||
#include "basetypes.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -12,50 +13,93 @@ class KaldiSequenceTrainingIO
|
|||
{
|
||||
private:
|
||||
bool m_oneSilenceClass;
|
||||
bool m_currentUttHasDeriv;
|
||||
bool m_derivRead;
|
||||
bool m_objRead;
|
||||
bool m_needLikelihood;
|
||||
size_t m_numUttsPerMinibatch;
|
||||
wstring m_trainCriterion;
|
||||
wstring m_currentUttID;
|
||||
ElemType m_oldAcousticScale;
|
||||
ElemType m_acousticScale;
|
||||
ElemType m_lmScale;
|
||||
ElemType m_objective;
|
||||
std::vector<kaldi::int32> m_silencePhones;
|
||||
size_t m_currentUttLength;
|
||||
kaldi::TransitionModel m_transModel;
|
||||
kaldi::Posterior m_posteriors;
|
||||
kaldi::RandomAccessCompactLatticeReader* m_denlatReader; /*denominator lattices*/
|
||||
kaldi::RandomAccessInt32VectorReader* m_aliReader; /*alignment*/
|
||||
kaldi::RandomAccessCompactLatticeReader* m_denlatReader;
|
||||
kaldi::RandomAccessInt32VectorReader* m_aliReader;
|
||||
|
||||
struct UtteranceDerivativeUnit
|
||||
{
|
||||
bool hasDerivative;
|
||||
size_t uttLength;
|
||||
size_t progress;
|
||||
size_t streamID;
|
||||
Matrix<ElemType> logLikelihood;
|
||||
kaldi::Posterior posterior;
|
||||
ElemType objective;
|
||||
|
||||
UtteranceDerivativeUnit() : logLikelihood(CPUDEVICE)
|
||||
{
|
||||
hasDerivative = false;
|
||||
uttLength = 0;
|
||||
progress = 0;
|
||||
streamID = 0;
|
||||
}
|
||||
};
|
||||
ElemType m_currentObj;
|
||||
int m_minCompleteMinibatchIndex;
|
||||
size_t m_minibatchIndex;
|
||||
std::vector<size_t> m_lastCompleteMinibatch;
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> m_currentUttInfo;
|
||||
unordered_map<wstring, UtteranceDerivativeUnit> m_uttPool;
|
||||
|
||||
// Rescores the lattice with the lastest posteriors from the neural network.
|
||||
void LatticeAcousticRescore(const std::vector<kaldi::int32>& stateTimes,
|
||||
const Matrix<ElemType>& outputs, kaldi::Lattice* lat);
|
||||
void LatticeAcousticRescore(
|
||||
const std::vector<kaldi::int32>& stateTimes,
|
||||
const Matrix<ElemType>& outputs, kaldi::Lattice* lat) const;
|
||||
|
||||
// <uttInfoInMinibatch> is a vector of vector of the following:
|
||||
// uttID startFrameIndexInMinibatch numFrames
|
||||
void ProcessUttInfo(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag,
|
||||
std::vector<std::vector<std::pair<
|
||||
wstring, std::pair<size_t, size_t>>>>* uttInfoInMinibatch) const;
|
||||
|
||||
bool ComputeDerivative(const wstring& uttID);
|
||||
|
||||
public:
|
||||
// Constructor.
|
||||
KaldiSequenceTrainingIO(const wstring& denlatRspecifier, const wstring& aliRspecifier,
|
||||
const wstring& transModelFilename, const wstring& silencePhoneStr,
|
||||
KaldiSequenceTrainingIO(const wstring& denlatRspecifier,
|
||||
const wstring& aliRspecifier,
|
||||
const wstring& transModelFilename,
|
||||
const wstring& silencePhoneStr,
|
||||
const wstring& trainCriterion,
|
||||
ElemType oldAcousticScale,
|
||||
ElemType acousticScale,
|
||||
ElemType lmScale,
|
||||
bool oneSilenceClass);
|
||||
bool oneSilenceClass,
|
||||
size_t numberOfuttsPerMinibatch);
|
||||
|
||||
// Destructor.
|
||||
~KaldiSequenceTrainingIO();
|
||||
|
||||
bool HasDerivatives(const wstring& uttID);
|
||||
bool NeedLikelihoodToComputeDerivative() const { return m_needLikelihood; }
|
||||
|
||||
bool ComputeDerivatives(const wstring& uttID, const Matrix<ElemType>& outputs);
|
||||
bool SetLikelihood(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag);
|
||||
|
||||
// Gets the computed derivatives for given utterance.
|
||||
void GetDerivatives(size_t startFrame, size_t endFrame, size_t mbSize,
|
||||
const std::wstring& uttID, Matrix<ElemType>& derivatives);
|
||||
bool GetDerivative(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlag>& minibatchPackingFlag,
|
||||
Matrix<ElemType>* derivativesOut);
|
||||
|
||||
// Gets the computed objectives for given utterance.
|
||||
void GetObjectives(size_t startFrame, size_t endFrame,
|
||||
const std::wstring& uttID, Matrix<ElemType>& derivatives);
|
||||
bool GetObjective(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
Matrix<ElemType>* objectivesIn);
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -1631,13 +1631,17 @@ protected:
|
|||
// Tries to read an utterance and run forward computation on the
|
||||
// whole utterance.
|
||||
assert(trainSetDataReader != NULL);
|
||||
std::wstring uttID;
|
||||
if (trainSetDataReader->GetForkedUtterance(uttID, *inputMatrices))
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> uttInfo;
|
||||
Matrix<ElemType> sentenceBoundary;
|
||||
std::vector<MinibatchPackingFlag> minibatchPackingFlag;
|
||||
while (trainSetDataReader->GetMinibatchCopy(uttInfo, *inputMatrices,
|
||||
sentenceBoundary,
|
||||
minibatchPackingFlag))
|
||||
{
|
||||
UpdateEvalTimeStamps(FeatureNodes);
|
||||
|
||||
std::vector<ComputationNodePtr>* outputNodes = net.OutputNodes();
|
||||
if (outputNodes->size() < 1)
|
||||
std::vector<ComputationNodePtr>* outputNodes = net.OutputNodes();
|
||||
if (outputNodes->size() < 1)
|
||||
{
|
||||
throw std::logic_error("no output node was found.");
|
||||
}
|
||||
|
@ -1645,8 +1649,11 @@ protected:
|
|||
net.SetActualMiniBatchSize(actualMBSize);
|
||||
net.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
trainSetDataReader->SetSentenceSegBatch(net.SentenceBoundary(), net.MinibatchPackingFlags());
|
||||
net.Evaluate((*outputNodes)[0]); // Only evaluate the first output
|
||||
trainSetDataReader->ComputeDerivativeFeatures(uttID, (*outputNodes)[0]->FunctionValues());
|
||||
net.Evaluate((*outputNodes)[0]); // Only evaluate the first output
|
||||
trainSetDataReader->SetNetOutput(uttInfo,
|
||||
(*outputNodes)[0]->FunctionValues(),
|
||||
sentenceBoundary,
|
||||
minibatchPackingFlag);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче