changed MinibatchPackingFlags from unsigned char to char, so we can use a Matrix<char> for that in the future;
updated GetMinibatchCopy() to use MBLayout; added a runtime check to Evaluate() in order to verify invariants of MBLayout; renamed SetActualNbrSlicesInEachRecIter() to SetActualNbrSlicesInEachRecurentIteration() for consistency
This commit is contained in:
Родитель
0c1111bb01
Коммит
9575edc2c4
|
@ -259,12 +259,11 @@ template<class ElemType>
|
|||
bool DataReader<ElemType>::GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<float>& sentenceBegin,
|
||||
std::vector<MinibatchPackingFlags>& minibatchPackingFlags)
|
||||
MBLayoutPtr pMBLayout)
|
||||
{
|
||||
bool ans = false;
|
||||
for (size_t i = 0; i < m_ioNames.size(); i++)
|
||||
ans = (m_dataReader[m_ioNames[i]]->GetMinibatchCopy(uttInfo, matrices, sentenceBegin, minibatchPackingFlags) || ans);
|
||||
ans = (m_dataReader[m_ioNames[i]]->GetMinibatchCopy(uttInfo, matrices, pMBLayout) || ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
@ -272,12 +271,11 @@ template<class ElemType>
|
|||
bool DataReader<ElemType>::SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<float>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlags>& minibatchPackingFlags)
|
||||
const MBLayoutPtr pMBLayout)
|
||||
{
|
||||
bool ans = false;
|
||||
for (size_t i = 0; i < m_ioNames.size(); i++)
|
||||
ans = (m_dataReader[m_ioNames[i]]->SetNetOutput(uttInfo, outputs, sentenceBegin, minibatchPackingFlags) || ans);
|
||||
ans = (m_dataReader[m_ioNames[i]]->SetNetOutput(uttInfo, outputs, pMBLayout) || ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,22 +102,22 @@ public:
|
|||
|
||||
// Gets a copy of the minibatch for the forward computation. This can be
|
||||
// useful if some of the computation has to happen in the reader.
|
||||
// TODO: No, there should be no computation in the reader.
|
||||
virtual bool GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& /*uttInfo*/,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& /*matrices*/,
|
||||
Matrix<float>& /*sentenceBegin*/,
|
||||
std::vector<MinibatchPackingFlags>& /*minibatchPackingFlags*/)
|
||||
MBLayoutPtr /*data copied here*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sets the neural network output to the reader. This can be useful if some
|
||||
// of the computation has to happen in the reader.
|
||||
// TODO: No, there should be no computation in the reader.
|
||||
virtual bool SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& /*uttInfo*/,
|
||||
const Matrix<ElemType>& /*outputs*/,
|
||||
const Matrix<float>& /*sentenceBegin*/,
|
||||
const std::vector<MinibatchPackingFlags>& /*minibatchPackingFlags*/)
|
||||
const MBLayoutPtr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -227,16 +227,14 @@ public:
|
|||
virtual bool GetMinibatchCopy(
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
std::map<std::wstring, Matrix<ElemType>*>& matrices,
|
||||
Matrix<float>& sentenceBegin,
|
||||
std::vector<MinibatchPackingFlags>& minibatchPackingFlags);
|
||||
MBLayoutPtr);
|
||||
|
||||
// Sets the neural network output to the reader. This can be useful if some
|
||||
// of the computation has to happen in the reader.
|
||||
virtual bool SetNetOutput(
|
||||
const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
|
||||
const Matrix<ElemType>& outputs,
|
||||
const Matrix<float>& sentenceBegin,
|
||||
const std::vector<MinibatchPackingFlags>& minibatchPackingFlags);
|
||||
const MBLayoutPtr);
|
||||
|
||||
void CopyMBLayoutTo(MBLayoutPtr pMBLayout);
|
||||
|
||||
|
|
|
@ -1052,20 +1052,9 @@ public:
|
|||
#define EPSILON 1e-5
|
||||
#define ISCLOSE(a, b, threshold) (abs(a - b) < threshold)?true:false
|
||||
|
||||
// why are the following in basetypes.h?
|
||||
/**
|
||||
These macros are used for sentence segmentation information.
|
||||
TODO: get rid of this, no need
|
||||
*/
|
||||
//#define ((int) MinibatchPackingFlags::SequenceStart) ((int) MinibatchPackingFlags::SequenceStart)
|
||||
//#define ((int) MinibatchPackingFlags::None) ((int) MinibatchPackingFlags::None)
|
||||
//#define ((int) MinibatchPackingFlags::SequenceEnd) ((int) MinibatchPackingFlags::SequenceEnd)
|
||||
//#define ((int) MinibatchPackingFlags::NoInput) ((int) MinibatchPackingFlags::NoInput)
|
||||
//#define ((int) MinibatchPackingFlags::NoFeature) ((int) MinibatchPackingFlags::NoFeature)
|
||||
//#define ((int) MinibatchPackingFlags::NoLabel) ((int) MinibatchPackingFlags::NoLabel)
|
||||
|
||||
// why is this in basetypes.h?
|
||||
// boundary flags for a frame
|
||||
enum class MinibatchPackingFlags : unsigned char
|
||||
enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix<char>, since we use it as a data holder)
|
||||
{
|
||||
None = 0,
|
||||
SequenceStart = 1 << 0, // binary 0001 frame is first of an utterance
|
||||
|
|
|
@ -594,6 +594,13 @@ public:
|
|||
// TODO: rename to ForwardProp()? To make it very clear?
|
||||
void Evaluate(const ComputationNodeBasePtr rootNode)
|
||||
{
|
||||
// checks that will disappear once we complete the refactoring. If this passes for a while, we will eliminate one
|
||||
// If this fails, comment this out (it is safe) and tell fseide@microsoft.com.
|
||||
if (m_pMBLayout && m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumStreams())
|
||||
LogicError("Evaluate: detected that m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumStreams()");
|
||||
if (m_pMBLayout && m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize())
|
||||
LogicError("Evaluate: detected that m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()");
|
||||
|
||||
// prepare to compute with the subnetwork that this rootNode depends on, including
|
||||
// - auto-detecting recurrent loops
|
||||
// - calling Validate() on all nodes, which, a.o, resizes the matrices
|
||||
|
@ -673,7 +680,7 @@ public:
|
|||
// returns the result from SetActualMiniBatchSize(). Note GetActualMBSize() also exists but returns a value derived from the inputs dimensions
|
||||
size_t GetMaxMBSize() { return m_actMiniBSize; }
|
||||
|
||||
void SetActualNbrSlicesInEachRecIter(const size_t aSize)
|
||||
void SetActualNbrSlicesInEachRecurentIteration(const size_t aSize)
|
||||
{
|
||||
m_nbrSlicesInEachRecurrentIteration = aSize;
|
||||
}
|
||||
|
|
|
@ -1160,7 +1160,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t actualMBSize = encoderNet->GetActualMBSize();
|
||||
|
||||
encoderNet->SetActualMiniBatchSize(actualMBSize);
|
||||
encoderNet->SetActualNbrSlicesInEachRecIter(encoderTrainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
encoderNet->SetActualNbrSlicesInEachRecurentIteration(encoderTrainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
encoderTrainSetDataReader->CopyMBLayoutTo(encoderNet->GetMBLayoutPtr());
|
||||
|
||||
encoderNet->Evaluate(encoderEvaluationNodes[0]);
|
||||
|
@ -1168,7 +1168,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
actualMBSize = decoderNet->GetActualMBSize();
|
||||
|
||||
decoderNet->SetActualMiniBatchSize(actualMBSize);
|
||||
decoderNet->SetActualNbrSlicesInEachRecIter(decoderTrainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderTrainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
|
||||
/// not the sentence begining, because the initial hidden layer activity is from the encoder network
|
||||
decoderTrainSetDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr());
|
||||
|
|
|
@ -1320,7 +1320,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
size_t actualMBSize = net.GetActualMBSize();
|
||||
net.SetActualMiniBatchSize(actualMBSize);
|
||||
net.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr());
|
||||
|
||||
// TODO: Exactly this loop should be INSIDE ComputationNetwork--pass the nodes array instead!
|
||||
|
@ -1769,11 +1769,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// whole utterance.
|
||||
assert(trainSetDataReader != NULL);
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> uttInfo;
|
||||
Matrix<float> sentenceBoundary;
|
||||
std::vector<MinibatchPackingFlags> minibatchPackingFlags;
|
||||
while (trainSetDataReader->GetMinibatchCopy(uttInfo, *inputMatrices,
|
||||
sentenceBoundary,
|
||||
minibatchPackingFlags))
|
||||
auto pMBLayout = make_shared<MBLayout>();
|
||||
while (trainSetDataReader->GetMinibatchCopy(uttInfo, *inputMatrices, pMBLayout))
|
||||
{
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
|
||||
|
@ -1783,13 +1780,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
size_t actualMBSize = net.GetActualMBSize();
|
||||
net.SetActualMiniBatchSize(actualMBSize);
|
||||
net.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr());
|
||||
net.Evaluate(outputNodes[0]); // Only evaluate the first output
|
||||
trainSetDataReader->SetNetOutput(uttInfo,
|
||||
dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[0])->FunctionValues(),
|
||||
sentenceBoundary,
|
||||
minibatchPackingFlags);
|
||||
pMBLayout);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1964,7 +1960,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
nSamplesSinceLastModelSync += actualMBSize;
|
||||
net.SetActualMiniBatchSize(actualMBSize);
|
||||
net.SetActualNbrSlicesInEachRecIter(nSlices);
|
||||
net.SetActualNbrSlicesInEachRecurentIteration(nSlices);
|
||||
|
||||
if (!useDistributedMBReading && useParallelTrain && trainSetDataReader->RequireSentenceSeg())
|
||||
{
|
||||
|
@ -1987,7 +1983,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr)
|
||||
{
|
||||
refNet.SetActualMiniBatchSize(actualMBSize);
|
||||
refNet.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
refNet.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter());
|
||||
refNet.Evaluate(refNode);
|
||||
Matrix<ElemType>::ScaleAndAdd((ElemType)m_adaptationRegWeight,
|
||||
dynamic_pointer_cast<ComputationNode<ElemType>>(refNode)->FunctionValues(),
|
||||
|
|
|
@ -129,7 +129,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
actualMBSize = m_net.GetActualMBSize();
|
||||
m_net.SetActualMiniBatchSize(actualMBSize);
|
||||
m_net.SetActualNbrSlicesInEachRecIter(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr());
|
||||
|
||||
//for now since we share the same label masking flag we call this on one node only
|
||||
|
@ -450,7 +450,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
LogicError("decoderTrainSetDataReader read data but encoderNet reports no data read");
|
||||
|
||||
(*ptr)->SetActualMiniBatchSize(actualMBSize);
|
||||
(*ptr)->SetActualNbrSlicesInEachRecIter((*preader)->NumberSlicesInEachRecurrentIter());
|
||||
(*ptr)->SetActualNbrSlicesInEachRecurentIteration((*preader)->NumberSlicesInEachRecurrentIter());
|
||||
(*preader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr());
|
||||
|
||||
const auto & pairs = (*ptr)->PairNodes();
|
||||
|
@ -464,7 +464,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
decoderNet->SetActualMiniBatchSize(actualMBSize);
|
||||
if (actualMBSize == 0)
|
||||
LogicError("decoderTrainSetDataReader read data but decoderNet reports no data read");
|
||||
decoderNet->SetActualNbrSlicesInEachRecIter(decoderDataReader->NumberSlicesInEachRecurrentIter());
|
||||
decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderDataReader->NumberSlicesInEachRecurrentIter());
|
||||
decoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr());
|
||||
|
||||
size_t i = 0;
|
||||
|
@ -667,7 +667,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
(*ptr)->SetActualMiniBatchSize(actualMBSize);
|
||||
mNutt = (*ptrreader)->NumberSlicesInEachRecurrentIter();
|
||||
(*ptr)->SetActualNbrSlicesInEachRecIter(mNutt);
|
||||
(*ptr)->SetActualNbrSlicesInEachRecurentIteration(mNutt);
|
||||
(*ptrreader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr());
|
||||
|
||||
const auto & pairs = (*ptr)->PairNodes();
|
||||
|
@ -679,7 +679,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
/// not the sentence begining, because the initial hidden layer activity is from the encoder network
|
||||
decoderNet->SetActualMiniBatchSize(actualMBSize);
|
||||
decoderNet->SetActualNbrSlicesInEachRecIter(mNutt);
|
||||
decoderNet->SetActualNbrSlicesInEachRecurentIteration(mNutt);
|
||||
encoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr());
|
||||
|
||||
FindBestPathWithVariableLength(decoderNet, actualMBSize, decoderDataReader, dataWriter, outputNodes, writeNodes, decoderFeatureNodes, beam, &decoderInputMatrices, best_path);
|
||||
|
@ -910,7 +910,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
/// use reader to initialize evalnet's sentence start information to let it know that this
|
||||
/// is the begining of sentence
|
||||
evalnet->SetActualMiniBatchSize(mbSize);
|
||||
evalnet->SetActualNbrSlicesInEachRecIter(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr());
|
||||
|
||||
clock_t start, now;
|
||||
|
@ -1069,7 +1069,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
/// use reader to initialize evalnet's sentence start information to let it know that this
|
||||
/// is the begining of sentence
|
||||
evalnet->SetActualMiniBatchSize(mbSize);
|
||||
evalnet->SetActualNbrSlicesInEachRecIter(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
|
||||
clock_t start, now;
|
||||
start = clock();
|
||||
|
|
|
@ -74,7 +74,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
size_t actualMBSize = m_net.GetActualMBSize();
|
||||
m_net.SetActualMiniBatchSize(actualMBSize);
|
||||
m_net.SetActualNbrSlicesInEachRecIter(dataReader.NumberSlicesInEachRecurrentIter());
|
||||
m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader.NumberSlicesInEachRecurrentIter());
|
||||
dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr());
|
||||
|
||||
for (int i=0; i<outputNodes.size(); i++)
|
||||
|
|
|
@ -523,7 +523,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
MBLayout() : m_sentenceBoundaryFlags(CPUDEVICE) { }
|
||||
|
||||
Matrix<float> m_sentenceBoundaryFlags; // (t,stream)
|
||||
// ^^ float -> MinibatchPackingFlags, right?
|
||||
// ^^ float -> MinibatchPackingFlags, right? Or unsigned char; or change that to 'char' because Matrix<char> already exists
|
||||
// This matrix ^^ is always in CPU memory --TODO: should rather be a matrix of some int
|
||||
/// conditionally point to either a pointer to that provided by network, or point to
|
||||
/// an individual sentence boundary info, which happens if timeStep > 1 is required for PastValue node
|
||||
|
@ -538,7 +538,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#if 0 // we have this pattern often:
|
||||
// TODO: mbSize and #slices must also move into MBLayout
|
||||
evalnet->SetActualMiniBatchSize(mbSize);
|
||||
evalnet->SetActualNbrSlicesInEachRecIter(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter());
|
||||
dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr());
|
||||
#endif
|
||||
#if 0 // a VERY TELLING piece of code
|
||||
|
|
Загрузка…
Ссылка в новой задаче