bug fix: LMSequenceReader randomization must be deterministic (seed = epoch)

This commit is contained in:
Frank Seide 2016-03-09 22:22:24 -08:00
Родитель ba2238d215
Коммит 4209d9df10
10 изменённых файлов: 75 добавлений и 53 удалений

Просмотреть файл

@ -268,8 +268,8 @@ Parameters =
Stabilize (x, enabled=true) =
if enabled
then [
beta = Exp(ScalarParam())
result = Scale(beta, x)
beta = Exp (ScalarParam())
result = Scale (beta, x)
].result
else x
]

Просмотреть файл

@ -18,14 +18,14 @@ using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
#define FUNCTIONOPEN "("
#define OPENBRACES "[{(\""
#define CLOSINGBRACES "]})\""
#define OPENBRACES "[{(\"" // all opening braces
#define CLOSINGBRACES "]})\"" // and matching closing ones
static const std::string::size_type npos = (std::string::size_type) -1;
// These are the constants associated with the "ResolveVariables" method.
static const char* openBraceVar = "$";
static const char* closingBraceVar = "$";
static const char* openBraceVar = "$"; // beginning of a var
static const char* closingBraceVar = "$"; // end of a var
static const char* forbiddenCharactersInVarName = ",/<>?;':\"[]{}\\|!@#%^&*()+=~` \t\n";
static const char* forbiddenCharactersInVarNameEscapeWhitespace = ",/<>?;':\"[]{}\\|!@#%^&*()+=~` \\t\\n";
static const std::size_t openBraceVarSize = strlen(openBraceVar);
@ -357,23 +357,19 @@ public:
// str - string to search
// tokenStart - start location in the string to search
// returns: character position of matching closing brace, string::npos if no brace present at start position
// BUGBUG: This seems to only work for one kind of braces at a time. Nested other braces are not
// understood. Also, braces in strings are not protected. [fseide]
static std::string::size_type FindBraces(const std::string& str, std::string::size_type tokenStart)
static size_t FindBraces(const std::string& str, const size_t tokenStart)
{
const auto len = str.length();
// start is outside (or rather, at end of string): no brace here
if (tokenStart >= len)
{
return npos;
}
// open braces and quote
static const std::string openBraces = OPENBRACES;
static const std::string openBraces = OPENBRACES; // currently "[{(\""
// close braces and quote
static const std::string closingBraces = CLOSINGBRACES;
const auto charsToLookFor = closingBraces + openBraces; // all chars we match for
static const auto charsToLookFor = closingBraces + openBraces; // all chars we match for
// get brace index for first character of input string
const auto braceFound = openBraces.find(str[tokenStart]);

Просмотреть файл

@ -218,7 +218,17 @@ void ComputationNetwork::ReadPersistableParameters(File& fstream, bool create)
if (create) // loaded from scratch
AddNodeToNet(node);
else // reloaded existing
node->Validate(true); // nothing that propagates should have changed --TODO: have a more rigid mechanism to prevent resizing; this should only reload the model parameters
{
let old = node->GetSampleLayout();
let changed = ValidateNode(node, /*isFinalValidationPass=*/true);
if (changed)
{
let upd = node->GetSampleLayout();
fprintf(stderr, "ValidateSubNetwork: %ls %ls operation changed, from [%s] to [%s].", node->NodeName().c_str(), node->OperationName().c_str(),
string(old).c_str(), string(upd).c_str());
//LogicError("ValidateSubNetwork: %ls %ls operation changed during reload or re-validation.", node->NodeName().c_str(), node->OperationName().c_str());
}
}
}
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");

Просмотреть файл

@ -165,6 +165,7 @@ public:
private:
void ValidateNetwork();
void ValidateNodes(list<ComputationNodeBasePtr> nodes, bool isFinalValidationPass, size_t& todo);
bool ValidateNode(ComputationNodeBasePtr node, bool isFinalValidationPass) const;
void MarkValueNonSharableNodes();
private:

Просмотреть файл

@ -579,6 +579,36 @@ static pair<TensorShape, bool> GetDims(const ComputationNodeBasePtr& node)
return make_pair(node->GetSampleLayout(), node->HasMBLayout());
}
bool ComputationNetwork::ValidateNode(ComputationNodeBasePtr node, bool isFinalValidationPass) const
{
const auto& children = node->GetInputs();
// keep state
MBLayoutPtr oldMBLayoutPtr = node->GetMBLayout();
auto dim = GetDims(node);
vector<pair<TensorShape, bool>> childDims;
for (auto& child : children)
childDims.push_back(GetDims(child));
auto sampleLayout = node->GetSampleLayout();
// We do call validate(final) as many times as needed, since stuff may have changed underneath.
node->Validate(isFinalValidationPass /*final*/); // all nodes have been visited: do verification instead of just inference
// also take the opportunity to propagate m_needsGradient
auto needsGradient = node->m_needsGradient;
for (auto& child : children) // TODO: do we need a check that this is stable if isFinalValidationPass?
node->m_needsGradient |= child->m_needsGradient;
// check state --node will be valid if all nodes have been visited and node has not been updated
bool unchanged = true;
unchanged &= (oldMBLayoutPtr == node->GetMBLayout());
unchanged &= (dim == GetDims(node));
vector<pair<TensorShape, bool>> newChildDims;
for (auto& child : children)
newChildDims.push_back(GetDims(child));
unchanged &= (childDims == newChildDims);
unchanged &= (sampleLayout == node->GetSampleLayout());
unchanged &= (needsGradient == node->m_needsGradient);
return !unchanged;
}
void ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, bool isFinalValidationPass, size_t& todo)
{
todo = 0; // returns how many nodes are to be redone
@ -596,35 +626,15 @@ void ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, bool
}
// if there is not at least one visited child
bool valid = false;
if (hasVisitedChild || isLeaf)
if (hasVisitedChild || isLeaf) // got at least one child: it makes sense to call Validate()
{
// got at least one child: it makes sense to call Validate()
// keep state
MBLayoutPtr oldMBLayoutPtr = node->GetMBLayout();
auto dim = GetDims(node);
vector<pair<TensorShape, bool>> childDims;
for (auto& child : children)
childDims.push_back(GetDims(child));
auto sampleLayout = node->GetSampleLayout();
// We do call validate(final) as many times as needed, since stuff may have changed underneath.
// TODO: PrintSelfBeforeValidation() into a function returning a string, and print all in a single line (also when it throws; print & rethrow).
node->PrintSelfBeforeValidation();
node->Validate(isFinalValidationPass /*final*/); // all nodes have been visited: do verification instead of just inference
fprintf(stderr, " -> [%s%s]", string(node->GetSampleLayout()).c_str(), node->HasMBLayout() ? " x *" : "");
bool unchanged = !ValidateNode(node, isFinalValidationPass);
node->m_visited = true;
// also take the opportunity to propagate m_needsGradient
auto needsGradient = node->m_needsGradient;
for (auto& child : children) // TODO: do we need a check that this is stable if isFinalValidationPass?
node->m_needsGradient |= child->m_needsGradient;
// check state --node will be valid if all nodes have been visited and node has not been updated
bool unchanged = true;
unchanged &= (oldMBLayoutPtr == node->GetMBLayout());
unchanged &= (dim == GetDims(node));
vector<pair<TensorShape, bool>> newChildDims;
for (auto& child : children)
newChildDims.push_back(GetDims(child));
unchanged &= (childDims == newChildDims);
unchanged &= (sampleLayout == node->GetSampleLayout());
unchanged &= (needsGradient == node->m_needsGradient);
fprintf(stderr, "[%s%s]", string(node->GetSampleLayout()).c_str(), node->HasMBLayout() ? " x *" : "");
// print the new type
// sanity checks
if (isFinalValidationPass && !unchanged)
LogicError("ValidateSubNetwork: %ls %ls operation changed during final validation.", node->NodeName().c_str(), node->OperationName().c_str());
if (isFinalValidationPass && !allChildrenVisited)

Просмотреть файл

@ -307,7 +307,7 @@ void ComputationNode<ElemType>::WriteMinibatchWithFormatting(FILE* f, size_t onl
fprintfOrDie(f, "%s", sampleSeparator.c_str());
if (j == jstop)
{
fprintf(f, "..."); // 'nuff said
fprintf(f, "... (%d more)", (int)(jend - jstop)); // 'nuff said
break;
}
for (size_t i = 0; i < iend; i++)

Просмотреть файл

@ -167,8 +167,8 @@ struct ComputationNetworkOwnedNodeState
// These are public since you are meant to set these flags manually in the debugger or temporarily poke into them from code as needed.
bool m_traceNodeValue = false;
bool m_traceNodeValueAsCategoryLabel = false;
size_t m_traceNodeValueUpToDim = 5;
size_t m_traceNodeValueUpToT = 5;
size_t m_traceNodeValueUpToDim = 3; // 3 should be enough to see simple patterns such as all values are identical or out of range
size_t m_traceNodeValueUpToT = 8; // 8 time steps fit comfortably into a normal-sized console
void EnableNodeTracing(bool isCategoryLabel) { m_traceNodeValue = true; m_traceNodeValueAsCategoryLabel = isCategoryLabel; }
protected: // TODO: should be fully encapsulated here
@ -1513,8 +1513,9 @@ public:
{
if (m_traceNodeValue)
{
fprintf(stderr, "Trace --> %ls = %ls -> [%s%s]\n", NodeName().c_str(), OperationName().c_str(), string(GetSampleLayout()).c_str(), HasMBLayout() ? " x *" : "");
WriteMinibatchWithFormatting(stderr, m_traceNodeValueUpToDim, m_traceNodeValueUpToT, true/*transpose*/, m_traceNodeValueAsCategoryLabel, std::vector<std::string>(),
const auto shape = GetTensorShape(DetermineElementwiseTensorRank());
fprintf(stderr, "Trace --> %ls = %ls -> [%s]\n", NodeName().c_str(), OperationName().c_str(), string(shape).c_str());
WriteMinibatchWithFormatting(stderr, m_traceNodeValueUpToDim, m_traceNodeValueUpToT, false/*transpose*/, m_traceNodeValueAsCategoryLabel, std::vector<std::string>(),
""/*sequenceSeparator*/, " "/*sequencePrologue*/, "\n"/*sequenceEpilogue*/, " "/*elementSeparator*/, "\n "/*sampleSeparator*/,
"%13.10f"/*valueFormatString*/);
}

Просмотреть файл

@ -167,7 +167,8 @@ public:
// BUGBUG: I got an error in when reloading persistent parameterse for a model that had dimension specified as 0, which did not get re-inferred correctly.
// We should either simply not write this parameter out at all (since it can always be inferred), or write the tensor shape.
SetDims(TensorShape(rows), HasMBLayout() /*may be true on reload (roll-back)*/); // tensor shape will be overwritten in Validate() --TODO: We should serialize it here.
if (GetSampleLayout().GetNumElements() != rows) // legacy format: if #rows matches then assume current tensor shape is up to date
SetDims(TensorShape(rows), HasMBLayout() /*may be true on reload (roll-back)*/); // tensor shape will be overwritten in Validate() --TODO: We should serialize it here.
m_delayedValue.Resize(rows, 0); // Note: If we try to access history in first minibatch, we shall crash. It would be a consequence of a missing sentence-begin flag
if (modelVersion >= CNTK_MODEL_VERSION_2)

Просмотреть файл

@ -1578,7 +1578,7 @@ void BatchSequenceReader<ElemType>::Reset()
{
mProcessed.clear();
mToProcess.clear();
mLastProcssedSentenceId = 0;
mLastProcessedSentenceId = 0;
mPosInSentence = 0;
mLastPosInSentence = 0;
mNumRead = 0;
@ -1651,6 +1651,7 @@ void BatchSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epo
// we use epochSize, which might not be set yet, so use a default value for allocations if not yet set
size_t epochSize = m_epochSize == requestDataSize ? 1000 : m_epochSize;
m_epoch = epoch;
m_randomSeed = (unsigned int)m_epoch;
m_mbStartSample = epoch * m_epochSize;
m_epochSamplesReturned = 0; // counter to know when we returned one epoch
@ -1700,7 +1701,7 @@ size_t BatchSequenceReader<ElemType>::DetermineSequencesToProcess()
int mp = (int) mToProcess[s];
if (mProcessed[mp])
{
mLastProcssedSentenceId = mp;
mLastProcessedSentenceId = mp;
mLastPosInSentence = 0;
allDone = true;
break;
@ -1722,7 +1723,7 @@ size_t BatchSequenceReader<ElemType>::DetermineSequencesToProcess()
size_t maxToProcess = mRequestedNumParallelSequences > 0 ? mRequestedNumParallelSequences : SIZE_MAX; // if mRequestedNumParallelSequences is 0 then we go by MB size
size_t maxTokens = mRequestedNumParallelSequences > 0 ? SIZE_MAX : m_mbSize;
size_t numTokens = 0; // token counter
for (size_t seq = mLastProcssedSentenceId;
for (size_t seq = mLastProcessedSentenceId;
seq < mNumRead && // hit end of buffer
mToProcess.size() < maxToProcess; // hit parallel-sequence limit
seq++)
@ -1791,14 +1792,14 @@ bool BatchSequenceReader<ElemType>::GetMinibatchData(size_t& /*out*/ firstPosInS
#ifdef _MSC_VER // make some old configurations reproducable (m_cacheBlockSize used to be a constant) --TODO: remove in a few months
if (m_cacheBlockSize == 50000)
{
srand(++m_randomSeed); // TODO: older code did not have that; so no idea what random seed was used
std::random_shuffle(m_parser.mSentenceIndex2SentenceInfo.begin(), m_parser.mSentenceIndex2SentenceInfo.end());
// Note: random_shuffle is deprecated since C++14.
}
else // new configs use a wider randomization
#endif
{
std::random_device rd;
std::mt19937 g(rd());
std::mt19937 g(++m_randomSeed); // random seed is initialized to epoch, but gets incremented for intermediate reshuffles
std::shuffle(m_parser.mSentenceIndex2SentenceInfo.begin(), m_parser.mSentenceIndex2SentenceInfo.end(), g);
}

Просмотреть файл

@ -354,7 +354,9 @@ public:
using Base::mRequestedNumParallelSequences; // IDataReader<ElemType>
private:
size_t mLastProcssedSentenceId;
unsigned int m_randomSeed = 0; // deterministic random seed
size_t mLastProcessedSentenceId;
size_t mNumRead; // number of sentences in current cache block
vector<bool> mProcessed; // [mNumRead] true if sequence has already been returned in this cache block
@ -379,7 +381,7 @@ public:
BatchSequenceReader()
: m_pMBLayout(make_shared<MBLayout>())
{
mLastProcssedSentenceId = 0;
mLastProcessedSentenceId = 0;
mRequestedNumParallelSequences = 1;
mLastPosInSentence = 0;
mNumRead = 0;