bug fix: LMSequenceReader randomization must be deterministic (seed = epoch)
This commit is contained in:
Родитель
ba2238d215
Коммит
4209d9df10
|
@ -268,8 +268,8 @@ Parameters =
|
|||
Stabilize (x, enabled=true) =
|
||||
if enabled
|
||||
then [
|
||||
beta = Exp(ScalarParam())
|
||||
result = Scale(beta, x)
|
||||
beta = Exp (ScalarParam())
|
||||
result = Scale (beta, x)
|
||||
].result
|
||||
else x
|
||||
]
|
||||
|
|
|
@ -18,14 +18,14 @@ using namespace std;
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
#define FUNCTIONOPEN "("
|
||||
#define OPENBRACES "[{(\""
|
||||
#define CLOSINGBRACES "]})\""
|
||||
#define OPENBRACES "[{(\"" // all opening braces
|
||||
#define CLOSINGBRACES "]})\"" // and matching closing ones
|
||||
|
||||
static const std::string::size_type npos = (std::string::size_type) -1;
|
||||
|
||||
// These are the constants associated with the "ResolveVariables" method.
|
||||
static const char* openBraceVar = "$";
|
||||
static const char* closingBraceVar = "$";
|
||||
static const char* openBraceVar = "$"; // beginning of a var
|
||||
static const char* closingBraceVar = "$"; // end of a var
|
||||
static const char* forbiddenCharactersInVarName = ",/<>?;':\"[]{}\\|!@#%^&*()+=~` \t\n";
|
||||
static const char* forbiddenCharactersInVarNameEscapeWhitespace = ",/<>?;':\"[]{}\\|!@#%^&*()+=~` \\t\\n";
|
||||
static const std::size_t openBraceVarSize = strlen(openBraceVar);
|
||||
|
@ -357,23 +357,19 @@ public:
|
|||
// str - string to search
|
||||
// tokenStart - start location in the string to search
|
||||
// returns: character position of matching closing brace, string::npos if no brace present at start position
|
||||
// BUGBUG: This seems to only work for one kind of braces at a time. Nested other braces are not
|
||||
// understood. Also, braces in strings are not protected. [fseide]
|
||||
static std::string::size_type FindBraces(const std::string& str, std::string::size_type tokenStart)
|
||||
static size_t FindBraces(const std::string& str, const size_t tokenStart)
|
||||
{
|
||||
const auto len = str.length();
|
||||
// start is outside (or rather, at end of string): no brace here
|
||||
if (tokenStart >= len)
|
||||
{
|
||||
return npos;
|
||||
}
|
||||
|
||||
// open braces and quote
|
||||
static const std::string openBraces = OPENBRACES;
|
||||
static const std::string openBraces = OPENBRACES; // currently "[{(\""
|
||||
// close braces and quote
|
||||
static const std::string closingBraces = CLOSINGBRACES;
|
||||
|
||||
const auto charsToLookFor = closingBraces + openBraces; // all chars we match for
|
||||
static const auto charsToLookFor = closingBraces + openBraces; // all chars we match for
|
||||
|
||||
// get brace index for first character of input string
|
||||
const auto braceFound = openBraces.find(str[tokenStart]);
|
||||
|
|
|
@ -218,7 +218,17 @@ void ComputationNetwork::ReadPersistableParameters(File& fstream, bool create)
|
|||
if (create) // loaded from scratch
|
||||
AddNodeToNet(node);
|
||||
else // reloaded existing
|
||||
node->Validate(true); // nothing that propagates should have changed --TODO: have a more rigid mechanism to prevent resizing; this should only reload the model parameters
|
||||
{
|
||||
let old = node->GetSampleLayout();
|
||||
let changed = ValidateNode(node, /*isFinalValidationPass=*/true);
|
||||
if (changed)
|
||||
{
|
||||
let upd = node->GetSampleLayout();
|
||||
fprintf(stderr, "ValidateSubNetwork: %ls %ls operation changed, from [%s] to [%s].", node->NodeName().c_str(), node->OperationName().c_str(),
|
||||
string(old).c_str(), string(upd).c_str());
|
||||
//LogicError("ValidateSubNetwork: %ls %ls operation changed during reload or re-validation.", node->NodeName().c_str(), node->OperationName().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
|
||||
|
|
|
@ -165,6 +165,7 @@ public:
|
|||
private:
|
||||
void ValidateNetwork();
|
||||
void ValidateNodes(list<ComputationNodeBasePtr> nodes, bool isFinalValidationPass, size_t& todo);
|
||||
bool ValidateNode(ComputationNodeBasePtr node, bool isFinalValidationPass) const;
|
||||
void MarkValueNonSharableNodes();
|
||||
|
||||
private:
|
||||
|
|
|
@ -579,6 +579,36 @@ static pair<TensorShape, bool> GetDims(const ComputationNodeBasePtr& node)
|
|||
return make_pair(node->GetSampleLayout(), node->HasMBLayout());
|
||||
}
|
||||
|
||||
bool ComputationNetwork::ValidateNode(ComputationNodeBasePtr node, bool isFinalValidationPass) const
|
||||
{
|
||||
const auto& children = node->GetInputs();
|
||||
|
||||
// keep state
|
||||
MBLayoutPtr oldMBLayoutPtr = node->GetMBLayout();
|
||||
auto dim = GetDims(node);
|
||||
vector<pair<TensorShape, bool>> childDims;
|
||||
for (auto& child : children)
|
||||
childDims.push_back(GetDims(child));
|
||||
auto sampleLayout = node->GetSampleLayout();
|
||||
// We do call validate(final) as many times as needed, since stuff may have changed underneath.
|
||||
node->Validate(isFinalValidationPass /*final*/); // all nodes have been visited: do verification instead of just inference
|
||||
// also take the opportunity to propagate m_needsGradient
|
||||
auto needsGradient = node->m_needsGradient;
|
||||
for (auto& child : children) // TODO: do we need a check that this is stable if isFinalValidationPass?
|
||||
node->m_needsGradient |= child->m_needsGradient;
|
||||
// check state --node will be valid if all nodes have been visited and node has not been updated
|
||||
bool unchanged = true;
|
||||
unchanged &= (oldMBLayoutPtr == node->GetMBLayout());
|
||||
unchanged &= (dim == GetDims(node));
|
||||
vector<pair<TensorShape, bool>> newChildDims;
|
||||
for (auto& child : children)
|
||||
newChildDims.push_back(GetDims(child));
|
||||
unchanged &= (childDims == newChildDims);
|
||||
unchanged &= (sampleLayout == node->GetSampleLayout());
|
||||
unchanged &= (needsGradient == node->m_needsGradient);
|
||||
return !unchanged;
|
||||
}
|
||||
|
||||
void ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, bool isFinalValidationPass, size_t& todo)
|
||||
{
|
||||
todo = 0; // returns how many nodes are to be redone
|
||||
|
@ -596,35 +626,15 @@ void ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, bool
|
|||
}
|
||||
// if there is not at least one visited child
|
||||
bool valid = false;
|
||||
if (hasVisitedChild || isLeaf)
|
||||
if (hasVisitedChild || isLeaf) // got at least one child: it makes sense to call Validate()
|
||||
{
|
||||
// got at least one child: it makes sense to call Validate()
|
||||
// keep state
|
||||
MBLayoutPtr oldMBLayoutPtr = node->GetMBLayout();
|
||||
auto dim = GetDims(node);
|
||||
vector<pair<TensorShape, bool>> childDims;
|
||||
for (auto& child : children)
|
||||
childDims.push_back(GetDims(child));
|
||||
auto sampleLayout = node->GetSampleLayout();
|
||||
// We do call validate(final) as many times as needed, since stuff may have changed underneath.
|
||||
// TODO: PrintSelfBeforeValidation() into a function returning a string, and print all in a single line (also when it throws; print & rethrow).
|
||||
node->PrintSelfBeforeValidation();
|
||||
node->Validate(isFinalValidationPass /*final*/); // all nodes have been visited: do verification instead of just inference
|
||||
fprintf(stderr, " -> [%s%s]", string(node->GetSampleLayout()).c_str(), node->HasMBLayout() ? " x *" : "");
|
||||
bool unchanged = !ValidateNode(node, isFinalValidationPass);
|
||||
node->m_visited = true;
|
||||
// also take the opportunity to propagate m_needsGradient
|
||||
auto needsGradient = node->m_needsGradient;
|
||||
for (auto& child : children) // TODO: do we need a check that this is stable if isFinalValidationPass?
|
||||
node->m_needsGradient |= child->m_needsGradient;
|
||||
// check state --node will be valid if all nodes have been visited and node has not been updated
|
||||
bool unchanged = true;
|
||||
unchanged &= (oldMBLayoutPtr == node->GetMBLayout());
|
||||
unchanged &= (dim == GetDims(node));
|
||||
vector<pair<TensorShape, bool>> newChildDims;
|
||||
for (auto& child : children)
|
||||
newChildDims.push_back(GetDims(child));
|
||||
unchanged &= (childDims == newChildDims);
|
||||
unchanged &= (sampleLayout == node->GetSampleLayout());
|
||||
unchanged &= (needsGradient == node->m_needsGradient);
|
||||
fprintf(stderr, "[%s%s]", string(node->GetSampleLayout()).c_str(), node->HasMBLayout() ? " x *" : "");
|
||||
// print the new type
|
||||
// sanity checks
|
||||
if (isFinalValidationPass && !unchanged)
|
||||
LogicError("ValidateSubNetwork: %ls %ls operation changed during final validation.", node->NodeName().c_str(), node->OperationName().c_str());
|
||||
if (isFinalValidationPass && !allChildrenVisited)
|
||||
|
|
|
@ -307,7 +307,7 @@ void ComputationNode<ElemType>::WriteMinibatchWithFormatting(FILE* f, size_t onl
|
|||
fprintfOrDie(f, "%s", sampleSeparator.c_str());
|
||||
if (j == jstop)
|
||||
{
|
||||
fprintf(f, "..."); // 'nuff said
|
||||
fprintf(f, "... (%d more)", (int)(jend - jstop)); // 'nuff said
|
||||
break;
|
||||
}
|
||||
for (size_t i = 0; i < iend; i++)
|
||||
|
|
|
@ -167,8 +167,8 @@ struct ComputationNetworkOwnedNodeState
|
|||
// These are public since you are meant to set these flags manually in the debugger or temporarily poke into them from code as needed.
|
||||
bool m_traceNodeValue = false;
|
||||
bool m_traceNodeValueAsCategoryLabel = false;
|
||||
size_t m_traceNodeValueUpToDim = 5;
|
||||
size_t m_traceNodeValueUpToT = 5;
|
||||
size_t m_traceNodeValueUpToDim = 3; // 3 should be enough to see simple patterns such as all values are identical or out of range
|
||||
size_t m_traceNodeValueUpToT = 8; // 8 time steps fit comfortably into a normal-sized console
|
||||
void EnableNodeTracing(bool isCategoryLabel) { m_traceNodeValue = true; m_traceNodeValueAsCategoryLabel = isCategoryLabel; }
|
||||
|
||||
protected: // TODO: should be fully encapsulated here
|
||||
|
@ -1513,8 +1513,9 @@ public:
|
|||
{
|
||||
if (m_traceNodeValue)
|
||||
{
|
||||
fprintf(stderr, "Trace --> %ls = %ls -> [%s%s]\n", NodeName().c_str(), OperationName().c_str(), string(GetSampleLayout()).c_str(), HasMBLayout() ? " x *" : "");
|
||||
WriteMinibatchWithFormatting(stderr, m_traceNodeValueUpToDim, m_traceNodeValueUpToT, true/*transpose*/, m_traceNodeValueAsCategoryLabel, std::vector<std::string>(),
|
||||
const auto shape = GetTensorShape(DetermineElementwiseTensorRank());
|
||||
fprintf(stderr, "Trace --> %ls = %ls -> [%s]\n", NodeName().c_str(), OperationName().c_str(), string(shape).c_str());
|
||||
WriteMinibatchWithFormatting(stderr, m_traceNodeValueUpToDim, m_traceNodeValueUpToT, false/*transpose*/, m_traceNodeValueAsCategoryLabel, std::vector<std::string>(),
|
||||
""/*sequenceSeparator*/, " "/*sequencePrologue*/, "\n"/*sequenceEpilogue*/, " "/*elementSeparator*/, "\n "/*sampleSeparator*/,
|
||||
"%13.10f"/*valueFormatString*/);
|
||||
}
|
||||
|
|
|
@ -167,7 +167,8 @@ public:
|
|||
|
||||
// BUGBUG: I got an error in when reloading persistent parameterse for a model that had dimension specified as 0, which did not get re-inferred correctly.
|
||||
// We should either simply not write this parameter out at all (since it can always be inferred), or write the tensor shape.
|
||||
SetDims(TensorShape(rows), HasMBLayout() /*may be true on reload (roll-back)*/); // tensor shape will be overwritten in Validate() --TODO: We should serialize it here.
|
||||
if (GetSampleLayout().GetNumElements() != rows) // legacy format: if #rows matches then assume current tensor shape is up to date
|
||||
SetDims(TensorShape(rows), HasMBLayout() /*may be true on reload (roll-back)*/); // tensor shape will be overwritten in Validate() --TODO: We should serialize it here.
|
||||
m_delayedValue.Resize(rows, 0); // Note: If we try to access history in first minibatch, we shall crash. It would be a consequence of a missing sentence-begin flag
|
||||
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_2)
|
||||
|
|
|
@ -1578,7 +1578,7 @@ void BatchSequenceReader<ElemType>::Reset()
|
|||
{
|
||||
mProcessed.clear();
|
||||
mToProcess.clear();
|
||||
mLastProcssedSentenceId = 0;
|
||||
mLastProcessedSentenceId = 0;
|
||||
mPosInSentence = 0;
|
||||
mLastPosInSentence = 0;
|
||||
mNumRead = 0;
|
||||
|
@ -1651,6 +1651,7 @@ void BatchSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epo
|
|||
// we use epochSize, which might not be set yet, so use a default value for allocations if not yet set
|
||||
size_t epochSize = m_epochSize == requestDataSize ? 1000 : m_epochSize;
|
||||
m_epoch = epoch;
|
||||
m_randomSeed = (unsigned int)m_epoch;
|
||||
m_mbStartSample = epoch * m_epochSize;
|
||||
m_epochSamplesReturned = 0; // counter to know when we returned one epoch
|
||||
|
||||
|
@ -1700,7 +1701,7 @@ size_t BatchSequenceReader<ElemType>::DetermineSequencesToProcess()
|
|||
int mp = (int) mToProcess[s];
|
||||
if (mProcessed[mp])
|
||||
{
|
||||
mLastProcssedSentenceId = mp;
|
||||
mLastProcessedSentenceId = mp;
|
||||
mLastPosInSentence = 0;
|
||||
allDone = true;
|
||||
break;
|
||||
|
@ -1722,7 +1723,7 @@ size_t BatchSequenceReader<ElemType>::DetermineSequencesToProcess()
|
|||
size_t maxToProcess = mRequestedNumParallelSequences > 0 ? mRequestedNumParallelSequences : SIZE_MAX; // if mRequestedNumParallelSequences is 0 then we go by MB size
|
||||
size_t maxTokens = mRequestedNumParallelSequences > 0 ? SIZE_MAX : m_mbSize;
|
||||
size_t numTokens = 0; // token counter
|
||||
for (size_t seq = mLastProcssedSentenceId;
|
||||
for (size_t seq = mLastProcessedSentenceId;
|
||||
seq < mNumRead && // hit end of buffer
|
||||
mToProcess.size() < maxToProcess; // hit parallel-sequence limit
|
||||
seq++)
|
||||
|
@ -1791,14 +1792,14 @@ bool BatchSequenceReader<ElemType>::GetMinibatchData(size_t& /*out*/ firstPosInS
|
|||
#ifdef _MSC_VER // make some old configurations reproducable (m_cacheBlockSize used to be a constant) --TODO: remove in a few months
|
||||
if (m_cacheBlockSize == 50000)
|
||||
{
|
||||
srand(++m_randomSeed); // TODO: older code did not have that; so no idea what random seed was used
|
||||
std::random_shuffle(m_parser.mSentenceIndex2SentenceInfo.begin(), m_parser.mSentenceIndex2SentenceInfo.end());
|
||||
// Note: random_shuffle is deprecated since C++14.
|
||||
}
|
||||
else // new configs use a wider randomization
|
||||
#endif
|
||||
{
|
||||
std::random_device rd;
|
||||
std::mt19937 g(rd());
|
||||
std::mt19937 g(++m_randomSeed); // random seed is initialized to epoch, but gets incremented for intermediate reshuffles
|
||||
std::shuffle(m_parser.mSentenceIndex2SentenceInfo.begin(), m_parser.mSentenceIndex2SentenceInfo.end(), g);
|
||||
}
|
||||
|
||||
|
|
|
@ -354,7 +354,9 @@ public:
|
|||
using Base::mRequestedNumParallelSequences; // IDataReader<ElemType>
|
||||
|
||||
private:
|
||||
size_t mLastProcssedSentenceId;
|
||||
unsigned int m_randomSeed = 0; // deterministic random seed
|
||||
|
||||
size_t mLastProcessedSentenceId;
|
||||
|
||||
size_t mNumRead; // number of sentences in current cache block
|
||||
vector<bool> mProcessed; // [mNumRead] true if sequence has already been returned in this cache block
|
||||
|
@ -379,7 +381,7 @@ public:
|
|||
BatchSequenceReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
mLastProcssedSentenceId = 0;
|
||||
mLastProcessedSentenceId = 0;
|
||||
mRequestedNumParallelSequences = 1;
|
||||
mLastPosInSentence = 0;
|
||||
mNumRead = 0;
|
||||
|
|
Загрузка…
Ссылка в новой задаче