fixed SGD logging to not output the same criterion multiple times
This commit is contained in:
Родитель
efafd7c795
Коммит
ea87db0cad
|
@ -422,6 +422,7 @@ void BatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordType & re
|
|||
|
||||
const LabelInfo& labelIn = m_labelInfo[labelInfoIn];
|
||||
const LabelInfo& labelOut = m_labelInfo[labelInfoOut];
|
||||
fprintf(stderr, "BatchLUSequenceReader: Input file is %ls\n", m_file.c_str());
|
||||
m_parser.ParseInit(m_file.c_str(), labelIn.dim, labelOut.dim, labelIn.beginSequence, labelIn.endSequence, labelOut.beginSequence, labelOut.endSequence, mUnkStr);
|
||||
|
||||
mRequestedNumParallelSequences = readerConfig(L"nbruttsineachrecurrentiter", (size_t)1);
|
||||
|
|
|
@ -414,29 +414,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Base(deviceId, name)
|
||||
{ }
|
||||
|
||||
//void ComputeInputPartialMap(const size_t inputIndex)
|
||||
//{
|
||||
// if (inputIndex > 1)
|
||||
// InvalidArgument("LookupTable operation only takes two inputs.");
|
||||
//
|
||||
// //DEVICEID_TYPE input1DeviceId = Inputs(1)->FunctionValues().GetDeviceId();
|
||||
// //DEVICEID_TYPE input0DeviceId = Inputs(0)->FunctionValues().GetDeviceId();
|
||||
// //Inputs(1)->FunctionValues().TransferFromDeviceToDevice(input1DeviceId, input0DeviceId);
|
||||
//
|
||||
// if (inputIndex == 0) //left derivative
|
||||
// {
|
||||
// ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues());
|
||||
// }
|
||||
// else //right derivative
|
||||
// {
|
||||
// ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues());
|
||||
// }
|
||||
// //Inputs(1)->FunctionValues().TransferFromDeviceToDevice(input0DeviceId, input1DeviceId);
|
||||
//}
|
||||
|
||||
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & t) override
|
||||
{
|
||||
//if (t.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
|
||||
if (inputIndex == 0) // left derivative (embedding matrix)
|
||||
{
|
||||
// This is a reduction operation, hence we need to mask out gaps.
|
||||
|
@ -501,18 +480,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
auto input1Reshaped = input1.Reshaped(rows1 / wordsInEachSample, cols1 * wordsInEachSample);
|
||||
|
||||
//DEVICEID_TYPE input1DeviceId = input1.GetDeviceId();
|
||||
//DEVICEID_TYPE input0DeviceId = input0.GetDeviceId();
|
||||
//input1.TransferFromDeviceToDevice(input1DeviceId, input0DeviceId);
|
||||
|
||||
auto functionValuesReshaped = functionValues.Reshaped(input0.GetNumRows(), input1Reshaped.GetNumCols());
|
||||
functionValuesReshaped.AssignProductOf(input0, false, input1Reshaped, false);
|
||||
//size_t rows = functionValues.GetNumRows();
|
||||
//functionValues.Reshape(rows * wordsInEachSample, cols1);
|
||||
|
||||
//input1.TransferFromDeviceToDevice(input0DeviceId, input1DeviceId);
|
||||
|
||||
//input1.Reshape(rows1, cols1);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
||||
|
|
|
@ -42,7 +42,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_timeStep = 1;
|
||||
CreateMatrixIfNull(m_functionValues);
|
||||
SetDims(row_size, col_size);
|
||||
//m_delayedActivation.Resize(row_size, col_size); // TODO: relevance of col_size? Why not timeStep?
|
||||
m_isHistoryCarryOverManagedExternally = false; // used for PairNetworkNode/PastValueNode combination
|
||||
}
|
||||
protected:
|
||||
|
@ -61,10 +60,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_timeStep = (int)timeStep;
|
||||
|
||||
m_functionValues->SetValue(m_initialActivationValue);
|
||||
//m_delayedActivation.SetValue(m_initialActivationValue);
|
||||
|
||||
//m_gradientValues->Resize(row_size, col_size);
|
||||
//m_gradientValues->SetValue(0.0f);
|
||||
}
|
||||
DelayedValueNodeBase(const ScriptableObjects::IConfigRecordPtr configp) :
|
||||
DelayedValueNodeBase(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"defaultHiddenActivation"), configp->Get(L"rows"), configp->Get(L"cols"), configp->Get(L"timeStep"))
|
||||
|
@ -303,9 +298,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
out.SetValue(inp);
|
||||
}
|
||||
|
||||
//MaskMissingValuesColumnsToZero(t); // fix gaps if any --TODO: make this take a FrameRange
|
||||
// TODO: why is masking needed here? We should never carry over data from those into valid regions, right?
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
||||
|
@ -314,7 +306,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// this function is only used for PairNetworkNode (on PastValueNode)
|
||||
// BUGBUG: Need to transfer the layout as well. PairNetworkNod will go away.
|
||||
// BUGBUG: Need to transfer the layout as well. PairNetworkNode will go away.
|
||||
bool GetHistory(Matrix<ElemType>& hist, bool)
|
||||
{
|
||||
DEVICEID_TYPE device = hist.GetDeviceId();
|
||||
|
@ -375,7 +367,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep; \
|
||||
using Base::m_pShiftedMBLayout; using Base::m_isHistoryCarryOverManagedExternally;
|
||||
|
||||
// =======================================================================
|
||||
// -----------------------------------------------------------------------
|
||||
// PastValueNode (input) -- delay node
|
||||
// TODO: Can this just be a typedef?
|
||||
|
|
|
@ -827,7 +827,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
timer.Stop();
|
||||
double epochTime = timer.ElapsedSeconds();
|
||||
|
||||
if (m_useEvalCriterionControlLR)
|
||||
if (m_useEvalCriterionControlLR && epochEvalErrors.size() > 0)
|
||||
{
|
||||
lrControlCriterion = epochEvalErrors[0];
|
||||
}
|
||||
|
@ -840,12 +840,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
"Finished Epoch[%2d of %d]: [Training Set] TrainLossPerSample = %.8g; ",
|
||||
i + 1, (int) m_maxEpochs, epochCriterion);
|
||||
|
||||
if (epochEvalErrors.size() == 1)
|
||||
if (epochEvalErrors.size() == 0) // no eval criterion, only train criterion itself
|
||||
{
|
||||
fprintf(stderr,
|
||||
"EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g; EpochTime=%.8g\n",
|
||||
"Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
|
||||
learnRatePerSample, epochTime);
|
||||
m_lastFinishedEpochEvalErr = epochCriterion;
|
||||
}
|
||||
else if (epochEvalErrors.size() == 1)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
|
||||
epochEvalErrors[0], learnRatePerSample, epochTime);
|
||||
m_lastFinishedEpochEvalErr = epochEvalErrors[0];
|
||||
m_lastFinishedEpochEvalErr = epochEvalErrors.back();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -853,13 +860,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (size_t j = 0; j < epochEvalErrors.size(); j++)
|
||||
{
|
||||
fprintf(stderr, "[%lu]=%.8g; ", j, epochEvalErrors[j]);
|
||||
m_lastFinishedEpochEvalErr = epochEvalErrors[j];
|
||||
|
||||
}
|
||||
m_lastFinishedEpochEvalErr = epochEvalErrors.back();
|
||||
|
||||
fprintf(stderr, "Ave LearnRatePerSample = %.10g; Epoch Time=%.8g\n",
|
||||
fprintf(stderr, "Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
|
||||
learnRatePerSample, epochTime);
|
||||
|
||||
// TODO: why these extra log messages here and not for 1 eval criterion?
|
||||
fprintf(stderr, "Finished Epoch[%2d of %d]: Criterion Node [%ls] Per Sample = %.8g\n",
|
||||
i + 1, (int) m_maxEpochs, criterionNodes[0]->NodeName().c_str(), epochCriterion);
|
||||
|
||||
|
@ -876,22 +883,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
SimpleEvaluator<ElemType> evalforvalidation(net);
|
||||
vector<wstring> cvSetTrainAndEvalNodes;
|
||||
cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName());
|
||||
cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName());
|
||||
if (criterionNodes.size() > 0)
|
||||
cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName());
|
||||
if (evaluationNodes.size() > 0)
|
||||
cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName());
|
||||
|
||||
vector<double> vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]);
|
||||
fprintf(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g; EvalErrPerSample = %.8g\n",
|
||||
i + 1, (int) m_maxEpochs, vScore[0], vScore[1]);
|
||||
fprintf(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g", i + 1, (int) m_maxEpochs, vScore[0]);
|
||||
if (vScore.size() > 1)
|
||||
fprintf(stderr, "; EvalErrPerSample = %.8g", vScore[1]);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
if (m_useCVSetControlLRIfCVExists)
|
||||
{
|
||||
if (m_useEvalCriterionControlLR)
|
||||
if (m_useEvalCriterionControlLR && vScore.size() > 1)
|
||||
lrControlCriterion = vScore[1];
|
||||
else
|
||||
lrControlCriterion = vScore[0]; //the first one is the training criterion.
|
||||
}
|
||||
lrControlCriterion = vScore[0]; // the first one is the training criterion
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast epochCriterion to make sure each processor will have the same learning rate schedule
|
||||
if ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && (g_mpi->NumNodesInUse() > 1))
|
||||
|
@ -906,8 +917,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else
|
||||
{
|
||||
avgCriterion = ((epochsSinceLastLearnRateAdjust - 1 - epochsNotCountedInAvgCriterion) *
|
||||
avgCriterion + lrControlCriterion) /
|
||||
(epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion);
|
||||
avgCriterion + lrControlCriterion) /
|
||||
(epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion);
|
||||
}
|
||||
|
||||
if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch &&
|
||||
|
|
|
@ -24,10 +24,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class SimpleEvaluator
|
||||
{
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
SimpleEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
|
||||
: m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
|
||||
{
|
||||
|
@ -68,12 +65,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
//initialize eval results
|
||||
// initialize eval results
|
||||
std::vector<double> evalResults;
|
||||
for (int i = 0; i < evalNodes.size(); i++)
|
||||
evalResults.push_back((double)0);
|
||||
|
||||
//prepare features and labels
|
||||
// prepare features and labels
|
||||
auto & featureNodes = m_net->FeatureNodes();
|
||||
auto & labelNodes = m_net->LabelNodes();
|
||||
|
||||
|
@ -83,7 +80,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (size_t i = 0; i < labelNodes.size(); i++)
|
||||
inputMatrices[labelNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(labelNodes[i])->FunctionValues();
|
||||
|
||||
//evaluate through minibatches
|
||||
// evaluate through minibatches
|
||||
size_t totalEpochSamples = 0;
|
||||
size_t numMBsRun = 0;
|
||||
size_t actualMBSize = 0;
|
||||
|
@ -102,14 +99,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||
|
||||
//for now since we share the same label masking flag we call this on one node only
|
||||
//Later, when we apply different labels on different nodes
|
||||
//we need to add code to call this function multiple times, one for each criteria node
|
||||
// for now since we share the same label masking flag we call this on one node only
|
||||
// Later, when we apply different labels on different nodes
|
||||
// we need to add code to call this function multiple times, one for each criteria node
|
||||
size_t numSamplesWithLabel = m_net->GetNumSamplesWithLabel(actualMBSize);
|
||||
for (int i = 0; i < evalNodes.size(); i++)
|
||||
{
|
||||
m_net->Evaluate(evalNodes[i]);
|
||||
evalResults[i] += (double)evalNodes[i]->Get00Element(); //criterionNode should be a scalar
|
||||
evalResults[i] += (double)evalNodes[i]->Get00Element(); // criterionNode should be a scalar
|
||||
}
|
||||
|
||||
totalEpochSamples += numSamplesWithLabel;
|
||||
|
@ -132,8 +129,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
/// call DataEnd to check if end of sentence is reached
|
||||
/// datareader will do its necessary/specific process for sentence ending
|
||||
// call DataEnd to check if end of sentence is reached
|
||||
// datareader will do its necessary/specific process for sentence ending
|
||||
dataReader->DataEnd(endDataSentence);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче