fixed SGD logging to not output the same criterion multiple times

2015-11-30 19:07:41 -08:00 · 2015-11-30 19:07:41 -08:00 · ea87db0cad
--- a/DataReader/LUSequenceReader/LUSequenceReader.cpp
+++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp
@ -422,6 +422,7 @@ void BatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordType & re

    const LabelInfo& labelIn = m_labelInfo[labelInfoIn];
    const LabelInfo& labelOut = m_labelInfo[labelInfoOut];
+    fprintf(stderr, "BatchLUSequenceReader: Input file is %ls\n", m_file.c_str());
    m_parser.ParseInit(m_file.c_str(), labelIn.dim, labelOut.dim, labelIn.beginSequence, labelIn.endSequence, labelOut.beginSequence, labelOut.endSequence, mUnkStr);

    mRequestedNumParallelSequences = readerConfig(L"nbruttsineachrecurrentiter", (size_t)1);
--- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h
@ -414,29 +414,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        //void ComputeInputPartialMap(const size_t inputIndex)
-        //{
-        //    if (inputIndex > 1)
-        //        InvalidArgument("LookupTable operation only takes two inputs.");
-        //
-        //    //DEVICEID_TYPE input1DeviceId = Inputs(1)->FunctionValues().GetDeviceId();
-        //    //DEVICEID_TYPE input0DeviceId = Inputs(0)->FunctionValues().GetDeviceId();
-        //    //Inputs(1)->FunctionValues().TransferFromDeviceToDevice(input1DeviceId, input0DeviceId);
-        //
-        //    if (inputIndex == 0)  //left derivative
-        //    {
-        //        ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues());
-        //    }
-        //    else  //right derivative
-        //    {
-        //        ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues());
-        //    }
-        //    //Inputs(1)->FunctionValues().TransferFromDeviceToDevice(input0DeviceId, input1DeviceId);
-        //}
-
        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & t) override
        {
-            //if (t.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
            if (inputIndex == 0)        // left derivative (embedding matrix)
            {
                // This is a reduction operation, hence we need to mask out gaps.
@ -501,18 +480,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            auto input1Reshaped = input1.Reshaped(rows1 / wordsInEachSample, cols1 * wordsInEachSample);

-            //DEVICEID_TYPE input1DeviceId = input1.GetDeviceId();
-            //DEVICEID_TYPE input0DeviceId = input0.GetDeviceId();
-            //input1.TransferFromDeviceToDevice(input1DeviceId, input0DeviceId);
-
            auto functionValuesReshaped = functionValues.Reshaped(input0.GetNumRows(), input1Reshaped.GetNumCols());
            functionValuesReshaped.AssignProductOf(input0, false, input1Reshaped, false);
-            //size_t rows = functionValues.GetNumRows();
-            //functionValues.Reshape(rows * wordsInEachSample, cols1);
-
-            //input1.TransferFromDeviceToDevice(input0DeviceId, input1DeviceId);
-
-            //input1.Reshape(rows1, cols1);
        }

        virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
--- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h
@ -42,7 +42,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_timeStep = 1;
            CreateMatrixIfNull(m_functionValues);
            SetDims(row_size, col_size);
-            //m_delayedActivation.Resize(row_size, col_size);     // TODO: relevance of col_size? Why not timeStep?
            m_isHistoryCarryOverManagedExternally = false;      // used for PairNetworkNode/PastValueNode combination
        }
    protected:
@ -61,10 +60,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_timeStep = (int)timeStep;

            m_functionValues->SetValue(m_initialActivationValue);
-            //m_delayedActivation.SetValue(m_initialActivationValue);
-
-            //m_gradientValues->Resize(row_size, col_size);
-            //m_gradientValues->SetValue(0.0f);
        }
        DelayedValueNodeBase(const ScriptableObjects::IConfigRecordPtr configp) :
            DelayedValueNodeBase(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"defaultHiddenActivation"), configp->Get(L"rows"), configp->Get(L"cols"), configp->Get(L"timeStep"))
@ -303,9 +298,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                out.SetValue(inp);
            }
-
-            //MaskMissingValuesColumnsToZero(t);  // fix gaps if any  --TODO: make this take a FrameRange
-            // TODO: why is masking needed here? We should never carry over data from those into valid regions, right?
        }

        virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
@ -314,7 +306,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        // this function is only used for PairNetworkNode (on PastValueNode)
-        // BUGBUG: Need to transfer the layout as well. PairNetworkNod will go away.
+        // BUGBUG: Need to transfer the layout as well. PairNetworkNode will go away.
        bool GetHistory(Matrix<ElemType>& hist, bool)
        {
            DEVICEID_TYPE device = hist.GetDeviceId();
@ -375,7 +367,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep; \
    using Base::m_pShiftedMBLayout; using Base::m_isHistoryCarryOverManagedExternally;

-    // =======================================================================
    // -----------------------------------------------------------------------
    // PastValueNode (input) -- delay node
    // TODO: Can this just be a typedef?
--- a/MachineLearning/CNTKSGDLib/SGD.cpp
+++ b/MachineLearning/CNTKSGDLib/SGD.cpp
@ -827,7 +827,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            timer.Stop();
            double epochTime = timer.ElapsedSeconds();

-            if (m_useEvalCriterionControlLR)
+            if (m_useEvalCriterionControlLR && epochEvalErrors.size() > 0)
            {
                lrControlCriterion = epochEvalErrors[0];
            }
@ -840,12 +840,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    "Finished Epoch[%2d of %d]: [Training Set] TrainLossPerSample = %.8g; ",
                    i + 1, (int) m_maxEpochs, epochCriterion);

-            if (epochEvalErrors.size() == 1)
+            if (epochEvalErrors.size() == 0)    // no eval criterion, only train criterion itself
            {
                fprintf(stderr,
-                        "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g; EpochTime=%.8g\n",
+                        "Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
+                        learnRatePerSample, epochTime);
+                m_lastFinishedEpochEvalErr = epochCriterion;
+            }
+            else if (epochEvalErrors.size() == 1)
+            {
+                fprintf(stderr,
+                        "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
                        epochEvalErrors[0], learnRatePerSample, epochTime);
-                m_lastFinishedEpochEvalErr = epochEvalErrors[0];
+                m_lastFinishedEpochEvalErr = epochEvalErrors.back();
            }
            else
            {
@ -853,13 +860,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                for (size_t j = 0; j < epochEvalErrors.size(); j++)
                {
                    fprintf(stderr, "[%lu]=%.8g; ", j, epochEvalErrors[j]);
-                    m_lastFinishedEpochEvalErr = epochEvalErrors[j];
-
                }
+                m_lastFinishedEpochEvalErr = epochEvalErrors.back();

-                fprintf(stderr, "Ave LearnRatePerSample = %.10g; Epoch Time=%.8g\n",
+                fprintf(stderr, "Ave LearnRatePerSample = %.6g; Epoch Time=%.6g\n",
                        learnRatePerSample, epochTime);

+                // TODO: why these extra log messages here and not for 1 eval criterion?
                fprintf(stderr, "Finished Epoch[%2d of %d]: Criterion Node [%ls] Per Sample = %.8g\n",
                                i + 1, (int) m_maxEpochs, criterionNodes[0]->NodeName().c_str(), epochCriterion);

@ -876,22 +883,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                {
                    SimpleEvaluator<ElemType> evalforvalidation(net);
                    vector<wstring> cvSetTrainAndEvalNodes;
-                    cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName());
-                    cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName());
+                    if (criterionNodes.size() > 0)
+                        cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName());
+                    if (evaluationNodes.size() > 0)
+                        cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName());

                    vector<double> vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]);
-                    fprintf(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g; EvalErrPerSample = %.8g\n",
-                            i + 1, (int) m_maxEpochs, vScore[0], vScore[1]);
+                    fprintf(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g", i + 1, (int) m_maxEpochs, vScore[0]);
+                    if (vScore.size() > 1)
+                        fprintf(stderr, "; EvalErrPerSample = %.8g", vScore[1]);
+                    fprintf(stderr, "\n");

                    if (m_useCVSetControlLRIfCVExists)
                    {
-                        if (m_useEvalCriterionControlLR)
+                        if (m_useEvalCriterionControlLR && vScore.size() > 1)
                            lrControlCriterion = vScore[1];
                        else
-                            lrControlCriterion = vScore[0]; //the first one is the training criterion.
-                        }
+                            lrControlCriterion = vScore[0]; // the first one is the training criterion
                    }
                }
+            }

            // broadcast epochCriterion to make sure each processor will have the same learning rate schedule
            if ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && (g_mpi->NumNodesInUse() > 1))
@ -906,8 +917,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            else
            {
                avgCriterion = ((epochsSinceLastLearnRateAdjust - 1 - epochsNotCountedInAvgCriterion) *
-                    avgCriterion + lrControlCriterion) /
-                    (epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion);
+                                avgCriterion + lrControlCriterion) /
+                                (epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion);
            }

            if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch &&
--- a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h
+++ b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h
@ -24,10 +24,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    template<class ElemType>
    class SimpleEvaluator
    {
-    protected:
-
    public:
-
        SimpleEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
            : m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
        {
@ -68,12 +65,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                }
            }

-            //initialize eval results
+            // initialize eval results
            std::vector<double> evalResults;
            for (int i = 0; i < evalNodes.size(); i++)
                evalResults.push_back((double)0);

-            //prepare features and labels
+            // prepare features and labels
            auto & featureNodes = m_net->FeatureNodes();
            auto & labelNodes = m_net->LabelNodes();

@ -83,7 +80,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            for (size_t i = 0; i < labelNodes.size(); i++)
                inputMatrices[labelNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(labelNodes[i])->FunctionValues();

-            //evaluate through minibatches
+            // evaluate through minibatches
            size_t totalEpochSamples = 0;
            size_t numMBsRun = 0;
            size_t actualMBSize = 0;
@ -102,14 +99,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
                ComputationNetwork::UpdateEvalTimeStamps(labelNodes);

-                //for now since we share the same label masking flag we call this on one node only
-                //Later, when we apply different labels on different nodes
-                //we need to add code to call this function multiple times, one for each criteria node
+                // for now since we share the same label masking flag we call this on one node only
+                // Later, when we apply different labels on different nodes
+                // we need to add code to call this function multiple times, one for each criteria node
                size_t numSamplesWithLabel = m_net->GetNumSamplesWithLabel(actualMBSize);
                for (int i = 0; i < evalNodes.size(); i++)
                {
                    m_net->Evaluate(evalNodes[i]);
-                    evalResults[i] += (double)evalNodes[i]->Get00Element(); //criterionNode should be a scalar
+                    evalResults[i] += (double)evalNodes[i]->Get00Element(); // criterionNode should be a scalar
                }

                totalEpochSamples += numSamplesWithLabel;
@ -132,8 +129,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    }
                }

-                /// call DataEnd to check if end of sentence is reached
-                /// datareader will do its necessary/specific process for sentence ending 
+                // call DataEnd to check if end of sentence is reached
+                // datareader will do its necessary/specific process for sentence ending 
                dataReader->DataEnd(endDataSentence);
            }