cleaned up some dead code related to logging
This commit is contained in:
Родитель
39038a4138
Коммит
16b606c711
|
@ -36,6 +36,7 @@ public:
|
|||
if (freeMatrix == nullptr || freeMatrix->GetMatrixType() == SPARSE)
|
||||
LogicError("MatrixPool::Release: freeMatrix should not be null or sparse.");
|
||||
//#define SUPRESS_MEMSHARING // #define this to disable memory sharing through this structure
|
||||
// TODO: Make this a runtime option.
|
||||
#ifndef SUPRESS_MEMSHARING
|
||||
vector<shared_ptr<Matrix<ElemType>>>& releasedMatrices = GetReleasedMatrices<ElemType>();
|
||||
#ifdef _DEBUG
|
||||
|
|
|
@ -115,7 +115,6 @@ private:
|
|||
size_t beta = reset ? 0 : 1;
|
||||
size_t numSamples = GetNumSamples(nodes[i], legacyNumSamples);
|
||||
|
||||
#if 1
|
||||
// For criterion nodes that emit criteria per frame, we will at this point
|
||||
// do masking and an implicit reduction.
|
||||
|
||||
|
@ -128,8 +127,6 @@ private:
|
|||
shape.NarrowTo(1, i, i + 1); // narrow to the single element that corresponds to the accumulator value
|
||||
auto criterionAccumulator = TensorView<ElemType>(m_aggregateCriterionValues, shape);
|
||||
|
||||
//fprintf(stderr, "Accumulating %ls, legNumSamp = %d, numSamp = %d\n", nodes[i]->NodeName().c_str(), (int)legacyNumSamples, (int)numSamples);
|
||||
//node->As<ComputationNode<ElemType>>()->Value().Print(msra::strfun::strprintf("crit[%d] m_value", (int)i).c_str());
|
||||
if (numSamples > 0) // (if MB is empty, matrix may not have the correct row dmension)
|
||||
{
|
||||
auto criterionValue = node->As<ComputationNode<ElemType>>()->ValueTensorFor(SIZE_MAX, fr);
|
||||
|
@ -138,23 +135,7 @@ private:
|
|||
// If count is zero, we lazily consider the numerator as zero as well.
|
||||
criterionAccumulator.DoCopyOf(m_aggregateSampleCounts[i] ? (float)beta : 0, criterionValue, 1);
|
||||
}
|
||||
//m_aggregateCriterionValues->Print(msra::strfun::strprintf("crit[%d] m_aggregateCriterionValues", (int)i).c_str());
|
||||
m_aggregateSampleCounts[i] = m_aggregateSampleCounts[i] * beta + numSamples;
|
||||
#else
|
||||
// temp solution until we add TensorView reduction
|
||||
if (beta == 0)
|
||||
{
|
||||
Matrix<ElemType>::AssignElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
|
||||
0, 0, *m_aggregateCriterionValues, 0, i);
|
||||
m_aggregateSampleCounts[i] = numSamples;
|
||||
}
|
||||
else if (numSamples > 0) // avoid unnecessary GPU access
|
||||
{
|
||||
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
|
||||
0, 0, *m_aggregateCriterionValues, 0, i);
|
||||
m_aggregateSampleCounts[i] += numSamples;
|
||||
}
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
// get the number of samples
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "DataReaderHelpers.h"
|
||||
#include "MatrixQuantizerImpl.h"
|
||||
#ifdef CNTK_PARALLEL_TRAINING_SUPPORT
|
||||
static inline bool operator==(const std::pair<double,size_t>& a, double b) { assert(b==0); return a.first == b; }
|
||||
//static inline bool operator==(const std::pair<double,size_t>& a, double b) { assert(b==0); return a.first == b; }
|
||||
// ^^ workaround until this line in AggregateGradientsImpl() gets updated: assert(headerCPU->evalErrors[i] == 0);
|
||||
#include "AllReduceDistGradAggregator.h"
|
||||
#include "BlockMomentumSGD.h"
|
||||
|
@ -483,14 +483,9 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
|||
else
|
||||
lrControlCriterion = epochCriterion.Average();
|
||||
|
||||
#if 1
|
||||
LOGPRINTF(stderr, "Finished Epoch[%2d of %d]: [Training] ", i + 1, (int)m_maxEpochs);
|
||||
epochCriterion.LogCriterion(criterionNodes[0]->NodeName());
|
||||
#else
|
||||
LOGPRINTF(stderr,
|
||||
"Finished Epoch[%2d of %d]: [Training Set] TrainLossPerSample = %.8g; TotalSamplesSeen = %d; ",
|
||||
i + 1, (int)m_maxEpochs, epochCriterion.Average(), (int)totalTrainingSamplesSeen);
|
||||
#endif
|
||||
|
||||
m_lastFinishedEpochTrainLoss = epochCriterion.Average();
|
||||
for (size_t j = 0; j < epochEvalErrors.size(); j++)
|
||||
epochEvalErrors[j].LogCriterion(evaluationNodes[j]->NodeName());
|
||||
|
@ -1078,8 +1073,6 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
}
|
||||
|
||||
// epochCriterion aggregates over entire epoch, but we only show difference to last time we logged
|
||||
// BUGBUG: How does this work if useGradientAggregation? epochCriterion has not been set.
|
||||
//double trainLossSinceLastLogged = (trainSamplesSinceLastLogged != 0) ? ((epochCriterion - epochCriterionLastLogged) / trainSamplesSinceLastLogged) : 0.0;
|
||||
EpochCriterion epochCriterionSinceLastLogged = epochCriterion - epochCriterionLastLogged;
|
||||
let trainLossSinceLastLogged = epochCriterionSinceLastLogged.Average(); // TODO: Check whether old trainSamplesSinceLastLogged matches this ^^ difference
|
||||
let trainSamplesSinceLastLogged = (int)epochCriterionSinceLastLogged.second;
|
||||
|
@ -1113,10 +1106,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
if (epochNumber > 0 || (int)epochSize > 0) // got anything? --TODO: why cast epochSize to (int) for this comparison?
|
||||
fprintf(stderr, (", %2." + to_string(mbProgNumPrecision) + "f%%").c_str(), mbProg * 100); // --TODO: use a * format?
|
||||
fprintf(stderr, "]: ");
|
||||
//fprintf(stderr, "SamplesSeen = %d; ", (int)trainSamplesSinceLastLogged);
|
||||
epochCriterionSinceLastLogged.LogCriterion(criterionNodes[0]->NodeName());
|
||||
//fprintf(stderr, ("%ls = " + GeneratePaddedFloatOrExpFormat(11, 8, trainLossSinceLastLogged) + " * %d").c_str(),
|
||||
// criterionNodes[0]->NodeName().c_str(), trainLossSinceLastLogged, trainSamplesSinceLastLogged);
|
||||
for (size_t i = 0; i < epochEvalErrors.size(); i++)
|
||||
(epochEvalErrors[i] - epochEvalErrorsLastLogged[i]).LogCriterion(evaluationNodes[i]->NodeName());
|
||||
|
||||
|
@ -1280,7 +1270,7 @@ bool SGD<ElemType>::PreCompute(ComputationNetworkPtr net,
|
|||
|
||||
// trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , requestDataSize);
|
||||
// trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , m_epochSize); // only based on one epoch
|
||||
// [1/12/2015 erw] to support large dataset, we usually partition whole dataset into several epoch's,
|
||||
// To support large dataset, we usually partition whole dataset into several epoch's,
|
||||
// so we need to use all the data to do precomputing
|
||||
if (m_useAllDataForPreComputedNode) // using all the data
|
||||
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0);
|
||||
|
@ -1757,27 +1747,6 @@ void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <class ElemType>
|
||||
int SGD<ElemType>::SGDTrace(FILE* __restrict __stream, bool isPrependTimestamp, const char* __restrict __format, ...)
|
||||
{
|
||||
int result = 0;
|
||||
if (m_traceLevel > 0)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, __format);
|
||||
if (isPrependTimestamp)
|
||||
{
|
||||
PREPENDTS(__stream);
|
||||
}
|
||||
|
||||
result = vfprintf(__stream, __format, args);
|
||||
va_end(args);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int traceLevel)
|
||||
{
|
||||
|
|
|
@ -310,7 +310,6 @@ public:
|
|||
// TODO: The next few do not belong into SGD any more than the network or reader we operate on. Either move network and reader in here, or move these out.
|
||||
m_modelPath((const wstring&) configSGD(L"modelPath")),
|
||||
m_keepCheckPointFiles(configSGD(L"keepCheckPointFiles", false)),
|
||||
// m_validateAfterModelReloading(configSGD(L"validateAfterModelReloading", true)),
|
||||
m_trainCriterionNodeName((const wstring&) configSGD(L"trainCriterionNodeName", L"")),
|
||||
m_evalCriterionNodeName ((const wstring&) configSGD(L"evalCriterionNodeName", L"")),
|
||||
m_traceNodeNamesReal (configSGD(L"traceNodeNamesReal", ConfigRecordType::Array(stringargvector()))),
|
||||
|
@ -546,7 +545,6 @@ public:
|
|||
protected:
|
||||
std::wstring m_modelPath;
|
||||
bool m_keepCheckPointFiles;
|
||||
// bool m_validateAfterModelReloading; // TODO: remove this. Why would one not validate a model?
|
||||
|
||||
std::wstring m_trainCriterionNodeName;
|
||||
std::wstring m_evalCriterionNodeName;
|
||||
|
|
Загрузка…
Ссылка в новой задаче