From 018a812f348a4dbad66020870a39185010a60599 Mon Sep 17 00:00:00 2001 From: Qiwei Ye Date: Mon, 10 Oct 2016 20:50:54 +0800 Subject: [PATCH] reverting simple evaluator for compatibility, initialing simple evaluator without m_mpi when using ASGD --- Source/SGDLib/SGD.cpp | 10 +++++++--- Source/SGDLib/SimpleEvaluator.h | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp index 568e94154..c7684516f 100644 --- a/Source/SGDLib/SGD.cpp +++ b/Source/SGDLib/SGD.cpp @@ -589,7 +589,11 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, if (validationSetDataReader != trainSetDataReader && validationSetDataReader != nullptr) { - SimpleEvaluator evalforvalidation(net, m_mpi, m_enableDistributedMBReading); + // TODO(dataASGD) making evaluator becoming nondistributed one when using asynchonized data parallel. + if (UsingAsyncGradientAggregation(i+1)) + SimpleEvaluator evalforvalidation(net, nullptr, m_enableDistributedMBReading); + else + SimpleEvaluator evalforvalidation(net, m_mpi, m_enableDistributedMBReading); vector cvSetTrainAndEvalNodes; if (criterionNodes.size() > 0) { @@ -601,8 +605,8 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, } // BUGBUG: We should not use the training MB size. The training MB size is constrained by both convergence and memory. Eval is only constrained by memory. - // Todo(dataASGD) cross validating should set to nondistributed reader while using asynchonized data parallel - let vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i], UsingAsyncGradientAggregation(i)); + // Todo(dataASGD) adding an options for that the cross validating should set to nondistributed reader while using asynchonized data parallel + let vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]); LOGPRINTF(stderr, "Finished Epoch[%2d of %d]: [Validate] ", i + 1, (int)m_maxEpochs); for (size_t k = 0; k < vScore.size() /*&& k < 2*/; k++) vScore[k].LogCriterion(cvSetTrainAndEvalNodes[k], /*addSemicolon=*/k + 1 < vScore.size()); diff --git a/Source/SGDLib/SimpleEvaluator.h b/Source/SGDLib/SimpleEvaluator.h index 4e0589847..c6d50f891 100644 --- a/Source/SGDLib/SimpleEvaluator.h +++ b/Source/SGDLib/SimpleEvaluator.h @@ -48,7 +48,7 @@ public: } // returns evaluation node values per sample determined by evalNodeNames (which can include both training and eval criterion nodes) - vector Evaluate(IDataReader* dataReader, const vector& evalNodeNames, const size_t mbSize, const size_t testSize = requestDataSize, const bool useDataParallelASGD = false) + vector Evaluate(IDataReader* dataReader, const vector& evalNodeNames, const size_t mbSize, const size_t testSize = requestDataSize) { ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring); @@ -107,7 +107,7 @@ public: std::vector evalResultsLastLogged(evalResults.size(), EpochCriterion(0)); - bool useParallelTrain = (m_mpi != nullptr) && !useDataParallelASGD; + bool useParallelTrain = (m_mpi != nullptr); bool useDistributedMBReading = useParallelTrain && m_enableDistributedMBReading && dataReader->SupportsDistributedMBRead(); if (useDistributedMBReading) dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), inputMatrices.GetStreamDescriptions(), testSize);