From 8d78ad03f0fd556d5a2f83a3b21a85624ea5cf33 Mon Sep 17 00:00:00 2001 From: Clemens Marschner Date: Thu, 22 Sep 2016 10:51:36 +0200 Subject: [PATCH 1/7] Add Dump output to SEQ nodes. --- .../ComputationNetworkEvaluation.cpp | 13 +++++++++++-- Source/ComputationNetworkLib/ComputationNode.h | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index fdcf8a89b..616a64f69 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -160,7 +160,7 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con } // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->GetEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell + if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); } } @@ -178,7 +178,7 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con node->EndBackprop(); // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->GetEnvironmentPtr() && node->Environment().traceLevel >= 1000000 && node->NeedsGradient()) // very high number, since this spews like hell + if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000 && node->NeedsGradient()) // very high number, since this spews like hell DumpNode(node, /*dumpGradient=*/true) || DumpNode(node, true); } } @@ -292,6 +292,15 @@ static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) node->BumpEvalTimeStamp(); } } + + // more extreme tracing for the ultimate debugging experience. Make space on your disk. + for (auto& node : m_nestedNodes) + { + if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell + { + DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); + } + } } /*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::EndForwardProp() /*override*/ diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index 64b439219..03a2e3974 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -646,6 +646,8 @@ public: LogicError("Environment: No environment has been set."); return *m_environment; } + + bool HasEnvironmentPtr() const { return m_environment.get() != nullptr; } ComputationEnvironmentPtr GetEnvironmentPtr() const { return m_environment; } void SetEnvironment(ComputationEnvironmentPtr environment) { m_environment = environment; } From 9cb329a0daa5a1d3543bbb36c94e33dbfa22ad9d Mon Sep 17 00:00:00 2001 From: "yuxiao.guo" Date: Fri, 9 Sep 2016 16:41:14 +0800 Subject: [PATCH 2/7] Optimize the flow of post batch normalization statistics, and allow disable regularization terms in batch normalization --- .../ResNet/ResNet_50_ndl_deprecated.cntk | 4 +- Makefile | 3 +- Source/ActionsLib/Actions.h | 4 +- Source/ActionsLib/EvalActions.cpp | 58 ------ Source/ActionsLib/TrainActions.cpp | 44 +++++ Source/CNTK/CNTK.cpp | 6 +- .../ComputationNetwork.h | 46 ++++- .../ComputationNetworkEvaluation.cpp | 41 ---- .../ComputationNetworkLib/ComputationNode.h | 4 - .../InputAndParamNodes.h | 12 ++ Source/ComputationNetworkLib/TrainingNodes.h | 64 +++---- Source/SGDLib/PostComputingActions.cpp | 161 ++++++++++++++++ Source/SGDLib/PostComputingActions.h | 65 +++++++ Source/SGDLib/SGD.cpp | 31 ++- Source/SGDLib/SGD.h | 5 +- Source/SGDLib/SGDLib.vcxproj | 4 +- Source/SGDLib/SGDLib.vcxproj.filters | 27 +-- Source/SGDLib/SimpleEvaluator.h | 180 +----------------- 18 files changed, 384 insertions(+), 375 deletions(-) create mode 100644 Source/SGDLib/PostComputingActions.cpp create mode 100644 Source/SGDLib/PostComputingActions.h diff --git a/Examples/Image/Miscellaneous/ImageNet/ResNet/ResNet_50_ndl_deprecated.cntk b/Examples/Image/Miscellaneous/ImageNet/ResNet/ResNet_50_ndl_deprecated.cntk index c6478384d..6482e2afb 100644 --- a/Examples/Image/Miscellaneous/ImageNet/ResNet/ResNet_50_ndl_deprecated.cntk +++ b/Examples/Image/Miscellaneous/ImageNet/ResNet/ResNet_50_ndl_deprecated.cntk @@ -88,11 +88,11 @@ Train=[ ] PBN=[ - action="pbn" + action="bnstat" modelPath="$ModelDir$/ResNet_50" # Set minibatch size for testing. minibatchSize=256 - iters=30 + itersPerNode=30 reader=[ readerType="ImageReader" diff --git a/Makefile b/Makefile index 895d6be6c..6ea6062e4 100644 --- a/Makefile +++ b/Makefile @@ -439,7 +439,8 @@ EVAL:=eval SGDLIB_SRC=\ $(SOURCEDIR)/SGDLib/Profiler.cpp \ - $(SOURCEDIR)/SGDLib/SGD.cpp + $(SOURCEDIR)/SGDLib/SGD.cpp \ + $(SOURCEDIR)/SGDLib/PostComputingActions.cpp \ EVAL_SRC=\ $(SOURCEDIR)/EvalDll/CNTKEval.cpp \ diff --git a/Source/ActionsLib/Actions.h b/Source/ActionsLib/Actions.h index 2a1c83e9a..b991e1b95 100644 --- a/Source/ActionsLib/Actions.h +++ b/Source/ActionsLib/Actions.h @@ -42,11 +42,11 @@ template void DoDumpNodes(const ConfigParameters& config); template void DoEdit(const ConfigParameters& config); +template +void DoBatchNormalizationStat(const ConfigParameters& config); // evaluation (EvalActions.cpp) template -void DoEvalBN(const ConfigParameters& config); -template void DoEval(const ConfigParameters& config); template void DoCrossValidate(const ConfigParameters& config); diff --git a/Source/ActionsLib/EvalActions.cpp b/Source/ActionsLib/EvalActions.cpp index cbba55207..b2c8c1f1b 100644 --- a/Source/ActionsLib/EvalActions.cpp +++ b/Source/ActionsLib/EvalActions.cpp @@ -78,62 +78,6 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader) eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize); } -// =========================================================================== -// DoEvalBNBase() - implements CNTK "pbn" command -// =========================================================================== - -template -static void DoEvalBNBase(const ConfigParameters& config, IDataReader& reader) -{ - // DEVICEID_TYPE deviceId = DeviceFromConfig(config); - ConfigArray minibatchSize = config(L"minibatchSize", "40960"); - size_t epochSize = config(L"epochSize", "0"); - if (epochSize == 0) - { - epochSize = requestDataSize; - } - wstring modelPath = config(L"modelPath"); - wstring exportPath = modelPath + L".PBN"; - intargvector mbSize = minibatchSize; - - int iters = config(L"iters", 240); - - int traceLevel = config(L"traceLevel", "0"); - size_t numMBsToShowResult = config(L"numMBsToShowResult", "100"); - size_t firstMBsToShowResult = config(L"firstMBsToShowResult", "0"); - size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX); - size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1); - - bool enableDistributedMBReading = config(L"distributedMBReading", false); - - vector evalNodeNamesVector; - - let net = GetModelFromConfig(config, L"evalNodeNames", evalNodeNamesVector); - - // set tracing flags - net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())), - config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())), - config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector()))); - - SimpleEvaluator eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult, - firstMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches); - eval.EvaluateBN(&reader, evalNodeNamesVector, exportPath, mbSize[0], iters, epochSize); -} - -template -void DoEvalBN(const ConfigParameters& config) -{ - // evaluate batch normalization mean and various - ConfigParameters readerConfig(config(L"reader")); - - // Should trace level to zero in Post BN? - //readerConfig.Insert("traceLevel", config(L"traceLevel", "0")); - - DataReader evaBNDataReader(readerConfig); - - DoEvalBNBase(config, evaBNDataReader); -} - template void DoEval(const ConfigParameters& config) { @@ -146,8 +90,6 @@ void DoEval(const ConfigParameters& config) DoEvalBase(config, testDataReader); } -template void DoEvalBN(const ConfigParameters& config); -template void DoEvalBN(const ConfigParameters& config); template void DoEval(const ConfigParameters& config); template void DoEval(const ConfigParameters& config); diff --git a/Source/ActionsLib/TrainActions.cpp b/Source/ActionsLib/TrainActions.cpp index eaaec303e..97b818c67 100644 --- a/Source/ActionsLib/TrainActions.cpp +++ b/Source/ActionsLib/TrainActions.cpp @@ -26,6 +26,7 @@ #include "ScriptableObjects.h" #include "BrainScriptEvaluator.h" #include "BrainScriptParser.h" +#include "PostComputingActions.h" #include #include @@ -235,3 +236,46 @@ void DoEdit(const ConfigParameters& config) template void DoEdit(const ConfigParameters& config); template void DoEdit(const ConfigParameters& config); + +// =========================================================================== +// DoBatchNormalizationStat() - implements CNTK "bnstat" command +// =========================================================================== + +template +void DoBatchNormalizationStat(const ConfigParameters& config) +{ + ConfigParameters readerConfig(config(L"reader")); + readerConfig.Insert("traceLevel", config(L"traceLevel", "0")); + + auto dataReader = make_shared(readerConfig); + + int traceLevel = config(L"traceLevel", "0"); + int itersPerNode = config(L"itersPerNode", 30); + + ConfigArray minibatchSize = config(L"minibatchSize", "40960"); + intargvector mbSize = minibatchSize; + + bool enableDistributedMBReading = config(L"enableDistributedMBReading", false); + + wstring curModelPath = config(L"modelPath", L""); + wstring newModelPath = config(L"newModelPath", L""); + if (newModelPath == L"") + { + newModelPath = curModelPath + L".PBN"; + } + + std::vector evalNodeNames; + let net = GetModelFromConfig(config, L"evalNodeNames", evalNodeNames); + // set tracing flags + net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())), + config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())), + config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector()))); + + PostComputingActions postComputingActions(net, MPIWrapper::GetInstance(), enableDistributedMBReading, traceLevel); + + postComputingActions.BatchNormalizationStatistics(dataReader.get(), evalNodeNames, newModelPath, mbSize[0], itersPerNode); +} + +template void DoBatchNormalizationStat(const ConfigParameters& config); +template void DoBatchNormalizationStat(const ConfigParameters& config); + diff --git a/Source/CNTK/CNTK.cpp b/Source/CNTK/CNTK.cpp index 9d07bb5f7..175713156 100644 --- a/Source/CNTK/CNTK.cpp +++ b/Source/CNTK/CNTK.cpp @@ -154,7 +154,7 @@ static void DisableLegacyUsage(const ConfigParameters& TopLevelConfig, const Con // When running in parallel with MPI, only commands in 'commandstoRunOnAllRanks' should // be run in parallel across multiple ranks. Others should only run on rank 0 -const std::set commandstoRunOnAllRanks = { "train", "trainRNN", "adapt", "test", "eval", "cv", "devtest", "pbn" }; +const std::set commandstoRunOnAllRanks = { "train", "trainRNN", "adapt", "test", "eval", "cv", "devtest", "bnstat" }; // process the command template @@ -243,9 +243,9 @@ void DoCommands(const ConfigParameters& config, const shared_ptr& mp LOGPRINTF(stderr, "CNTKCommandTrainEnd: %s\n", command[i].c_str()); fullEpochsOffset += GetMaxEpochs(commandParams); } - else if (thisAction == "pbn") + else if (thisAction == "bnstat") { - DoEvalBN(commandParams); + DoBatchNormalizationStat(commandParams); } else if (thisAction == "adapt") { diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index 9b072e6a6..7c934aef2 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -136,10 +136,6 @@ public: // main entry point for backprop void Backprop(const ComputationNodeBasePtr rootNode); - // partial forward entry - void ForwardProp(const ComputationNodeBasePtr rootNode, const ComputationNodeBasePtr startNode, - const ComputationNodeBasePtr endNode); - template // version that takes multiple nodes void ForwardProp(const NODESET& nodes) { @@ -678,6 +674,44 @@ public: return nodesWithType; } + // Get the eval nodes with names + // if evalNodeNames are not specified, return all the default evalnodes and training criterion nodes. + std::vector GetEvalNodesWithName(const std::vector evalNodeNames) + { + // determine nodes to evaluate + std::vector evalNodes; + + set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) + if (evalNodeNames.size() == 0) + { + fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); + if (EvaluationNodes().empty() && FinalCriterionNodes().empty()) + InvalidArgument("There is no default evaluation node or training criterion specified in the network."); + + for (const auto& node : EvaluationNodes()) + if (criteriaLogged.insert(node).second) + evalNodes.push_back(node); + + for (const auto& node : FinalCriterionNodes()) + if (criteriaLogged.insert(node).second) + evalNodes.push_back(node); + } + else + { + for (int i = 0; i < evalNodeNames.size(); i++) + { + const auto& node = GetNodeFromName(evalNodeNames[i]); + if (!criteriaLogged.insert(node).second) + continue; + if (node->GetSampleLayout().GetNumElements() != 1) + InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); + evalNodes.push_back(node); + } + } + + return evalNodes; + } + public: // return list of nodes that require precomputation and not precomputed yet std::list GetNodesRequiringPreComputation(const ComputationNodeBasePtr& rootNode = nullptr, bool checkComputed = true); @@ -1014,7 +1048,7 @@ protected: virtual const std::wstring OperationName() const override { return L"PARTraversalFlowControlNode"; - } + } virtual void BeginForwardProp() override { } @@ -1038,8 +1072,6 @@ protected: virtual void AllocateGradientMatricesForInputs(MatrixPool& matrixPool); virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool); virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool); - - virtual void ForwardProp(const FrameRange&, const ComputationNodeBasePtr, const ComputationNodeBasePtr) override; public: // this special constructor constructs the top-level network node diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index 6501f948b..e830a35f5 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -79,17 +79,6 @@ void ComputationNetwork::Backprop(const ComputationNodeBasePtr rootNode) // trai GetNestedNetwork(rootNode)->Backprop(FrameRange(nullptr), true, true); } -void ComputationNetwork::ForwardProp(const ComputationNodeBasePtr rootNode, const ComputationNodeBasePtr startNode, const ComputationNodeBasePtr endNode) -{ - VerifyIsCompiled("ForwardProp"); - - // traverse partial nodes as inputs - shared_ptr network = dynamic_pointer_cast(GetNestedNetwork(rootNode)); - assert(network); - - network->ForwardProp(FrameRange(nullptr), startNode, endNode); -} - void ComputationNetwork::FormNestedNetwork(const ComputationNodeBasePtr& rootNode) { if (m_nestedNetworks.find(rootNode) != m_nestedNetworks.end()) @@ -158,7 +147,6 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con } } } - /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::Backprop(const FrameRange& fr, bool childrenInThisLoop, bool childrenInOuterLoop) /*override*/ { childrenInThisLoop, childrenInOuterLoop; // TODO: think through what these mean when coming from PAR mode @@ -187,36 +175,7 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ReleaseMatricesAfterBackprop(MatrixPool& matrixPool) /*override*/ { } -/*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ForwardProp(const FrameRange & fr, ComputationNodeBasePtr startNode, ComputationNodeBasePtr endNode) -{ - // if start node is nullptr, forward will be enable - bool enableForward = startNode ? false : true; - for (auto& node : m_nestedNodes) - { -#if 0 - if (dynamic_pointer_cast>(node)) - dynamic_pointer_cast>(node)->DebugLogMinibatch(); -#endif - if (node->IsOutOfDateWrtInputs() && enableForward) - { - node->BeginForwardProp(); - node->ForwardProp(fr.WithLayout(node->GetMBLayout())); - node->EndForwardProp(); - - node->BumpEvalTimeStamp(); - } - - if (node == startNode) - { - enableForward = true; - } - else if (node == endNode) - { - break; - } - } -} // ----------------------------------------------------------------------- // SEQTraversalFlowControlNode methods -- implements SEQ traversal (loop unrolling) diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index e441adfb2..b0444dece 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -1878,10 +1878,6 @@ public: virtual void DumpNodeInfo(const bool /*printValues*/, const bool /*printMetadata*/, File& fstream) const override {} virtual std::set> GetMatrixInfo() const override { NOT_IMPLEMENTED; } - virtual void ForwardProp(const FrameRange&, const ComputationNodeBasePtr, const ComputationNodeBasePtr) { NOT_IMPLEMENTED; } - - std::vector GetNestedNodes() { return m_nestedNodes; } - protected: public: // needed in ComputationNetwork::FindInRecurrentLoops(), which really should be part of SEQTraversalFlowControlNode std::vector m_nestedNodes; // nodes tucked away in this node, in evaluation order }; diff --git a/Source/ComputationNetworkLib/InputAndParamNodes.h b/Source/ComputationNetworkLib/InputAndParamNodes.h index 76496ccd6..95119d60e 100644 --- a/Source/ComputationNetworkLib/InputAndParamNodes.h +++ b/Source/ComputationNetworkLib/InputAndParamNodes.h @@ -37,6 +37,7 @@ public: MarkValueNonSharable(); m_initString = L"fromValue"; // default init is with 0; typically overwritten m_initValue = 0; + m_regMultiplier = 1.0f; // enable reg in update by default } LearnableParameter(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& shape) : LearnableParameter(deviceId, name) @@ -101,6 +102,14 @@ public: // called from CloneFunction(..., parameters="constant") virtual void FreezeParameters() override; // from IFreezable + // Setting the reg multiplier for a learnable node, effecting L1Reg and L2Reg both. + void SetRegMultiplier(float regMultiplier) + { + m_regMultiplier = regMultiplier; + } + // called from SGD UpdateWeights, to adjust the reg for each node + float GetRegMultiplier() const { return m_regMultiplier; } + private: // init parameters for deferred initialization (which happens in Validate()) std::wstring m_initString; // if non-empty then deferred initialization is needed. Gets cleared upon completion of deferred init. @@ -109,6 +118,9 @@ private: int m_initOutputRank; bool m_initOnCPUOnly; ElemType m_initValue; + + // flags related to gradient update + float m_regMultiplier; // The multiplier to adjust the L1Reg and L2Reg for Learnable node }; // ----------------------------------------------------------------------- diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h index b687d88f8..31bc26913 100644 --- a/Source/ComputationNetworkLib/TrainingNodes.h +++ b/Source/ComputationNetworkLib/TrainingNodes.h @@ -8,6 +8,7 @@ #include "ComputationNode.h" #include "BatchNormalizationEngine.h" #include "RNGHandle.h" +#include "InputAndParamNodes.h" #define __STDC_FORMAT_MACROS #include @@ -1587,15 +1588,15 @@ class BatchNormalizationNode : public ComputationNodeNonLooping, publi public: BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name), m_spatial(false), m_normTimeConst(0), m_blendTimeConst(0), m_epsilon(0), m_useCntkEngine(true), - m_samplesSeen(0), m_imageLayoutKind(ImageLayoutKind::CHW), m_postBatchNormalization(false), m_swapNormTimeConst(0), - m_swapBlendTimeConst(0), m_convertRunningVariancePending(false) + m_samplesSeen(0), m_imageLayoutKind(ImageLayoutKind::CHW), + m_convertRunningVariancePending(false) { } BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name, bool spatial, double normalizationTimeConstant, double blendTimeConstant, double epsilon, bool useCntkEngine, ImageLayoutKind imageLayoutKind) : Base(deviceId, name), m_spatial(spatial), m_normTimeConst(normalizationTimeConstant), m_blendTimeConst(blendTimeConstant), - m_epsilon(epsilon), m_useCntkEngine(useCntkEngine), m_imageLayoutKind(imageLayoutKind), m_samplesSeen(0), m_postBatchNormalization(false), - m_swapNormTimeConst(0), m_swapBlendTimeConst(0), m_convertRunningVariancePending(false) + m_epsilon(epsilon), m_useCntkEngine(useCntkEngine), m_imageLayoutKind(imageLayoutKind), m_samplesSeen(0), + m_convertRunningVariancePending(false) { } BatchNormalizationNode(const ScriptableObjects::IConfigRecordPtr configp) : @@ -1605,9 +1606,6 @@ public: ImageLayoutKindFrom(configp->Get(L"imageLayout"))) { AttachInputsFromConfig(configp, this->GetExpectedNumInputs()); - m_postBatchNormalization = false; - m_swapNormTimeConst = 0; - m_swapBlendTimeConst = 0; } void Save(File& fstream) const override @@ -1724,7 +1722,7 @@ private: // time-constant conversions double ComputeExpAvgFactor() const { // in inference mode, only use long-term mean and do not update running estimates - if (!Environment().IsTraining() && !m_postBatchNormalization) + if (!Environment().IsTraining()) { if (m_samplesSeen == 0) RuntimeError("%ls: inference mode is used, but nothing has been trained.", NodeName().c_str()); @@ -1756,7 +1754,7 @@ private: // time-constant conversions double ComputeBlendFactor() const { // in inference mode, only use long-term mean and do not update running estimates - if (!Environment().IsTraining() && !m_postBatchNormalization) + if (!Environment().IsTraining()) { if (m_samplesSeen == 0) RuntimeError("%ls: inference mode is used, but nothing has been trained.", NodeName().c_str()); @@ -1805,7 +1803,7 @@ public: // In inference-only mode, m_savedMean and m_saveInvStdDev will not be // produced and BackpropToNonLooping() may not be called. In // non-inference (training) mode, saved statistics must be produced. - bool inferenceOnly = !Environment().IsTraining() && !m_postBatchNormalization; + bool inferenceOnly = !Environment().IsTraining(); m_bnEng->Forward(/*in=*/ sliceInputValue, scale, bias, // (in) inferenceOnly, expAvgFactor, blendFactor, runMean, runVariance, // (in/out) running estimates, updated from the current MB mean/variance @@ -1870,14 +1868,6 @@ public: } virtual void EndForwardProp() override - { - if(m_postBatchNormalization) - m_samplesSeen += GetMBLayout()->GetActualNumSamples(); - - Base::EndForwardProp(); - } - - virtual void EndBackprop() override { // Update samples if not locked. double expAvgFactor = ComputeExpAvgFactor(); // weight for the new MB statistics in the running estimate. The previous value of the running statistics is kept with weight (1-this) @@ -2019,28 +2009,29 @@ public: m_blendTimeConst = std::numeric_limits::infinity(); } + // ResetStatisticsState will set the batch normal statistics into initial state + // used for re-statistics the mean and variance of BN + // any others use may lead undependable results, please be careful + void ResetStatisticsState() + { + m_samplesSeen = 0; + m_normTimeConst = 0; + m_blendTimeConst = 0; + } + // Turn off the L1 and L2 regularization + void DisableRegInBatchNormalization() + { + let scaleNode = dynamic_pointer_cast>(Input(1)); + let biasNode = dynamic_pointer_cast>(Input(2)); + scaleNode->SetRegMultiplier(0.f); + biasNode->SetRegMultiplier(0.f); + } double NormalizationTimeConstant() const { return m_normTimeConst; } double BlendTimeConstant() const { return m_blendTimeConst; } bool Spatial() const { return m_spatial; } double Epsilon() const { return m_epsilon; } bool UseCNTKEngine() const { return m_useCntkEngine; } - void SetPostBatchNormalizationBegin() - { - m_postBatchNormalization = true; - m_samplesSeen = 0; - m_swapNormTimeConst = m_normTimeConst; - m_swapBlendTimeConst = m_blendTimeConst; - m_normTimeConst = -1; - m_blendTimeConst = 0; - } - void SetPostBatchNormalizationEnd() - { - m_postBatchNormalization = false; - m_normTimeConst = m_swapNormTimeConst; - m_blendTimeConst = m_swapBlendTimeConst; - } - private: // Old versioning - do not use. Do not remove until we're sure there are no old models around. struct VersionInfo @@ -2104,11 +2095,6 @@ private: std::unique_ptr> m_bnEng; - // post batch normalization process mark - bool m_postBatchNormalization; - - double m_swapNormTimeConst; - double m_swapBlendTimeConst; bool m_convertRunningVariancePending; }; diff --git a/Source/SGDLib/PostComputingActions.cpp b/Source/SGDLib/PostComputingActions.cpp new file mode 100644 index 000000000..03b2513a7 --- /dev/null +++ b/Source/SGDLib/PostComputingActions.cpp @@ -0,0 +1,161 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// +// PostStat.cpp -- CNTK post statistics related actions +// + +#include "PostComputingActions.h" + +#include "TrainingNodes.h" +#include "ProgressTracing.h" +#include "DataReaderHelpers.h" +#include "SimpleDistGradAggregator.h" + +#include + +namespace Microsoft { namespace MSR{ namespace CNTK { + +template +void PostComputingActions::BatchNormalizationStatistics(IDataReader * dataReader, const vector& evalNodeNames, + const wstring newModelPath, const size_t mbSize, const int iters) +{ + // since the mean and variance of bn will be modified in statistics, + // training mode will make it work. And there is no back prop, other parameters + // are fixed during computing. + ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::training); + + // bn nodes need to be computed from bottom to top with evaluating order + let evalNodes = m_net->GetEvalNodesWithName(evalNodeNames); + + // find all the BN nodes by evalOrder + std::vector bnNodes; + std::set bnNodesLogged; // (avoid double record of batch normalization nodes) + for (auto& evalNode : evalNodes) + { + for (auto& node : m_net->GetEvalOrder(evalNode)) + { + let bnNode = dynamic_pointer_cast>(node); + if (bnNode) + { + if (bnNodesLogged.insert(node).second) + { + // reset the statistics states of bn nodes + bnNode->ResetStatisticsState(); + bnNode->SetNormalizationTimeConstants(-1, bnNode->NormalizationTimeConstant(), + 0, bnNode->BlendTimeConstant()); + bnNodes.push_back(node); + // add BN nodes into the evaluation group, then they will be added into root nodes when + // the network re-compile + m_net->AddToNodeGroup(L"evaluation", bnNode); + } + } + } + } + + // re-compile the network to add bn nodes as rootNodes. + m_net->CompileNetwork(); + + // allocate memory for all bnNodes evalOrder + m_net->AllocateAllMatrices(bnNodes, std::vector(), nullptr); + + // prepare features + auto& featureNodes = m_net->FeatureNodes(); + + StreamMinibatchInputs inputMatrices; + for (auto& node : featureNodes) + inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); + + bool useParallelTrain = (m_mpi != nullptr); + bool useDistributedMBReading = useParallelTrain && m_enableDistributedMBReading && dataReader->SupportsDistributedMBRead(); + size_t totalEpochSize = bnNodes.size() * mbSize * iters; + + m_net->StartEvaluateMinibatchLoop(bnNodes); + + if (useDistributedMBReading) + dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), totalEpochSize); + else + dataReader->StartMinibatchLoop(mbSize, 0, totalEpochSize); + + for (auto& node : bnNodes) + { + let bnNode = static_pointer_cast>(node); + size_t actualMBSize = 0; + + LOGPRINTF(stderr, "Estimating Statistics --> %ls\n", bnNode->GetName().c_str()); + + + // for every single bn node, the statistics is the average of mean and variance for several times in forward prop + // the forward prop is from the feature to the current bn node + for (int iter = 0; iter < iters; iter++) + { + // during the bn stat, dataRead must be ensured + bool wasDataRead = DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, + nullptr, useDistributedMBReading, useParallelTrain, inputMatrices, actualMBSize, m_mpi); + + if (!wasDataRead) LogicError("DataRead Failure in batch normalization statistics"); + + ComputationNetwork::BumpEvalTimeStamp(featureNodes); + + // forward prop till reaching the current bn node + m_net->ForwardProp(node); + } + + // after finished statistics, the mean and variance of the bn node should be freezd. + bnNode->FreezeParameters(); + + // Sync during or after all iters of a BN node are equivalent + if (useParallelTrain) + { + if (m_gradHeader == nullptr) + { + m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) + { + DistGradHeader::Destroy(ptr); + }); + } + + // push the statistics results of mean and variance of bn nodes into mpi updating vector + std::vector*> learnParamsValues(2, nullptr); + + SimpleDistGradAggregator distGradAgg(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/); + + auto runMeanParameterPtr = node->Input(3); + auto runStdParameterPtr = node->Input(4); + + shared_ptr> runMeanNode = static_pointer_cast>(runMeanParameterPtr); + shared_ptr> runStdNode = static_pointer_cast>(runStdParameterPtr); + + learnParamsValues[0] = &(runMeanNode->Value()); + learnParamsValues[1] = &(runStdNode->Value()); + + m_gradHeader->numSamples = actualMBSize ? 1 : actualMBSize; + distGradAgg.AggregateGradients(learnParamsValues, m_gradHeader.get(), 0); + + // get the average mean and variance across all the workers + for (auto& parameter : learnParamsValues) + { + (*parameter) /= (ElemType)m_mpi->NumNodesInUse(); + } + } + } + + dataReader->DataEnd(); + + // remove all the added BN nodes from evaluation group + for (auto& bnNode : bnNodes) + { + m_net->RemoveFromNodeGroup(L"evaluation", bnNode); + } + + // save model + if (!useParallelTrain || m_mpi->CurrentNodeRank() == m_mpi->MainNodeRank()) + m_net->Save(newModelPath); + + return; +} + +template class PostComputingActions; +template class PostComputingActions; + +}}} diff --git a/Source/SGDLib/PostComputingActions.h b/Source/SGDLib/PostComputingActions.h new file mode 100644 index 000000000..9dfc95bb6 --- /dev/null +++ b/Source/SGDLib/PostComputingActions.h @@ -0,0 +1,65 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// +// PostStat.h -- CNTK post statistics related actions +// + +#pragma once +#include "ComputationNode.h" +#include "ComputationNetwork.h" +#include "MPIWrapper.h" +#include "DataReader.h" +#include "IDistGradAggregator.h" +#include "DistGradHeader.h" + +using namespace std; + +namespace Microsoft { namespace MSR { namespace CNTK { + +template +class IDistGradAggregator; + +// Post statistics normally called between training and evaluating, to generate the statistics results used by evaluating +// For now, the application is only with statistics mean and variance of Batch Normalization nodes after training +template +class PostComputingActions +{ +public: + PostComputingActions(ComputationNetworkPtr net, const MPIWrapperPtr& mpi, bool enableDistributedMBReading = false, const int traceLevel = 0) : + m_net(net), + m_traceLevel(traceLevel), + m_mpi(mpi), + m_distGradAgg(nullptr), + m_gradHeader(nullptr), + m_enableDistributedMBReading(enableDistributedMBReading) + { + } + + // This function is used for evaluating the mean and variance of all batch normalization nodes after training. + // Details will link to the wiki https://github.com/Microsoft/CNTK/wiki/Post-Batch-Normalization-Statistics + // The reason why put it into evalute is the action take place after trainning and non-backprop processing, which makes me believe + // this function is like a kind of evaluate function. + // In this function, + // 1. since all other weights are fix except the un-pbn nodes, I set the networkoperationMode into inferring. + // 2. The next thing is to load the network model and data source, I follow the Evaluate function to do so, however, I delete something + // seem useless, like error statistics etc. + // 3. Finding the BN nodes in the network and put them into a vector with evaluate order (This links the nestedNode vector I got in + // ControlFlowNetwork) + // 4. From node to node in the BN vector to generate the mean and various (This links to the changes of BatchNormalizationNode + // in TrainingNodes.h, since I need to make the nodes "learn" mean and variance in inferring mode) + // 5. Consider the multi-GPU, we need to sync up the BN results between all the worker and average the value. + void BatchNormalizationStatistics(IDataReader* dataReader, const vector& evalNodeNames, const wstring newModelPath, + const size_t mbSize, const int iters = 30); + +private: + ComputationNetworkPtr m_net; + MPIWrapperPtr m_mpi; + bool m_enableDistributedMBReading; + + int m_traceLevel; + + std::shared_ptr> m_distGradAgg; + std::shared_ptr m_gradHeader; +}; +}}} diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp index b5a590d8a..61cb07091 100644 --- a/Source/SGDLib/SGD.cpp +++ b/Source/SGDLib/SGD.cpp @@ -8,6 +8,7 @@ #include "SpecialPurposeNodes.h" // for SequenceWithSoftmaxNode #include "DataReaderHelpers.h" #include "MatrixQuantizerImpl.h" +#include "InputAndParamNodes.h" #ifdef CNTK_PARALLEL_TRAINING_SUPPORT //static inline bool operator==(const std::pair& a, double b) { assert(b==0); return a.first == b; } @@ -875,23 +876,13 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, EpochCriterion epochCriterionLastLogged = epochCriterion; vector epochEvalErrorsLastLogged = epochEvalErrors; - // Now, we need to use a switch to enable/disable wk in BatchNormalization. - // If we can determine whether wk added or not for each node, then, discard this - std::unordered_set batchNormalizationWeights; - if (m_disableWkInBatchNormal) { - for (auto& evalNode : evaluationNodes) + // NOTE: For ResNet, the regularization in BatchNormalization should be disable. + if (m_disableRegInBatchNormalization) { + let bnNodes = net->GetNodesWithType(L"BatchNormalization"); + for (auto &node : bnNodes) { - shared_ptr nestedNetwork = static_pointer_cast(net->GetNestedNetwork(evalNode)); - for (auto& node : nestedNetwork->GetNestedNodes()) - { - shared_ptr> castNode = - dynamic_pointer_cast>(node); - if (castNode) - { - batchNormalizationWeights.insert(castNode->GetInputs()[1]); - batchNormalizationWeights.insert(castNode->GetInputs()[2]); - } - } + let bnNode = dynamic_pointer_cast>(node); + bnNode->DisableRegInBatchNormalization(); } } @@ -1110,11 +1101,10 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, if (smoothedGradient.HasNan("TrainOneEpoch/UpdateWeights(): ")) LogicError("%ls %ls operation has NaNs in smoothedGradient.", node->NodeName().c_str(), node->OperationName().c_str()); #endif - double l2Factor = batchNormalizationWeights.find(node) == batchNormalizationWeights.end() ? 1.0 : 0.0; // BUGBUG (Issue #95): Access to net MBLayout can no longer be done if we have multiple input layouts UpdateWeights(node, smoothedGradient, learnRatePerSample, GetMomentumPerSample(epochNumber /*BUGBUG workaround:*/, net->GetMBLayoutPtrOfNetwork()->GetNumParallelSequences()), numSamplesInMinibatch, - m_L2RegWeight * l2Factor, m_L1RegWeight, + m_L2RegWeight, m_L1RegWeight, m_needAveMultiplier, m_useNesterovMomentum); #ifdef _DEBUG if (dynamic_pointer_cast>(node)->Value().HasNan("TrainOneEpoch/UpdateWeights(): ")) @@ -2017,9 +2007,10 @@ void SGD::UpdateWeights(const ComputationNodeBasePtr& node, LogicError("UpdateWeights() called for a learnable ComputationNode which has m_learningRateMultiplier == 0!"); double nodeDependentLearningRatePerSample = learnRatePerSample * node->GetLearningRateMultiplier(); + double nodeDependentRegMultiplier = dynamic_pointer_cast>(node)->GetRegMultiplier(); UpdateWeightsS(this, dynamic_pointer_cast>(node)->Value(), dynamic_pointer_cast>(node)->Gradient(), smoothedGradient, nodeDependentLearningRatePerSample, momentumPerSample, - actualMBSize, L2RegWeight, L1RegWeight, + actualMBSize, L2RegWeight * nodeDependentRegMultiplier, L1RegWeight * nodeDependentRegMultiplier, needAveMultiplier, m_useNesterovMomentum); node->BumpEvalTimeStamp(); } @@ -2475,7 +2466,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType) m_seqGammarCalcbMMIFactor = configSGD(L"seqGammarBMMIFactor", 0.0); m_seqGammarCalcWP = configSGD(L"seqGammarWordPen", 0.0); - m_disableWkInBatchNormal = configSGD(L"disableWkInBatchNormal", false); + m_disableRegInBatchNormalization = configSGD(L"disableRegInBatchNormalization", false); m_dropoutRates = configSGD(L"dropoutRate", ConfigRecordType::Array(doubleargvector(vector{0.0}))); m_batchNormalizationTimeConstant = configSGD(L"batchNormalizationTimeConstant", ConfigRecordType::Array(doubleargvector(vector{0}))); diff --git a/Source/SGDLib/SGD.h b/Source/SGDLib/SGD.h index 9e6bc4474..064338ff8 100644 --- a/Source/SGDLib/SGD.h +++ b/Source/SGDLib/SGD.h @@ -291,7 +291,10 @@ protected: double m_seqGammarCalcbMMIFactor; bool m_seqGammarCalcUsesMBR; - bool m_disableWkInBatchNormal; + // decide whether should apply L2 regularization into BatchNormalizationNode + // true: disable L2 Regularization + // false: enable L2 Regularization (default) + bool m_disableRegInBatchNormalization; }; template diff --git a/Source/SGDLib/SGDLib.vcxproj b/Source/SGDLib/SGDLib.vcxproj index 03d11b4db..4b6e0dada 100644 --- a/Source/SGDLib/SGDLib.vcxproj +++ b/Source/SGDLib/SGDLib.vcxproj @@ -124,6 +124,7 @@ + @@ -132,10 +133,11 @@ + - + \ No newline at end of file diff --git a/Source/SGDLib/SGDLib.vcxproj.filters b/Source/SGDLib/SGDLib.vcxproj.filters index 5d0cb116e..fc4f156a2 100644 --- a/Source/SGDLib/SGDLib.vcxproj.filters +++ b/Source/SGDLib/SGDLib.vcxproj.filters @@ -1,32 +1,17 @@  - - Common - - - Common - - - Common - - - Common - Misc - - Common - GPU Interfacing SGD - - Common + + Stat @@ -144,6 +129,9 @@ SGD + + Stat + @@ -182,5 +170,8 @@ {b866d513-7bd0-497c-98c2-f62dbcd4cde4} + + {f406217f-5a11-44ca-bb34-52254dbee8af} + - + \ No newline at end of file diff --git a/Source/SGDLib/SimpleEvaluator.h b/Source/SGDLib/SimpleEvaluator.h index 813fb382a..a2dda2927 100644 --- a/Source/SGDLib/SimpleEvaluator.h +++ b/Source/SGDLib/SimpleEvaluator.h @@ -52,36 +52,7 @@ public: { ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring); - // determine nodes to evaluate - std::vector evalNodes; - - set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) - if (evalNodeNames.size() == 0) - { - fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); - if (m_net->EvaluationNodes().empty() && m_net->FinalCriterionNodes().empty()) - InvalidArgument("There is no default evaluation node or training criterion specified in the network."); - - for (const auto& node : m_net->EvaluationNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - - for (const auto& node : m_net->FinalCriterionNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - } - else - { - for (int i = 0; i < evalNodeNames.size(); i++) - { - const auto& node = m_net->GetNodeFromName(evalNodeNames[i]); - if (!criteriaLogged.insert(node).second) - continue; - if (node->GetSampleLayout().GetNumElements() != 1) - InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); - evalNodes.push_back(node); - } - } + let evalNodes = m_net->GetEvalNodesWithName(evalNodeNames); // initialize eval results std::vector evalResults(evalNodes.size(), EpochCriterion(0)); @@ -112,7 +83,7 @@ public: if (useDistributedMBReading) dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), testSize); else - dataReader->StartMinibatchLoop(mbSize, 0, testSize); + dataReader->StartMinibatchLoop(mbSize, 0, testSize); m_net->StartEvaluateMinibatchLoop(evalNodes); @@ -257,153 +228,6 @@ public: return evalResults; } - void EvaluateBN(IDataReader* dataReader, const vector& evalNodeNames, const wstring exportPath, const size_t mbSize, const int iters = 240, const size_t testSize = requestDataSize) - { - ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring); - - // determine nodes to evaluate - std::vector evalNodes; - - set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) - if (evalNodeNames.size() == 0) - { - fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); - if (m_net->EvaluationNodes().empty() && m_net->FinalCriterionNodes().empty()) - InvalidArgument("There is no default evaluation node or training criterion specified in the network."); - - for (const auto& node : m_net->EvaluationNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - - for (const auto& node : m_net->FinalCriterionNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - } - else - { - for (int i = 0; i < evalNodeNames.size(); i++) - { - const auto& node = m_net->GetNodeFromName(evalNodeNames[i]); - if (!criteriaLogged.insert(node).second) - continue; - if (node->GetSampleLayout().GetNumElements() != 1) - InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); - evalNodes.push_back(node); - } - } - - // allocate memory for forward computation - m_net->AllocateAllMatrices(evalNodes, {}, nullptr); - - // prepare features and labels - auto& featureNodes = m_net->FeatureNodes(); - auto& labelNodes = m_net->LabelNodes(); - - StreamMinibatchInputs inputMatrices; - for (auto& node : featureNodes) - inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); - for (auto& node : labelNodes) - inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); - - bool useParallelTrain = (m_mpi != nullptr); - bool useDistributedMBReading = useParallelTrain && m_enableDistributedMBReading && dataReader->SupportsDistributedMBRead(); - if (useDistributedMBReading) - dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), testSize); - else - dataReader->StartMinibatchLoop(mbSize, 0, testSize); - - m_net->StartEvaluateMinibatchLoop(evalNodes); - - // Passing in two empty node lists so the dispatcher can work for the evalNodes. - std::list learnableNodes; - std::vector criterionNodes; - - // First, all batch normalization nodes should be marked. - std::vector batchNormalNodes; - shared_ptr nestedNetwork = static_pointer_cast(m_net->GetNestedNetwork(evalNodes[0])); - for (auto& node : nestedNetwork->GetNestedNodes()) - { - shared_ptr> castNode = - dynamic_pointer_cast>(node); - if (castNode) - { - batchNormalNodes.push_back(node); - } - } - - // Push all batch normalization mean and std into learn params values for mpi update - std::vector*> learnParamsValues(2, nullptr); - - bool noMoreSamplesToProcess = false; - for (auto& node : batchNormalNodes) - { - shared_ptr> batchNode = - static_pointer_cast>(node); - batchNode->SetPostBatchNormalizationBegin(); - size_t actualMBSize = 0; - - LOGPRINTF(stderr, "Start evaluating: %ls\n", batchNode->GetName().c_str()); - - // Post batch normal iters - for (int iter = 0; iter < iters; iter++) - { - bool wasDataRead = DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, - nullptr, useDistributedMBReading, useParallelTrain, inputMatrices, actualMBSize, m_mpi); - - if (!wasDataRead && (!useDistributedMBReading || noMoreSamplesToProcess)) - break; - - // TODO should handle it, since post BN exist no samples in iters - if (!wasDataRead) - actualMBSize = 0; - - // Batch Normalization Evaluate don't need to support subMinibatches - ComputationNetwork::BumpEvalTimeStamp(featureNodes); - ComputationNetwork::BumpEvalTimeStamp(labelNodes); - - m_net->ForwardProp(evalNodes[0], nullptr, node); - dataReader->DataEnd(); - } - batchNode->SetPostBatchNormalizationEnd(); - - // Sync during or after all iters of a BN node are equivalent - if (useParallelTrain) - { - if (m_gradHeader == nullptr) - { - m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) - { - DistGradHeader::Destroy(ptr); - }); - } - SimpleDistGradAggregator distGradAgg(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/); - - auto runMeanParameterPtr = node->GetInputs()[3]; - auto runStdParameterPtr = node->GetInputs()[4]; - - shared_ptr> runMeanNode = static_pointer_cast>(runMeanParameterPtr); - shared_ptr> runStdNode = static_pointer_cast>(runStdParameterPtr); - - learnParamsValues[0] = &(runMeanNode->Value()); - learnParamsValues[1] = &(runStdNode->Value()); - - m_gradHeader->numSamples = actualMBSize ? 1 : actualMBSize; - distGradAgg.AggregateGradients(learnParamsValues, m_gradHeader.get(), 0); - - for (auto& parameter : learnParamsValues) - { - (*parameter) /= (ElemType)m_mpi->NumNodesInUse(); - } - } - } - - // Save Model - if (!useParallelTrain || m_mpi->CurrentNodeRank() == m_mpi->MainNodeRank()) - m_net->Save(exportPath); - - return; - } - protected: void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastLogged, const vector& evalNodes, From b69d236625159d738de8f88a11d5b0e665dc92a6 Mon Sep 17 00:00:00 2001 From: "yuxiao.guo" Date: Tue, 18 Oct 2016 15:18:00 +0800 Subject: [PATCH 3/7] fix some details after merging from master --- .../Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk | 3 ++- Source/ComputationNetworkLib/ComputationNetwork.h | 2 +- Source/ComputationNetworkLib/TrainingNodes.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk b/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk index 6482e2afb..73d4932e3 100644 --- a/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk +++ b/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk @@ -46,7 +46,7 @@ Train=[ L2RegWeight=0.0001 dropoutRate=0 - disableWkInBatchNormal=true + disableRegInBatchNormalization=true ParallelTrain=[ parallelizationMethod="DataParallelSGD" @@ -93,6 +93,7 @@ PBN=[ # Set minibatch size for testing. minibatchSize=256 itersPerNode=30 + enableDistributedMBReading=true reader=[ readerType="ImageReader" diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index 7ede685eb..e831f274a 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -1065,7 +1065,7 @@ protected: virtual const std::wstring OperationName() const override { return L"PARTraversalFlowControlNode"; - } + } virtual void BeginForwardProp() override { } diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h index 837f81af6..d967a21cd 100644 --- a/Source/ComputationNetworkLib/TrainingNodes.h +++ b/Source/ComputationNetworkLib/TrainingNodes.h @@ -11,6 +11,7 @@ #include "InputAndParamNodes.h" #include "CPURNGHandle.h" + #define __STDC_FORMAT_MACROS #include #include From 63de1654436070cadbb91c2abad1e679cc754c12 Mon Sep 17 00:00:00 2001 From: Mark Hillebrand Date: Tue, 18 Oct 2016 07:45:17 +0200 Subject: [PATCH 4/7] CNTK.sln: add missing project dependencies for PythonBindings.vcxproj --- CNTK.sln | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CNTK.sln b/CNTK.sln index dd0454f16..c80227888 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1275,13 +1275,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonBindings", "bindings\ {9BD0A711-0BBD-45B6-B81C-053F03C26CFB} = {9BD0A711-0BBD-45B6-B81C-053F03C26CFB} {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5} {D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {D667AF32-028A-4A5D-BE19-F46776F0F6B2} + {7B7A563D-AA8E-4660-A805-D50235A02120} = {7B7A563D-AA8E-4660-A805-D50235A02120} {9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {9A2F2441-5972-4EA8-9215-4119FCE0FB68} {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} {91973E60-A7BE-4C86-8FDB-59C88A0B3715} = {91973E60-A7BE-4C86-8FDB-59C88A0B3715} {014DA766-B37B-4581-BC26-963EA5507931} = {014DA766-B37B-4581-BC26-963EA5507931} {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {CE429AA2-3778-4619-8FD1-49BA3B81197B} + {EF766CAE-9CB1-494C-9153-0030631A6340} = {EF766CAE-9CB1-494C-9153-0030631A6340} {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} {E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF} + {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9} {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {1D5787D4-52E4-45DB-951B-82F220EE0C6A} {7B7A51ED-AA8E-4660-A805-D50235A02120} = {7B7A51ED-AA8E-4660-A805-D50235A02120} {E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1} From c28ac3327e14590553c36ada9dc85031259a665f Mon Sep 17 00:00:00 2001 From: Mark Hillebrand Date: Tue, 18 Oct 2016 07:56:13 +0200 Subject: [PATCH 5/7] .vcxproj: remove and This is a partial revert of 18fa8097bf9abe1befe8439c91a8740257bb5134. --- Source/ActionsLib/ActionsLib.vcxproj | 6 ------ Source/CNTK/CNTK.vcxproj | 6 ------ Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj | 6 ------ Source/Common/Common.vcxproj | 6 ------ Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj | 6 ------ Source/EvalDll/EvalDll.vcxproj | 6 ------ Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj | 6 ------ Source/Math/Math.vcxproj | 6 ------ Source/Math/MathCUDA.vcxproj | 6 ------ Source/Readers/BinaryReader/BinaryReader.vcxproj | 6 ------ .../CNTKTextFormatReader/CNTKTextFormatReader.vcxproj | 6 ------ .../Readers/CompositeDataReader/CompositeDataReader.vcxproj | 6 ------ Source/Readers/DSSMReader/DSSMReader.vcxproj | 6 ------ Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj | 6 ------ Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj | 6 ------ Source/Readers/ImageReader/ImageReader.vcxproj | 6 ------ Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj | 6 ------ Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj | 6 ------ .../Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj | 6 ------ Source/Readers/ReaderLib/ReaderLib.vcxproj | 6 ------ Source/Readers/SparsePCReader/SparsePCReader.vcxproj | 6 ------ Source/Readers/UCIFastReader/UCIFastReader.vcxproj | 6 ------ Source/SGDLib/SGDLib.vcxproj | 6 ------ Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj | 6 ------ .../CPPEvalClientTest/CPPEvalClientTest.vcxproj | 6 ------ Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj | 6 ------ Tests/UnitTests/CommandEval/CommandEval.vcxproj | 6 ------ Tests/UnitTests/EvalTests/EvalTests.vcxproj | 6 ------ .../MathPerformanceTests/MathPerformanceTests.vcxproj | 6 ------ Tests/UnitTests/MathTests/MathTests.vcxproj | 6 ------ Tests/UnitTests/NetworkTests/NetworkTests.vcxproj | 6 ------ Tests/UnitTests/ReaderTests/ReaderTests.vcxproj | 6 ------ Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj | 6 ------ 33 files changed, 198 deletions(-) diff --git a/Source/ActionsLib/ActionsLib.vcxproj b/Source/ActionsLib/ActionsLib.vcxproj index f7fea7479..72df22085 100644 --- a/Source/ActionsLib/ActionsLib.vcxproj +++ b/Source/ActionsLib/ActionsLib.vcxproj @@ -80,8 +80,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -91,10 +89,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/CNTK/CNTK.vcxproj b/Source/CNTK/CNTK.vcxproj index d0936702c..b92e3674e 100644 --- a/Source/CNTK/CNTK.vcxproj +++ b/Source/CNTK/CNTK.vcxproj @@ -131,8 +131,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -141,10 +139,6 @@ xcopy /I /D /Y "$(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs" "$(TargetDir)" && if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying dependencies - - true - true - diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj index cefc1bb69..83c257b94 100644 --- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj +++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj @@ -119,8 +119,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -129,10 +127,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/Common/Common.vcxproj b/Source/Common/Common.vcxproj index 1797cc86a..7ab928a35 100644 --- a/Source/Common/Common.vcxproj +++ b/Source/Common/Common.vcxproj @@ -54,13 +54,7 @@ /d2Zi+ %(AdditionalOptions) - true - true - - true - true - diff --git a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj index 48a9d94d5..ba672d5c9 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj +++ b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj @@ -72,8 +72,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -83,10 +81,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/EvalDll/EvalDll.vcxproj b/Source/EvalDll/EvalDll.vcxproj index c2dde9ebd..329791c63 100644 --- a/Source/EvalDll/EvalDll.vcxproj +++ b/Source/EvalDll/EvalDll.vcxproj @@ -120,8 +120,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -130,10 +128,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj b/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj index c0a86b00e..d6d8f1f79 100644 --- a/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj +++ b/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj @@ -78,16 +78,10 @@ Level3 WIN32;NDEBUG;%(PreprocessorDefinitions) - true - true true - - true - true - diff --git a/Source/Math/Math.vcxproj b/Source/Math/Math.vcxproj index cd463fa24..0d223472f 100644 --- a/Source/Math/Math.vcxproj +++ b/Source/Math/Math.vcxproj @@ -147,16 +147,10 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) - - true - true - diff --git a/Source/Math/MathCUDA.vcxproj b/Source/Math/MathCUDA.vcxproj index 8748a171a..6bfe9c5f6 100644 --- a/Source/Math/MathCUDA.vcxproj +++ b/Source/Math/MathCUDA.vcxproj @@ -105,16 +105,10 @@ if exist "$(CuDnnDll)" xcopy /D /Y "$(CuDnnDll)" "$(OutputPath)" true false /d2Zi+ %(AdditionalOptions) - true - true false - - true - true - diff --git a/Source/Readers/BinaryReader/BinaryReader.vcxproj b/Source/Readers/BinaryReader/BinaryReader.vcxproj index e64a6cbfd..d5dfe337a 100644 --- a/Source/Readers/BinaryReader/BinaryReader.vcxproj +++ b/Source/Readers/BinaryReader/BinaryReader.vcxproj @@ -90,8 +90,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -101,10 +99,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj index f78ac53ec..4af732971 100644 --- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj +++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj @@ -83,18 +83,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj index c03f34ad0..442c34310 100644 --- a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj +++ b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj @@ -75,18 +75,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/DSSMReader/DSSMReader.vcxproj b/Source/Readers/DSSMReader/DSSMReader.vcxproj index 838cc1dad..d06080f58 100644 --- a/Source/Readers/DSSMReader/DSSMReader.vcxproj +++ b/Source/Readers/DSSMReader/DSSMReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj b/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj index 1dd2f3024..c2685240a 100644 --- a/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj +++ b/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj @@ -75,18 +75,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj b/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj index 8635a4055..56453f16e 100644 --- a/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj +++ b/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj @@ -93,8 +93,6 @@ ..\..\common\include;..\..\Math ..\..\common\include;..\..\Math ..\..\common\include;..\..\Math - true - true Console @@ -107,10 +105,6 @@ $(SolutionDir)$(Platform)\$(Configuration)\ $(SolutionDir)$(Platform)\$(Configuration)\ - - true - true - diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj b/Source/Readers/ImageReader/ImageReader.vcxproj index 13899b05d..376a9b8c3 100644 --- a/Source/Readers/ImageReader/ImageReader.vcxproj +++ b/Source/Readers/ImageReader/ImageReader.vcxproj @@ -97,18 +97,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj b/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj index 2b6cb08c0..59aed758d 100644 --- a/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj +++ b/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj b/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj index afe788402..e39b67b67 100644 --- a/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj +++ b/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj @@ -91,8 +91,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -102,10 +100,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj b/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj index 179bf0fb8..afd3f48b8 100644 --- a/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj +++ b/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj index 1a5c3226e..6b54ca0cc 100644 --- a/Source/Readers/ReaderLib/ReaderLib.vcxproj +++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj @@ -41,13 +41,7 @@ $(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math - true - true - - true - true - diff --git a/Source/Readers/SparsePCReader/SparsePCReader.vcxproj b/Source/Readers/SparsePCReader/SparsePCReader.vcxproj index 9557eaf63..a3200c0b3 100644 --- a/Source/Readers/SparsePCReader/SparsePCReader.vcxproj +++ b/Source/Readers/SparsePCReader/SparsePCReader.vcxproj @@ -91,8 +91,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -102,10 +100,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/UCIFastReader/UCIFastReader.vcxproj b/Source/Readers/UCIFastReader/UCIFastReader.vcxproj index 6cc2fdcea..fb79bf8b0 100644 --- a/Source/Readers/UCIFastReader/UCIFastReader.vcxproj +++ b/Source/Readers/UCIFastReader/UCIFastReader.vcxproj @@ -90,8 +90,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -101,10 +99,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/SGDLib/SGDLib.vcxproj b/Source/SGDLib/SGDLib.vcxproj index 9147bbe48..dc55bb472 100644 --- a/Source/SGDLib/SGDLib.vcxproj +++ b/Source/SGDLib/SGDLib.vcxproj @@ -85,8 +85,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -96,10 +94,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj b/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj index 85b579c31..9725d9eb3 100644 --- a/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj +++ b/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj @@ -41,13 +41,7 @@ WIN32;_LIB;%(PreprocessorDefinitions) $(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math - true - true - - true - true - diff --git a/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj b/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj index 68eae9a6d..e7a22b173 100644 --- a/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj +++ b/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj @@ -100,8 +100,6 @@ false /d2Zi+ %(AdditionalOptions) MultiThreadedDLL - true - true true @@ -110,10 +108,6 @@ true - - true - true - diff --git a/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj b/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj index edcda5b11..884519c98 100644 --- a/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj +++ b/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj @@ -89,16 +89,10 @@ %(AdditionalIncludeDirectories) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) - - true - true - diff --git a/Tests/UnitTests/CommandEval/CommandEval.vcxproj b/Tests/UnitTests/CommandEval/CommandEval.vcxproj index 86736bbf0..dfde0f912 100644 --- a/Tests/UnitTests/CommandEval/CommandEval.vcxproj +++ b/Tests/UnitTests/CommandEval/CommandEval.vcxproj @@ -87,8 +87,6 @@ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - true - true Console @@ -97,10 +95,6 @@ true Math.lib; Common.lib; %(AdditionalDependencies) - - true - true - diff --git a/Tests/UnitTests/EvalTests/EvalTests.vcxproj b/Tests/UnitTests/EvalTests/EvalTests.vcxproj index 1be3a8311..3529f8e32 100644 --- a/Tests/UnitTests/EvalTests/EvalTests.vcxproj +++ b/Tests/UnitTests/EvalTests/EvalTests.vcxproj @@ -109,17 +109,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj index fff9ea353..c814dc274 100644 --- a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj +++ b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj @@ -105,13 +105,7 @@ $(CudaToolkitIncludeDir);%(AdditionalIncludeDirectories) - true - true - - true - true - diff --git a/Tests/UnitTests/MathTests/MathTests.vcxproj b/Tests/UnitTests/MathTests/MathTests.vcxproj index 34acdc8d1..75916baa3 100644 --- a/Tests/UnitTests/MathTests/MathTests.vcxproj +++ b/Tests/UnitTests/MathTests/MathTests.vcxproj @@ -109,17 +109,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj b/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj index d2ea4652d..1c84efc02 100644 --- a/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj +++ b/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj @@ -90,17 +90,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj index e2970c678..2df7d7605 100644 --- a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj +++ b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj @@ -93,16 +93,10 @@ NDEBUG;%(PreprocessorDefinitions) true /d2Zi+ %(AdditionalOptions) - true - true true - - true - true - diff --git a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj index 437100ad2..5ca968f61 100644 --- a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj +++ b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj @@ -97,8 +97,6 @@ true MultiThreaded MultiThreaded - true - true Console @@ -107,10 +105,6 @@ true CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - true - true - From 3a66ecab7f3db515b4d4fc15735e958450a149b9 Mon Sep 17 00:00:00 2001 From: yuxiaoguo Date: Tue, 18 Oct 2016 16:43:59 +0800 Subject: [PATCH 6/7] fix a data reading bug in post batch normalization statistics --- Source/SGDLib/PostComputingActions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/SGDLib/PostComputingActions.cpp b/Source/SGDLib/PostComputingActions.cpp index 03b2513a7..f08faaa43 100644 --- a/Source/SGDLib/PostComputingActions.cpp +++ b/Source/SGDLib/PostComputingActions.cpp @@ -73,9 +73,9 @@ void PostComputingActions::BatchNormalizationStatistics(IDataReader * m_net->StartEvaluateMinibatchLoop(bnNodes); if (useDistributedMBReading) - dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), totalEpochSize); + dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), inputMatrices.GetStreamDescriptions(), totalEpochSize); else - dataReader->StartMinibatchLoop(mbSize, 0, totalEpochSize); + dataReader->StartMinibatchLoop(mbSize, 0, inputMatrices.GetStreamDescriptions(), totalEpochSize); for (auto& node : bnNodes) { From a61725c9c9ef5fb6013b32ed390f75c626b35158 Mon Sep 17 00:00:00 2001 From: Clemens Marschner Date: Tue, 18 Oct 2016 11:44:16 +0200 Subject: [PATCH 7/7] Review comments; Fix SEQ backward as well. --- .../ComputationEnvironment.h | 4 ++++ .../ComputationNetworkEvaluation.cpp | 21 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/Source/ComputationNetworkLib/ComputationEnvironment.h b/Source/ComputationNetworkLib/ComputationEnvironment.h index cce8252f0..8d6eb19c0 100644 --- a/Source/ComputationNetworkLib/ComputationEnvironment.h +++ b/Source/ComputationNetworkLib/ComputationEnvironment.h @@ -44,6 +44,10 @@ struct ComputationEnvironment // traceLevel int traceLevel = 0; + + // Extreme tracing of node outputs. Make space on your disk. + bool IsLogLevelNodeTrace() const { return traceLevel >= 1000000; } + // more properties should be added here as needed }; typedef std::shared_ptr ComputationEnvironmentPtr; diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index dd3015036..9fb38e79f 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -159,8 +159,8 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con node->BumpEvalTimeStamp(); } - // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell + // Extreme Tracing, part 1/4 + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace()) DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); } } @@ -177,8 +177,8 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con node->Backprop(fr.WithLayout(node->GetMBLayout()), true /*childrenInThisLoop*/, true /*childrenInOuterLoop*/); node->EndBackprop(); - // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000 && node->NeedsGradient()) // very high number, since this spews like hell + // Extreme Tracing, part 2/4 + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace() && node->NeedsGradient()) DumpNode(node, /*dumpGradient=*/true) || DumpNode(node, true); } } @@ -295,10 +295,10 @@ static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) } } - // more extreme tracing for the ultimate debugging experience. Make space on your disk. + // Extreme Tracing, part 3/4 for (auto& node : m_nestedNodes) { - if (node->HasEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace()) { DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); } @@ -335,6 +335,15 @@ static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) // a node that is outside the loop, which is done later in EndBackprop() in PAR mode. } } + + // Extreme Tracing, part 4 + for (auto& node : m_nestedNodes) + { + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace() && node->NeedsGradient()) + { + DumpNode(node, /*dumpGradient=*/true) || DumpNode(node, true); + } + } } // called after last iteration step of ComputeGradient()