diff --git a/CNTK.sln b/CNTK.sln index 49effde03..e01230a4c 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1278,13 +1278,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonBindings", "bindings\ {9BD0A711-0BBD-45B6-B81C-053F03C26CFB} = {9BD0A711-0BBD-45B6-B81C-053F03C26CFB} {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5} {D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {D667AF32-028A-4A5D-BE19-F46776F0F6B2} + {7B7A563D-AA8E-4660-A805-D50235A02120} = {7B7A563D-AA8E-4660-A805-D50235A02120} {9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {9A2F2441-5972-4EA8-9215-4119FCE0FB68} {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} {91973E60-A7BE-4C86-8FDB-59C88A0B3715} = {91973E60-A7BE-4C86-8FDB-59C88A0B3715} {014DA766-B37B-4581-BC26-963EA5507931} = {014DA766-B37B-4581-BC26-963EA5507931} {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {CE429AA2-3778-4619-8FD1-49BA3B81197B} + {EF766CAE-9CB1-494C-9153-0030631A6340} = {EF766CAE-9CB1-494C-9153-0030631A6340} {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} {E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF} + {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9} {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {1D5787D4-52E4-45DB-951B-82F220EE0C6A} {7B7A51ED-AA8E-4660-A805-D50235A02120} = {7B7A51ED-AA8E-4660-A805-D50235A02120} {E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1} diff --git a/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk b/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk index c6478384d..73d4932e3 100644 --- a/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk +++ b/Examples/Image/Classification/ResNet/ResNet_50_ndl_deprecated.cntk @@ -46,7 +46,7 @@ Train=[ L2RegWeight=0.0001 dropoutRate=0 - disableWkInBatchNormal=true + disableRegInBatchNormalization=true ParallelTrain=[ parallelizationMethod="DataParallelSGD" @@ -88,11 +88,12 @@ Train=[ ] PBN=[ - action="pbn" + action="bnstat" modelPath="$ModelDir$/ResNet_50" # Set minibatch size for testing. minibatchSize=256 - iters=30 + itersPerNode=30 + enableDistributedMBReading=true reader=[ readerType="ImageReader" diff --git a/Makefile b/Makefile index e75c81222..f6468b72a 100644 --- a/Makefile +++ b/Makefile @@ -467,7 +467,8 @@ EVAL:=eval SGDLIB_SRC=\ $(SOURCEDIR)/SGDLib/Profiler.cpp \ - $(SOURCEDIR)/SGDLib/SGD.cpp + $(SOURCEDIR)/SGDLib/SGD.cpp \ + $(SOURCEDIR)/SGDLib/PostComputingActions.cpp \ EVAL_SRC=\ $(SOURCEDIR)/EvalDll/CNTKEval.cpp \ diff --git a/Source/ActionsLib/Actions.h b/Source/ActionsLib/Actions.h index 2a1c83e9a..b991e1b95 100644 --- a/Source/ActionsLib/Actions.h +++ b/Source/ActionsLib/Actions.h @@ -42,11 +42,11 @@ template void DoDumpNodes(const ConfigParameters& config); template void DoEdit(const ConfigParameters& config); +template +void DoBatchNormalizationStat(const ConfigParameters& config); // evaluation (EvalActions.cpp) template -void DoEvalBN(const ConfigParameters& config); -template void DoEval(const ConfigParameters& config); template void DoCrossValidate(const ConfigParameters& config); diff --git a/Source/ActionsLib/ActionsLib.vcxproj b/Source/ActionsLib/ActionsLib.vcxproj index f7fea7479..72df22085 100644 --- a/Source/ActionsLib/ActionsLib.vcxproj +++ b/Source/ActionsLib/ActionsLib.vcxproj @@ -80,8 +80,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -91,10 +89,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/ActionsLib/EvalActions.cpp b/Source/ActionsLib/EvalActions.cpp index e9853c3bf..909aeca8e 100644 --- a/Source/ActionsLib/EvalActions.cpp +++ b/Source/ActionsLib/EvalActions.cpp @@ -84,65 +84,6 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader) eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize); } -// =========================================================================== -// DoEvalBNBase() - implements CNTK "pbn" command -// =========================================================================== - -template -static void DoEvalBNBase(const ConfigParameters& config, IDataReader& reader) -{ - // DEVICEID_TYPE deviceId = DeviceFromConfig(config); - ConfigArray minibatchSize = config(L"minibatchSize", "40960"); - size_t epochSize = config(L"epochSize", "0"); - if (epochSize == 0) - { - epochSize = requestDataSize; - } - wstring modelPath = config(L"modelPath"); - wstring exportPath = modelPath + L".PBN"; - intargvector mbSize = minibatchSize; - - int iters = config(L"iters", 240); - - int traceLevel = config(L"traceLevel", "0"); - size_t numMBsToShowResult = config(L"numMBsToShowResult", "100"); - size_t firstMBsToShowResult = config(L"firstMBsToShowResult", "0"); - size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX); - size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1); - - bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader)); - - vector evalNodeNamesVector; - - let net = GetModelFromConfig(config, L"evalNodeNames", evalNodeNamesVector); - - // set tracing flags - net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())), - config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())), - config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector()))); - - SimpleEvaluator eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult, - firstMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches); - eval.EvaluateBN(&reader, evalNodeNamesVector, exportPath, mbSize[0], iters, epochSize); -} - -template -void DoEvalBN(const ConfigParameters& config) -{ - // This is actually used for re-estimating the BN node. It *should* actually randomize. - // TODO: rename to DoEstimateBN. - - // evaluate batch normalization mean and various - ConfigParameters readerConfig(config(L"reader")); - - // Should trace level to zero in Post BN? - //readerConfig.Insert("traceLevel", config(L"traceLevel", "0")); - - DataReader evaBNDataReader(readerConfig); - - DoEvalBNBase(config, evaBNDataReader); -} - template void DoEval(const ConfigParameters& config) { @@ -158,8 +99,6 @@ void DoEval(const ConfigParameters& config) DoEvalBase(config, testDataReader); } -template void DoEvalBN(const ConfigParameters& config); -template void DoEvalBN(const ConfigParameters& config); template void DoEval(const ConfigParameters& config); template void DoEval(const ConfigParameters& config); diff --git a/Source/ActionsLib/TrainActions.cpp b/Source/ActionsLib/TrainActions.cpp index 19cc4a4e4..961fc28df 100644 --- a/Source/ActionsLib/TrainActions.cpp +++ b/Source/ActionsLib/TrainActions.cpp @@ -26,6 +26,7 @@ #include "ScriptableObjects.h" #include "BrainScriptEvaluator.h" #include "BrainScriptParser.h" +#include "PostComputingActions.h" #include #include @@ -259,3 +260,46 @@ void DoEdit(const ConfigParameters& config) template void DoEdit(const ConfigParameters& config); template void DoEdit(const ConfigParameters& config); + +// =========================================================================== +// DoBatchNormalizationStat() - implements CNTK "bnstat" command +// =========================================================================== + +template +void DoBatchNormalizationStat(const ConfigParameters& config) +{ + ConfigParameters readerConfig(config(L"reader")); + readerConfig.Insert("traceLevel", config(L"traceLevel", "0")); + + auto dataReader = make_shared(readerConfig); + + int traceLevel = config(L"traceLevel", "0"); + int itersPerNode = config(L"itersPerNode", 30); + + ConfigArray minibatchSize = config(L"minibatchSize", "40960"); + intargvector mbSize = minibatchSize; + + bool enableDistributedMBReading = config(L"enableDistributedMBReading", false); + + wstring curModelPath = config(L"modelPath", L""); + wstring newModelPath = config(L"newModelPath", L""); + if (newModelPath == L"") + { + newModelPath = curModelPath + L".PBN"; + } + + std::vector evalNodeNames; + let net = GetModelFromConfig(config, L"evalNodeNames", evalNodeNames); + // set tracing flags + net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())), + config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())), + config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector()))); + + PostComputingActions postComputingActions(net, MPIWrapper::GetInstance(), enableDistributedMBReading, traceLevel); + + postComputingActions.BatchNormalizationStatistics(dataReader.get(), evalNodeNames, newModelPath, mbSize[0], itersPerNode); +} + +template void DoBatchNormalizationStat(const ConfigParameters& config); +template void DoBatchNormalizationStat(const ConfigParameters& config); + diff --git a/Source/CNTK/CNTK.cpp b/Source/CNTK/CNTK.cpp index 5014c589f..43dd00ae0 100644 --- a/Source/CNTK/CNTK.cpp +++ b/Source/CNTK/CNTK.cpp @@ -165,7 +165,7 @@ static void DisableLegacyUsage(const ConfigParameters& TopLevelConfig, const Con // When running in parallel with MPI, only commands in 'commandstoRunOnAllRanks' should // be run in parallel across multiple ranks. Others should only run on rank 0 -const std::set commandstoRunOnAllRanks = { "train", "trainRNN", "adapt", "test", "eval", "cv", "devtest", "pbn" }; +const std::set commandstoRunOnAllRanks = { "train", "trainRNN", "adapt", "test", "eval", "cv", "devtest", "bnstat" }; // process the command template @@ -273,10 +273,9 @@ void DoCommands(const ConfigParameters& config, const shared_ptr& mp } fullEpochsOffset += GetMaxEpochs(commandParams); } - // TODO: Choose a clearer name. - else if (thisAction == "pbn") + else if (thisAction == "bnstat") { - DoEvalBN(commandParams); + DoBatchNormalizationStat(commandParams); } else if (thisAction == "adapt") { diff --git a/Source/CNTK/CNTK.vcxproj b/Source/CNTK/CNTK.vcxproj index 32b9a25f2..816f8bd95 100644 --- a/Source/CNTK/CNTK.vcxproj +++ b/Source/CNTK/CNTK.vcxproj @@ -131,8 +131,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -141,10 +139,6 @@ xcopy /I /D /Y "$(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs" "$(TargetDir)" && if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying dependencies - - true - true - diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj index cefc1bb69..83c257b94 100644 --- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj +++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj @@ -119,8 +119,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -129,10 +127,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/Common/Common.vcxproj b/Source/Common/Common.vcxproj index 1797cc86a..7ab928a35 100644 --- a/Source/Common/Common.vcxproj +++ b/Source/Common/Common.vcxproj @@ -54,13 +54,7 @@ /d2Zi+ %(AdditionalOptions) - true - true - - true - true - diff --git a/Source/ComputationNetworkLib/ComputationEnvironment.h b/Source/ComputationNetworkLib/ComputationEnvironment.h index cce8252f0..8d6eb19c0 100644 --- a/Source/ComputationNetworkLib/ComputationEnvironment.h +++ b/Source/ComputationNetworkLib/ComputationEnvironment.h @@ -44,6 +44,10 @@ struct ComputationEnvironment // traceLevel int traceLevel = 0; + + // Extreme tracing of node outputs. Make space on your disk. + bool IsLogLevelNodeTrace() const { return traceLevel >= 1000000; } + // more properties should be added here as needed }; typedef std::shared_ptr ComputationEnvironmentPtr; diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index 1bc4c4693..e831f274a 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -136,10 +136,6 @@ public: // main entry point for backprop void Backprop(const ComputationNodeBasePtr rootNode); - // partial forward entry - void ForwardProp(const ComputationNodeBasePtr rootNode, const ComputationNodeBasePtr startNode, - const ComputationNodeBasePtr endNode); - template // version that takes multiple nodes void ForwardProp(const NODESET& nodes) { @@ -689,6 +685,44 @@ public: return GetNodesWhere(predicate, rootNode); } + // Get the eval nodes with names + // if evalNodeNames are not specified, return all the default evalnodes and training criterion nodes. + std::vector GetEvalNodesWithName(const std::vector evalNodeNames) + { + // determine nodes to evaluate + std::vector evalNodes; + + set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) + if (evalNodeNames.size() == 0) + { + fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); + if (EvaluationNodes().empty() && FinalCriterionNodes().empty()) + InvalidArgument("There is no default evaluation node or training criterion specified in the network."); + + for (const auto& node : EvaluationNodes()) + if (criteriaLogged.insert(node).second) + evalNodes.push_back(node); + + for (const auto& node : FinalCriterionNodes()) + if (criteriaLogged.insert(node).second) + evalNodes.push_back(node); + } + else + { + for (int i = 0; i < evalNodeNames.size(); i++) + { + const auto& node = GetNodeFromName(evalNodeNames[i]); + if (!criteriaLogged.insert(node).second) + continue; + if (node->GetSampleLayout().GetNumElements() != 1) + InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); + evalNodes.push_back(node); + } + } + + return evalNodes; + } + public: // return list of nodes that require precomputation and not precomputed yet std::list GetNodesRequiringPreComputation(const ComputationNodeBasePtr& rootNode = nullptr, bool checkComputed = true); @@ -1056,9 +1090,6 @@ protected: virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool); virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool); - // TODO: Why is this virtual? - virtual void ForwardProp(const FrameRange&, const ComputationNodeBasePtr, const ComputationNodeBasePtr) override; - public: // this special constructor constructs the top-level network node // There is currently no other constructor for inner nested PAR-traversed sub-networks, but there will be. diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index 529e66f61..6e4aae767 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -79,17 +79,6 @@ void ComputationNetwork::Backprop(const ComputationNodeBasePtr rootNode) // trai GetNestedNetwork(rootNode)->Backprop(FrameRange(nullptr), true, true); } -void ComputationNetwork::ForwardProp(const ComputationNodeBasePtr rootNode, const ComputationNodeBasePtr startNode, const ComputationNodeBasePtr endNode) -{ - VerifyIsCompiled("ForwardProp"); - - // traverse partial nodes as inputs - shared_ptr network = dynamic_pointer_cast(GetNestedNetwork(rootNode)); - assert(network); - - network->ForwardProp(FrameRange(nullptr), startNode, endNode); -} - void ComputationNetwork::FormNestedNetwork(const ComputationNodeBasePtr& rootNode) { if (m_nestedNetworks.find(rootNode) != m_nestedNetworks.end()) @@ -159,12 +148,11 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con node->BumpEvalTimeStamp(); } - // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->GetEnvironmentPtr() && node->Environment().traceLevel >= 1000000) // very high number, since this spews like hell + // Extreme Tracing, part 1/4 + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace()) DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); } } - /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::Backprop(const FrameRange& fr, bool childrenInThisLoop, bool childrenInOuterLoop) /*override*/ { childrenInThisLoop, childrenInOuterLoop; // TODO: think through what these mean when coming from PAR mode @@ -177,8 +165,8 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con node->Backprop(fr.WithLayout(node->GetMBLayout()), true /*childrenInThisLoop*/, true /*childrenInOuterLoop*/); node->EndBackprop(); - // more extreme tracing for the ultimate debugging experience. Make space on your disk. - if (node->GetEnvironmentPtr() && node->Environment().traceLevel >= 1000000 && node->NeedsGradient()) // very high number, since this spews like hell + // Extreme Tracing, part 2/4 + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace() && node->NeedsGradient()) DumpNode(node, /*dumpGradient=*/true) || DumpNode(node, true); } } @@ -197,37 +185,7 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ReleaseMatricesAfterBackprop(MatrixPool& matrixPool) /*override*/ { } -// TODO: merge with the main ForwardProp() function. -/*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ForwardProp(const FrameRange & fr, ComputationNodeBasePtr startNode, ComputationNodeBasePtr endNode) -{ - // if start node is nullptr, forward will be enable - bool enableForward = startNode ? false : true; - for (auto& node : m_nestedNodes) - { -#if 0 - if (dynamic_pointer_cast>(node)) - dynamic_pointer_cast>(node)->DebugLogMinibatch(); -#endif - if (node->IsOutOfDateWrtInputs() && enableForward) - { - node->BeginForwardProp(); - node->ForwardProp(fr.WithLayout(node->GetMBLayout())); - node->EndForwardProp(); - - node->BumpEvalTimeStamp(); - } - - if (node == startNode) - { - enableForward = true; - } - else if (node == endNode) - { - break; - } - } -} // helper for logging. Returns false if it was not able to dynamic-cast nodep to ComputationNode template static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) @@ -294,6 +252,15 @@ static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) node->BumpEvalTimeStamp(); } } + + // Extreme Tracing, part 3/4 + for (auto& node : m_nestedNodes) + { + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace()) + { + DumpNode(node, /*dumpGradient=*/false) || DumpNode(node, false); + } + } } /*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::EndForwardProp() /*override*/ @@ -326,6 +293,15 @@ static bool DumpNode(ComputationNodeBasePtr nodep, bool dumpGradient) // a node that is outside the loop, which is done later in EndBackprop() in PAR mode. } } + + // Extreme Tracing, part 4 + for (auto& node : m_nestedNodes) + { + if (node->HasEnvironmentPtr() && node->Environment().IsLogLevelNodeTrace() && node->NeedsGradient()) + { + DumpNode(node, /*dumpGradient=*/true) || DumpNode(node, true); + } + } } // called after last iteration step of ComputeGradient() diff --git a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj index 48a9d94d5..ba672d5c9 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj +++ b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj @@ -72,8 +72,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -83,10 +81,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index e4ea431d1..592a27655 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -647,6 +647,8 @@ public: LogicError("Environment: No environment has been set."); return *m_environment; } + + bool HasEnvironmentPtr() const { return m_environment.get() != nullptr; } ComputationEnvironmentPtr GetEnvironmentPtr() const { return m_environment; } void SetEnvironment(ComputationEnvironmentPtr environment) { m_environment = environment; } @@ -1886,10 +1888,6 @@ public: virtual void DumpNodeInfo(const bool /*printValues*/, const bool /*printMetadata*/, File& fstream) const override {} virtual std::set> GetMatrixInfo() const override { NOT_IMPLEMENTED; } - virtual void ForwardProp(const FrameRange&, const ComputationNodeBasePtr, const ComputationNodeBasePtr) { NOT_IMPLEMENTED; } - - std::vector GetNestedNodes() { return m_nestedNodes; } - protected: public: // needed in ComputationNetwork::FindInRecurrentLoops(), which really should be part of SEQTraversalFlowControlNode std::vector m_nestedNodes; // nodes tucked away in this node, in evaluation order }; diff --git a/Source/ComputationNetworkLib/InputAndParamNodes.h b/Source/ComputationNetworkLib/InputAndParamNodes.h index 54b3e73e5..3ced4c275 100644 --- a/Source/ComputationNetworkLib/InputAndParamNodes.h +++ b/Source/ComputationNetworkLib/InputAndParamNodes.h @@ -47,6 +47,7 @@ public: MarkValueNonSharable(); m_initString = L"fromValue"; // default init is with 0; typically overwritten m_initValue = 0; + m_regMultiplier = 1.0f; // enable reg in update by default } LearnableParameter(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& shape) : LearnableParameter(deviceId, name) @@ -142,6 +143,14 @@ public: // called from CloneFunction(..., parameters="constant") virtual void FreezeParameters() override; // from IFreezable + // Setting the reg multiplier for a learnable node, effecting L1Reg and L2Reg both. + void SetRegMultiplier(float regMultiplier) + { + m_regMultiplier = regMultiplier; + } + // called from SGD UpdateWeights, to adjust the reg for each node + float GetRegMultiplier() const { return m_regMultiplier; } + private: // init parameters for deferred initialization (which happens in Validate()) std::wstring m_initString; // if non-empty then deferred initialization is needed. Gets cleared upon completion of deferred init. @@ -151,6 +160,9 @@ private: int m_initOutputRank; bool m_initOnCPUOnly; ElemType m_initValue; + + // flags related to gradient update + float m_regMultiplier; // The multiplier to adjust the L1Reg and L2Reg for Learnable node }; // ----------------------------------------------------------------------- diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h index a4406bd97..d967a21cd 100644 --- a/Source/ComputationNetworkLib/TrainingNodes.h +++ b/Source/ComputationNetworkLib/TrainingNodes.h @@ -8,6 +8,7 @@ #include "ComputationNode.h" #include "BatchNormalizationEngine.h" #include "RNGHandle.h" +#include "InputAndParamNodes.h" #include "CPURNGHandle.h" @@ -2223,15 +2224,15 @@ class BatchNormalizationNode : public ComputationNodeNonLooping, publi public: BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name), m_spatial(false), m_normTimeConst(0), m_blendTimeConst(0), m_epsilon(0), m_useCntkEngine(true), - m_samplesSeen(0), m_imageLayoutKind(ImageLayoutKind::CHW), m_postBatchNormalization(false), m_swapNormTimeConst(0), - m_swapBlendTimeConst(0), m_convertRunningVariancePending(false) + m_samplesSeen(0), m_imageLayoutKind(ImageLayoutKind::CHW), + m_convertRunningVariancePending(false) { } BatchNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name, bool spatial, double normalizationTimeConstant, double blendTimeConstant, double epsilon, bool useCntkEngine, ImageLayoutKind imageLayoutKind) : Base(deviceId, name), m_spatial(spatial), m_normTimeConst(normalizationTimeConstant), m_blendTimeConst(blendTimeConstant), - m_epsilon(epsilon), m_useCntkEngine(useCntkEngine), m_imageLayoutKind(imageLayoutKind), m_samplesSeen(0), m_postBatchNormalization(false), - m_swapNormTimeConst(0), m_swapBlendTimeConst(0), m_convertRunningVariancePending(false) + m_epsilon(epsilon), m_useCntkEngine(useCntkEngine), m_imageLayoutKind(imageLayoutKind), m_samplesSeen(0), + m_convertRunningVariancePending(false) { } BatchNormalizationNode(const ScriptableObjects::IConfigRecordPtr configp) : @@ -2241,9 +2242,6 @@ public: ImageLayoutKindFrom(configp->Get(L"imageLayout"))) { AttachInputsFromConfig(configp, this->GetExpectedNumInputs()); - m_postBatchNormalization = false; - m_swapNormTimeConst = 0; - m_swapBlendTimeConst = 0; } void Save(File& fstream) const override @@ -2360,7 +2358,7 @@ private: // time-constant conversions double ComputeExpAvgFactor() const { // in inference mode, only use long-term mean and do not update running estimates - if (!Environment().IsTraining() && !m_postBatchNormalization) + if (!Environment().IsTraining()) { if (m_samplesSeen == 0) RuntimeError("%ls: inference mode is used, but nothing has been trained.", NodeName().c_str()); @@ -2392,7 +2390,7 @@ private: // time-constant conversions double ComputeBlendFactor() const { // in inference mode, only use long-term mean and do not update running estimates - if (!Environment().IsTraining() && !m_postBatchNormalization) + if (!Environment().IsTraining()) { if (m_samplesSeen == 0) RuntimeError("%ls: inference mode is used, but nothing has been trained.", NodeName().c_str()); @@ -2441,7 +2439,7 @@ public: // In inference-only mode, m_savedMean and m_saveInvStdDev will not be // produced and BackpropToNonLooping() may not be called. In // non-inference (training) mode, saved statistics must be produced. - bool inferenceOnly = !Environment().IsTraining() && !m_postBatchNormalization; + bool inferenceOnly = !Environment().IsTraining(); m_bnEng->Forward(/*in=*/ sliceInputValue, scale, bias, // (in) inferenceOnly, expAvgFactor, blendFactor, runMean, runVariance, // (in/out) running estimates, updated from the current MB mean/variance @@ -2506,14 +2504,6 @@ public: } virtual void EndForwardProp() override - { - if(m_postBatchNormalization) - m_samplesSeen += GetMBLayout()->GetActualNumSamples(); - - Base::EndForwardProp(); - } - - virtual void EndBackprop() override { // Update samples if not locked. double expAvgFactor = ComputeExpAvgFactor(); // weight for the new MB statistics in the running estimate. The previous value of the running statistics is kept with weight (1-this) @@ -2655,28 +2645,29 @@ public: m_blendTimeConst = std::numeric_limits::infinity(); } + // ResetStatisticsState will set the batch normal statistics into initial state + // used for re-statistics the mean and variance of BN + // any others use may lead undependable results, please be careful + void ResetStatisticsState() + { + m_samplesSeen = 0; + m_normTimeConst = 0; + m_blendTimeConst = 0; + } + // Turn off the L1 and L2 regularization + void DisableRegInBatchNormalization() + { + let scaleNode = dynamic_pointer_cast>(Input(1)); + let biasNode = dynamic_pointer_cast>(Input(2)); + scaleNode->SetRegMultiplier(0.f); + biasNode->SetRegMultiplier(0.f); + } double NormalizationTimeConstant() const { return m_normTimeConst; } double BlendTimeConstant() const { return m_blendTimeConst; } bool Spatial() const { return m_spatial; } double Epsilon() const { return m_epsilon; } bool UseCNTKEngine() const { return m_useCntkEngine; } - void SetPostBatchNormalizationBegin() - { - m_postBatchNormalization = true; - m_samplesSeen = 0; - m_swapNormTimeConst = m_normTimeConst; - m_swapBlendTimeConst = m_blendTimeConst; - m_normTimeConst = -1; - m_blendTimeConst = 0; - } - void SetPostBatchNormalizationEnd() - { - m_postBatchNormalization = false; - m_normTimeConst = m_swapNormTimeConst; - m_blendTimeConst = m_swapBlendTimeConst; - } - private: // Old versioning - do not use. Do not remove until we're sure there are no old models around. struct VersionInfo @@ -2740,11 +2731,6 @@ private: std::unique_ptr> m_bnEng; - // post batch normalization process mark - bool m_postBatchNormalization; - - double m_swapNormTimeConst; - double m_swapBlendTimeConst; bool m_convertRunningVariancePending; }; diff --git a/Source/EvalDll/EvalDll.vcxproj b/Source/EvalDll/EvalDll.vcxproj index c2dde9ebd..329791c63 100644 --- a/Source/EvalDll/EvalDll.vcxproj +++ b/Source/EvalDll/EvalDll.vcxproj @@ -120,8 +120,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -130,10 +128,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - diff --git a/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj b/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj index c0a86b00e..d6d8f1f79 100644 --- a/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj +++ b/Source/Extensibility/EvalWrapper/EvalWrapper.vcxproj @@ -78,16 +78,10 @@ Level3 WIN32;NDEBUG;%(PreprocessorDefinitions) - true - true true - - true - true - diff --git a/Source/Math/Math.vcxproj b/Source/Math/Math.vcxproj index cd463fa24..0d223472f 100644 --- a/Source/Math/Math.vcxproj +++ b/Source/Math/Math.vcxproj @@ -147,16 +147,10 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) - - true - true - diff --git a/Source/Math/MathCUDA.vcxproj b/Source/Math/MathCUDA.vcxproj index 8748a171a..6bfe9c5f6 100644 --- a/Source/Math/MathCUDA.vcxproj +++ b/Source/Math/MathCUDA.vcxproj @@ -105,16 +105,10 @@ if exist "$(CuDnnDll)" xcopy /D /Y "$(CuDnnDll)" "$(OutputPath)" true false /d2Zi+ %(AdditionalOptions) - true - true false - - true - true - diff --git a/Source/Readers/BinaryReader/BinaryReader.vcxproj b/Source/Readers/BinaryReader/BinaryReader.vcxproj index e64a6cbfd..d5dfe337a 100644 --- a/Source/Readers/BinaryReader/BinaryReader.vcxproj +++ b/Source/Readers/BinaryReader/BinaryReader.vcxproj @@ -90,8 +90,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -101,10 +99,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj index f78ac53ec..4af732971 100644 --- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj +++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.vcxproj @@ -83,18 +83,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj index c03f34ad0..442c34310 100644 --- a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj +++ b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj @@ -75,18 +75,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/DSSMReader/DSSMReader.vcxproj b/Source/Readers/DSSMReader/DSSMReader.vcxproj index 838cc1dad..d06080f58 100644 --- a/Source/Readers/DSSMReader/DSSMReader.vcxproj +++ b/Source/Readers/DSSMReader/DSSMReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj b/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj index 1dd2f3024..c2685240a 100644 --- a/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj +++ b/Source/Readers/HTKDeserializers/HTKDeserializers.vcxproj @@ -75,18 +75,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj b/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj index 8635a4055..56453f16e 100644 --- a/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj +++ b/Source/Readers/HTKMLFReader/HTKMLFReader.vcxproj @@ -93,8 +93,6 @@ ..\..\common\include;..\..\Math ..\..\common\include;..\..\Math ..\..\common\include;..\..\Math - true - true Console @@ -107,10 +105,6 @@ $(SolutionDir)$(Platform)\$(Configuration)\ $(SolutionDir)$(Platform)\$(Configuration)\ - - true - true - diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj b/Source/Readers/ImageReader/ImageReader.vcxproj index 13899b05d..376a9b8c3 100644 --- a/Source/Readers/ImageReader/ImageReader.vcxproj +++ b/Source/Readers/ImageReader/ImageReader.vcxproj @@ -97,18 +97,12 @@ true NDEBUG;%(PreprocessorDefinitions) /d2Zi+ %(AdditionalOptions) - true - true true true true - - true - true - diff --git a/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj b/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj index 2b6cb08c0..59aed758d 100644 --- a/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj +++ b/Source/Readers/LMSequenceReader/LMSequenceReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj b/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj index afe788402..e39b67b67 100644 --- a/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj +++ b/Source/Readers/LUSequenceReader/LUSequenceReader.vcxproj @@ -91,8 +91,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -102,10 +100,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj b/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj index 179bf0fb8..afd3f48b8 100644 --- a/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj +++ b/Source/Readers/LibSVMBinaryReader/LibSVMBinaryReader.vcxproj @@ -88,8 +88,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -99,10 +97,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj index 1a5c3226e..6b54ca0cc 100644 --- a/Source/Readers/ReaderLib/ReaderLib.vcxproj +++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj @@ -41,13 +41,7 @@ $(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math - true - true - - true - true - diff --git a/Source/Readers/SparsePCReader/SparsePCReader.vcxproj b/Source/Readers/SparsePCReader/SparsePCReader.vcxproj index 9557eaf63..a3200c0b3 100644 --- a/Source/Readers/SparsePCReader/SparsePCReader.vcxproj +++ b/Source/Readers/SparsePCReader/SparsePCReader.vcxproj @@ -91,8 +91,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -102,10 +100,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/Readers/UCIFastReader/UCIFastReader.vcxproj b/Source/Readers/UCIFastReader/UCIFastReader.vcxproj index 6cc2fdcea..fb79bf8b0 100644 --- a/Source/Readers/UCIFastReader/UCIFastReader.vcxproj +++ b/Source/Readers/UCIFastReader/UCIFastReader.vcxproj @@ -90,8 +90,6 @@ false /d2Zi+ %(AdditionalOptions) true - true - true Console @@ -101,10 +99,6 @@ Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true - - true - true - diff --git a/Source/SGDLib/PostComputingActions.cpp b/Source/SGDLib/PostComputingActions.cpp new file mode 100644 index 000000000..f08faaa43 --- /dev/null +++ b/Source/SGDLib/PostComputingActions.cpp @@ -0,0 +1,161 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// +// PostStat.cpp -- CNTK post statistics related actions +// + +#include "PostComputingActions.h" + +#include "TrainingNodes.h" +#include "ProgressTracing.h" +#include "DataReaderHelpers.h" +#include "SimpleDistGradAggregator.h" + +#include + +namespace Microsoft { namespace MSR{ namespace CNTK { + +template +void PostComputingActions::BatchNormalizationStatistics(IDataReader * dataReader, const vector& evalNodeNames, + const wstring newModelPath, const size_t mbSize, const int iters) +{ + // since the mean and variance of bn will be modified in statistics, + // training mode will make it work. And there is no back prop, other parameters + // are fixed during computing. + ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::training); + + // bn nodes need to be computed from bottom to top with evaluating order + let evalNodes = m_net->GetEvalNodesWithName(evalNodeNames); + + // find all the BN nodes by evalOrder + std::vector bnNodes; + std::set bnNodesLogged; // (avoid double record of batch normalization nodes) + for (auto& evalNode : evalNodes) + { + for (auto& node : m_net->GetEvalOrder(evalNode)) + { + let bnNode = dynamic_pointer_cast>(node); + if (bnNode) + { + if (bnNodesLogged.insert(node).second) + { + // reset the statistics states of bn nodes + bnNode->ResetStatisticsState(); + bnNode->SetNormalizationTimeConstants(-1, bnNode->NormalizationTimeConstant(), + 0, bnNode->BlendTimeConstant()); + bnNodes.push_back(node); + // add BN nodes into the evaluation group, then they will be added into root nodes when + // the network re-compile + m_net->AddToNodeGroup(L"evaluation", bnNode); + } + } + } + } + + // re-compile the network to add bn nodes as rootNodes. + m_net->CompileNetwork(); + + // allocate memory for all bnNodes evalOrder + m_net->AllocateAllMatrices(bnNodes, std::vector(), nullptr); + + // prepare features + auto& featureNodes = m_net->FeatureNodes(); + + StreamMinibatchInputs inputMatrices; + for (auto& node : featureNodes) + inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); + + bool useParallelTrain = (m_mpi != nullptr); + bool useDistributedMBReading = useParallelTrain && m_enableDistributedMBReading && dataReader->SupportsDistributedMBRead(); + size_t totalEpochSize = bnNodes.size() * mbSize * iters; + + m_net->StartEvaluateMinibatchLoop(bnNodes); + + if (useDistributedMBReading) + dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), inputMatrices.GetStreamDescriptions(), totalEpochSize); + else + dataReader->StartMinibatchLoop(mbSize, 0, inputMatrices.GetStreamDescriptions(), totalEpochSize); + + for (auto& node : bnNodes) + { + let bnNode = static_pointer_cast>(node); + size_t actualMBSize = 0; + + LOGPRINTF(stderr, "Estimating Statistics --> %ls\n", bnNode->GetName().c_str()); + + + // for every single bn node, the statistics is the average of mean and variance for several times in forward prop + // the forward prop is from the feature to the current bn node + for (int iter = 0; iter < iters; iter++) + { + // during the bn stat, dataRead must be ensured + bool wasDataRead = DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, + nullptr, useDistributedMBReading, useParallelTrain, inputMatrices, actualMBSize, m_mpi); + + if (!wasDataRead) LogicError("DataRead Failure in batch normalization statistics"); + + ComputationNetwork::BumpEvalTimeStamp(featureNodes); + + // forward prop till reaching the current bn node + m_net->ForwardProp(node); + } + + // after finished statistics, the mean and variance of the bn node should be freezd. + bnNode->FreezeParameters(); + + // Sync during or after all iters of a BN node are equivalent + if (useParallelTrain) + { + if (m_gradHeader == nullptr) + { + m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) + { + DistGradHeader::Destroy(ptr); + }); + } + + // push the statistics results of mean and variance of bn nodes into mpi updating vector + std::vector*> learnParamsValues(2, nullptr); + + SimpleDistGradAggregator distGradAgg(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/); + + auto runMeanParameterPtr = node->Input(3); + auto runStdParameterPtr = node->Input(4); + + shared_ptr> runMeanNode = static_pointer_cast>(runMeanParameterPtr); + shared_ptr> runStdNode = static_pointer_cast>(runStdParameterPtr); + + learnParamsValues[0] = &(runMeanNode->Value()); + learnParamsValues[1] = &(runStdNode->Value()); + + m_gradHeader->numSamples = actualMBSize ? 1 : actualMBSize; + distGradAgg.AggregateGradients(learnParamsValues, m_gradHeader.get(), 0); + + // get the average mean and variance across all the workers + for (auto& parameter : learnParamsValues) + { + (*parameter) /= (ElemType)m_mpi->NumNodesInUse(); + } + } + } + + dataReader->DataEnd(); + + // remove all the added BN nodes from evaluation group + for (auto& bnNode : bnNodes) + { + m_net->RemoveFromNodeGroup(L"evaluation", bnNode); + } + + // save model + if (!useParallelTrain || m_mpi->CurrentNodeRank() == m_mpi->MainNodeRank()) + m_net->Save(newModelPath); + + return; +} + +template class PostComputingActions; +template class PostComputingActions; + +}}} diff --git a/Source/SGDLib/PostComputingActions.h b/Source/SGDLib/PostComputingActions.h new file mode 100644 index 000000000..9dfc95bb6 --- /dev/null +++ b/Source/SGDLib/PostComputingActions.h @@ -0,0 +1,65 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. +// +// PostStat.h -- CNTK post statistics related actions +// + +#pragma once +#include "ComputationNode.h" +#include "ComputationNetwork.h" +#include "MPIWrapper.h" +#include "DataReader.h" +#include "IDistGradAggregator.h" +#include "DistGradHeader.h" + +using namespace std; + +namespace Microsoft { namespace MSR { namespace CNTK { + +template +class IDistGradAggregator; + +// Post statistics normally called between training and evaluating, to generate the statistics results used by evaluating +// For now, the application is only with statistics mean and variance of Batch Normalization nodes after training +template +class PostComputingActions +{ +public: + PostComputingActions(ComputationNetworkPtr net, const MPIWrapperPtr& mpi, bool enableDistributedMBReading = false, const int traceLevel = 0) : + m_net(net), + m_traceLevel(traceLevel), + m_mpi(mpi), + m_distGradAgg(nullptr), + m_gradHeader(nullptr), + m_enableDistributedMBReading(enableDistributedMBReading) + { + } + + // This function is used for evaluating the mean and variance of all batch normalization nodes after training. + // Details will link to the wiki https://github.com/Microsoft/CNTK/wiki/Post-Batch-Normalization-Statistics + // The reason why put it into evalute is the action take place after trainning and non-backprop processing, which makes me believe + // this function is like a kind of evaluate function. + // In this function, + // 1. since all other weights are fix except the un-pbn nodes, I set the networkoperationMode into inferring. + // 2. The next thing is to load the network model and data source, I follow the Evaluate function to do so, however, I delete something + // seem useless, like error statistics etc. + // 3. Finding the BN nodes in the network and put them into a vector with evaluate order (This links the nestedNode vector I got in + // ControlFlowNetwork) + // 4. From node to node in the BN vector to generate the mean and various (This links to the changes of BatchNormalizationNode + // in TrainingNodes.h, since I need to make the nodes "learn" mean and variance in inferring mode) + // 5. Consider the multi-GPU, we need to sync up the BN results between all the worker and average the value. + void BatchNormalizationStatistics(IDataReader* dataReader, const vector& evalNodeNames, const wstring newModelPath, + const size_t mbSize, const int iters = 30); + +private: + ComputationNetworkPtr m_net; + MPIWrapperPtr m_mpi; + bool m_enableDistributedMBReading; + + int m_traceLevel; + + std::shared_ptr> m_distGradAgg; + std::shared_ptr m_gradHeader; +}; +}}} diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp index dc5a9d109..59962212a 100644 --- a/Source/SGDLib/SGD.cpp +++ b/Source/SGDLib/SGD.cpp @@ -8,6 +8,7 @@ #include "SpecialPurposeNodes.h" // for SequenceWithSoftmaxNode #include "DataReaderHelpers.h" #include "MatrixQuantizerImpl.h" +#include "InputAndParamNodes.h" #ifdef CNTK_PARALLEL_TRAINING_SUPPORT //static inline bool operator==(const std::pair& a, double b) { assert(b==0); return a.first == b; } @@ -147,15 +148,15 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, else { LOGPRINTF(stderr, "Training criteria:\n"); - for (const auto& node : criterionNodes) - { - LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str()); - } - if (criterionNodes.empty()) - { - LOGPRINTF(stderr, "\t(none)\n"); - InvalidArgument("TrainOrAdaptModel: No criterion node was specified."); - } + for (const auto& node : criterionNodes) + { + LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str()); + } + if (criterionNodes.empty()) + { + LOGPRINTF(stderr, "\t(none)\n"); + InvalidArgument("TrainOrAdaptModel: No criterion node was specified."); + } } // This code is only relevant for the new (V2) readers. It exist because of @@ -296,10 +297,10 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, { fprintf(stderr, "out of %d parameter tensors and %d nodes with gradient:\n\n", (int)learnableNodes.size(), (int)numNeedsGradient); - for (let nodeDescription : nodesToUpdateDescriptions) - { - LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str()); - } + for (let nodeDescription : nodesToUpdateDescriptions) + { + LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str()); + } } // one blank line before training progress log @@ -334,7 +335,7 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, { InitModelAggregationHandler(m_syncStatsTrace, net->GetDeviceId()); } - + // precompute mean and invStdDev nodes and save initial model // When no precompute, only save if we did not load the model from a // checkpoint but instead built it from a network description @@ -534,7 +535,7 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, ? 0.0 : momentumPerSample >= 1.0 ? 0.0 - : -1.0 / log(momentumPerSample); + : -1.0 / log(momentumPerSample); if (m_traceLevel > 0) { fprintf(stderr, "\n"); @@ -962,24 +963,13 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, EpochCriterion epochCriterionLastLogged = epochCriterion; vector epochEvalErrorsLastLogged = epochEvalErrors; - // Now, we need to use a switch to enable/disable wk in BatchNormalization. - // If we can determine whether wk added or not for each node, then, discard this - // TODO: Define "wk" and say what this is for and in which context it is used. - std::unordered_set batchNormalizationWeights; - if (m_disableWkInBatchNormal) { - for (auto& evalNode : evaluationNodes) + // NOTE: For ResNet, the regularization in BatchNormalization should be disable. + if (m_disableRegInBatchNormalization) { + let bnNodes = net->GetNodesWithType(L"BatchNormalization"); + for (auto &node : bnNodes) { - shared_ptr nestedNetwork = static_pointer_cast(net->GetNestedNetwork(evalNode)); - for (auto& node : nestedNetwork->GetNestedNodes()) - { - shared_ptr> castNode = - dynamic_pointer_cast>(node); - if (castNode) - { - batchNormalizationWeights.insert(castNode->GetInputs()[1]); - batchNormalizationWeights.insert(castNode->GetInputs()[2]); - } - } + let bnNode = dynamic_pointer_cast>(node); + bnNode->DisableRegInBatchNormalization(); } } @@ -1124,11 +1114,11 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, { // accumulate criterion values (objective, eval) assert(wasDataRead || numSamplesWithLabelOfNetwork == 0); - // criteria are in Value()(0,0), we accumulate into another 1x1 Matrix (to avoid having to pull the values off the GPU) - localEpochCriterion.Add(criterionNodes, 0, numSamplesWithLabelOfNetwork); - for (size_t i = 0; i < evaluationNodes.size(); i++) - localEpochEvalErrors.Add(evaluationNodes, i, numSamplesWithLabelOfNetwork); - } + // criteria are in Value()(0,0), we accumulate into another 1x1 Matrix (to avoid having to pull the values off the GPU) + localEpochCriterion.Add(criterionNodes, 0, numSamplesWithLabelOfNetwork); + for (size_t i = 0; i < evaluationNodes.size(); i++) + localEpochEvalErrors.Add(evaluationNodes, i, numSamplesWithLabelOfNetwork); + } else { // distributed gradient aggregation @@ -1207,8 +1197,8 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, LogicError("%ls %ls operation has NaNs in smoothedGradient.", node->NodeName().c_str(), node->OperationName().c_str()); #endif double nodeDependentLearningRatePerSample = learnRatePerSample * node->GetLearningRateMultiplier(); + double nodeDependentRegMultiplier = dynamic_pointer_cast>(node)->GetRegMultiplier(); double momentumPerSample = GetMomentumPerSample(epochNumber /*BUGBUG workaround:*/, net->GetMBLayoutPtrOfNetwork()->GetNumParallelSequences()); - double l2Factor = batchNormalizationWeights.find(node) == batchNormalizationWeights.end() ? 1.0 : 0.0; // TODO: Check why l2Factor is not applied to L1. Bug? // BUGBUG (Issue #95): Access to net MBLayout can no longer be done if we have multiple input layouts UpdateWeights(dynamic_pointer_cast>(node)->Value(), @@ -1216,7 +1206,7 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, *smoothedGradientIter, *smoothedCountIter, nodeDependentLearningRatePerSample, momentumPerSample, numSamplesInMinibatch, - m_L2RegWeight * l2Factor, m_L1RegWeight, + m_L2RegWeight * nodeDependentRegMultiplier, m_L1RegWeight * nodeDependentRegMultiplier, m_needAveMultiplier, m_useNesterovMomentum); node->BumpEvalTimeStamp(); #ifdef _DEBUG @@ -1229,7 +1219,7 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, if (m_perfTraceLevel > 0) - { + { std::unique_ptr mainStreamSyncEvent(MatrixComputeStreamEvent::Create(net->GetDeviceId())); mainStreamSyncEvent->SynchronizeEvent(); fineGrainedPerfMeasurementTimer.Stop(); @@ -1268,7 +1258,7 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, { m_pMultiversoHelper->PushAndPullModel(learnableNodes, nSamplesSinceLastModelSync); nSamplesSinceLastModelSync = 0; - } + } } @@ -1884,7 +1874,7 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, { LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Computed baseCriterion %.8f for minibatchSize=%d\n", (int)epochNumber + 1, baseCriterion.Average(), (int)trialMinibatchSize); - } + } } else if (!epochCriterion.IsNan() && epochCriterion.Average() > (baseCriterion.Average() * (1.0 + (m_minibatchSearchCriterionErrorMargin / 100.0)))) @@ -1908,7 +1898,7 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, } if (m_traceLevel > 0) { - LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n", + LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n", (int)epochNumber + 1, (int)lastGoodMinibatchSize, lastGoodEpochCriterion.Average(), baseCriterion.Average()); } return lastGoodMinibatchSize; @@ -1999,7 +1989,7 @@ void SGD::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net template void SGD::InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel) - { +{ assert(GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD); if (traceLevel > 0) fprintf(stderr, "Initializing dataParallelSGD for %d-bit quantization.\n", numGradientBits); @@ -2008,11 +1998,11 @@ void SGD::InitDistGradAgg(int numEvalNodes, int numGradientBits, int t m_distGradAgg = std::make_shared>(m_mpi, numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace); #else if (numGradientBits != (8 * sizeof(ElemType))) - { - RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!"); - } + { + RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!"); + } - m_distGradAgg = std::make_shared>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace); + m_distGradAgg = std::make_shared>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace); #endif // !CNTK_PARALLEL_TRAINING_SUPPORT m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { DistGradHeader::Destroy(ptr); }); @@ -2141,7 +2131,6 @@ void SGD::UpdateWeights(Matrix& functionValues, Matrix void SGD::ClipGradient(Matrix& gradient, const size_t actualMBSize) const { @@ -2621,8 +2610,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType) m_seqGammarCalcLMF = configSGD(L"seqGammarLMF", 14.0); m_seqGammarCalcbMMIFactor = configSGD(L"seqGammarBMMIFactor", 0.0); m_seqGammarCalcWP = configSGD(L"seqGammarWordPen", 0.0); - - m_disableWkInBatchNormal = configSGD(L"disableWkInBatchNormal", false); + m_disableRegInBatchNormalization = configSGD(L"disableRegInBatchNormalization", false); m_dropoutRates = configSGD(L"dropoutRate", ConfigRecordType::Array(doubleargvector(vector{0.0}))); m_batchNormalizationTimeConstant = configSGD(L"batchNormalizationTimeConstant", ConfigRecordType::Array(doubleargvector(vector{0}))); diff --git a/Source/SGDLib/SGD.h b/Source/SGDLib/SGD.h index 6d6c82965..ef63ebc9e 100644 --- a/Source/SGDLib/SGD.h +++ b/Source/SGDLib/SGD.h @@ -307,7 +307,10 @@ protected: double m_seqGammarCalcbMMIFactor; bool m_seqGammarCalcUsesMBR; - bool m_disableWkInBatchNormal; // TODO: comment? + // decide whether should apply regularization into BatchNormalizationNode + // true: disable Regularization + // false: enable Regularization (default) + bool m_disableRegInBatchNormalization; }; template diff --git a/Source/SGDLib/SGDLib.vcxproj b/Source/SGDLib/SGDLib.vcxproj index aca16d8e9..a30976f7d 100644 --- a/Source/SGDLib/SGDLib.vcxproj +++ b/Source/SGDLib/SGDLib.vcxproj @@ -87,8 +87,6 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) @@ -98,10 +96,6 @@ if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)" Copying NVidia GDK extension DLL to target folder - - true - true - @@ -139,6 +133,7 @@ + @@ -147,6 +142,7 @@ + diff --git a/Source/SGDLib/SGDLib.vcxproj.filters b/Source/SGDLib/SGDLib.vcxproj.filters index 0d50e4db1..29b714a4e 100644 --- a/Source/SGDLib/SGDLib.vcxproj.filters +++ b/Source/SGDLib/SGDLib.vcxproj.filters @@ -10,6 +10,9 @@ SGD + + Stat + @@ -132,7 +135,10 @@ SGD - + + Stat + + Parallelization @@ -173,5 +179,8 @@ {b866d513-7bd0-497c-98c2-f62dbcd4cde4} + + {f406217f-5a11-44ca-bb34-52254dbee8af} + \ No newline at end of file diff --git a/Source/SGDLib/SimpleEvaluator.h b/Source/SGDLib/SimpleEvaluator.h index c6d50f891..48b6897c3 100644 --- a/Source/SGDLib/SimpleEvaluator.h +++ b/Source/SGDLib/SimpleEvaluator.h @@ -52,36 +52,7 @@ public: { ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring); - // determine nodes to evaluate - std::vector evalNodes; - - set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) - if (evalNodeNames.size() == 0) - { - fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); - if (m_net->EvaluationNodes().empty() && m_net->FinalCriterionNodes().empty()) - InvalidArgument("There is no default evaluation node or training criterion specified in the network."); - - for (const auto& node : m_net->EvaluationNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - - for (const auto& node : m_net->FinalCriterionNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - } - else - { - for (int i = 0; i < evalNodeNames.size(); i++) - { - const auto& node = m_net->GetNodeFromName(evalNodeNames[i]); - if (!criteriaLogged.insert(node).second) - continue; - if (node->GetSampleLayout().GetNumElements() != 1) - InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); - evalNodes.push_back(node); - } - } + let evalNodes = m_net->GetEvalNodesWithName(evalNodeNames); // initialize eval results std::vector evalResults(evalNodes.size(), EpochCriterion(0)); @@ -257,154 +228,6 @@ public: return evalResults; } - // TODO: remove code dup w.r.t. Evaluate() - void EvaluateBN(IDataReader* dataReader, const vector& evalNodeNames, const wstring exportPath, const size_t mbSize, const int iters = 240, const size_t testSize = requestDataSize) - { - ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring); - - // determine nodes to evaluate - std::vector evalNodes; - - set criteriaLogged; // (keeps track ot duplicates to avoid we don't double-log critera) - if (evalNodeNames.size() == 0) - { - fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n"); - if (m_net->EvaluationNodes().empty() && m_net->FinalCriterionNodes().empty()) - InvalidArgument("There is no default evaluation node or training criterion specified in the network."); - - for (const auto& node : m_net->EvaluationNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - - for (const auto& node : m_net->FinalCriterionNodes()) - if (criteriaLogged.insert(node).second) - evalNodes.push_back(node); - } - else - { - for (int i = 0; i < evalNodeNames.size(); i++) - { - const auto& node = m_net->GetNodeFromName(evalNodeNames[i]); - if (!criteriaLogged.insert(node).second) - continue; - if (node->GetSampleLayout().GetNumElements() != 1) - InvalidArgument("Criterion nodes to evaluate must have dimension 1x1."); - evalNodes.push_back(node); - } - } - - // allocate memory for forward computation - m_net->AllocateAllMatrices(evalNodes, {}, nullptr); - - // prepare features and labels - auto& featureNodes = m_net->FeatureNodes(); - auto& labelNodes = m_net->LabelNodes(); - - StreamMinibatchInputs inputMatrices; - for (auto& node : featureNodes) - inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); - for (auto& node : labelNodes) - inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout()); - - bool useParallelTrain = (m_mpi != nullptr); - bool useDistributedMBReading = useParallelTrain && m_enableDistributedMBReading && dataReader->SupportsDistributedMBRead(); - if (useDistributedMBReading) - dataReader->StartDistributedMinibatchLoop(mbSize, 0, m_mpi->CurrentNodeRank(), m_mpi->NumNodesInUse(), testSize); - else - dataReader->StartMinibatchLoop(mbSize, 0, testSize); - - m_net->StartEvaluateMinibatchLoop(evalNodes); - - // Passing in two empty node lists so the dispatcher can work for the evalNodes. - std::list learnableNodes; - std::vector criterionNodes; - - // First, all batch normalization nodes should be marked. - std::vector batchNormalNodes; - shared_ptr nestedNetwork = static_pointer_cast(m_net->GetNestedNetwork(evalNodes[0])); - for (auto& node : nestedNetwork->GetNestedNodes()) - { - shared_ptr> castNode = - dynamic_pointer_cast>(node); - if (castNode) - { - batchNormalNodes.push_back(node); - } - } - - // Push all batch normalization mean and std into learn params values for mpi update - std::vector*> learnParamsValues(2, nullptr); - - bool noMoreSamplesToProcess = false; - for (auto& node : batchNormalNodes) - { - shared_ptr> batchNode = - static_pointer_cast>(node); - batchNode->SetPostBatchNormalizationBegin(); - size_t actualMBSize = 0; - - LOGPRINTF(stderr, "Start evaluating: %ls\n", batchNode->GetName().c_str()); - - // Post batch normal iters - for (int iter = 0; iter < iters; iter++) - { - bool wasDataRead = DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, - nullptr, useDistributedMBReading, useParallelTrain, inputMatrices, actualMBSize, m_mpi); - - if (!wasDataRead && (!useDistributedMBReading || noMoreSamplesToProcess)) - break; - - // TODO should handle it, since post BN exist no samples in iters - if (!wasDataRead) - actualMBSize = 0; - - // Batch Normalization Evaluate don't need to support subMinibatches - ComputationNetwork::BumpEvalTimeStamp(featureNodes); - ComputationNetwork::BumpEvalTimeStamp(labelNodes); - - m_net->ForwardProp(evalNodes[0], nullptr, node); - dataReader->DataEnd(); - } - batchNode->SetPostBatchNormalizationEnd(); - - // Sync during or after all iters of a BN node are equivalent - if (useParallelTrain) - { - if (m_gradHeader == nullptr) - { - m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) - { - DistGradHeader::Destroy(ptr); - }); - } - SimpleDistGradAggregator distGradAgg(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/); - - auto runMeanParameterPtr = node->GetInputs()[3]; - auto runStdParameterPtr = node->GetInputs()[4]; - - shared_ptr> runMeanNode = static_pointer_cast>(runMeanParameterPtr); - shared_ptr> runStdNode = static_pointer_cast>(runStdParameterPtr); - - learnParamsValues[0] = &(runMeanNode->Value()); - learnParamsValues[1] = &(runStdNode->Value()); - - m_gradHeader->numSamples = actualMBSize ? 1 : actualMBSize; - distGradAgg.AggregateGradients(learnParamsValues, m_gradHeader.get(), /*resetState =*/ false); - - for (auto& parameter : learnParamsValues) - { - (*parameter) /= (ElemType)m_mpi->NumNodesInUse(); - } - } - } - - // Save Model - if (!useParallelTrain || m_mpi->CurrentNodeRank() == m_mpi->MainNodeRank()) - m_net->Save(exportPath); - - return; - } - protected: void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastLogged, const vector& evalNodes, diff --git a/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj b/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj index 85b579c31..9725d9eb3 100644 --- a/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj +++ b/Source/SequenceTrainingLib/SequenceTrainingLib.vcxproj @@ -41,13 +41,7 @@ WIN32;_LIB;%(PreprocessorDefinitions) $(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math - true - true - - true - true - diff --git a/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj b/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj index 68eae9a6d..e7a22b173 100644 --- a/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj +++ b/Tests/EndToEndTests/EvalClientTests/CPPEvalClientTest/CPPEvalClientTest.vcxproj @@ -100,8 +100,6 @@ false /d2Zi+ %(AdditionalOptions) MultiThreadedDLL - true - true true @@ -110,10 +108,6 @@ true - - true - true - diff --git a/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj b/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj index edcda5b11..884519c98 100644 --- a/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj +++ b/Tests/UnitTests/BrainScriptTests/BrainScriptTests.vcxproj @@ -89,16 +89,10 @@ %(AdditionalIncludeDirectories) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) - - true - true - diff --git a/Tests/UnitTests/CommandEval/CommandEval.vcxproj b/Tests/UnitTests/CommandEval/CommandEval.vcxproj index 86736bbf0..dfde0f912 100644 --- a/Tests/UnitTests/CommandEval/CommandEval.vcxproj +++ b/Tests/UnitTests/CommandEval/CommandEval.vcxproj @@ -87,8 +87,6 @@ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - true - true Console @@ -97,10 +95,6 @@ true Math.lib; Common.lib; %(AdditionalDependencies) - - true - true - diff --git a/Tests/UnitTests/EvalTests/EvalTests.vcxproj b/Tests/UnitTests/EvalTests/EvalTests.vcxproj index 1be3a8311..3529f8e32 100644 --- a/Tests/UnitTests/EvalTests/EvalTests.vcxproj +++ b/Tests/UnitTests/EvalTests/EvalTests.vcxproj @@ -109,17 +109,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj index fff9ea353..c814dc274 100644 --- a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj +++ b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj @@ -105,13 +105,7 @@ $(CudaToolkitIncludeDir);%(AdditionalIncludeDirectories) - true - true - - true - true - diff --git a/Tests/UnitTests/MathTests/MathTests.vcxproj b/Tests/UnitTests/MathTests/MathTests.vcxproj index 34acdc8d1..75916baa3 100644 --- a/Tests/UnitTests/MathTests/MathTests.vcxproj +++ b/Tests/UnitTests/MathTests/MathTests.vcxproj @@ -109,17 +109,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj b/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj index d2ea4652d..1c84efc02 100644 --- a/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj +++ b/Tests/UnitTests/NetworkTests/NetworkTests.vcxproj @@ -90,17 +90,11 @@ %(AdditionalIncludeDirectories);$(CudaInclude) - true - true %(AdditionalLibraryDirectories);$(CudaLibPath) %(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll) - - true - true - diff --git a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj index e2970c678..2df7d7605 100644 --- a/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj +++ b/Tests/UnitTests/ReaderTests/ReaderTests.vcxproj @@ -93,16 +93,10 @@ NDEBUG;%(PreprocessorDefinitions) true /d2Zi+ %(AdditionalOptions) - true - true true - - true - true - diff --git a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj index 437100ad2..5ca968f61 100644 --- a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj +++ b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj @@ -97,8 +97,6 @@ true MultiThreaded MultiThreaded - true - true Console @@ -107,10 +105,6 @@ true CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - true - true -