diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 0cca9c821..0dc34dcf9 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -616,12 +616,12 @@ public: // for every time step run through all nodes in this particular loop (treat the loop like a little ComputationNetwork) #if 1 + (*nodeIter)->UpdateFunctionAndGradientMBSize(); // TODO: for sequence-to-sequence models we will need to be able to grow this step by step since size is unknown upfront FrameRangeIteration range(pMBLayout, recInfo->m_isForwardLoop ? -1 : +1); for (auto t = range.begin(); t != range.end(); t++) { for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) { - (*nodeIter)->UpdateFunctionAndGradientMBSize(); (*nodeIter)->EvaluateThisNode(t); if (IsNodeReqMultiSeqHandling(*nodeIter)) (*nodeIter)->MaskMissingValuesColumnsToZero(t.t()); // TODO: This should take a FrameRange as well diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 885cb5b7b..a8be27376 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -876,8 +876,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FunctionValues().Resize(rows, cols); #ifdef _DEBUG - fprintf(stderr, "Resize: Invalidating %ls %ls operation.\n", NodeName().c_str(), OperationName().c_str()); - FunctionValues().Invalidate(); + fprintf(stderr, "Resize: Destructive resize to (%d x %d) in %ls %ls operation.\n", (int)rows, (int)cols, NodeName().c_str(), OperationName().c_str()); #endif } /*implement*/double Get00Element() const { return FunctionValues().Get00Element(); } @@ -946,6 +945,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // - InputValue, which verifies instead of resizing (since Resize() is specified to be destructive, it should not call it). // - LearnableParameters // - GMMLogLikelihoodNode (which allocates some internal temp memory). + // Important: Unless overridden, this function is destructive. Nodes cannot carry over minibatch-size dependent state across minibatches through m_functionValues because of this. virtual size_t UpdateFunctionAndGradientMBSize(size_t numCols) { if (!m_pMBLayout) // if no layout, this node contains parameters independent of MB size, don't resize @@ -1342,7 +1342,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { s_constOnes[rows].find(cols) == s_constOnes[rows].end()) //not found { Matrix* matrix = new Matrix(rows, cols, (DEVICEID_TYPE)deviceId); - matrix->SetValue(ElemType(1.000)); + matrix->SetValue(1); s_constOnes[rows][cols] = matrix; } diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 84a3b294c..b6d68a995 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -240,7 +240,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t t = frameRange.t(); - Resize(Inputs(0)); + VerifySize(Inputs(0)); size_t T = GetNumTimeSteps(); size_t T_delayedActivation = m_delayedActivationMBLayout ? m_delayedActivationMBLayout->GetNumTimeSteps() : 0; // (note: should never happen in full-sequence mode) diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index 9964f2a89..1fd978960 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -1153,7 +1153,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { return false; } - fprintf(stderr, "Found %lu PreCompute nodes\n", nodes.size()); + fprintf(stderr, "\nPrecomputing --> %lu PreCompute nodes found.\n\n", nodes.size()); for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) { auto node = static_pointer_cast>(*nodeIter); @@ -1192,6 +1192,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { auto node = static_pointer_cast>(*nodeIter); node->MarkComputed(true/*done accumulating*/); } + fprintf(stderr, "\nPrecomputing --> Completed.\n\n"); return true; } diff --git a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h index 142e0a327..3de6b17f1 100644 --- a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h +++ b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h @@ -759,7 +759,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } //return true if precomputation is executed. - bool PreCompute(ComputationNetwork& net, + bool EvaluateBatchModeNodes(ComputationNetwork& net, const std::vector& featureNodes) { batchComputeNodes = net.GetNodesRequiringBatchMode(); @@ -923,7 +923,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t maxSize = min(maxMbSize, mbSize); ResetPreCompute(); - PreCompute(*evalnet, featureNodes); + EvaluateBatchModeNodes(*evalnet, featureNodes); /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence @@ -1094,7 +1094,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t itdx = 0; ResetPreCompute(); - PreCompute(*evalnet, featureNodes); + EvaluateBatchModeNodes(*evalnet, featureNodes); /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence diff --git a/Makefile b/Makefile index 6023f84da..d6378fe97 100644 --- a/Makefile +++ b/Makefile @@ -135,6 +135,7 @@ ifeq ("$(BUILDTYPE)","debug") endif CXXFLAGS += -g + CPPFLAGS += -D_DEBUG CUFLAGS += -O0 -G -lineinfo $(GENCODE_FLAGS) endif diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 8c3d083a3..e1907a014 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1367,6 +1367,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { NOT_IMPLEMENTED ); } +#ifdef _DEBUG + //Invalidate(); +#endif } // Note: Resize() will leave the matrix content undefined.