From 7b6708cb9c75495386d24c820327d7a6c71097a9 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 22 Jan 2016 09:57:10 -0800 Subject: [PATCH] removed condtional compilations for tensor view --- Source/CNTK/SimpleNetworkBuilder.cpp | 7 -- .../ComputationNetworkBuilder.cpp | 53 ++------- .../ComputationNetworkBuilder.h | 7 -- .../ComputationNetworkLib/ComputationNode.h | 4 - Source/ComputationNetworkLib/EsotericNodes.h | 6 +- .../LinearAlgebraNodes.h | 11 -- .../ComputationNetworkLib/NonlinearityNodes.h | 106 +++++++++--------- 7 files changed, 62 insertions(+), 132 deletions(-) diff --git a/Source/CNTK/SimpleNetworkBuilder.cpp b/Source/CNTK/SimpleNetworkBuilder.cpp index c913a1c92..5cb20d295 100644 --- a/Source/CNTK/SimpleNetworkBuilder.cpp +++ b/Source/CNTK/SimpleNetworkBuilder.cpp @@ -15,9 +15,6 @@ #include "ConvolutionalNodes.h" #include "RecurrentNodes.h" #include "CompositeComputationNodes.h" -#ifndef ENABLE_TENSORVIEW -#include "EsotericNodes.h" // non-tensor versions have been moved here -#endif #pragma warning(disable : 4189) // (we have lots of unused variables to show how variables can be set up) @@ -771,11 +768,7 @@ shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilde ComputationNodePtr scalar = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"SV%d", i), 1, 1); scalar->Value().SetValue((ElemType) 0.01); -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES - ComputationNodePtr scaled = builder.Scale(scalar, directOutput, msra::strfun::wstrprintf(L"S%d", i)); -#else ComputationNodePtr scaled = builder.ElementTimes(scalar, directOutput, msra::strfun::wstrprintf(L"S%d", i)); -#endif mergedNode = builder.Plus(toNode, scaled); } diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp index 0a946030b..d1f7f443c 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp @@ -37,13 +37,6 @@ static shared_ptr> CreateStandardNode(const std::wstri return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode)) return New>(forward<_Types>(_Args)...); -#ifdef ENABLE_BROADCASTING_ELEMENTTIMES - else if (nodeType == L"ColumnElementTimes") - return New>(forward<_Types>(_Args)...); -#else - else if (nodeType == OperationNameOf(ColumnElementTimesNode)) - return New>(forward<_Types>(_Args)...); -#endif else if (nodeType == OperationNameOf(CosDistanceNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode)) @@ -116,26 +109,12 @@ static shared_ptr> CreateStandardNode(const std::wstri return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(ReshapeNode)) return New>(forward<_Types>(_Args)...); -#ifdef ENABLE_BROADCASTING_ELEMENTTIMES - else if (nodeType == L"RowElementTimes") - return New>(forward<_Types>(_Args)...); -#else - else if (nodeType == OperationNameOf(RowElementTimesNode)) - return New>(forward<_Types>(_Args)...); -#endif else if (nodeType == OperationNameOf(RowRepeatNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(RowSliceNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(RowStackNode)) return New>(forward<_Types>(_Args)...); -#ifdef ENABLE_BROADCASTING_ELEMENTTIMES - else if (nodeType == L"Scale") - return New>(forward<_Types>(_Args)...); -#else - else if (nodeType == OperationNameOf(ScaleNode)) - return New>(forward<_Types>(_Args)...); -#endif else if (nodeType == OperationNameOf(SequenceDecoderNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(ShiftNode)) @@ -165,12 +144,18 @@ static shared_ptr> CreateStandardNode(const std::wstri else if (nodeType == OperationNameOf(TransposeTimesNode)) return New>(forward<_Types>(_Args)...); // old names we also support + else if (nodeType == L"ColumnElementTimes") + return New>(forward<_Types>(_Args)...); else if (nodeType == L"Delay") return New>(forward<_Types>(_Args)...); else if (nodeType == L"PerDimMeanVarNormalizationNode") return New>(forward<_Types>(_Args)...); - else if (nodeType == L"PerDimMeanVarNormalizationNode") - return New>(forward<_Types>(_Args)...); + else if (nodeType == L"PerDimMeanVarDeNormalizationNode") + return New>(forward<_Types>(_Args)...); + else if (nodeType == L"RowElementTimes") + return New>(forward<_Types>(_Args)...); + else if (nodeType == L"Scale") + return New>(forward<_Types>(_Args)...); #if 1 else if (nodeType == OperationNameOf(DeprecatedReshapeNode)) return New>(forward<_Types>(_Args)...); @@ -562,14 +547,6 @@ shared_ptr> ComputationNetworkBuilder::Sum(c return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), a); } -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES -template -shared_ptr> ComputationNetworkBuilder::Scale(const ComputationNodePtr scalar, const ComputationNodePtr matrix, const std::wstring nodeName) -{ - return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), scalar, matrix); -} -#endif - template shared_ptr> ComputationNetworkBuilder::Transpose(const ComputationNodePtr matrix, const std::wstring nodeName) { @@ -594,20 +571,6 @@ shared_ptr> ComputationNetworkBuilder::Eleme return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), a, b); } -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES -template -shared_ptr> ComputationNetworkBuilder::RowElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName) -{ - return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), a, b); -} - -template -shared_ptr> ComputationNetworkBuilder::ColumnElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName) -{ - return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), a, b); -} -#endif - template shared_ptr> ComputationNetworkBuilder::StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName) { diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h index cd7e823e9..f0b8ce2f4 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h @@ -101,17 +101,10 @@ public: ComputationNodePtr Hardmax(const ComputationNodePtr a, const std::wstring nodeName = L""); ComputationNodePtr LogSoftmax(const ComputationNodePtr a, const std::wstring nodeName = L""); ComputationNodePtr Sum(const ComputationNodePtr a, const std::wstring nodeName = L""); -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES - ComputationNodePtr Scale(const ComputationNodePtr scalar, const ComputationNodePtr matrix, const std::wstring nodeName = L""); -#endif ComputationNodePtr Transpose(const ComputationNodePtr matrix, const std::wstring nodeName = L""); ComputationNodePtr Times(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); ComputationNodePtr TransposeTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES - ComputationNodePtr RowElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); - ComputationNodePtr ColumnElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); -#endif ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L""); ComputationNodePtr DiagTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); ComputationNodePtr CosDistance(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index 26dca071a..7d7b6e095 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -25,10 +25,6 @@ #include #include -// remove these following two #defines once the tensor lib works -#define ENABLE_TENSORVIEW // if set then tensor lib is used instead of old Matrix implementations, wherever such an implementation exists -#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes - #define DEFAULT_HIDDEN_ACTIVATION 0.1 #pragma warning(disable : 4267) // conversion from size_t to int or other types diff --git a/Source/ComputationNetworkLib/EsotericNodes.h b/Source/ComputationNetworkLib/EsotericNodes.h index 597b04a50..a2dd43252 100644 --- a/Source/ComputationNetworkLib/EsotericNodes.h +++ b/Source/ComputationNetworkLib/EsotericNodes.h @@ -17,7 +17,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // This header collects special-purpose nodes. // It is likely that these are no longer functional. -#ifndef ENABLE_BROADCASTING_ELEMENTTIMES +#if 0 //def ENABLE_TENSORVIEW // ----------------------------------------------------------------------- // PlusNode (summand1, summand2) // ----------------------------------------------------------------------- @@ -345,7 +345,7 @@ public: virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override { -#ifdef ENABLE_TENSORVIEW // This takes a big perf hit since our reduction uses only a single thread in this case. Needs to be fixed. +#if 1 //def ENABLE_TENSORVIEW // This takes a big perf hit since our reduction uses only a single thread in this case. Needs to be fixed. size_t rank = DetermineElementwiseTensorRank(); auto gradient = GradientTensorFor(rank, fr); auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast()); @@ -381,7 +381,7 @@ public: virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override { -#ifdef ENABLE_TENSORVIEW +#if 1 //def ENABLE_TENSORVIEW static int c = 0; if (c++ == 0) { diff --git a/Source/ComputationNetworkLib/LinearAlgebraNodes.h b/Source/ComputationNetworkLib/LinearAlgebraNodes.h index 421bde273..9718e18a5 100644 --- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h +++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h @@ -26,8 +26,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { -#ifdef ENABLE_TENSORVIEW - // ----------------------------------------------------------------------- // PlusNode (summand1, summand2) // ----------------------------------------------------------------------- @@ -51,7 +49,6 @@ public: virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override { - //static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUSBP#\n"); } size_t rank = DetermineElementwiseTensorRank(); auto gradient = GradientTensorFor(rank, fr); auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast()); @@ -114,7 +111,6 @@ public: virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override { - //static int c = 0; if (c++ == 0) { fprintf(stderr,"#MINUS#\n"); } size_t rank = DetermineElementwiseTensorRank(); auto result = ValueTensorFor(rank, fr); auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast()); @@ -126,8 +122,6 @@ public: template class MinusNode; template class MinusNode; -#endif // ENABLE_TENSORVIEW - // ----------------------------------------------------------------------- // NegateNode (input) // computes the negative of its input @@ -364,8 +358,6 @@ public: template class TransposeTimesNode; template class TransposeTimesNode; -#ifdef ENABLE_TENSORVIEW - // ----------------------------------------------------------------------- // ElementTimesNode (factor1, factor2) // @@ -412,7 +404,6 @@ public: virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override { - //static int c = 0; if (c++ == 0) { fprintf(stderr,"#ETIMES#\n"); } size_t rank = DetermineElementwiseTensorRank(); auto result = ValueTensorFor(rank, fr); auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast()); @@ -424,8 +415,6 @@ public: template class ElementTimesNode; template class ElementTimesNode; -#endif // ENABLE_TENSORVIEW - // ----------------------------------------------------------------------- // DiagTimesNode (vector representing the diagonal of a square matrix, data) // ----------------------------------------------------------------------- diff --git a/Source/ComputationNetworkLib/NonlinearityNodes.h b/Source/ComputationNetworkLib/NonlinearityNodes.h index 8e6bc55aa..647e65b5b 100644 --- a/Source/ComputationNetworkLib/NonlinearityNodes.h +++ b/Source/ComputationNetworkLib/NonlinearityNodes.h @@ -24,8 +24,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { -#ifdef ENABLE_TENSORVIEW - // ----------------------------------------------------------------------- // UnaryElementWiseWithOpCodeNodeBase (input) -- base for elementwise unary op // where forward // and backward are single ElementWiseOperator opcodes and @@ -46,8 +44,6 @@ public: virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override { - //static int c = 0; if (c++ == 0) { fprintf(stderr, "#NLop%d#\n", (int)opForward); } - size_t rank = DetermineElementwiseTensorRank(); auto result = ValueTensorFor(rank, fr); auto input = Input(0)->ValueTensorFor(rank, fr); @@ -126,7 +122,6 @@ DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, true); DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, false); #pragma pop_macro("DeclareUnaryTensorOp") -#endif // ----------------------------------------------------------------------- // SoftmaxNodeBase (input) -- shared base of Softmax and LogSoftmax @@ -363,6 +358,57 @@ private: template class LogSoftmaxNode; template class LogSoftmaxNode; +// ----------------------------------------------------------------------- +// Hardmax(prediction) +// ----------------------------------------------------------------------- +// the result is a 1 of n coding in which the (r, c) = 1 if row r has max value in column c +// this node is not differentiable and so cannot be used in the backpropagation +// TODO: make function value sparse? +template +class HardmaxNode : public SoftmaxNodeBase /*ComputationNode*/ +{ + typedef SoftmaxNodeBase Base; + UsingSoftmaxNodeBaseMembers; + static const std::wstring TypeName() + { + return L"Hardmax"; + } + +public: + DeclareConstructorFromConfigWithNumInputs(HardmaxNode); + HardmaxNode(DEVICEID_TYPE deviceId, const wstring& name) + : Base(deviceId, name) + { + } + + /*virtual*/ void BackpropToV(Matrix& gradient, const Matrix& inputFunctionValues, Matrix& inputGradientValues, const Matrix& gradientValues, const Matrix& functionValues) override + { + gradient; + inputFunctionValues; + inputGradientValues; + gradientValues; + LogicError("Hardmax is not differentiable and is used for evaluation only."); + } + + virtual bool OutputUsedInComputingInputNodesGradients() const override + { + return false; + } + virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override + { + return false; + } + + /*virtual*/ void ForwardPropV(Matrix& functionValues, const Matrix& inputFunctionValues) override + { + //TODO: temp solution, we need to write a math function specifically for this + functionValues.AssignHardmaxOf(inputFunctionValues, true); + } +}; + +template class HardmaxNode; +template class HardmaxNode; + // ----------------------------------------------------------------------- // GMMLogLikelihoodNode (unnormedPrior, means, logStdDevs, features) -- GMM log LL over input vector(s) // calculates the log likelihood of a feature given parameters of a Gaussian mixture model (GMM) with shared diagonal variance @@ -873,54 +919,4 @@ private: template class DropoutNode; template class DropoutNode; -// ----------------------------------------------------------------------- -// Hardmax(prediction) -// ----------------------------------------------------------------------- -// the result is a 1 of n coding in which the (r, c) = 1 if row r has max value in column c -// this node is not differentiable and so cannot be used in the backpropagation -// TODO: make function value sparse? -template -class HardmaxNode : public SoftmaxNodeBase /*ComputationNode*/ -{ - typedef SoftmaxNodeBase Base; - UsingSoftmaxNodeBaseMembers; - static const std::wstring TypeName() - { - return L"Hardmax"; - } - -public: - DeclareConstructorFromConfigWithNumInputs(HardmaxNode); - HardmaxNode(DEVICEID_TYPE deviceId, const wstring& name) - : Base(deviceId, name) - { - } - - /*virtual*/ void BackpropToV(Matrix& gradient, const Matrix& inputFunctionValues, Matrix& inputGradientValues, const Matrix& gradientValues, const Matrix& functionValues) override - { - gradient; - inputFunctionValues; - inputGradientValues; - gradientValues; - LogicError("Hardmax is not differentiable and is used for evaluation only."); - } - - virtual bool OutputUsedInComputingInputNodesGradients() const override - { - return false; - } - virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override - { - return false; - } - - /*virtual*/ void ForwardPropV(Matrix& functionValues, const Matrix& inputFunctionValues) override - { - //TODO: temp solution, we need to write a math function specifically for this - functionValues.AssignHardmaxOf(inputFunctionValues, true); - } -}; - -template class HardmaxNode; -template class HardmaxNode; } } }