removed condtional compilations for tensor view
This commit is contained in:
Родитель
6d31cda88a
Коммит
7b6708cb9c
|
@ -15,9 +15,6 @@
|
|||
#include "ConvolutionalNodes.h"
|
||||
#include "RecurrentNodes.h"
|
||||
#include "CompositeComputationNodes.h"
|
||||
#ifndef ENABLE_TENSORVIEW
|
||||
#include "EsotericNodes.h" // non-tensor versions have been moved here
|
||||
#endif
|
||||
|
||||
#pragma warning(disable : 4189) // (we have lots of unused variables to show how variables can be set up)
|
||||
|
||||
|
@ -771,11 +768,7 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
|
|||
|
||||
ComputationNodePtr scalar = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"SV%d", i), 1, 1);
|
||||
scalar->Value().SetValue((ElemType) 0.01);
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
ComputationNodePtr scaled = builder.Scale(scalar, directOutput, msra::strfun::wstrprintf(L"S%d", i));
|
||||
#else
|
||||
ComputationNodePtr scaled = builder.ElementTimes(scalar, directOutput, msra::strfun::wstrprintf(L"S%d", i));
|
||||
#endif
|
||||
|
||||
mergedNode = builder.Plus(toNode, scaled);
|
||||
}
|
||||
|
|
|
@ -37,13 +37,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
return New<CRFNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))
|
||||
return New<ClassBasedCrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
else if (nodeType == L"ColumnElementTimes")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#else
|
||||
else if (nodeType == OperationNameOf(ColumnElementTimesNode))
|
||||
return New<ColumnElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(CosDistanceNode))
|
||||
return New<CosDistanceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode))
|
||||
|
@ -116,26 +109,12 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
return New<RectifiedLinearNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ReshapeNode))
|
||||
return New<ReshapeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
else if (nodeType == L"RowElementTimes")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#else
|
||||
else if (nodeType == OperationNameOf(RowElementTimesNode))
|
||||
return New<RowElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(RowRepeatNode))
|
||||
return New<RowRepeatNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowSliceNode))
|
||||
return New<RowSliceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowStackNode))
|
||||
return New<RowStackNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
else if (nodeType == L"Scale")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#else
|
||||
else if (nodeType == OperationNameOf(ScaleNode))
|
||||
return New<ScaleNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(SequenceDecoderNode))
|
||||
return New<SequenceDecoderNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ShiftNode))
|
||||
|
@ -165,12 +144,18 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(TransposeTimesNode))
|
||||
return New<TransposeTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
// old names we also support
|
||||
else if (nodeType == L"ColumnElementTimes")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Delay")
|
||||
return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"PerDimMeanVarNormalizationNode")
|
||||
return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"PerDimMeanVarNormalizationNode")
|
||||
return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"PerDimMeanVarDeNormalizationNode")
|
||||
return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"RowElementTimes")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Scale")
|
||||
return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#if 1
|
||||
else if (nodeType == OperationNameOf(DeprecatedReshapeNode))
|
||||
return New<DeprecatedReshapeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -562,14 +547,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Sum(c
|
|||
return net.AddNodeToNetAndAttachInputs(New<SumElementsNode<ElemType>>(net.GetDeviceId(), nodeName), a);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Scale(const ComputationNodePtr scalar, const ComputationNodePtr matrix, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<ScaleNode<ElemType>>(net.GetDeviceId(), nodeName), scalar, matrix);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Transpose(const ComputationNodePtr matrix, const std::wstring nodeName)
|
||||
{
|
||||
|
@ -594,20 +571,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Eleme
|
|||
return net.AddNodeToNetAndAttachInputs(New<ElementTimesNode<ElemType>>(net.GetDeviceId(), nodeName), a, b);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::RowElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<RowElementTimesNode<ElemType>>(net.GetDeviceId(), nodeName), a, b);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ColumnElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<ColumnElementTimesNode<ElemType>>(net.GetDeviceId(), nodeName), a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName)
|
||||
{
|
||||
|
|
|
@ -101,17 +101,10 @@ public:
|
|||
ComputationNodePtr Hardmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr LogSoftmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Sum(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
ComputationNodePtr Scale(const ComputationNodePtr scalar, const ComputationNodePtr matrix, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
ComputationNodePtr Transpose(const ComputationNodePtr matrix, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Times(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr TransposeTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
ComputationNodePtr RowElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr ColumnElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr DiagTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr CosDistance(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
|
|
|
@ -25,10 +25,6 @@
|
|||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
// remove these following two #defines once the tensor lib works
|
||||
#define ENABLE_TENSORVIEW // if set then tensor lib is used instead of old Matrix implementations, wherever such an implementation exists
|
||||
#define ENABLE_BROADCASTING_ELEMENTTIMES // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
|
||||
|
||||
#define DEFAULT_HIDDEN_ACTIVATION 0.1
|
||||
|
||||
#pragma warning(disable : 4267) // conversion from size_t to int or other types
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// This header collects special-purpose nodes.
|
||||
// It is likely that these are no longer functional.
|
||||
|
||||
#ifndef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
#if 0 //def ENABLE_TENSORVIEW
|
||||
// -----------------------------------------------------------------------
|
||||
// PlusNode (summand1, summand2)
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -345,7 +345,7 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
#ifdef ENABLE_TENSORVIEW // This takes a big perf hit since our reduction uses only a single thread in this case. Needs to be fixed.
|
||||
#if 1 //def ENABLE_TENSORVIEW // This takes a big perf hit since our reduction uses only a single thread in this case. Needs to be fixed.
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
|
@ -381,7 +381,7 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
#ifdef ENABLE_TENSORVIEW
|
||||
#if 1 //def ENABLE_TENSORVIEW
|
||||
static int c = 0;
|
||||
if (c++ == 0)
|
||||
{
|
||||
|
|
|
@ -26,8 +26,6 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
#ifdef ENABLE_TENSORVIEW
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// PlusNode (summand1, summand2)
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -51,7 +49,6 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
//static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUSBP#\n"); }
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto gradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
|
@ -114,7 +111,6 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
//static int c = 0; if (c++ == 0) { fprintf(stderr,"#MINUS#\n"); }
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
|
@ -126,8 +122,6 @@ public:
|
|||
template class MinusNode<float>;
|
||||
template class MinusNode<double>;
|
||||
|
||||
#endif // ENABLE_TENSORVIEW
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// NegateNode (input)
|
||||
// computes the negative of its input
|
||||
|
@ -364,8 +358,6 @@ public:
|
|||
template class TransposeTimesNode<float>;
|
||||
template class TransposeTimesNode<double>;
|
||||
|
||||
#ifdef ENABLE_TENSORVIEW
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ElementTimesNode (factor1, factor2)
|
||||
//
|
||||
|
@ -412,7 +404,6 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
//static int c = 0; if (c++ == 0) { fprintf(stderr,"#ETIMES#\n"); }
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
|
@ -424,8 +415,6 @@ public:
|
|||
template class ElementTimesNode<float>;
|
||||
template class ElementTimesNode<double>;
|
||||
|
||||
#endif // ENABLE_TENSORVIEW
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DiagTimesNode (vector representing the diagonal of a square matrix, data)
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
#ifdef ENABLE_TENSORVIEW
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// UnaryElementWiseWithOpCodeNodeBase (input) -- base for elementwise unary op
|
||||
// where forward // and backward are single ElementWiseOperator opcodes and
|
||||
|
@ -46,8 +44,6 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
//static int c = 0; if (c++ == 0) { fprintf(stderr, "#NLop%d#\n", (int)opForward); }
|
||||
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
|
@ -126,7 +122,6 @@ DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, true);
|
|||
DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, false);
|
||||
|
||||
#pragma pop_macro("DeclareUnaryTensorOp")
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SoftmaxNodeBase (input) -- shared base of Softmax and LogSoftmax
|
||||
|
@ -363,6 +358,57 @@ private:
|
|||
template class LogSoftmaxNode<float>;
|
||||
template class LogSoftmaxNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Hardmax(prediction)
|
||||
// -----------------------------------------------------------------------
|
||||
// the result is a 1 of n coding in which the (r, c) = 1 if row r has max value in column c
|
||||
// this node is not differentiable and so cannot be used in the backpropagation
|
||||
// TODO: make function value sparse?
|
||||
template <class ElemType>
|
||||
class HardmaxNode : public SoftmaxNodeBase /*ComputationNode*/<ElemType>
|
||||
{
|
||||
typedef SoftmaxNodeBase<ElemType> Base;
|
||||
UsingSoftmaxNodeBaseMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Hardmax";
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(HardmaxNode);
|
||||
HardmaxNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues) override
|
||||
{
|
||||
gradient;
|
||||
inputFunctionValues;
|
||||
inputGradientValues;
|
||||
gradientValues;
|
||||
LogicError("Hardmax is not differentiable and is used for evaluation only.");
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
|
||||
{
|
||||
//TODO: temp solution, we need to write a math function specifically for this
|
||||
functionValues.AssignHardmaxOf(inputFunctionValues, true);
|
||||
}
|
||||
};
|
||||
|
||||
template class HardmaxNode<float>;
|
||||
template class HardmaxNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GMMLogLikelihoodNode (unnormedPrior, means, logStdDevs, features) -- GMM log LL over input vector(s)
|
||||
// calculates the log likelihood of a feature given parameters of a Gaussian mixture model (GMM) with shared diagonal variance
|
||||
|
@ -873,54 +919,4 @@ private:
|
|||
template class DropoutNode<float>;
|
||||
template class DropoutNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Hardmax(prediction)
|
||||
// -----------------------------------------------------------------------
|
||||
// the result is a 1 of n coding in which the (r, c) = 1 if row r has max value in column c
|
||||
// this node is not differentiable and so cannot be used in the backpropagation
|
||||
// TODO: make function value sparse?
|
||||
template <class ElemType>
|
||||
class HardmaxNode : public SoftmaxNodeBase /*ComputationNode*/<ElemType>
|
||||
{
|
||||
typedef SoftmaxNodeBase<ElemType> Base;
|
||||
UsingSoftmaxNodeBaseMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Hardmax";
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(HardmaxNode);
|
||||
HardmaxNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues) override
|
||||
{
|
||||
gradient;
|
||||
inputFunctionValues;
|
||||
inputGradientValues;
|
||||
gradientValues;
|
||||
LogicError("Hardmax is not differentiable and is used for evaluation only.");
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
|
||||
{
|
||||
//TODO: temp solution, we need to write a math function specifically for this
|
||||
functionValues.AssignHardmaxOf(inputFunctionValues, true);
|
||||
}
|
||||
};
|
||||
|
||||
template class HardmaxNode<float>;
|
||||
template class HardmaxNode<double>;
|
||||
} } }
|
||||
|
|
Загрузка…
Ссылка в новой задаче