marked the following nodes as COMING_SOON: TransposeNode, GMMLogLikelihoodNode, SequenceDecoderNode. These will be brought back once we have defined tests for them and made necessary updates;
moved DiagonalNode to ReshapingNodes.h; renamed DeprecatedReshapeNode to LegacyReshapeNode
This commit is contained in:
Родитель
7f07161ccd
Коммит
55b3ae946f
|
@ -49,7 +49,7 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
|||
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
|
||||
L"Reshape(input, numRows, imageWidth = 0, imageHeight = 0, imageChannels = 0, tag='') = new ComputationNode [ operation = 'DeprecatedReshape' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"Reshape(input, numRows, imageWidth = 0, imageHeight = 0, imageChannels = 0, tag='') = new ComputationNode [ operation = 'LegacyReshape' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"NewReshape(input, dims, beginDim=0, endDim=0, tag='') = new ComputationNode [ operation = 'Reshape' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"ReshapeDimension(x, dim, tensorShape) = NewReshape(x, tensorShape, beginDim=dim, endDim=dim + 1) \n"
|
||||
L"FlattenDimensions(x, dim, num) = NewReshape(x, 0, beginDim=dim, endDim=dim + num) \n"
|
||||
|
@ -71,7 +71,9 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
|||
#define BinaryStandardNode(Op, a, b) L## #Op L"(" L## #a L", " L## #b L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L") /*plus the function args*/ ]\n"
|
||||
#define TernaryStandardNode(Op, a, b, c) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L") /*plus the function args*/ ]\n"
|
||||
#define QuaternaryStandardNode(Op, a, b, c, d) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", " L## #d L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L" : " L## #d L") /*plus the function args*/ ]\n"
|
||||
#ifdef COMING_SOON
|
||||
TernaryStandardNode(CRF, labelVectorSequence, positionDependenScoreVectorSequence, transitionScores) // TODO: better names
|
||||
#endif
|
||||
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
|
||||
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
|
||||
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
|
||||
|
@ -108,7 +110,9 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
|||
UnaryStandardNode(RectifiedLinear, z)
|
||||
//BinaryStandardNode(RowElementTimesNode)
|
||||
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
|
||||
#ifdef COMING_SOON
|
||||
//BinaryStandardNode(SequenceDecoderNode)
|
||||
#endif
|
||||
UnaryStandardNode(Sigmoid, z)
|
||||
UnaryStandardNode(Softmax, z)
|
||||
UnaryStandardNode(Hardmax, z)
|
||||
|
@ -119,6 +123,8 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
|||
UnaryStandardNode(Tanh, z)
|
||||
UnaryStandardNode(TimeReverse, vectorSequence)
|
||||
BinaryStandardNode(Times, leftMatrix, rightMatrix)
|
||||
#ifdef COMING_SOON
|
||||
UnaryStandardNode(Transpose, matrix)
|
||||
//BinaryStandardNode(TransposeTimesNode)
|
||||
#endif
|
||||
BinaryStandardNode(TransposeTimes, leftMatrix, rightMatrix)
|
||||
;
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "PreComputeNodes.h"
|
||||
#include "EvaluationNodes.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// DuplicateNode - Duplicate a node in a macro as needed (it might already exist)
|
||||
|
@ -146,11 +148,12 @@ NDLPass& operator++(NDLPass& ndlPass)
|
|||
template <typename ElemType>
|
||||
bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
||||
{
|
||||
std::wstring nodeType = msra::strfun::utf16(p_nodeType);
|
||||
bool ret = false;
|
||||
if (allowUndeterminedVariable)
|
||||
*allowUndeterminedVariable = true; // be default we allow undetermined variables
|
||||
if (EqualInsensitive(nodeType, OperationNameOf(AveragePoolingNode))) ret = true;
|
||||
|
||||
wstring nodeType = msra::strfun::utf16(p_nodeType);
|
||||
bool ret = false;
|
||||
if (EqualInsensitive(nodeType, OperationNameOf(AveragePoolingNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(CRFNode), L"CRF")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true;
|
||||
|
@ -168,7 +171,9 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(ErrorPredictionNode), L"ClassificationError")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ExpNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(FutureValueNode))) ret = true;
|
||||
#ifdef COMING_SOON
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(GMMLogLikelihoodNode), L"GMMLL")) ret = true;
|
||||
#endif
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(HardmaxNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(InputValue), L"Input")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(InvStdDevNode))) ret = true;
|
||||
|
@ -193,7 +198,9 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(RowRepeatNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(RowSliceNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(RowStackNode))) ret = true;
|
||||
#ifdef COMING_SOON
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(SequenceDecoderNode), L"SEWithSM")) ret = true;
|
||||
#endif
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(SequenceWithSoftmaxNode), L"SEWithSM")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(SigmoidNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(SoftmaxNode))) ret = true;
|
||||
|
@ -203,7 +210,9 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(SumElementsNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(TanhNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(TimesNode))) ret = true;
|
||||
#ifdef COMING_SOON
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(TransposeNode))) ret = true;
|
||||
#endif
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(TransposeTimesNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"ColumnElementTimes")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"Constant", L"Const")) ret = true;
|
||||
|
|
|
@ -27,22 +27,22 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
switch (m_rnnType)
|
||||
{
|
||||
case SIMPLENET:
|
||||
net = BuildSimpleDNN();
|
||||
net = BuildSimpleDNNFromDescription();
|
||||
break;
|
||||
case SIMPLERNN:
|
||||
net = BuildSimpleRNN();
|
||||
net = BuildSimpleRNNFromDescription();
|
||||
break;
|
||||
case LSTM:
|
||||
net = BuildLSTMNetworkFromDescription();
|
||||
break;
|
||||
case CLASSLSTM:
|
||||
net = BuildCLASSLSTMNetworkFromDescription();
|
||||
net = BuildClassLSTMNetworkFromDescription();
|
||||
break;
|
||||
case NCELSTM:
|
||||
net = BuildNCELSTMNetworkFromDescription();
|
||||
break;
|
||||
case CLASSLM:
|
||||
net = BuildClassEntropyNetwork();
|
||||
net = BuildClassEntropyNetworkFromDescription();
|
||||
break;
|
||||
case LBLM:
|
||||
net = BuildLogBilinearNetworkFromDescription();
|
||||
|
@ -53,9 +53,11 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
case CLSTM:
|
||||
net = BuildConditionalLSTMNetworkFromDescription();
|
||||
break;
|
||||
#ifdef COMING_SOON
|
||||
case RCRF:
|
||||
net = BuildSeqTrnLSTMNetworkFromDescription();
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
|
||||
}
|
||||
|
@ -67,7 +69,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNNFromDescription()
|
||||
{
|
||||
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
@ -166,7 +168,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
|||
|
||||
// Note: while ComputationNode and CompuationNetwork are (supposed to be) independent of ElemType, it is OK to keep this class dependent.
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNNFromDescription()
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) // not built yet
|
||||
|
@ -274,7 +276,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetworkFromDescription()
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
||||
|
@ -290,7 +292,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
|||
ComputationNodePtr wrd2cls, cls2idx, clslogpostprob, clsweight;
|
||||
|
||||
if (m_vocabSize != m_layerSizes[numHiddenLayers + 1])
|
||||
RuntimeError("BuildClassEntropyNetwork : vocabulary size should be the same as the output layer size");
|
||||
RuntimeError("BuildClassEntropyNetworkFromDescription : vocabulary size should be the same as the output layer size");
|
||||
|
||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
@ -435,7 +437,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
}
|
||||
else
|
||||
{
|
||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
LogicError("BuildClassLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
}
|
||||
|
||||
int recur_idx = 0;
|
||||
|
@ -484,7 +486,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
||||
clslogpostprob);
|
||||
|
||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
||||
output = builder.TransposeTimes(w, input, L"outputs");
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
|
@ -947,6 +949,8 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
|
|||
return output;
|
||||
}
|
||||
|
||||
#ifdef COMING_SOON
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription()
|
||||
{
|
||||
|
@ -1046,8 +1050,10 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFrom
|
|||
return m_net;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription()
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassLSTMNetworkFromDescription()
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) // not built yet
|
||||
|
@ -1088,7 +1094,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
|||
}
|
||||
else
|
||||
{
|
||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
LogicError("BuildClassLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
}
|
||||
|
||||
int recur_idx = 0;
|
||||
|
@ -1127,7 +1133,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
|||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
||||
clslogpostprob);
|
||||
|
||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
||||
output = builder.TransposeTimes(w, input, L"outputs");
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
|
@ -1310,196 +1316,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
|||
return m_net;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilder<ElemType>::BuildLSTMComponentWithMultiInputs(ULONG& randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
|
||||
ComputationNodePtr Wxo, Who, Wco, bo, Wxi, Whi, Wci, bi;
|
||||
ComputationNodePtr Wxf, Whf, Wcf, bf, Wxc, Whc, bc;
|
||||
ComputationNodePtr ot, it, ft, ct, ht;
|
||||
ComputationNodePtr pastValueHI, pastValueCI, pastValueHO, pastValueHF, pastValueHC, pastValueCF, pastValueCC;
|
||||
ComputationNodePtr directWIO, directInput, directOutput;
|
||||
ComputationNodePtr bit, bft, bct;
|
||||
ComputationNodePtr streamsxi, streamsxo, streamsxf, streamsxc;
|
||||
|
||||
for (size_t sidx = 0; sidx < inputObs.size(); sidx++)
|
||||
{
|
||||
input = inputObs[sidx];
|
||||
#if 0
|
||||
if (inputWeightSparse)
|
||||
{
|
||||
Wxo = builder.CreateSparseLearnableParameter(msra::strfun::wstrprintf(L"WXO%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxi = builder.CreateSparseLearnableParameter(msra::strfun::wstrprintf(L"WXI%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxf = builder.CreateSparseLearnableParameter(msra::strfun::wstrprintf(L"WXF%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxc = builder.CreateSparseLearnableParameter(msra::strfun::wstrprintf(L"WXC%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
Wxo = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WXO%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxi = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WXI%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxf = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WXF%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
Wxc = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WXC%dI%d", iLayer, sidx), outputDim, inputDim[sidx]);
|
||||
}
|
||||
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
m_net->InitLearnableParameters(Wxf, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
m_net->InitLearnableParameters(Wxc, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
streamsxi = (streamsxi == nullptr) ? builder.Times(Wxi, input) : builder.Plus(streamsxi, builder.Times(Wxi, input));
|
||||
streamsxf = (streamsxf == nullptr) ? builder.Times(Wxf, input) : builder.Plus(streamsxf, builder.Times(Wxf, input));
|
||||
streamsxc = (streamsxc == nullptr) ? builder.Times(Wxc, input) : builder.Plus(streamsxc, builder.Times(Wxc, input));
|
||||
streamsxo = (streamsxo == nullptr) ? builder.Times(Wxo, input) : builder.Plus(streamsxo, builder.Times(Wxo, input));
|
||||
}
|
||||
|
||||
bo = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"bo%d", iLayer), outputDim, 1);
|
||||
bc = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"bc%d", iLayer), outputDim, 1);
|
||||
bi = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"bi%d", iLayer), outputDim, 1);
|
||||
bf = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"bf%d", iLayer), outputDim, 1);
|
||||
// if (m_forgetGateInitVal > 0)
|
||||
bf->Value().SetValue(m_forgetGateInitVal);
|
||||
// if (m_inputGateInitVal > 0)
|
||||
bi->Value().SetValue(m_inputGateInitVal);
|
||||
// if (m_outputGateInitVal > 0)
|
||||
bo->Value().SetValue(m_outputGateInitVal);
|
||||
|
||||
Whi = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WHI%d", iLayer), outputDim, outputDim);
|
||||
m_net->InitLearnableParameters(Whi, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
Wci = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WCI%d", iLayer), outputDim, 1);
|
||||
m_net->InitLearnableParameters(Wci, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
Whf = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WHF%d", iLayer), outputDim, outputDim);
|
||||
m_net->InitLearnableParameters(Whf, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
Wcf = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WCF%d", iLayer), outputDim, 1);
|
||||
m_net->InitLearnableParameters(Wcf, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
Who = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WHO%d", iLayer), outputDim, outputDim);
|
||||
m_net->InitLearnableParameters(Who, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
Wco = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WCO%d", iLayer), outputDim, 1);
|
||||
m_net->InitLearnableParameters(Wco, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
Whc = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WHC%d", iLayer), outputDim, outputDim);
|
||||
m_net->InitLearnableParameters(Whc, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
size_t layer1 = outputDim;
|
||||
|
||||
pastValueHI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueHF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueHO = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueHC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueCI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueCF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
pastValueCC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1);
|
||||
|
||||
if (m_constInputGateValue)
|
||||
{
|
||||
// it = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim);
|
||||
// it->SetParameterUpdateRequired(false);
|
||||
// it->Value().SetValue(m_constInputGateValue);
|
||||
it = nullptr;
|
||||
}
|
||||
else
|
||||
it = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
streamsxi,
|
||||
bi),
|
||||
builder.Times(Whi, pastValueHI)),
|
||||
builder.DiagTimes(Wci, pastValueCI)),
|
||||
0);
|
||||
|
||||
if (it == nullptr)
|
||||
{
|
||||
bit = builder.Tanh(
|
||||
builder.Plus(
|
||||
streamsxc,
|
||||
builder.Plus(
|
||||
builder.Times(Whc, pastValueHC),
|
||||
bc)));
|
||||
}
|
||||
else
|
||||
{
|
||||
bit = builder.ElementTimes(it,
|
||||
builder.Tanh(
|
||||
builder.Plus(
|
||||
streamsxc,
|
||||
builder.Plus(
|
||||
builder.Times(Whc, pastValueHC),
|
||||
bc))));
|
||||
}
|
||||
|
||||
if (m_constForgetGateValue)
|
||||
{
|
||||
ft = nullptr;
|
||||
}
|
||||
else
|
||||
ft = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
streamsxf,
|
||||
bf),
|
||||
builder.Times(Whf, pastValueHF)),
|
||||
builder.DiagTimes(Wcf, pastValueCF)),
|
||||
0);
|
||||
|
||||
if (ft == nullptr)
|
||||
{
|
||||
bft = pastValueCC;
|
||||
}
|
||||
else
|
||||
{
|
||||
bft = builder.ElementTimes(ft, pastValueCC);
|
||||
}
|
||||
|
||||
ct = builder.Plus(bft, bit);
|
||||
|
||||
if (m_constOutputGateValue)
|
||||
{
|
||||
ot = nullptr;
|
||||
}
|
||||
else
|
||||
ot = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
builder.Plus(
|
||||
streamsxo,
|
||||
bo),
|
||||
builder.Times(Who, pastValueHO)),
|
||||
builder.DiagTimes(Wco, ct)),
|
||||
0);
|
||||
|
||||
if (ot == nullptr)
|
||||
{
|
||||
output = builder.Tanh(ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
output = builder.ElementTimes(ot, builder.Tanh(ct));
|
||||
}
|
||||
|
||||
pastValueHO->AttachInputs(output);
|
||||
pastValueHI->AttachInputs(output);
|
||||
pastValueHF->AttachInputs(output);
|
||||
pastValueHC->AttachInputs(output);
|
||||
pastValueCI->AttachInputs(ct);
|
||||
pastValueCF->AttachInputs(ct);
|
||||
pastValueCC->AttachInputs(ct);
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
output = input;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription()
|
||||
{
|
||||
|
|
|
@ -247,34 +247,25 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
ComputationNetworkPtr BuildSimpleDNN();
|
||||
|
||||
ComputationNetworkPtr BuildSimpleRNN();
|
||||
|
||||
ComputationNetworkPtr BuildClassEntropyNetwork();
|
||||
|
||||
ComputationNodePtr BuildLSTMComponent(unsigned long& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
|
||||
ComputationNodePtr BuildLSTMNodeComponent(ULONG& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
|
||||
ComputationNodePtr BuildLSTMComponentWithMultiInputs(ULONG& randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse = false);
|
||||
|
||||
ComputationNodePtr BuildDirectConnect(unsigned long& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
|
||||
|
||||
ComputationNetworkPtr BuildSimpleDNNFromDescription();
|
||||
ComputationNetworkPtr BuildSimpleRNNFromDescription();
|
||||
ComputationNetworkPtr BuildClassEntropyNetworkFromDescription();
|
||||
ComputationNetworkPtr BuildLogBilinearNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildNeuralProbNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildLSTMNetworkFromDescription();
|
||||
|
||||
#ifdef COMING_SOON
|
||||
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription();
|
||||
|
||||
#endif
|
||||
ComputationNetworkPtr BuildClassLSTMNetworkFromDescription();
|
||||
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
|
||||
|
||||
// mulitply used components
|
||||
ComputationNodePtr BuildLSTMComponent(unsigned long& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
ComputationNodePtr BuildLSTMNodeComponent(ULONG& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
ComputationNodePtr BuildDirectConnect(unsigned long& randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
|
||||
|
||||
// layer is 0 based
|
||||
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);
|
||||
|
|
|
@ -305,7 +305,7 @@ void SynchronousNodeEvaluator<ElemType>::Evaluate(NDLNode<ElemType>* node, const
|
|||
size_t img_channels = node->GetOptionalParameter("imageChannels", "0");
|
||||
|
||||
bool needGradient = node->GetOptionalParameter("needGradient", "false");
|
||||
nodePtr = builder.DeprecatedReshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, ImageLayoutKind::HWC /*legacy*/), name); // BUGBUG: use a tensor descriptor instead
|
||||
nodePtr = builder.LegacyReshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, ImageLayoutKind::HWC /*legacy*/), name); // BUGBUG: use a tensor descriptor instead
|
||||
nodePtr->SetParameterUpdateRequired(needGradient);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,9 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ExpNode)) return New<ExpNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(FutureValueNode)) return New<FutureValueNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef COMING_SOON
|
||||
else if (nodeType == OperationNameOf(GMMLogLikelihoodNode)) return New<GMMLogLikelihoodNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(HardmaxNode)) return New<HardmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -71,8 +73,12 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(RowRepeatNode)) return New<RowRepeatNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowSliceNode)) return New<RowSliceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowStackNode)) return New<RowStackNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef COMING_SOON
|
||||
else if (nodeType == OperationNameOf(SequenceDecoderNode)) return New<SequenceDecoderNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
#ifdef COMING_SOON
|
||||
else if (nodeType == OperationNameOf(ShiftNode)) return New<ShiftNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(SigmoidNode)) return New<SigmoidNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SoftmaxNode)) return New<SoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SquareErrorNode)) return New<SquareErrorNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -81,7 +87,9 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(SumElementsNode)) return New<SumElementsNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(TanhNode)) return New<TanhNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(TimesNode)) return New<TimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef COMING_SOON
|
||||
else if (nodeType == OperationNameOf(TransposeNode)) return New<TransposeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(TransposeTimesNode)) return New<TransposeTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
// old names we also support
|
||||
else if (nodeType == L"ColumnElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -91,7 +99,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == L"RowElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Scale") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#if 1
|
||||
else if (nodeType == OperationNameOf(DeprecatedReshapeNode)) return New<DeprecatedReshapeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LegacyReshapeNode)) return New<LegacyReshapeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else InvalidArgument("Attempted to instantiate undefined operation %ls.", nodeType.c_str());
|
||||
}
|
||||
|
@ -300,11 +308,13 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Logis
|
|||
return net.AddNodeToNetAndAttachInputs(New<LogisticNode<ElemType>>(net.GetDeviceId(), nodeName), a, b, c);
|
||||
}
|
||||
|
||||
#ifdef COMING_SOON
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::SequenceDecoder(const ComputationNodePtr label, const ComputationNodePtr prediction, const ComputationNodePtr pairscore, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<SequenceDecoderNode<ElemType>>(net.GetDeviceId(), nodeName), label, prediction, pairscore);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CrossEntropyWithSoftmax(const ComputationNodePtr label, const ComputationNodePtr prediction, const std::wstring nodeName)
|
||||
|
@ -447,11 +457,13 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Sum(c
|
|||
return net.AddNodeToNetAndAttachInputs(New<SumElementsNode<ElemType>>(net.GetDeviceId(), nodeName), a);
|
||||
}
|
||||
|
||||
#ifdef COMING_SOON
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Transpose(const ComputationNodePtr matrix, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<TransposeNode<ElemType>>(net.GetDeviceId(), nodeName), matrix);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Times(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
|
@ -516,12 +528,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Resha
|
|||
}
|
||||
#if 1
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::DeprecatedReshape(const ComputationNodePtr a,
|
||||
const size_t numRows,
|
||||
const TensorShape& imageLayout,
|
||||
const std::wstring nodeName)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::LegacyReshape(const ComputationNodePtr a,
|
||||
const size_t numRows,
|
||||
const TensorShape& imageLayout,
|
||||
const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<DeprecatedReshapeNode<ElemType>>(net.GetDeviceId(), nodeName, numRows, imageLayout), a);
|
||||
return net.AddNodeToNetAndAttachInputs(New<LegacyReshapeNode<ElemType>>(net.GetDeviceId(), nodeName, numRows, imageLayout), a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -564,6 +576,7 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::RowSt
|
|||
return net.AddNodeToNetAndAttachInputs(New<RowStackNode<ElemType>>(net.GetDeviceId(), nodeName), inputs);
|
||||
}
|
||||
|
||||
#ifdef COMING_SOON
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::GMMLogLikelihood(const ComputationNodePtr unnormedPrior,
|
||||
const ComputationNodePtr mean,
|
||||
|
@ -573,6 +586,7 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::GMMLo
|
|||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<GMMLogLikelihoodNode<ElemType>>(net.GetDeviceId(), nodeName), unnormedPrior, mean, logStddev, feature);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::LookupTable(const ComputationNodePtr dictionary, const ComputationNodePtr input, const std::wstring nodeName)
|
||||
|
|
|
@ -87,7 +87,9 @@ public:
|
|||
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");
|
||||
#ifdef COMING_SOON
|
||||
ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
ComputationNodePtr Hardmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr InvStdDev(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr KhatriRaoProduct(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
|
@ -111,7 +113,9 @@ public:
|
|||
ComputationNodePtr RowRepeat(const ComputationNodePtr a, const size_t num_repeat, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr RowSlice(const ComputationNodePtr a, const size_t start_index, const size_t num_rows, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr RowStack(const std::vector<ComputationNodePtr> pinputs, const std::wstring nodeName = L"");
|
||||
#ifdef COMING_SOON
|
||||
ComputationNodePtr SequenceDecoder(const ComputationNodePtr label, const ComputationNodePtr prediction, const ComputationNodePtr pairscore, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
ComputationNodePtr SequenceWithSoftmax(const ComputationNodePtr label, const ComputationNodePtr prediction, const ComputationNodePtr loglikelihood, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Sigmoid(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Softmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
|
@ -119,10 +123,12 @@ public:
|
|||
ComputationNodePtr Sum(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Tanh(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Times(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
#ifdef COMING_SOON
|
||||
ComputationNodePtr Transpose(const ComputationNodePtr matrix, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
ComputationNodePtr TransposeTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
#if 1 // legacy
|
||||
ComputationNodePtr DeprecatedReshape(const ComputationNodePtr a, const size_t num_rows, const TensorShape& imageLayout, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr LegacyReshape(const ComputationNodePtr a, const size_t num_rows, const TensorShape& imageLayout, const std::wstring nodeName = L"");
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
@ -133,6 +133,8 @@ private:
|
|||
template class ErrorPredictionNode<float>;
|
||||
template class ErrorPredictionNode<double>;
|
||||
|
||||
#ifdef COMING_SOON
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SequenceDecoderNode (label, position_dependent_score, transition_score)
|
||||
// Decoder that matches CRF training.
|
||||
|
@ -333,4 +335,6 @@ public:
|
|||
template class SequenceDecoderNode<float>;
|
||||
template class SequenceDecoderNode<double>;
|
||||
|
||||
#endif
|
||||
|
||||
} } }
|
||||
|
|
|
@ -331,7 +331,6 @@ template class TimesNode<double>;
|
|||
// -----------------------------------------------------------------------
|
||||
// TransposeTimesNode (A', B)
|
||||
// right operand and output can have MB layout, while left operand cannot
|
||||
// TODO: merge with TimesNode
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -357,7 +356,6 @@ template class TransposeTimesNode<double>;
|
|||
|
||||
// -----------------------------------------------------------------------
|
||||
// ElementTimesNode (factor1, factor2)
|
||||
//
|
||||
// This allows broadcasting, and can thus also scale with a row, a column, or a scalar.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
|
@ -664,6 +662,8 @@ public:
|
|||
template class SumColumnElementsNode<float>;
|
||||
template class SumColumnElementsNode<double>;
|
||||
|
||||
#ifdef COMING_SOON // known bug in backprop; generalize to tensor
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// TransposeNode (input matrix)
|
||||
// TODO: extend towards tensor transpose (swap 2 dimensions, incl. time)
|
||||
|
@ -752,87 +752,7 @@ public:
|
|||
template class TransposeNode<float>;
|
||||
template class TransposeNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DiagonalNode -- extract diagonal elements of a square matrix into a row vector
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class DiagonalNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNodeNonLooping<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Diagonal";
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(DiagonalNode);
|
||||
DiagonalNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
m_pMBLayout = nullptr;
|
||||
|
||||
if (isFinalValidationPass && Input(0)->HasMBLayout())
|
||||
InvalidArgument("%ls %ls operation cannot operate on minibatch data (which have a layout)", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
size_t dim = Input(0)->GetAsMatrixNumCols();
|
||||
if (isFinalValidationPass && dim != Input(0)->GetAsMatrixNumRows())
|
||||
InvalidArgument("%ls %ls operation requires a square matrix as its input.", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
if (Input(0)->HasSampleLayout())
|
||||
fprintf(stderr, "WARNING: Diagonal operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
|
||||
SetDims(TensorShape(1, dim), false);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||
{
|
||||
Input(0)->ValueAsMatrix().AssignDiagonalValuesTo(ValueAsMatrix()); // TODO: use tensor lib; this is a stride operation
|
||||
#if NANCHECK
|
||||
Value().HasNan("Diagonal");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeNonLooping::*/ BackpropToNonLooping(size_t /*inputIndex*/) override
|
||||
{
|
||||
auto& inputGradientValues = Input(0)->GradientAsMatrix();
|
||||
auto& gradientValues = GradientAsMatrix();
|
||||
|
||||
// BUGBUG: This should use the memshare mechanism.
|
||||
// TODO: use tensor lib, then this will be easy, no memsharing needed
|
||||
Matrix<ElemType> diag(gradientValues.GetNumRows(), gradientValues.GetNumCols(), gradientValues.GetDeviceId());
|
||||
diag = gradientValues;
|
||||
diag.Resize(gradientValues.GetNumCols(), 1);
|
||||
|
||||
inputGradientValues.SetValue(0);
|
||||
// BUGBUG: Must *add* to gradient!
|
||||
inputGradientValues.SetDiagonalValue(diag);
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The DiagonalNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
|
||||
{
|
||||
// The DiagonalNode does not require any of it's input's values for computing
|
||||
// the gradients of its input nodes
|
||||
UNREFERENCED_PARAMETER(childIndex);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template class DiagonalNode<float>;
|
||||
template class DiagonalNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CosDistanceNode (left, right)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -23,448 +23,6 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ReinterpretNodeBase (input) -- base class for nodes that reinterpret
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class ReinterpretNodeBase : public ComputationNode<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembers;
|
||||
|
||||
public:
|
||||
// DeclareConstructorFromConfigWithNumInputs(ReinterpretNodeBase);
|
||||
ReinterpretNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
// stack K consecutive frames into a single frame that is K times taller
|
||||
// FrameRange and MBLayout refer to the 'to' (reduced) timeline.
|
||||
// BUGBUG: THIS IS UNTESTED!!
|
||||
static void Stack(const FrameRange& fr, const shared_ptr<MBLayout>& pMBLayout, /*const*/ Matrix<ElemType>& from, Matrix<ElemType>& to, size_t K, bool addTo)
|
||||
{
|
||||
// example
|
||||
// input: T=2, D=2, K=3, S=2 (abcdef and uvwxyz)
|
||||
// abc def
|
||||
// ABC DEF
|
||||
//
|
||||
// uvw xyz
|
||||
// UVW XYZ
|
||||
// target:
|
||||
// a d
|
||||
// A D
|
||||
// b e
|
||||
// B E
|
||||
// c f
|
||||
// C F
|
||||
//
|
||||
// u x
|
||||
// U X
|
||||
// v y
|
||||
// V Y
|
||||
// w z
|
||||
// W Z
|
||||
// underlying matrix storage is actually this:
|
||||
// input:
|
||||
// aubvcw dxeyfz
|
||||
// AUBVCW DXEYFZ
|
||||
// target:
|
||||
// abcuvw defxyz
|
||||
// ABCUVW DEFXYZ
|
||||
|
||||
// I.e. this operation swaps index dimensions of a tensor:
|
||||
// The input is a tensor of the form (D, S, M, K, T).
|
||||
// The output is of the form (D, K, M, S, T).
|
||||
// K = stacking factor
|
||||
// T = target steps
|
||||
// S = #sequences
|
||||
// D = featDim
|
||||
// M = 1, thrown in for generality of underlying Matrix function
|
||||
|
||||
// We operate on the 'to' layout, fr refers to result, not the input.
|
||||
// The input layout is different, but reshaping the input to output dimensions will allow us to pull out the right values anyway.
|
||||
auto from0 = from.Reshaped(to.GetNumRows(), to.GetNumCols()); // we operate on 'to' layout
|
||||
auto fromSlice0 = DataWithMBLayoutFor(from0, fr, pMBLayout);
|
||||
auto toSlice0 = DataWithMBLayoutFor(to, fr, pMBLayout);
|
||||
// now we got views on the right ranges of values, but with weird dimensions
|
||||
|
||||
// reshape them into a unified view with D being the row dimension, and (S,M,K,T) the column dimension
|
||||
size_t D = from.GetNumRows();
|
||||
size_t SMKT = from.GetNumCols();
|
||||
auto fromSlice = fromSlice0.Reshaped(D, SMKT);
|
||||
auto toSlice = toSlice0.Reshaped(D, SMKT);
|
||||
|
||||
// now to the shuffle dance
|
||||
size_t S = pMBLayout->GetNumParallelSequences();
|
||||
size_t T = pMBLayout->GetNumTimeSteps();
|
||||
size_t M = 1;
|
||||
Matrix<ElemType>::TensorShuffleScaleAndAdd(addTo ? 1.0f : 0, fromSlice, D, S, M, K, T, 1.0f, toSlice, toSlice);
|
||||
}
|
||||
|
||||
// split frames of D*K elements into K consecutive frames of dimension D.
|
||||
// FrameRange and MBLayout refer to the 'from' (reduced) timeline.
|
||||
// This function is the inverse of Stack(). See comments there and exchange from and to.
|
||||
static void Unstack(const FrameRange& fr, const shared_ptr<MBLayout>& pMBLayout, /*const*/ Matrix<ElemType>& from, Matrix<ElemType>& to, size_t K, bool addTo)
|
||||
{
|
||||
auto fromSlice0 = DataWithMBLayoutFor(from, fr, pMBLayout);
|
||||
auto to0 = to.Reshaped(from.GetNumRows(), from.GetNumCols());
|
||||
auto toSlice0 = DataWithMBLayoutFor(to0, fr, pMBLayout);
|
||||
|
||||
size_t D = to.GetNumRows();
|
||||
size_t SMKT = to.GetNumCols();
|
||||
auto fromSlice = fromSlice0.Reshaped(D, SMKT);
|
||||
auto toSlice = toSlice0.Reshaped(D, SMKT);
|
||||
|
||||
size_t S = pMBLayout->GetNumParallelSequences();
|
||||
size_t T = pMBLayout->GetNumTimeSteps();
|
||||
size_t M = 1;
|
||||
Matrix<ElemType>::TensorShuffleScaleAndAdd(addTo ? 1.0f : 0, fromSlice, D, K, M, S, T, 1.0f, toSlice, toSlice);
|
||||
}
|
||||
};
|
||||
|
||||
#define UsingReinterpretNodeBaseMembers UsingComputationNodeMembersBoilerplate
|
||||
|
||||
// TODO: This ReshapeNode is currently not used. Its function will be taken over by Transpose and the Reshape that follows this one below.
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DeprecatedReshapeNode (input) -- reinterpret input matrix as having different dimensions
|
||||
// where the new row dimension is given, and the column dimension is inferred.
|
||||
// Also optionally associate a different TensorShape with the data.
|
||||
//
|
||||
// If input has no layout, then this reshapes the input matrix
|
||||
// from (rows x cols) to (newRows x (cols / newRows * rows)).
|
||||
//
|
||||
// If input has a layout, then it adds or removes a nested time dimension.
|
||||
// - If newRows > rows, then we remove a time dimension by stacking all frames from the dimension into one:
|
||||
// (rows x (newRows/rows nested time steps) x T time steps)
|
||||
// -> (newRows x T time steps).
|
||||
// - If newRows < rows, then we add a time dimension, going
|
||||
// (rows x T time steps)
|
||||
// -> (newRows x (rows/newRows nested time steps) x T time steps).
|
||||
// which requires the nested time sequence to have the correct number of steps.
|
||||
// E.g. going from rows=20 to newRows=40 assumes a nested time sequence of 2 steps, which are grouped into one step, with the two vectors stacked.
|
||||
// Multiple parallel sequences are treated independently.
|
||||
// TODO: This definition is poor; we should use a different node name, and specify the factor directly.
|
||||
// We may hide that in BrainScript, but better use different node types.
|
||||
// E.g. ReinterpretRowStackAsSequence and ReinterpretSequenceAsRowStack.
|
||||
// BUGBUG: This is not actually implemented yet. Instead, it goes from 1 to K steps or from K to 1 step. This is temporary/experimental, until the plumbing for nesting is there.
|
||||
//
|
||||
// Thirdly, DeprecatedReshapeNode can also be used to update only the TensorShape. In that case, the MBLayout is kept as is.
|
||||
//
|
||||
// Note: The new row dimension must be a straight multiple or divisor of the current row dimension.
|
||||
// To reshape to a non-multiple go to row dim 1 first.
|
||||
//
|
||||
// Unlike most other nodes, this node has intimate inside knowlegde of MBLayouts and frameRanges.
|
||||
// TODO: Changing the TensorShape does not seem to belong here.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class DeprecatedReshapeNode : public ReinterpretNodeBase<ElemType>
|
||||
{
|
||||
typedef ReinterpretNodeBase<ElemType> Base;
|
||||
UsingReinterpretNodeBaseMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"DeprecatedReshape";
|
||||
}
|
||||
|
||||
public:
|
||||
DeprecatedReshapeNode(DEVICEID_TYPE deviceId, const wstring& name, size_t numRows = 0, const TensorShape& imageLayout = TensorShape())
|
||||
: Base(deviceId, name),
|
||||
m_numTargetRows(numRows),
|
||||
m_targetImageLayout(imageLayout)
|
||||
{
|
||||
}
|
||||
DeprecatedReshapeNode(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
: DeprecatedReshapeNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"numRows"), ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKind::HWC /*legacy*/))
|
||||
{
|
||||
// BUGBUG: We should not operate on image layouts here, but on a proper tensor layout.
|
||||
AttachInputs(configp, this->GetExpectedNumInputs());
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<DeprecatedReshapeNode<ElemType>>(nodeP);
|
||||
node->m_numTargetRows = m_numTargetRows;
|
||||
node->m_targetImageLayout = m_targetImageLayout;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Save(File& fstream) const override
|
||||
{
|
||||
Base::Save(fstream);
|
||||
fstream << m_numTargetRows;
|
||||
m_targetImageLayout.Save(fstream);
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
fstream >> m_numTargetRows;
|
||||
m_targetImageLayout.Load(fstream, /*acceptLegacyFormat=*/true);
|
||||
}
|
||||
|
||||
virtual void /*IComputationNode::*/ PrintSelfBeforeValidation() const override
|
||||
{
|
||||
fprintf(stderr, "\nValidating --> %ls = %ls", NodeName().c_str(), OperationName().c_str());
|
||||
fprintf(stderr, "(");
|
||||
for (size_t i = 0; i < GetNumInputs(); i++)
|
||||
{
|
||||
ComputationNodePtr child = Input(i);
|
||||
if (i > 0)
|
||||
fprintf(stderr, ", ");
|
||||
if (!child)
|
||||
fprintf(stderr, "NULL");
|
||||
else
|
||||
fprintf(stderr, "%ls[%s%s]", child->NodeName().c_str(), string(child->GetSampleLayout()).c_str(), child->HasMBLayout() ? " x *" : "");
|
||||
}
|
||||
fprintf(stderr, ", NumOfRows=%lu, imageWidth=%lu, imageHeight=%lu, imageChannels=%lu)", m_numTargetRows, m_targetImageLayout[1], m_targetImageLayout[2], m_targetImageLayout[0]);
|
||||
// BUGBUG: This interpretaion as image dims is only correct for the 'legacy format, not for cudnn.
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
if (factor() == 1) // canonical case: keeps the MBLayout(e.g. only changing the TensorShape)
|
||||
m_pMBLayout = Input(0)->GetMBLayout();
|
||||
else if (Input(0)->HasMBLayout())
|
||||
{
|
||||
if (!m_pMBLayout)
|
||||
m_pMBLayout = make_shared<MBLayout>(); // mini-batch data: this generates a new layout
|
||||
}
|
||||
else
|
||||
assert(!m_pMBLayout); // reshaping non-mini-batch data
|
||||
|
||||
size_t newCols = 1; // dummy
|
||||
if (!m_pMBLayout)
|
||||
{
|
||||
size_t rows = Input(0)->GetAsMatrixNumRows(), cols = Input(0)->GetAsMatrixNumCols();
|
||||
newCols = cols * rows / m_numTargetRows;
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if ((m_numTargetRows > rows && m_numTargetRows % rows != 0) || // grouping columns
|
||||
(m_numTargetRows < rows && rows % m_numTargetRows != 0)) // splitting columns
|
||||
InvalidArgument("%ls %ls operation: output row dimension %d is not an integer multiple or divisor of input dimension %d", NodeName().c_str(), OperationName().c_str(), (int) m_numTargetRows, (int) rows);
|
||||
if (rows * cols != m_numTargetRows * newCols)
|
||||
LogicError("%ls %ls operation: unexpected dimension mismatch", NodeName().c_str(), OperationName().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// patch up m_targetImageLayout, which was originally a construction parameter
|
||||
InferTargetSampleLayout();
|
||||
|
||||
// setting any dimension to 0 means lose the tensor, flatten to vector
|
||||
if (m_targetImageLayout.GetNumElements() == 0)
|
||||
{
|
||||
if (Input(0)->HasSampleLayout())
|
||||
fprintf(stderr, "WARNING: Reshape operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
// TODO: We need to decide what reshaping means in presence of a tensor.
|
||||
if (HasMBLayout())
|
||||
SetDims(TensorShape(m_numTargetRows), true);
|
||||
else
|
||||
SetDims(TensorShape(m_numTargetRows, newCols), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_numTargetRows != m_targetImageLayout.GetNumElements())
|
||||
LogicError("DeprecatedReshapeNode: InferTargetSampleLayout() computed a sample layout [%s] that mismatches m_numTargetRows %d.", string(m_targetImageLayout).c_str(), (int) m_numTargetRows);
|
||||
SetDims(m_targetImageLayout, HasMBLayout());
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
virtual void UpdateFunctionMBSize() override
|
||||
{
|
||||
size_t rows = Input(0)->GetNumRows(), cols = Input(0)->GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
if (!m_pMBLayout)
|
||||
{
|
||||
#if 0
|
||||
VerifyDims(m_numTargetRows, newCols);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
SetNumCols(newCols);
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: Clarify/resolve the semantic overlap between BeginForwardProp() and UpdateFunctionMBSize().
|
||||
virtual void /*IComputationNode::*/ BeginForwardProp() override
|
||||
{
|
||||
// create the derived layout
|
||||
if (m_pMBLayout && factor() != 1)
|
||||
{
|
||||
// BUGBUG: This assumes that the layout is complete at this point in time (RecurrentNodeBase makes the same assumption).
|
||||
// This assumption is correct at present, but will becomes invalid once we go sequence-to-sequence.
|
||||
if (weStack())
|
||||
{
|
||||
// going from many samples to one: layout entry will get no flags
|
||||
if (Input(0)->GetMBLayout()->GetNumTimeSteps() * Input(0)->GetSampleMatrixNumRows() / m_numTargetRows != 1)
|
||||
LogicError("DeprecatedReshapeNode::BeginForwardProp() faking to remove a nested time dimension only works when going back to a single frame per sequence.");
|
||||
// we are in frame mode now
|
||||
m_pMBLayout->InitAsFrameMode(Input(0)->GetNumParallelSequences());
|
||||
}
|
||||
else
|
||||
{
|
||||
// going from one sample to many: layout will get SentenceStart/SentenceEnd flags for the sequence we expand into
|
||||
if (Input(0)->GetMBLayout()->GetNumTimeSteps() != 1)
|
||||
LogicError("DeprecatedReshapeNode::BeginForwardProp() faking to add a nested time dimension only works when coming from a single frame per sequence.");
|
||||
m_pMBLayout->Init(Input(0)->GetNumParallelSequences(), Input(0)->GetMBLayout()->GetNumTimeSteps() * Input(0)->GetSampleMatrixNumRows() / m_numTargetRows);
|
||||
for (size_t s = 0; s < m_pMBLayout->GetNumParallelSequences(); s++)
|
||||
m_pMBLayout->AddSequence(NEW_SEQUENCE_ID, s, 0, GetMBLayout()->GetNumTimeSteps());
|
||||
// BUGBUG: In the future, NEW_SEQUENCE_ID will be incorrect here; need an iterator over sequences in there.
|
||||
}
|
||||
}
|
||||
// Call this at the end because this will resize Value(), but that requires the updated MBLayout. TODO: Clarify the sequence of events. Should we update the MBLayout in UpdateFunctionMBSize()?
|
||||
Base::BeginForwardProp();
|
||||
}
|
||||
|
||||
// notes:
|
||||
// - input and output have different time base and different layouts (unless the canonical case of factor() == 1)
|
||||
// - fr refers to *functionValues*, not the inputs
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rows = Input(0)->Value().GetNumRows(), cols = Input(0)->Value().GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
assert(newCols * m_numTargetRows == cols * rows); // follows from above check
|
||||
Value().VerifySize(m_numTargetRows, newCols);
|
||||
|
||||
// no layout case: this is indeed just a reshape. Same for canonical case
|
||||
// (We still need to copy the values since there is currently no way to point to an input function value while reshaping at the same time.)
|
||||
if (!m_pMBLayout || factor() == 1)
|
||||
{
|
||||
Value().Reshaped(newCols * m_numTargetRows, 1).SetValue(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
|
||||
}
|
||||
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
|
||||
else
|
||||
{
|
||||
// TODO: It does not make sense to run DeprecatedReshapeNode frame-by-frame inside a loop, because it changes the time base.
|
||||
// However, in the future, we should be able to run inside an outer loop.
|
||||
if (!fr.IsAllFrames())
|
||||
InvalidArgument("%ls %ls operation cannot be run from inside a loop since it changes the time base.", NodeName().c_str(), OperationName().c_str());
|
||||
if (weStack())
|
||||
Base::Stack(fr, m_pMBLayout, Input(0)->Value(), Value(), factor(), false /*addTo*/);
|
||||
else
|
||||
Base::Unstack(fr.WithLayout(Input(0)->GetMBLayout()), Input(0)->GetMBLayout(), Input(0)->Value(), Value(), factor(), false /*addTo*/);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
|
||||
{
|
||||
size_t rows = Input(0)->Value().GetNumRows(), cols = Input(0)->Value().GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
|
||||
// no layout case: this is indeed just a reshape. Same for canonical case
|
||||
if (!m_pMBLayout || factor() == 1)
|
||||
{
|
||||
Input(0)->Gradient().Reshaped(cols * rows, 1) += Gradient().Reshaped(newCols * m_numTargetRows, 1); // treat the values as one long vector
|
||||
}
|
||||
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
|
||||
else
|
||||
{
|
||||
if (weStack())
|
||||
Base::Unstack(fr, m_pMBLayout, Gradient(), Input(0)->Gradient(), factor(), true /*addTo*/);
|
||||
else
|
||||
Base::Stack(fr.WithLayout(Input(0)->GetMBLayout()), Input(0)->GetMBLayout(), Gradient(), Input(0)->Gradient(), factor(), true /*addTo*/);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The DeprecatedReshapeNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
|
||||
{
|
||||
// The DeprecatedReshapeNode does not require any of it's input's values for computing
|
||||
// the gradients of its input nodes
|
||||
UNREFERENCED_PARAMETER(childIndex);
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_numTargetRows;
|
||||
bool weStack() const
|
||||
{
|
||||
return m_numTargetRows > Input(0)->GetSampleMatrixNumRows();
|
||||
} // do we stack (multiple frames into one)
|
||||
size_t factor() const
|
||||
{
|
||||
return m_numTargetRows > Input(0)->GetSampleMatrixNumRows() ? m_numTargetRows / Input(0)->GetSampleMatrixNumRows() : Input(0)->GetSampleMatrixNumRows() / m_numTargetRows;
|
||||
} // factor by which we stack or unstack
|
||||
TensorShape m_targetImageLayout;
|
||||
|
||||
// This infers dimensions in m_targetImageLayout.
|
||||
// Users are allowed to provide 2 (out of 3) image dimensions.
|
||||
// One missing dimension can be inferred. If two dimensions are
|
||||
// unspecified it throws a runtime error.
|
||||
void InferTargetSampleLayout()
|
||||
{
|
||||
// BUGBUG: Below is the result of refactoring and only works for rank-3 tensors. Generalize.
|
||||
if (m_targetImageLayout[1] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[2] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_targetImageLayout.GetNumElements() != m_numTargetRows)
|
||||
RuntimeError("Image dimensions do not match row size.");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[2]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[2]), m_targetImageLayout[1], m_targetImageLayout[2]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[0]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_targetImageLayout[1], m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_targetImageLayout[2] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[2] * m_targetImageLayout[0]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_numTargetRows / (m_targetImageLayout[2] * m_targetImageLayout[0]), m_targetImageLayout[2]);
|
||||
}
|
||||
else
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
}
|
||||
else if (m_targetImageLayout[0] > 0)
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(1, m_numTargetRows, 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template class DeprecatedReshapeNode<float>;
|
||||
template class DeprecatedReshapeNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Reshape(x, tensorShape, beginDim=0, endDim=0) -- reinterpret input samples as having different tensor dimensions
|
||||
// - just replaces metadata m_sampleLayout, does not change data values
|
||||
|
@ -1002,6 +560,532 @@ private:
|
|||
template class RowRepeatNode<float>;
|
||||
template class RowRepeatNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DiagonalNode -- extract diagonal elements of a square matrix into a row vector
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class DiagonalNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNodeNonLooping<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"Diagonal";
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(DiagonalNode);
|
||||
DiagonalNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
m_pMBLayout = nullptr;
|
||||
|
||||
if (isFinalValidationPass && Input(0)->HasMBLayout())
|
||||
InvalidArgument("%ls %ls operation cannot operate on minibatch data (which have a layout)", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
size_t dim = Input(0)->GetAsMatrixNumCols();
|
||||
if (isFinalValidationPass && dim != Input(0)->GetAsMatrixNumRows())
|
||||
InvalidArgument("%ls %ls operation requires a square matrix as its input.", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
if (Input(0)->HasSampleLayout())
|
||||
fprintf(stderr, "WARNING: Diagonal operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
|
||||
SetDims(TensorShape(1, dim), false);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||
{
|
||||
Input(0)->ValueAsMatrix().AssignDiagonalValuesTo(ValueAsMatrix()); // TODO: use tensor lib; this is a stride operation
|
||||
#if NANCHECK
|
||||
Value().HasNan("Diagonal");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeNonLooping::*/ BackpropToNonLooping(size_t /*inputIndex*/) override
|
||||
{
|
||||
auto& inputGradientValues = Input(0)->GradientAsMatrix();
|
||||
auto& gradientValues = GradientAsMatrix();
|
||||
|
||||
// BUGBUG: This should use the memshare mechanism.
|
||||
// TODO: use tensor lib, then this will be easy, no memsharing needed
|
||||
Matrix<ElemType> diag(gradientValues.GetNumRows(), gradientValues.GetNumCols(), gradientValues.GetDeviceId());
|
||||
diag = gradientValues;
|
||||
diag.Resize(gradientValues.GetNumCols(), 1);
|
||||
|
||||
inputGradientValues.SetValue(0);
|
||||
// BUGBUG: Must *add* to gradient!
|
||||
inputGradientValues.SetDiagonalValue(diag);
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The DiagonalNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
|
||||
{
|
||||
// The DiagonalNode does not require any of it's input's values for computing
|
||||
// the gradients of its input nodes
|
||||
UNREFERENCED_PARAMETER(childIndex);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template class DiagonalNode<float>;
|
||||
template class DiagonalNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ReinterpretNodeBase (input) -- base class for nodes that reinterpret
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class ReinterpretNodeBase : public ComputationNode<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembers;
|
||||
|
||||
public:
|
||||
// DeclareConstructorFromConfigWithNumInputs(ReinterpretNodeBase);
|
||||
ReinterpretNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
// stack K consecutive frames into a single frame that is K times taller
|
||||
// FrameRange and MBLayout refer to the 'to' (reduced) timeline.
|
||||
// BUGBUG: THIS IS UNTESTED!!
|
||||
static void Stack(const FrameRange& fr, const shared_ptr<MBLayout>& pMBLayout, /*const*/ Matrix<ElemType>& from, Matrix<ElemType>& to, size_t K, bool addTo)
|
||||
{
|
||||
// example
|
||||
// input: T=2, D=2, K=3, S=2 (abcdef and uvwxyz)
|
||||
// abc def
|
||||
// ABC DEF
|
||||
//
|
||||
// uvw xyz
|
||||
// UVW XYZ
|
||||
// target:
|
||||
// a d
|
||||
// A D
|
||||
// b e
|
||||
// B E
|
||||
// c f
|
||||
// C F
|
||||
//
|
||||
// u x
|
||||
// U X
|
||||
// v y
|
||||
// V Y
|
||||
// w z
|
||||
// W Z
|
||||
// underlying matrix storage is actually this:
|
||||
// input:
|
||||
// aubvcw dxeyfz
|
||||
// AUBVCW DXEYFZ
|
||||
// target:
|
||||
// abcuvw defxyz
|
||||
// ABCUVW DEFXYZ
|
||||
|
||||
// I.e. this operation swaps index dimensions of a tensor:
|
||||
// The input is a tensor of the form (D, S, M, K, T).
|
||||
// The output is of the form (D, K, M, S, T).
|
||||
// K = stacking factor
|
||||
// T = target steps
|
||||
// S = #sequences
|
||||
// D = featDim
|
||||
// M = 1, thrown in for generality of underlying Matrix function
|
||||
|
||||
// We operate on the 'to' layout, fr refers to result, not the input.
|
||||
// The input layout is different, but reshaping the input to output dimensions will allow us to pull out the right values anyway.
|
||||
auto from0 = from.Reshaped(to.GetNumRows(), to.GetNumCols()); // we operate on 'to' layout
|
||||
auto fromSlice0 = DataWithMBLayoutFor(from0, fr, pMBLayout);
|
||||
auto toSlice0 = DataWithMBLayoutFor(to, fr, pMBLayout);
|
||||
// now we got views on the right ranges of values, but with weird dimensions
|
||||
|
||||
// reshape them into a unified view with D being the row dimension, and (S,M,K,T) the column dimension
|
||||
size_t D = from.GetNumRows();
|
||||
size_t SMKT = from.GetNumCols();
|
||||
auto fromSlice = fromSlice0.Reshaped(D, SMKT);
|
||||
auto toSlice = toSlice0.Reshaped(D, SMKT);
|
||||
|
||||
// now to the shuffle dance
|
||||
size_t S = pMBLayout->GetNumParallelSequences();
|
||||
size_t T = pMBLayout->GetNumTimeSteps();
|
||||
size_t M = 1;
|
||||
Matrix<ElemType>::TensorShuffleScaleAndAdd(addTo ? 1.0f : 0, fromSlice, D, S, M, K, T, 1.0f, toSlice, toSlice);
|
||||
}
|
||||
|
||||
// split frames of D*K elements into K consecutive frames of dimension D.
|
||||
// FrameRange and MBLayout refer to the 'from' (reduced) timeline.
|
||||
// This function is the inverse of Stack(). See comments there and exchange from and to.
|
||||
static void Unstack(const FrameRange& fr, const shared_ptr<MBLayout>& pMBLayout, /*const*/ Matrix<ElemType>& from, Matrix<ElemType>& to, size_t K, bool addTo)
|
||||
{
|
||||
auto fromSlice0 = DataWithMBLayoutFor(from, fr, pMBLayout);
|
||||
auto to0 = to.Reshaped(from.GetNumRows(), from.GetNumCols());
|
||||
auto toSlice0 = DataWithMBLayoutFor(to0, fr, pMBLayout);
|
||||
|
||||
size_t D = to.GetNumRows();
|
||||
size_t SMKT = to.GetNumCols();
|
||||
auto fromSlice = fromSlice0.Reshaped(D, SMKT);
|
||||
auto toSlice = toSlice0.Reshaped(D, SMKT);
|
||||
|
||||
size_t S = pMBLayout->GetNumParallelSequences();
|
||||
size_t T = pMBLayout->GetNumTimeSteps();
|
||||
size_t M = 1;
|
||||
Matrix<ElemType>::TensorShuffleScaleAndAdd(addTo ? 1.0f : 0, fromSlice, D, K, M, S, T, 1.0f, toSlice, toSlice);
|
||||
}
|
||||
};
|
||||
|
||||
#define UsingReinterpretNodeBaseMembers UsingComputationNodeMembersBoilerplate
|
||||
|
||||
// TODO: This ReshapeNode is currently not used. Its function will be taken over by Transpose and the Reshape that follows this one below.
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// LegacyReshapeNode (input) -- reinterpret input matrix as having different dimensions
|
||||
// where the new row dimension is given, and the column dimension is inferred.
|
||||
// Also optionally associate a different TensorShape with the data.
|
||||
//
|
||||
// DEPRECATED, do not use anymore.
|
||||
//
|
||||
// If input has no layout, then this reshapes the input matrix
|
||||
// from (rows x cols) to (newRows x (cols / newRows * rows)).
|
||||
//
|
||||
// If input has a layout, then it adds or removes a nested time dimension.
|
||||
// - If newRows > rows, then we remove a time dimension by stacking all frames from the dimension into one:
|
||||
// (rows x (newRows/rows nested time steps) x T time steps)
|
||||
// -> (newRows x T time steps).
|
||||
// - If newRows < rows, then we add a time dimension, going
|
||||
// (rows x T time steps)
|
||||
// -> (newRows x (rows/newRows nested time steps) x T time steps).
|
||||
// which requires the nested time sequence to have the correct number of steps.
|
||||
// E.g. going from rows=20 to newRows=40 assumes a nested time sequence of 2 steps, which are grouped into one step, with the two vectors stacked.
|
||||
// Multiple parallel sequences are treated independently.
|
||||
// TODO: This definition is poor; we should use a different node name, and specify the factor directly.
|
||||
// We may hide that in BrainScript, but better use different node types.
|
||||
// E.g. ReinterpretRowStackAsSequence and ReinterpretSequenceAsRowStack.
|
||||
// BUGBUG: This is not actually implemented yet. Instead, it goes from 1 to K steps or from K to 1 step. This is temporary/experimental, until the plumbing for nesting is there.
|
||||
//
|
||||
// Thirdly, LegacyReshapeNode can also be used to update only the TensorShape. In that case, the MBLayout is kept as is.
|
||||
//
|
||||
// Note: The new row dimension must be a straight multiple or divisor of the current row dimension.
|
||||
// To reshape to a non-multiple go to row dim 1 first.
|
||||
//
|
||||
// Unlike most other nodes, this node has intimate inside knowlegde of MBLayouts and frameRanges.
|
||||
// TODO: Changing the TensorShape does not seem to belong here.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class LegacyReshapeNode : public ReinterpretNodeBase<ElemType>
|
||||
{
|
||||
typedef ReinterpretNodeBase<ElemType> Base;
|
||||
UsingReinterpretNodeBaseMembers;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"LegacyReshape";
|
||||
}
|
||||
|
||||
public:
|
||||
LegacyReshapeNode(DEVICEID_TYPE deviceId, const wstring& name, size_t numRows = 0, const TensorShape& imageLayout = TensorShape())
|
||||
: Base(deviceId, name),
|
||||
m_numTargetRows(numRows),
|
||||
m_targetImageLayout(imageLayout)
|
||||
{
|
||||
}
|
||||
LegacyReshapeNode(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
: LegacyReshapeNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"numRows"), ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKind::HWC /*legacy*/))
|
||||
{
|
||||
// BUGBUG: We should not operate on image layouts here, but on a proper tensor layout.
|
||||
AttachInputs(configp, this->GetExpectedNumInputs());
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<LegacyReshapeNode<ElemType>>(nodeP);
|
||||
node->m_numTargetRows = m_numTargetRows;
|
||||
node->m_targetImageLayout = m_targetImageLayout;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Save(File& fstream) const override
|
||||
{
|
||||
Base::Save(fstream);
|
||||
fstream << m_numTargetRows;
|
||||
m_targetImageLayout.Save(fstream);
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
fstream >> m_numTargetRows;
|
||||
m_targetImageLayout.Load(fstream, /*acceptLegacyFormat=*/true);
|
||||
}
|
||||
|
||||
virtual void /*IComputationNode::*/ PrintSelfBeforeValidation() const override
|
||||
{
|
||||
fprintf(stderr, "\nValidating --> %ls = %ls", NodeName().c_str(), OperationName().c_str());
|
||||
fprintf(stderr, "(");
|
||||
for (size_t i = 0; i < GetNumInputs(); i++)
|
||||
{
|
||||
ComputationNodePtr child = Input(i);
|
||||
if (i > 0)
|
||||
fprintf(stderr, ", ");
|
||||
if (!child)
|
||||
fprintf(stderr, "NULL");
|
||||
else
|
||||
fprintf(stderr, "%ls[%s%s]", child->NodeName().c_str(), string(child->GetSampleLayout()).c_str(), child->HasMBLayout() ? " x *" : "");
|
||||
}
|
||||
fprintf(stderr, ", NumOfRows=%lu, imageWidth=%lu, imageHeight=%lu, imageChannels=%lu)", m_numTargetRows, m_targetImageLayout[1], m_targetImageLayout[2], m_targetImageLayout[0]);
|
||||
// BUGBUG: This interpretaion as image dims is only correct for the 'legacy format, not for cudnn.
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
if (factor() == 1) // canonical case: keeps the MBLayout(e.g. only changing the TensorShape)
|
||||
m_pMBLayout = Input(0)->GetMBLayout();
|
||||
else if (Input(0)->HasMBLayout())
|
||||
{
|
||||
if (!m_pMBLayout)
|
||||
m_pMBLayout = make_shared<MBLayout>(); // mini-batch data: this generates a new layout
|
||||
}
|
||||
else
|
||||
assert(!m_pMBLayout); // reshaping non-mini-batch data
|
||||
|
||||
size_t newCols = 1; // dummy
|
||||
if (!m_pMBLayout)
|
||||
{
|
||||
size_t rows = Input(0)->GetAsMatrixNumRows(), cols = Input(0)->GetAsMatrixNumCols();
|
||||
newCols = cols * rows / m_numTargetRows;
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if ((m_numTargetRows > rows && m_numTargetRows % rows != 0) || // grouping columns
|
||||
(m_numTargetRows < rows && rows % m_numTargetRows != 0)) // splitting columns
|
||||
InvalidArgument("%ls %ls operation: output row dimension %d is not an integer multiple or divisor of input dimension %d", NodeName().c_str(), OperationName().c_str(), (int) m_numTargetRows, (int) rows);
|
||||
if (rows * cols != m_numTargetRows * newCols)
|
||||
LogicError("%ls %ls operation: unexpected dimension mismatch", NodeName().c_str(), OperationName().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// patch up m_targetImageLayout, which was originally a construction parameter
|
||||
InferTargetSampleLayout();
|
||||
|
||||
// setting any dimension to 0 means lose the tensor, flatten to vector
|
||||
if (m_targetImageLayout.GetNumElements() == 0)
|
||||
{
|
||||
if (Input(0)->HasSampleLayout())
|
||||
fprintf(stderr, "WARNING: Reshape operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
// TODO: We need to decide what reshaping means in presence of a tensor.
|
||||
if (HasMBLayout())
|
||||
SetDims(TensorShape(m_numTargetRows), true);
|
||||
else
|
||||
SetDims(TensorShape(m_numTargetRows, newCols), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_numTargetRows != m_targetImageLayout.GetNumElements())
|
||||
LogicError("LegacyReshapeNode: InferTargetSampleLayout() computed a sample layout [%s] that mismatches m_numTargetRows %d.", string(m_targetImageLayout).c_str(), (int) m_numTargetRows);
|
||||
SetDims(m_targetImageLayout, HasMBLayout());
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
virtual void UpdateFunctionMBSize() override
|
||||
{
|
||||
size_t rows = Input(0)->GetNumRows(), cols = Input(0)->GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
if (!m_pMBLayout)
|
||||
{
|
||||
#if 0
|
||||
VerifyDims(m_numTargetRows, newCols);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
SetNumCols(newCols);
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: Clarify/resolve the semantic overlap between BeginForwardProp() and UpdateFunctionMBSize().
|
||||
virtual void /*IComputationNode::*/ BeginForwardProp() override
|
||||
{
|
||||
// create the derived layout
|
||||
if (m_pMBLayout && factor() != 1)
|
||||
{
|
||||
// BUGBUG: This assumes that the layout is complete at this point in time (RecurrentNodeBase makes the same assumption).
|
||||
// This assumption is correct at present, but will becomes invalid once we go sequence-to-sequence.
|
||||
if (weStack())
|
||||
{
|
||||
// going from many samples to one: layout entry will get no flags
|
||||
if (Input(0)->GetMBLayout()->GetNumTimeSteps() * Input(0)->GetSampleMatrixNumRows() / m_numTargetRows != 1)
|
||||
LogicError("LegacyReshapeNode::BeginForwardProp() faking to remove a nested time dimension only works when going back to a single frame per sequence.");
|
||||
// we are in frame mode now
|
||||
m_pMBLayout->InitAsFrameMode(Input(0)->GetNumParallelSequences());
|
||||
}
|
||||
else
|
||||
{
|
||||
// going from one sample to many: layout will get SentenceStart/SentenceEnd flags for the sequence we expand into
|
||||
if (Input(0)->GetMBLayout()->GetNumTimeSteps() != 1)
|
||||
LogicError("LegacyReshapeNode::BeginForwardProp() faking to add a nested time dimension only works when coming from a single frame per sequence.");
|
||||
m_pMBLayout->Init(Input(0)->GetNumParallelSequences(), Input(0)->GetMBLayout()->GetNumTimeSteps() * Input(0)->GetSampleMatrixNumRows() / m_numTargetRows);
|
||||
for (size_t s = 0; s < m_pMBLayout->GetNumParallelSequences(); s++)
|
||||
m_pMBLayout->AddSequence(NEW_SEQUENCE_ID, s, 0, GetMBLayout()->GetNumTimeSteps());
|
||||
// BUGBUG: In the future, NEW_SEQUENCE_ID will be incorrect here; need an iterator over sequences in there.
|
||||
}
|
||||
}
|
||||
// Call this at the end because this will resize Value(), but that requires the updated MBLayout. TODO: Clarify the sequence of events. Should we update the MBLayout in UpdateFunctionMBSize()?
|
||||
Base::BeginForwardProp();
|
||||
}
|
||||
|
||||
// notes:
|
||||
// - input and output have different time base and different layouts (unless the canonical case of factor() == 1)
|
||||
// - fr refers to *functionValues*, not the inputs
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rows = Input(0)->Value().GetNumRows(), cols = Input(0)->Value().GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
assert(newCols * m_numTargetRows == cols * rows); // follows from above check
|
||||
Value().VerifySize(m_numTargetRows, newCols);
|
||||
|
||||
// no layout case: this is indeed just a reshape. Same for canonical case
|
||||
// (We still need to copy the values since there is currently no way to point to an input function value while reshaping at the same time.)
|
||||
if (!m_pMBLayout || factor() == 1)
|
||||
{
|
||||
Value().Reshaped(newCols * m_numTargetRows, 1).SetValue(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
|
||||
}
|
||||
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
|
||||
else
|
||||
{
|
||||
// TODO: It does not make sense to run LegacyReshapeNode frame-by-frame inside a loop, because it changes the time base.
|
||||
// However, in the future, we should be able to run inside an outer loop.
|
||||
if (!fr.IsAllFrames())
|
||||
InvalidArgument("%ls %ls operation cannot be run from inside a loop since it changes the time base.", NodeName().c_str(), OperationName().c_str());
|
||||
if (weStack())
|
||||
Base::Stack(fr, m_pMBLayout, Input(0)->Value(), Value(), factor(), false /*addTo*/);
|
||||
else
|
||||
Base::Unstack(fr.WithLayout(Input(0)->GetMBLayout()), Input(0)->GetMBLayout(), Input(0)->Value(), Value(), factor(), false /*addTo*/);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
|
||||
{
|
||||
size_t rows = Input(0)->Value().GetNumRows(), cols = Input(0)->Value().GetNumCols();
|
||||
size_t newCols = cols * rows / m_numTargetRows;
|
||||
|
||||
// no layout case: this is indeed just a reshape. Same for canonical case
|
||||
if (!m_pMBLayout || factor() == 1)
|
||||
{
|
||||
Input(0)->Gradient().Reshaped(cols * rows, 1) += Gradient().Reshaped(newCols * m_numTargetRows, 1); // treat the values as one long vector
|
||||
}
|
||||
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
|
||||
else
|
||||
{
|
||||
if (weStack())
|
||||
Base::Unstack(fr, m_pMBLayout, Gradient(), Input(0)->Gradient(), factor(), true /*addTo*/);
|
||||
else
|
||||
Base::Stack(fr.WithLayout(Input(0)->GetMBLayout()), Input(0)->GetMBLayout(), Gradient(), Input(0)->Gradient(), factor(), true /*addTo*/);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The LegacyReshapeNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
|
||||
{
|
||||
// The LegacyReshapeNode does not require any of it's input's values for computing
|
||||
// the gradients of its input nodes
|
||||
UNREFERENCED_PARAMETER(childIndex);
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_numTargetRows;
|
||||
bool weStack() const
|
||||
{
|
||||
return m_numTargetRows > Input(0)->GetSampleMatrixNumRows();
|
||||
} // do we stack (multiple frames into one)
|
||||
size_t factor() const
|
||||
{
|
||||
return m_numTargetRows > Input(0)->GetSampleMatrixNumRows() ? m_numTargetRows / Input(0)->GetSampleMatrixNumRows() : Input(0)->GetSampleMatrixNumRows() / m_numTargetRows;
|
||||
} // factor by which we stack or unstack
|
||||
TensorShape m_targetImageLayout;
|
||||
|
||||
// This infers dimensions in m_targetImageLayout.
|
||||
// Users are allowed to provide 2 (out of 3) image dimensions.
|
||||
// One missing dimension can be inferred. If two dimensions are
|
||||
// unspecified it throws a runtime error.
|
||||
void InferTargetSampleLayout()
|
||||
{
|
||||
// BUGBUG: Below is the result of refactoring and only works for rank-3 tensors. Generalize.
|
||||
if (m_targetImageLayout[1] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[2] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_targetImageLayout.GetNumElements() != m_numTargetRows)
|
||||
RuntimeError("Image dimensions do not match row size.");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[2]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[2]), m_targetImageLayout[1], m_targetImageLayout[2]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[1] * m_targetImageLayout[0]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_targetImageLayout[1], m_numTargetRows / (m_targetImageLayout[1] * m_targetImageLayout[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_targetImageLayout[2] > 0)
|
||||
{
|
||||
if (m_targetImageLayout[0] > 0)
|
||||
{
|
||||
if (m_numTargetRows % (m_targetImageLayout[2] * m_targetImageLayout[0]) > 0)
|
||||
RuntimeError("Image row size is not a multiple of specified image dimensions.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(m_targetImageLayout[0], m_numTargetRows / (m_targetImageLayout[2] * m_targetImageLayout[0]), m_targetImageLayout[2]);
|
||||
}
|
||||
else
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
}
|
||||
else if (m_targetImageLayout[0] > 0)
|
||||
RuntimeError("At least two image dimensions must be specified.");
|
||||
else
|
||||
m_targetImageLayout = TensorShape(1, m_numTargetRows, 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template class LegacyReshapeNode<float>;
|
||||
template class LegacyReshapeNode<double>;
|
||||
|
||||
/*
|
||||
|
||||
notes on tensor operations
|
||||
|
@ -1020,7 +1104,7 @@ reshaping
|
|||
- Exceptions: training criteria, BatchNormalization, ...WithNegativeSamples (we should not need this)
|
||||
- I don't like that 'dim' refers to the index of the dimension as well as the number of elements in that dimension. Axis (numpy)?
|
||||
|
||||
- Reshaping: --these are all implemented in C++ by DeprecatedReshapeNode
|
||||
- Reshaping: --these are all implemented in C++ by LegacyReshapeNode
|
||||
- Reshape(x, tensorShape, beginDim=0, endDim=0)
|
||||
- just replaces metadata m_sampleLayout
|
||||
- one dimension may be specified as 0 and will be inferred
|
||||
|
|
|
@ -19,6 +19,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// This header collects special-purpose nodes.
|
||||
|
||||
#ifdef COMING_SOON
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// GMMLogLikelihoodNode (unnormedPrior, means, logStdDevs, features) -- GMM log LL over input vector(s)
|
||||
// calculates the log likelihood of a feature given parameters of a Gaussian mixture model (GMM) with shared diagonal variance
|
||||
|
@ -398,9 +400,10 @@ protected:
|
|||
template class GMMLogLikelihoodNode<float>;
|
||||
template class GMMLogLikelihoodNode<double>;
|
||||
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
/// SequenceWithSoftmaxNode (label, prediction, loglikelihood)
|
||||
// SequenceWithSoftmaxNode (label, prediction, loglikelihood)
|
||||
// word-lattice based sequence training criterion, using a Microsoft-proprietary lattice format
|
||||
//
|
||||
// This node is likely not very useful for external use since it uses an MS-proprietary lattice-archive format
|
||||
|
@ -589,42 +592,15 @@ public:
|
|||
}
|
||||
|
||||
// TODO: method names should be CamelCase
|
||||
std::vector<shared_ptr<const msra::dbn::latticepair>>* getLatticePtr()
|
||||
{
|
||||
return &m_lattices;
|
||||
}
|
||||
std::vector<shared_ptr<const msra::dbn::latticepair>>* getLatticePtr() { return &m_lattices; }
|
||||
std::vector<size_t>* getuidprt() { return &m_uids; }
|
||||
std::vector<size_t>* getboundaryprt() { return &m_boundaries; }
|
||||
std::vector<size_t>* getextrauttmap() { return &m_extraUttMap; }
|
||||
msra::asr::simplesenonehmm* gethmm() { return &m_hmm; }
|
||||
|
||||
std::vector<size_t>* getuidprt()
|
||||
{
|
||||
return &m_uids;
|
||||
}
|
||||
|
||||
std::vector<size_t>* getboundaryprt()
|
||||
{
|
||||
return &m_boundaries;
|
||||
}
|
||||
std::vector<size_t>* getextrauttmap()
|
||||
{
|
||||
return &m_extraUttMap;
|
||||
}
|
||||
msra::asr::simplesenonehmm* gethmm()
|
||||
{
|
||||
return &m_hmm;
|
||||
}
|
||||
|
||||
void SetSmoothWeight(double fsSmoothingWeight)
|
||||
{
|
||||
m_fsSmoothingWeight = fsSmoothingWeight;
|
||||
}
|
||||
void SetFrameDropThresh(double frameDropThresh)
|
||||
{
|
||||
m_frameDropThreshold = frameDropThresh;
|
||||
}
|
||||
|
||||
void SetReferenceAlign(const bool doreferencealign)
|
||||
{
|
||||
m_doReferenceAlignment = doreferencealign;
|
||||
}
|
||||
void SetSmoothWeight(double fsSmoothingWeight) { m_fsSmoothingWeight = fsSmoothingWeight; }
|
||||
void SetFrameDropThresh(double frameDropThresh) { m_frameDropThreshold = frameDropThresh; }
|
||||
void SetReferenceAlign(const bool doreferencealign) { m_doReferenceAlignment = doreferencealign; }
|
||||
|
||||
void SetGammarCalculationParam(const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче