changed GetRecurrenceDirections() back to operate on a singly dimension only (multiple dimensions can be realized with BrainScript);
towards implementing DataTensorFor() using tensor slices, so that we can reuse that for ShiftNode
This commit is contained in:
Родитель
c1c818c85b
Коммит
ed5d40aae1
|
@ -47,7 +47,7 @@ using namespace std;
|
|||
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
|
||||
L"Shift(input, fromOffsets, boundaryValue, dim=-1, offsetRanges=1, multiOffsetDim=0, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) ; fromOffset = new IntVector [ items = fromOffsets ] ; offsetRange = new SizeVector [items= new SizeVector [ items = offsetRanges ] ]/*plus the function args*/ ]\n"
|
||||
L"Shift(input, fromOffset, boundaryValue, boundaryMode=-1/*context*/, dim=-1, numSteps=1, insertedDim=0, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) /*plus the function args*/ ]\n"
|
||||
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
|
||||
|
|
|
@ -748,17 +748,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (!pMBLayout)
|
||||
LogicError("DataFor: Attempting to retrieve a parallel sequence from data without layout.");
|
||||
#if 1
|
||||
else
|
||||
LogicError("DataFor: To retrieve a parallel sequence, implement Matrix::RowSlice() first!");
|
||||
#else
|
||||
// get a reshaped view that stacks all sequences into T long vectors
|
||||
auto mat = data.ColumnSlice(0, data.GetNumCols());
|
||||
mat.Resize(data.GetNumRows() * pMBLayout->GetNumParallelSequences(), data.GetNumRows() / pMBLayout->GetNumParallelSequences());
|
||||
return mat; // .RowSlice(fr.seqIndex * data.GetNumRows());
|
||||
// TODO: Why does RowSlice() not exist? Seems simple. Is there a hidden assumption of contiguous memory?#endif
|
||||
// TODO: The tensor version of this will support it.
|
||||
#endif
|
||||
LogicError("DataFor: Individual parallel sequences cannot be retrieved in Matrix representation. Use TensorView instead.");
|
||||
}
|
||||
}
|
||||
// FrameRange refers to a time slice -> return that
|
||||
|
@ -790,6 +781,82 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return data.ColumnSlice(columnRange.first, columnRange.second);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// TensorSliceWithMBLayoutFor() -- Return tensor slice for a FrameRange of a Matrix with specified number of columns with a given MBLayout
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template<class DimensionVector> // e.g. std::vector<size_t> or SmallVector<size_t>
|
||||
static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayoutFor(const DimensionVector & shape/*of data matrix to slice*/,
|
||||
const FrameRange & fr/*select frame or entire batch*/,
|
||||
const MBLayoutPtr & pMBLayout/*the MB layout of 'data'*/)
|
||||
{
|
||||
std::pair<DimensionVector, DimensionVector> result;
|
||||
|
||||
// this creates a slice for the entire matrix, which we will then narrow down
|
||||
result.first.resize(shape.size(), 0);
|
||||
result.second = shape;
|
||||
|
||||
fr; pMBLayout;
|
||||
#if 0
|
||||
// MBLayout of data and of FrameRange must be identical pointers,
|
||||
// or in case of broadcasting, respective parent pointers.
|
||||
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
|
||||
// For those cases, use a ReconcileMBLayout node.
|
||||
if (fr.m_pMBLayout != pMBLayout)
|
||||
{
|
||||
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
|
||||
// Currently, the only 'outer' loop we have is to have no layout.
|
||||
if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
|
||||
return std::pair<size_t, size_t>(0, numCols);
|
||||
if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
|
||||
LogicError("DataFor: fr's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
|
||||
else
|
||||
LogicError("DataFor: fr's MBLayout inconsistent with matrix");
|
||||
}
|
||||
// if FrameRange refers to whole minibatch (map mode)
|
||||
// or if we don't even have a layout
|
||||
// then return the whole matrix
|
||||
// but as a reference (e.g. it cannot be resized)
|
||||
if (!pMBLayout || fr.IsAllFrames())
|
||||
{
|
||||
if (fr.m_timeOffset != 0)
|
||||
LogicError("DataFor: Time offset must not be specified for FrameRanges that reference the entire minibatch.");
|
||||
// TODO: Can we allow this? Semantics would be different, it would crop frames outside.
|
||||
if (fr.seqIndex == SIZE_MAX)
|
||||
return std::pair<size_t, size_t>(0, numCols);
|
||||
else
|
||||
{
|
||||
if (!pMBLayout)
|
||||
LogicError("DataFor: Attempting to retrieve a parallel sequence from data without layout.");
|
||||
#if 1
|
||||
else
|
||||
LogicError("DataFor: To retrieve a parallel sequence, implement Matrix::RowSlice() first!");
|
||||
#else
|
||||
// get a reshaped view that stacks all sequences into T long vectors
|
||||
auto mat = data.ColumnSlice(0, data.GetNumCols());
|
||||
mat.Resize(data.GetNumRows() * pMBLayout->GetNumParallelSequences(), data.GetNumRows() / pMBLayout->GetNumParallelSequences());
|
||||
return mat; // .RowSlice(fr.seqIndex * data.GetNumRows());
|
||||
// TODO: Why does RowSlice() not exist? Seems simple. Is there a hidden assumption of contiguous memory?#endif
|
||||
// TODO: The tensor version of this will support it.
|
||||
#endif
|
||||
}
|
||||
}
|
||||
// FrameRange refers to a time slice -> return that
|
||||
else
|
||||
{
|
||||
size_t numParallelSequences = pMBLayout->GetNumParallelSequences();
|
||||
size_t startColumn = (fr.timeIdxInSeq + fr.m_timeOffset) * numParallelSequences;
|
||||
if (startColumn >= numCols)
|
||||
LogicError("DataFor: FrameRange specifies a time index that is out of range.");
|
||||
if (fr.seqIndex == SIZE_MAX)
|
||||
return std::pair<size_t, size_t>(startColumn, numParallelSequences);
|
||||
else
|
||||
return std::pair<size_t, size_t>(startColumn + fr.seqIndex, 1);
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// MaskMissingColumnsTo() -- function to set gaps to zero or NaN
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
|
||||
// TODO: Can this be moved to a separate class?
|
||||
|
||||
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr &);
|
||||
static int GetRecurrenceSteppingDirection(const ComputationNodeBasePtr &);
|
||||
|
||||
// FormRecurrentLoops() -- MAIN ENTRY POINT for network recurrent-loop analysis. All other functions in this CPP are called only from this one.
|
||||
// This function analysis the networks for recurrent loops present in the computation of 'rootNode.'
|
||||
|
@ -92,7 +92,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const auto & node = iter->m_nestedNodes[j];
|
||||
for (size_t i = 0; i < node->GetNumInputs(); i++)
|
||||
{
|
||||
if (node->Input(i)->m_loopId == node->m_loopId && GetRecurrenceDirections(node).empty())
|
||||
if (node->Input(i)->m_loopId == node->m_loopId && GetRecurrenceSteppingDirection(node) == 0)
|
||||
{
|
||||
//assert(node->Input(i)->m_indexInLoop == 0); // No. It seems this variable really counts the number of parents.
|
||||
node->Input(i)->m_indexInLoop++; // BUGBUG: this is bumping up the m_indexInLoop, but I don't think it is initialized anywhere other than PurgeStateForFormingRecurrentLoops(). i-1?
|
||||
|
@ -168,13 +168,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// checks whether a node is recurrent, and which direction
|
||||
static vector<int> emptyVector;
|
||||
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr & node)
|
||||
static int GetRecurrenceSteppingDirection(const ComputationNodeBasePtr & node)
|
||||
{
|
||||
if (node->Is<IRecurrentNode>())
|
||||
return node->As<IRecurrentNode>()->GetRecurrenceDirections();
|
||||
return node->As<IRecurrentNode>()->GetRecurrenceSteppingDirection();
|
||||
else
|
||||
return emptyVector;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes);
|
||||
|
@ -308,7 +307,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
visited.insert(cur);
|
||||
recStack.insert(cur);
|
||||
|
||||
if (GetRecurrenceDirections(cur).empty()) // recurrence stops at delays
|
||||
if (GetRecurrenceSteppingDirection(cur) == 0) // recurrence stops at delay nodes
|
||||
{
|
||||
for (size_t i = 0; i < cur->GetNumInputs(); i++)
|
||||
if (cur->Input(i)->m_loopId == cur->m_loopId)
|
||||
|
@ -395,25 +394,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// BUGBUG: Need to extend to multi-dimensional loop directions. Use a vector<int>.
|
||||
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes)
|
||||
{
|
||||
vector<int> recurrenceDirections;
|
||||
int steppingDirection = 0;
|
||||
|
||||
for (auto & node : nestedNodes)
|
||||
{
|
||||
const auto & dirs = GetRecurrenceDirections(node);
|
||||
if (dirs.empty()) // not a recurrent node
|
||||
int dir = GetRecurrenceSteppingDirection(node);
|
||||
if (dir == 0) // not a recurrent node
|
||||
continue;
|
||||
if (recurrenceDirections.empty())
|
||||
recurrenceDirections = dirs;
|
||||
else if (recurrenceDirections != dirs)
|
||||
InvalidArgument("It is not allowed to have multiple different recurrence directions in the same loop (loop connected to %ls %ls operation).",
|
||||
if (steppingDirection == 0)
|
||||
steppingDirection = dir;
|
||||
else if (steppingDirection != dir)
|
||||
InvalidArgument("It is not allowed to have multiple different stepping directions in the same loop (loop connected to %ls %ls operation).",
|
||||
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
|
||||
}
|
||||
|
||||
if (recurrenceDirections.empty())
|
||||
if (steppingDirection == 0)
|
||||
LogicError("There is no recurrent node in the loop connected to %ls %ls operation.",
|
||||
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
|
||||
// BUGBUG: Multiple recurrence dimensions not yet supported beyond this point.
|
||||
return -recurrenceDirections[0];
|
||||
return steppingDirection;
|
||||
}
|
||||
|
||||
}}}
|
||||
|
|
|
@ -213,30 +213,6 @@ namespace Microsoft {
|
|||
return maxRank;
|
||||
}
|
||||
|
||||
// determine the full tensor dimension including padding and multiple samples (MBLayout)
|
||||
// but without trailing ones (assuming they will be auto-padded by the tensor op)
|
||||
TensorShape ComputationNodeBase::GetTensorShape(size_t rank, const FrameRange & fr) const
|
||||
{
|
||||
//GetAndValidateSampleLayout(); // no need to validate because rank comes from DetermineElementwiseTensorRank() which validates all
|
||||
if (!HasMBLayout())
|
||||
return GetSampleLayout().Append(GetSampleLayout().GetRank(), GetNumCols()); // last dim is column dimension
|
||||
// TODO: This is not nice! Instead, of no MBLayout then have sample layout explain whole matrix.
|
||||
else if (fr.IsAllFrames())
|
||||
{
|
||||
// we have an MBLayout, and for refers to the entire MB
|
||||
return GetSampleLayout().Append(rank, GetMBLayout()->GetNumCols());
|
||||
}
|
||||
//else if (fr.Sequence != SIZE_MAX) // needs a slice and a two-dim tensor
|
||||
//{
|
||||
// return GetAndValidateSampleLayout(); // .Append(rank, 1); // no need to append ones
|
||||
//}
|
||||
else
|
||||
{
|
||||
// we have an MBLayout, and fr refers to one frame (across all parallel sequences)
|
||||
return GetSampleLayout().Append(rank, GetMBLayout()->GetNumParallelSequences());
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// others
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -353,8 +353,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const TensorShape & GetAndValidateSampleLayout() const; // TODO: Once numRows is consistent with m_sampleLayout, this will go away
|
||||
size_t DetermineElementwiseTensorRank() const;
|
||||
public:
|
||||
TensorShape GetTensorShape(size_t dims, const FrameRange & fr) const;
|
||||
|
||||
// access to element(0,0) without having to type-cast
|
||||
virtual double Get00Element() const = 0;
|
||||
|
||||
|
@ -1144,15 +1142,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// tensor variants
|
||||
TensorView<ElemType> DataTensorFor(Matrix<ElemType> & data, size_t rank, const FrameRange & fr)
|
||||
{
|
||||
return TensorView<ElemType>(DataFor(data, fr), GetTensorShape(rank, fr));
|
||||
TensorShape tensorShape = GetSampleLayout();
|
||||
if (!HasMBLayout())
|
||||
tensorShape.AppendInPlace(tensorShape.GetRank(), GetNumCols()); // last dim is column dimension
|
||||
// TODO: This is not nice! Instead, of no MBLayout then have sample layout explain whole matrix.
|
||||
else if (fr.IsAllFrames()) // we have an MBLayout, and for refers to the entire MB
|
||||
tensorShape.AppendInPlace(rank, GetMBLayout()->GetNumCols());
|
||||
else // we have an MBLayout, and fr refers to one frame (across all parallel sequences)
|
||||
tensorShape.AppendInPlace(rank, GetMBLayout()->GetNumParallelSequences());
|
||||
// TODO: determine SmallVector begin, end bounds first, get a narrow full shape, squeeze the dims, then return the tensor
|
||||
return TensorView<ElemType>(DataFor(data, fr), tensorShape);
|
||||
}
|
||||
TensorView<ElemType> ValueTensorFor(size_t rank, const FrameRange & fr)
|
||||
{
|
||||
return TensorView<ElemType>(ValueFor(fr), GetTensorShape(rank, fr));
|
||||
return DataTensorFor(Value(), rank, fr);
|
||||
}
|
||||
TensorView<ElemType> GradientTensorFor(size_t rank, const FrameRange & fr)
|
||||
{
|
||||
return TensorView<ElemType>(GradientFor(fr), GetTensorShape(rank, fr));
|
||||
return DataTensorFor(Gradient(), rank, fr);
|
||||
}
|
||||
|
||||
// update the actual matrix allocation for m_value based on the node dimension
|
||||
|
@ -1509,7 +1516,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// IRecurrentNode -- helper wrapper class for ComputationNodes that can be recurrent
|
||||
// =======================================================================
|
||||
|
||||
struct IRecurrentNode { virtual const std::vector<int> & GetRecurrenceDirections() const = 0; };
|
||||
struct IRecurrentNode { virtual int GetRecurrenceSteppingDirection() const = 0; };
|
||||
|
||||
|
||||
// =======================================================================
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ShiftNode (input, fromOffset, boundaryValue, dim=-1, offsetRange=1, multiOffsetDim=0) -- delay and rolling window
|
||||
// ShiftNode (input, fromOffset, boundaryValue, dim=-1, numSteps=1, insertDim=0) -- delay and rolling window
|
||||
//
|
||||
// This shifts the input by (-fromOffset) steps. In other words, output(t) will be input(t+fromOffset).
|
||||
// E.g. for fromOffset=-1, this gives the past value.
|
||||
|
@ -61,9 +61,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// when all involved nodes are implemented using the tensor library. Nodes implemented using
|
||||
// Matrix slices can only support iterating over time.
|
||||
//
|
||||
// The fromOffset can also be a tensor, e.g. (1,1). In that case, iteration will be over multiple
|
||||
// consecutive dimensions. offsetRange must have the same number of dimensions.
|
||||
//
|
||||
// If the boundaryValue has 0 elements, the sequence will be trimmed (frames reaching beyond the boundary
|
||||
// are dropped). This will initially not be implemented for the time dimension (as it would require
|
||||
// change of MBLayout).
|
||||
|
@ -75,34 +72,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"Shift"; }
|
||||
public:
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, const std::vector<int> & fromOffset, int shiftDimension, const std::vector<size_t> & offsetRange, int expandDimension) :
|
||||
Base(deviceId, name), m_fromOffsetBegin(fromOffset),
|
||||
m_shiftDimension(shiftDimension), m_expandDimension(expandDimension),
|
||||
enum BoundaryMode : int // how to fill frames at boundaries
|
||||
{
|
||||
reachAcross = -1, // go across the boundary: use boundaryValue. This is for recurrence.
|
||||
duplicate = 0, // duplicate frame at boundary, e.g. duplicate first frame. Non-recurrent mode only.
|
||||
trim = 1 // drop frames. Non-recurrent mode only.
|
||||
};
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, int fromOffset, BoundaryMode boundaryMode, int shiftDimension, size_t numSteps, int insertedDimParam) :
|
||||
Base(deviceId, name), m_fromOffset(fromOffset), m_numSteps(numSteps),
|
||||
m_boundaryMode(boundaryMode),
|
||||
m_shiftDimension(shiftDimension), m_insertedDimParam(insertedDimParam),
|
||||
m_insertExpandShapeAt(SIZE_MAX/*uninitialized at this point*/)
|
||||
{
|
||||
// determine m_fromOffsetEnd from fromOffset/offsetRange
|
||||
bool anyNonRecurrent = false;
|
||||
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
|
||||
{
|
||||
m_fromOffsetEnd.push_back(m_fromOffsetBegin[k] + (k < offsetRange.size() ? (int)offsetRange[k] : 1));
|
||||
if (m_fromOffsetEnd[k] <= 0)
|
||||
m_recurrenceDirections.push_back(-1);
|
||||
else if (m_fromOffsetBegin[k] > 0)
|
||||
m_recurrenceDirections.push_back(+1);
|
||||
else
|
||||
m_recurrenceDirections.push_back(0);
|
||||
anyNonRecurrent |= m_recurrenceDirections[k] == 0;
|
||||
}
|
||||
if (anyNonRecurrent)
|
||||
m_recurrenceDirections.clear();
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(TensorShape(), 0); // empty for now
|
||||
}
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
ShiftNode(deviceId, name, std::vector<int> { 1 }, -1, std::vector<size_t> { 1 }, 0)
|
||||
ShiftNode(deviceId, name, 1, BoundaryMode::reachAcross, -1, 1, 0)
|
||||
{ }
|
||||
ShiftNode(const ScriptableObjects::IConfigRecordPtr configp) :
|
||||
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), configp->Get(L"dim"), configp->Get(L"offsetRange"), configp->Get(L"multiOffsetDim"))
|
||||
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), (BoundaryMode)(int)configp->Get(L"boundaryMode"), configp->Get(L"dim"), configp->Get(L"numSteps"), configp->Get(L"insertedDim"))
|
||||
{
|
||||
// We do NOT attach the inputs, as we cannot resolve the main input without causing a circular reference.
|
||||
// Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below.
|
||||
|
@ -122,23 +111,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void Save(File& fstream) const
|
||||
{
|
||||
Base::Save(fstream);
|
||||
|
||||
fstream << m_fromOffsetBegin;
|
||||
fstream << m_fromOffsetEnd;
|
||||
fstream << m_shiftDimension;
|
||||
fstream << m_expandDimension;
|
||||
fstream << m_recurrenceDirections;
|
||||
fstream << m_fromOffset << m_numSteps << m_boundaryMode << m_shiftDimension << m_insertedDimParam;
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
|
||||
fstream >> m_fromOffsetBegin;
|
||||
fstream >> m_fromOffsetEnd;
|
||||
fstream >> m_shiftDimension;
|
||||
fstream >> m_expandDimension;
|
||||
fstream >> m_recurrenceDirections;
|
||||
fstream >> m_fromOffset >> m_numSteps >> m_boundaryMode >> m_shiftDimension >> m_insertedDimParam;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
|
@ -150,19 +129,47 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override {return false; }
|
||||
|
||||
virtual void BeginForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
Base::BeginForwardProp();
|
||||
|
||||
// in case of trimming, narrow the layout
|
||||
// We actually do not drop content, only reduce the range of sequences.
|
||||
// This is meant to optimize for the case where we have multiple sequences concatenated while trimming a small amount only.
|
||||
}
|
||||
|
||||
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
Base::EndForwardProp();
|
||||
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
// TODO: package up the state using ExportState(). Then in BeginForwardProp() bring it back. In-between, the packages can be moved around.
|
||||
// The necessary frames are stored in m_state->m_delayedValue.
|
||||
|
||||
// Only if layout has anything exceeding the MB.
|
||||
}
|
||||
|
||||
// This function assumes BeginForwardProp/EndForwardProp() to be called before/after the iteration loop.
|
||||
// TODO: In the future, there may be value for one more way of handling the boundary condition: Fill as 'NoInput'. Then we can use this to implement rolling windows (albeit inefficiently). Would require to unshare the layout.
|
||||
virtual void ForwardProp(const FrameRange & fr) override
|
||||
{
|
||||
fr;
|
||||
// STEP 1: whole-sale copy a shifted version of the input to the output
|
||||
// - consider the saved parts from the last minibatch as part of the input at dimensions beyond the bounds
|
||||
// - ignore boundary conditions for now
|
||||
|
||||
// get the tensors without shift
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
|
||||
// shift the dimension in the input
|
||||
|
||||
// STEP 2: fix up the boundary conditions
|
||||
// - fill in xxx
|
||||
|
||||
// turn selected frame and shifted frame into a tensor
|
||||
|
||||
// copy all that's in range
|
||||
|
||||
// fix up all that is not
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
||||
|
@ -178,40 +185,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (isFinalValidationPass && !m_pMBLayout)
|
||||
InvalidArgument("%ls %ls operation must operate on data (must have an MB Layout).", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
// determine expandShape--empty if no multiple offsets; otherwise the 1 or more dimensions that need to be added at m_expandDimension
|
||||
m_expandShape.clear();
|
||||
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
|
||||
{
|
||||
size_t dim = m_fromOffsetEnd[k] - m_fromOffsetBegin[k];
|
||||
if (dim > 1)
|
||||
{
|
||||
m_expandShape.resize(k, 1);
|
||||
m_expandShape.push_back(dim);
|
||||
}
|
||||
}
|
||||
if (!m_expandShape.empty())
|
||||
m_expandShape.resize(m_fromOffsetBegin.size(), 1); // pad ones to end
|
||||
// now it either matches the dimensions to insert, or is empty if none to append
|
||||
|
||||
// determine final sample layout
|
||||
auto inputSampleLayout = Input(0)->GetSampleLayout();
|
||||
auto inputDims = inputSampleLayout.GetDims();
|
||||
if (m_expandDimension < 0)
|
||||
if (m_insertedDimParam < 0)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d refers to a time dimension, but this is not allowed.",
|
||||
NodeName().c_str(), OperationName().c_str(), m_expandDimension);
|
||||
m_insertExpandShapeAt = m_expandShape.empty() ? 0 : (m_expandDimension > 0 ? m_expandDimension - 1 : inputDims.size());
|
||||
NodeName().c_str(), OperationName().c_str(), m_insertedDimParam);
|
||||
m_insertExpandShapeAt = m_numSteps > 1 ? 0 : (m_insertedDimParam > 0 ? m_insertedDimParam - 1 : inputDims.size());
|
||||
if (m_insertExpandShapeAt > inputDims.size())
|
||||
if (isFinalValidationPass)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d beyond end of input sample layout [%s].",
|
||||
NodeName().c_str(), OperationName().c_str(), m_expandDimension, string(inputSampleLayout).c_str());
|
||||
NodeName().c_str(), OperationName().c_str(), m_insertedDimParam, string(inputSampleLayout).c_str());
|
||||
else
|
||||
m_insertExpandShapeAt = inputDims.size(); // this may be an error, but we want to catch that only in the final pass
|
||||
SmallVector<size_t> dims;
|
||||
if (!m_expandShape.empty() && inputDims.size() + m_expandShape.size() > dims.capacity())
|
||||
if (m_numSteps > 1 && inputDims.size() + 1 > dims.capacity())
|
||||
InvalidArgument("%ls %ls operation: Too many dimensions. Did you feed back output of this node without stripping the extra dimensions?",
|
||||
NodeName().c_str(), OperationName().c_str());
|
||||
dims.append(inputDims.begin(), inputDims.begin() + m_insertExpandShapeAt);
|
||||
dims.append(m_expandShape.begin(), m_expandShape.end());
|
||||
if (m_numSteps > 1) // insert the new dimension if we expand into more than one step
|
||||
dims.push_back(m_numSteps);
|
||||
dims.append(inputDims.begin() + m_insertExpandShapeAt, inputDims.end());
|
||||
auto sampleLayout = TensorShape(dims);
|
||||
|
||||
|
@ -219,9 +212,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// special interface for use by loop detection
|
||||
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
|
||||
virtual int /*IRecurrentNode::*/GetRecurrenceSteppingDirection() const override
|
||||
{
|
||||
return m_recurrenceDirections;
|
||||
if (m_boundaryMode != BoundaryMode::reachAcross)
|
||||
return 0;
|
||||
else if (m_fromOffset + (int)m_numSteps <= 0)
|
||||
return +1;
|
||||
else if (m_fromOffset > 0)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
|
@ -230,20 +230,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<ShiftNode<ElemType>>(nodeP);
|
||||
node->m_fromOffsetBegin = m_fromOffsetBegin;
|
||||
node->m_fromOffsetEnd = m_fromOffsetEnd;
|
||||
node->m_recurrenceDirections = m_recurrenceDirections;
|
||||
node->m_shiftDimension = m_shiftDimension;
|
||||
node->m_expandDimension = m_expandDimension;
|
||||
node->m_expandShape = m_expandShape;
|
||||
node->m_insertExpandShapeAt = m_insertExpandShapeAt;
|
||||
node->m_state = m_state;
|
||||
node->m_fromOffset = m_fromOffset;
|
||||
node->m_numSteps = m_numSteps;
|
||||
node->m_boundaryMode = m_boundaryMode;
|
||||
node->m_shiftDimension = m_shiftDimension;
|
||||
node->m_insertedDimParam = m_insertedDimParam;
|
||||
node->m_insertExpandShapeAt = m_insertExpandShapeAt;
|
||||
node->m_state = m_state;
|
||||
}
|
||||
}
|
||||
|
||||
class ShiftNodeState : public INodeState
|
||||
{
|
||||
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
|
||||
Matrix<ElemType> m_delayedValue; // saves the activation of the previous step that this node points to
|
||||
vector<MBLayout::SequenceInfo> m_delayedSequences; // and associated sequence info. This is only used for consistency checking (it must match).
|
||||
ShiftNodeState(DEVICEID_TYPE deviceId) : m_delayedValue(deviceId) { }
|
||||
};
|
||||
typedef std::shared_ptr<ShiftNodeState> ShiftNodeStatePtr;
|
||||
|
||||
|
@ -260,15 +261,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
protected:
|
||||
// parameters remembered from construction
|
||||
std::vector<int> m_fromOffsetBegin; // offset to pull from; first offset in case of offset range
|
||||
std::vector<int> m_fromOffsetEnd; // end of offset range
|
||||
int m_fromOffset; // offset to pull from
|
||||
int m_numSteps; // offset range
|
||||
BoundaryMode m_boundaryMode; // how to fill at the boundary (reach across, duplicate, or trim)
|
||||
int m_shiftDimension; // dimension to shift (default: time)
|
||||
int m_expandDimension; // in case of offset range, this is where a new dimension will be inserted
|
||||
int m_insertedDimParam; // in case of multiple steps, this is where a new dimension will be inserted
|
||||
|
||||
// derived params set up in Validate()
|
||||
SmallVector<size_t> m_expandShape; // offsetEnd-offsetBegin if >1 offset in any dimension; empty otherwise
|
||||
size_t m_insertExpandShapeAt; // at which dimension to insert (internal 0-based index)
|
||||
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
|
||||
|
||||
ShiftNodeStatePtr m_state; // saves the activation of the previous step that this node points to
|
||||
|
||||
|
@ -350,7 +350,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
m_initialActivationValue = initialActivationValue;
|
||||
m_timeStep = 1;
|
||||
m_recurrenceDirections.push_back(direction);
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(sampleLayout, 0);
|
||||
m_value->SetValue(m_initialActivationValue); // is this needed?
|
||||
|
@ -358,13 +357,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
protected:
|
||||
DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
Base(deviceId, name),
|
||||
m_delayedActivation(deviceId)
|
||||
m_delayedValue(deviceId)
|
||||
{
|
||||
Init(TensorShape(), (ElemType)DEFAULT_HIDDEN_ACTIVATION);
|
||||
}
|
||||
DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, const TensorShape & sampleLayout, size_t timeStep) :
|
||||
Base(deviceId, name),
|
||||
m_delayedActivation(deviceId)
|
||||
m_delayedValue(deviceId)
|
||||
{
|
||||
Init(sampleLayout, initialActivationValue);
|
||||
m_timeStep = (int)timeStep; // TODO: pass this to Init() instead as well
|
||||
|
@ -408,7 +407,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// Note: Do we need load cols for delay node? I just set to zero to see if there is any problem.
|
||||
SetDims(TensorShape(rows), 0); // tensor shape will be overwritten in Validate() --TODO: We should serialize it here.
|
||||
m_delayedActivation.Resize(rows, 0); // Note: If we try to access history in first minibatch, we shall crash. It would be a consequence of a missing sentence-begin flag
|
||||
m_delayedValue.Resize(rows, 0); // Note: If we try to access history in first minibatch, we shall crash. It would be a consequence of a missing sentence-begin flag
|
||||
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_2)
|
||||
fstream >> m_initialActivationValue;
|
||||
|
@ -488,14 +487,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
// It is kept in m_delayedActivation, m_delayedActivationMBLayout.
|
||||
// It is kept in m_delayedValue, m_delayedActivationMBLayout.
|
||||
// This could be optimized as follows:
|
||||
// - only keep the required number of frames (m_timeStep)
|
||||
// - we don't need to keep anything in full-sequence mode
|
||||
// - we don't need to keep anything if all sequences are closed (sentence end)
|
||||
// This condition includes full-sequence mode.
|
||||
// TODO: Can we optimize this and only copy if there is a sequence spanning across the end of the MB? And add a check to BeginForwardProp() to make sure we got one if there is a boundary at the start?
|
||||
m_delayedActivation = Input(0)->Value();
|
||||
m_delayedValue = Input(0)->Value();
|
||||
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
|
||||
m_delayedActivationMBLayout->CopyFrom(m_pMBLayout);
|
||||
|
||||
|
@ -555,9 +554,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
// inside the sequence: access delayed value
|
||||
if (t_delayed < 0)
|
||||
inp = DataWithMBLayoutFor(m_delayedActivation, FrameRange(m_delayedActivationMBLayout, t_delayed + T_delayedActivation).Sequence(id), m_delayedActivationMBLayout); // delay reaches in previous minibatch
|
||||
inp = DataWithMBLayoutFor(m_delayedValue, FrameRange(m_delayedActivationMBLayout, t_delayed + T_delayedActivation).Sequence(id), m_delayedActivationMBLayout); // delay reaches in previous minibatch
|
||||
else if (t_delayed >= T)
|
||||
inp = DataWithMBLayoutFor(m_delayedActivation, FrameRange(m_delayedActivationMBLayout, t_delayed - T).Sequence(id), m_delayedActivationMBLayout); // delay reaches in previous minibatch
|
||||
inp = DataWithMBLayoutFor(m_delayedValue, FrameRange(m_delayedActivationMBLayout, t_delayed - T).Sequence(id), m_delayedActivationMBLayout); // delay reaches in previous minibatch
|
||||
else
|
||||
inp = Input(0)->ValueFor(frDelayed.Sequence(id));
|
||||
//inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed).Sequence(id));
|
||||
|
@ -571,9 +570,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType> out = ValueFor(fr);
|
||||
|
||||
if (t_delayed < 0)
|
||||
inp = DataWithMBLayoutFor(m_delayedActivation, FrameRange(m_delayedActivationMBLayout, t_delayed + T_delayedActivation), m_delayedActivationMBLayout);
|
||||
inp = DataWithMBLayoutFor(m_delayedValue, FrameRange(m_delayedActivationMBLayout, t_delayed + T_delayedActivation), m_delayedActivationMBLayout);
|
||||
else if (t_delayed >= T)
|
||||
inp = DataWithMBLayoutFor(m_delayedActivation, FrameRange(m_delayedActivationMBLayout, t_delayed - T), m_delayedActivationMBLayout);
|
||||
inp = DataWithMBLayoutFor(m_delayedValue, FrameRange(m_delayedActivationMBLayout, t_delayed - T), m_delayedActivationMBLayout);
|
||||
else
|
||||
inp = Input(0)->ValueFor(frDelayed);
|
||||
//inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed));
|
||||
|
@ -587,10 +586,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ValidateUnaryMap(isFinalValidationPass);
|
||||
}
|
||||
|
||||
// special interface for use by loop detection
|
||||
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
|
||||
virtual int /*IRecurrentNode::*/GetRecurrenceSteppingDirection() const override
|
||||
{
|
||||
return m_recurrenceDirections;
|
||||
return -direction;
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
|
@ -601,7 +599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
auto node = dynamic_pointer_cast<DelayedValueNodeBase<ElemType, direction/*, SequenceStart_or_End*/>>(nodeP);
|
||||
node->m_timeStep = m_timeStep;
|
||||
node->m_initialActivationValue = m_initialActivationValue;
|
||||
node->m_delayedActivation = m_delayedActivation;
|
||||
node->m_delayedValue = m_delayedValue;
|
||||
if (m_delayedActivationMBLayout)
|
||||
(node->m_delayedActivationMBLayout = make_shared<MBLayout>())->CopyFrom(m_delayedActivationMBLayout);
|
||||
else
|
||||
|
@ -652,7 +650,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
pState->CacheState(m_delayedActivation.ColumnSlice((nT - 1)*nU, nU));
|
||||
pState->CacheState(m_delayedValue.ColumnSlice((nT - 1)*nU, nU));
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
pExportedState = pState;
|
||||
}
|
||||
|
@ -686,7 +684,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
pState->CacheState(m_delayedActivation.ColumnSlice((nT-1)*nU, nU));
|
||||
pState->CacheState(m_delayedValue.ColumnSlice((nT-1)*nU, nU));
|
||||
pState->CacheDelayedMBLayout(m_delayedActivationMBLayout);
|
||||
pExportedState = pState;
|
||||
}
|
||||
|
@ -718,12 +716,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
int dir = direction;
|
||||
if (dir == -1) // looking backward
|
||||
{
|
||||
m_delayedActivation.SetColumnSlice(delayedActivation, (nT - 1)*nU, nU);
|
||||
m_delayedValue.SetColumnSlice(delayedActivation, (nT - 1)*nU, nU);
|
||||
}
|
||||
if (dir == 1)
|
||||
{
|
||||
//m_delayedActivation.CopyColumnsStrided(delayedActivation, nU, 1, nT);
|
||||
m_delayedActivation.SetColumnSlice(delayedActivation, 0, nU);
|
||||
//m_delayedValue.CopyColumnsStrided(delayedActivation, nU, 1, nT);
|
||||
m_delayedValue.SetColumnSlice(delayedActivation, 0, nU);
|
||||
}
|
||||
if (dir != -1 && dir == 1)
|
||||
{// it is really a compile error ?
|
||||
|
@ -733,15 +731,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
protected:
|
||||
|
||||
ElemType m_initialActivationValue; // starting value for hidden activation vector at boundary
|
||||
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
|
||||
MBLayoutPtr m_delayedActivationMBLayout; // layout for m_delayedActivation
|
||||
Matrix<ElemType> m_delayedValue; // saves the activation of the previous step that this node points to
|
||||
MBLayoutPtr m_delayedActivationMBLayout; // layout for m_delayedValue
|
||||
int m_timeStep; // delay in frames (typ. 1)
|
||||
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
|
||||
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
|
||||
};
|
||||
|
||||
#define UsingDelayedValueNodeMembers UsingComputationNodeMembersBoilerplate; \
|
||||
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep;
|
||||
using Base::m_initialActivationValue; using Base::m_delayedValue; using Base::m_timeStep;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// PastValueNode (input) -- delay node
|
||||
|
|
|
@ -67,7 +67,7 @@ speechTrain = [
|
|||
|
||||
// LSTM cell
|
||||
# TODO: This is temporary test code for the new ShiftNode (until we switch PastValue() itself over)
|
||||
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1, offsetRanges=1, multiOffsetDim=2)
|
||||
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1, numSteps=1, insertedDim=2)
|
||||
PastValue1 = PastValue
|
||||
#PastValue1 = PastValueShift
|
||||
dh = PastValue1(outputDim, output); // hidden state(t-1)
|
||||
|
|
Загрузка…
Ссылка в новой задаче