factored out new function TensorSliceWithMBLayoutFor() from DataTensorFor(), for use by ShiftNode

2016-01-09 16:30:25 -08:00 · 2016-01-09 16:30:25 -08:00 · c886f32d6d
--- a/Source/Common/Include/Sequences.h
+++ b/Source/Common/Include/Sequences.h
@ -427,7 +427,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    public: // TODO: make private (currently used from masking and DataFor) ; TODO: rename all members with m_ prefix
        size_t timeIdxInSeq;                // start frame; SIZE_MAX = all frames in MB
        ptrdiff_t m_timeOffset;             // this is added to timeIdxInSeq wherever it is used
-        size_t seqIndex;                    // sequence index; SIZE_MAX = all sequences in MB (most common case)
+        size_t seqIndex;                    // parallel-sequence index; SIZE_MAX = all sequences in MB (most common case)  --TODO: Bad name, 'sequence' and 'parallel sequence' are two different things
        MBLayoutPtr m_pMBLayout;            // layout associated with this
        bool m_broadcastAllowed;            // frame range may be broadcast from outer layout (e.g. a matrix with NULL layout and 1 column is acceptable to this frame range)
        const FrameRange *parent;           // or NULL: parent range, relative to which this FrameRange is interpreted  --TODO: not used yet
@ -657,14 +657,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        return m_columnsValidityMask;
    }

-    // class for defining an iteration over a sequence
-    // Currently supports time sequences, forward and backward.
-    // TODO: It is meant to some day generalize to multi-dimensional iterations, e.g. across an image:
-    //  - abstract delay direction to be multi-dimensional (let's call it FrameStep)
-    //  - DelayedValueNode::direction gets replaced with a FrameStep
-    //  - recInfo->m_steppingDirection will be replaced by a FrameStep
-    //  - FrameRangeIterator derives from FrameStep, and operator++ adds tat to FrameRange
-    // Longer-term, we will also have nested structures. For those, FrameRangeIterations will be able to be instantiated from FrameRange objects to loop over their nested dimension.
+    // class for defining an iteration over a sequence, forward and backward
+    // One day, we may also have nested structures. For those, FrameRangeIterations will be able to be instantiated from FrameRange objects to loop over their nested dimension.
    class FrameRangeIteration
    {
        MBLayoutPtr m_pMBLayout;
@ -675,7 +669,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        //  - for left-to-right models -> pass step = +1
        //  - for right-to-left models -> pass step = -1
        FrameRangeIteration(MBLayoutPtr pMBLayout, int step) : m_pMBLayout(pMBLayout), m_step(step) { }
-        // in the future we may consier multi-dimensional iterators such as iterators over images
        // This class is returned by begin() and end().
        // It is a FrameRange with additions ++ and != operators needed in the for loop.
        class FrameRangeIterator : public FrameRange
@ -782,12 +775,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    // -----------------------------------------------------------------------
-    // TensorSliceWithMBLayoutFor() -- Return tensor slice for a FrameRange of a Matrix with specified number of columns with a given MBLayout
+    // TensorSliceWithMBLayoutFor() -- Return tensor slice for a FrameRange with specified number of columns with a given MBLayout
+    // This implements the logic of interpreting the FrameRange object.
+    // Unlike the matrix version above, this supports iteration indices other than time.
+    // TODO: This ^^. Still missing is a field to identify the index.
    // -----------------------------------------------------------------------

    template<class DimensionVector> // e.g. std::vector<size_t> or SmallVector<size_t>
-    static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayoutFor(const DimensionVector & shape/*of data matrix to slice*/,
-                                                                                         const FrameRange & fr/*select frame or entire batch*/,
+    static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayoutFor(const DimensionVector & shape/*actual tensor shape of 'data'*/,
+                                                                                         const FrameRange & fr/*select frame or entire batch from 'data'*/,
                                                                                         const MBLayoutPtr & pMBLayout/*the MB layout of 'data'*/)
    {
        std::pair<DimensionVector, DimensionVector> result;
@ -795,9 +791,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // this creates a slice for the entire matrix, which we will then narrow down
        result.first.resize(shape.size(), 0);
        result.second = shape;
-       
-        fr; pMBLayout;
-#if 0
+
+        // get position of time and sequence index
+        // These are only valid if we have a layout.
+        // In the future, the 'timeDim' will be identified by the FrameRange.
+        size_t sequenceDim = shape.size() - 2;  // TODO: In case of multiple time dims, this must be adjusted.
+        size_t timeDim = sequenceDim + 1;       // TODO: Get this from the FrameRange object.
+
        // MBLayout of data and of FrameRange must be identical pointers,
        // or in case of broadcasting, respective parent pointers.
        // MBLayouts that are identical in content but not object identity (pointer) are not admissible.
@ -806,9 +806,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            // if broadcast allowed then it is allowed to broadcast from an outer-loop value
            // Currently, the only 'outer' loop we have is to have no layout.
-            if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
-                return std::pair<size_t, size_t>(0, numCols);
-            if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
+            if (fr.m_pMBLayout/*get data for a loop*/ && !pMBLayout/*'data' is not samples*/ && fr.m_broadcastAllowed/*we're OK with that*/)
+                ;               // the time dimension is broadcasting--leave it as is
+            else if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
                LogicError("DataFor: fr's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
            else
                LogicError("DataFor: fr's MBLayout inconsistent with matrix");
@ -817,43 +817,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // or if we don't even have a layout
        // then return the whole matrix
        // but as a reference (e.g. it cannot be resized)
-        if (!pMBLayout || fr.IsAllFrames())
+        else if (!pMBLayout || fr.IsAllFrames())
        {
-            if (fr.m_timeOffset != 0)
-                LogicError("DataFor: Time offset must not be specified for FrameRanges that reference the entire minibatch.");
+            if (fr.m_timeOffset != 0)   // entire minibatch with non-zero offset exceeds bounds on at least one side
+                LogicError("DataFor: Iteration offset must not be specified for FrameRanges that reference the entire minibatch.");
            // TODO: Can we allow this? Semantics would be different, it would crop frames outside.
-            if (fr.seqIndex == SIZE_MAX)
-                return std::pair<size_t, size_t>(0, numCols);
-            else
-            {
-                if (!pMBLayout)
-                    LogicError("DataFor: Attempting to retrieve a parallel sequence from data without layout.");
-#if 1
-                else
-                    LogicError("DataFor: To retrieve a parallel sequence, implement Matrix::RowSlice() first!");
-#else
-                // get a reshaped view that stacks all sequences into T long vectors
-                auto mat = data.ColumnSlice(0, data.GetNumCols());
-                mat.Resize(data.GetNumRows() * pMBLayout->GetNumParallelSequences(), data.GetNumRows() / pMBLayout->GetNumParallelSequences());
-                return mat;   // .RowSlice(fr.seqIndex * data.GetNumRows());
-                // TODO: Why does RowSlice() not exist? Seems simple. Is there a hidden assumption of contiguous memory?#endif
-                // TODO: The tensor version of this will support it.
-#endif
-            }
        }
        // FrameRange refers to a time slice -> return that
-        else
+        else  if (result.second[timeDim] > 1)    // (if time dim is broadcasting then always return that one independent of requested index)
        {
-            size_t numParallelSequences = pMBLayout->GetNumParallelSequences();
-            size_t startColumn = (fr.timeIdxInSeq + fr.m_timeOffset) * numParallelSequences;
-            if (startColumn >= numCols)
-                LogicError("DataFor: FrameRange specifies a time index that is out of range.");
-            if (fr.seqIndex == SIZE_MAX)
-                return std::pair<size_t, size_t>(startColumn, numParallelSequences);
-            else
-                return std::pair<size_t, size_t>(startColumn + fr.seqIndex, 1);
+            size_t t = fr.timeIdxInSeq + fr.m_timeOffset;
+            if (t >= result.second[timeDim])
+                LogicError("DataFor: FrameRange specifies an iteration index that is out of range.");
+            result.first[timeDim]  = t;
+            result.second[timeDim] = t + 1;
        }
-#endif
+        
+        // sequence index
+        if (fr.seqIndex != SIZE_MAX/*sequence requested*/ && pMBLayout/*have sequences*/ && result.second[sequenceDim] > 1/*>1 sequence (not broadcasting)*/)
+        {
+            size_t s = fr.seqIndex;
+            if (s >= result.second[sequenceDim])
+                LogicError("DataFor: FrameRange specifies a paralllel-sequence index that is out of range.");
+            result.first[sequenceDim]  = s;
+            result.second[sequenceDim] = s + 1;
+        }
+
        return result;
    }

--- a/Source/Common/Include/TensorShape.h
+++ b/Source/Common/Include/TensorShape.h
@ -263,7 +263,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                size_t dim = k < size() ? m_dims[k] : 1;        // dimensions are bottomless
                if (index[k] >= dim)
                    LogicError("Locate: Tensor index[%d]=%d exceeds bound %d.", (int)k, (int)index[k], (int)dim);
-                location += (ptrdiff_t)index[k] * m_strides[k]; // strides may be negative
+                if (k < size())
+                    location += (ptrdiff_t)index[k] * m_strides[k]; // strides may be negative
            }
            if (location < 0 || (size_t)location >= m_allocation)
                LogicError("Locate: Tensor index out of bounds.");
@ -352,13 +353,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
            return *this;
        }
-        //TensorShape Concat(const TensorShape & other) const // concatenate
-        //{
-        //    auto dims = GetDims();
-        //    auto otherDims = other.GetDims();
-        //    dims.append(otherDims.begin(), otherDims.end());
-        //    return TensorShape(std::move(dims));
-        //}
        TensorShape & AppendInPlace(size_t rank, size_t newDim)  // concatenate one new dimension at position 'rank'
        {
            PadInPlace(rank);
@ -374,6 +368,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            result.AppendInPlace(rank, newDim);
            return result;
        }
+        template<class DimensionVector>
+        TensorShape & NarrowTo(const std::pair<DimensionVector, DimensionVector> & bounds/*begin[], end[]*/)
+        {
+            if (size() != bounds.first.size() || size() != bounds.second.size())
+                LogicError("NarrowedTo: Bounds parameter must have same rank as tensor.");
+            for (size_t k = 0; k < size(); k++)
+                if (bounds.second[k] <= bounds.first[k] || bounds.second[k] > m_dims[k])
+                    LogicError("NarrowedTo: Invalid bounds parameter, dimensions must be at least one.");
+            for (size_t k = 0; k < size(); k++)
+            {
+                m_offset += m_strides[k] * bounds.first[k];
+                m_dims[k] = bounds.second[k] - bounds.first[k];
+            }
+            return *this;
+        }

        // pretty-printing. Returns tensor dims in the form "I x J x K".
        operator std::string() const
--- a/Source/ComputationNetworkLib/ComputationNode.h
+++ b/Source/ComputationNetworkLib/ComputationNode.h
@ -1142,16 +1142,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // tensor variants
        TensorView<ElemType> DataTensorFor(Matrix<ElemType> & data, size_t rank, const FrameRange & fr)
        {
+            // form the actual tensor that describes the full object
+            // If we have an MB layout then add the necessary dimensions. If we have none, then absorb the column dimension.
            TensorShape tensorShape = GetSampleLayout();    // TODO: Can this tensor arbitrary strides? In case it came out of a Slice, Reshape, or Transpose op in-place
            if (!HasMBLayout())
                tensorShape.AppendInPlace(tensorShape.GetRank(), GetNumCols());    //  last dim is column dimension
            // TODO: This is not nice! Instead, if no MBLayout then have sample layout explain whole matrix.
-            else if (fr.IsAllFrames()) // we have an MBLayout, and for refers to the entire MB
+            else
                tensorShape.AppendInPlace(rank, GetMBLayout()->GetNumParallelSequences()).AppendInPlace(rank + 1, GetMBLayout()->GetNumTimeSteps());
-            else  // we have an MBLayout, and fr refers to one frame (across all parallel sequences)
-                tensorShape.AppendInPlace(rank, GetMBLayout()->GetNumParallelSequences()).AppendInPlace(rank + 1, 1);
-            // TODO: determine SmallVector begin, end bounds first, get a narrow full shape, squeeze the dims, then return the tensor
-            return TensorView<ElemType>(DataFor(data, fr), tensorShape);
+            // Now tensorShape fully describes the content of the Matrix object.
+
+            // determine the slice dimensions described by the FrameRange
+            // Note: These are dimensions without strides.
+            auto slice = TensorSliceWithMBLayoutFor(tensorShape.GetDims(), fr, GetMBLayout());
+
+            // narrow the tensor
+            // Note: Tensor itself may have strides.
+            tensorShape.NarrowTo(slice);
+            return TensorView<ElemType>(data, tensorShape);
        }
        TensorView<ElemType> ValueTensorFor(size_t rank, const FrameRange & fr)
        {
--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -44,7 +44,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
        {
 #ifdef ENABLE_TENSORVIEW
-            static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUSBP#\n"); }
+            //static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUSBP#\n"); }
            size_t rank = DetermineElementwiseTensorRank();
            auto gradient      =                    GradientTensorFor(rank, fr);
            auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
@ -124,7 +124,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override  
        {
 #ifdef ENABLE_TENSORVIEW
-            static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUS#\n"); }
+            //static int c = 0; if (c++ == 0) { fprintf(stderr, "#PLUS#\n"); }
            size_t rank = DetermineElementwiseTensorRank();
            auto result =           ValueTensorFor(rank, fr);
            auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
@ -267,7 +267,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override
        {
 #ifdef ENABLE_TENSORVIEW
-            static int c = 0; if (c++ == 0) { fprintf(stderr,"#MINUS#\n"); }
+            //static int c = 0; if (c++ == 0) { fprintf(stderr,"#MINUS#\n"); }
            size_t rank = DetermineElementwiseTensorRank();
            auto result =           ValueTensorFor(rank, fr);
            auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
@ -638,7 +638,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override  
        {
 #ifdef ENABLE_TENSORVIEW
-            static int c = 0; if (c++ == 0) { fprintf(stderr,"#ETIMES#\n"); }
+            //static int c = 0; if (c++ == 0) { fprintf(stderr,"#ETIMES#\n"); }
            size_t rank = DetermineElementwiseTensorRank();
            auto result =           ValueTensorFor(rank, fr);
            auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
--- a/Source/Math/TensorView.cpp
+++ b/Source/Math/TensorView.cpp
@ -44,6 +44,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    TensorView<ElemType>::TensorView(const TensorView<ElemType> & other, const TensorShape & shape) :
        m_sob(other.m_sob.AsReference()), m_shape(shape)
    {
+#if 0   // disabled since now we use slices, for which this check is no longer correct
        // for now we enforce that tensor dimensions match dimensions of the underlying matrix storage object
        // This is for sanity checks. In the future, it may appropriate to reduce this check to just checking the total number of elements, to allow abuses.
        // TODO: Use the multipliers instead?
@ -57,6 +58,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            colDim *= m_shape[i];
        if (rowDim != m_sob.GetNumRows() || colDim != m_sob.GetNumCols())
            LogicError("TensorView: Tensor dimensions %s do not match storage-object dims %d x %d", string(m_shape).c_str(), (int)m_sob.GetNumRows(), (int)m_sob.GetNumCols());
+#endif
    }

    // -------------------------------------------------------------------