From c918f3b6b88d5119cb37ebd1122621808a90301f Mon Sep 17 00:00:00 2001
From: Frank Seide <fseide@microsoft.com>
Date: Tue, 15 Dec 2015 15:02:43 -0800
Subject: [PATCH] started the TensorView class by adding header and CPP to math
 lib

---
 Common/Include/DataTensor.h                   | 124 ++++++++++++------
 .../ComputationNode.h                         |   1 +
 Makefile                                      |   1 +
 Math/Math/Math.vcxproj                        |   2 +
 Math/Math/Math.vcxproj.filters                |   9 ++
 Math/Math/Matrix.cpp                          |   3 +-
 6 files changed, 97 insertions(+), 43 deletions(-)
diff --git a/Common/Include/DataTensor.h b/Common/Include/DataTensor.h
index 731e7d14d..7f5a0af1a 100644
--- a/Common/Include/DataTensor.h
+++ b/Common/Include/DataTensor.h
@@ -8,7 +8,9 @@
 #pragma once
 
 #include "Basics.h"
+#include "File.h"
 #include <vector>
+#include <string>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -84,81 +86,121 @@ namespace Microsoft { namespace MSR { namespace CNTK {
     struct TensorShape
     {
     public:
-        // BUGBUG: This initialization is not correct. This must match GetNumRows(). We probably cannot have all three members here.
-        // Idea: We could construct this thing with a ref to the enclosing ComputationNode, and replace 'width' by an expression.
-        TensorShape() : m_tensorDims(3, 1) { }
         template<class VEC>
-        TensorShape(const VEC & dims) { m_tensorDims.reserve(dims.size()); m_tensorDims.assign(dims.begin(), dims.end()); }
-        TensorShape(std::vector<size_t> && dims) : m_tensorDims(std::move(dims)) { }
+        TensorShape(const VEC & dims)
+        {
+            m_dims.reserve(dims.size());
+            m_dims.assign(dims.begin(), dims.end());
+            InitAsNoSlice();
+        }
+        // convenience constructors, e,g. for test code
+        TensorShape(size_t I) : TensorShape(std::vector<size_t> { I }) { }
+        TensorShape(size_t I, size_t J) : TensorShape(std::vector<size_t> { I, J }) { }
+        TensorShape(size_t I, size_t J, size_t K) : TensorShape(std::vector<size_t> { I, J, K }) { }
+        TensorShape(size_t I, size_t J, size_t K, size_t L) : TensorShape(std::vector<size_t> { I, J, K, L }) { }
+        TensorShape(size_t I, size_t J, size_t K, size_t L, size_t M) : TensorShape(std::vector<size_t> { I, J, K, L, M }) { }
+        // BUGBUG: This default initialization is not correct. This must match GetNumRows(). We probably cannot have all three members here.
+        TensorShape() : TensorShape(1, 1, 1) { }
 
-        void Invalidate() { m_tensorDims.assign(3, SIZE_MAX); } // TODO: clean up the valid/invalid situation (this is currently done inconsistently)
+        // boilerplate
+        TensorShape(std::vector<size_t> && dims) : m_dims(std::move(dims)) { }
+        bool operator==(const TensorShape & other) const { return m_dims == other.m_dims; }
+
+        void Invalidate() { m_dims.assign(3, SIZE_MAX); } // TODO: clean up the valid/invalid situation (this is currently done inconsistently). Also this object is immutable.
 
         // TODO: need move constructor/assignment?
 
-        bool operator==(const TensorShape & other) const { return m_tensorDims == other.m_tensorDims; }
-
         void Save(File& fstream) const
         {
-#if 1
+            if (m_offset != 0)
+                LogicError("TensorShape::Save(): Cannot serialize TensorShape for slices.");
             // saving as 32-bit ints. This allows to continue to support the old format (size_t W, H, C)
-            fstream << (uint32_t)m_tensorDims.size();
-            for (auto dim : m_tensorDims)
+            fstream << (uint32_t)m_dims.size();
+            size_t mul = 1;
+            for (size_t k = 0; k < m_dims.size(); k++)
             {
+                auto dim = m_dims[k];
                 if (dim > UINT32_MAX)
-                    LogicError("TensorShape::Save(): Tensor dimension out of bounds (> 4G).");
+                    LogicError("TensorShape::Save(): Tensor dimensions %s out of bounds (> 4G).", string(*this).c_str());
                 fstream << (uint32_t)dim;
+                if (m_multipliers[k] != dim)
+                    LogicError("TensorShape::Save(): Cannot serialize TensorShape for slices.");
+                mul *= dim;
             }
-#else
-            // TODO: need to use a generic format
-            assert(m_tensorDims.size() == 3);   // current format does not understand anything else
-            fstream << m_tensorDims[1] << m_tensorDims[2] << m_tensorDims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
-#endif
         }
         void Load(File& fstream)
         {
-#if 1
             // format: uint32_t n, dim[0], dim[1], ..., dim[n-1]
             // We are also able to read (but not write) an older format, which stores 3-dimensional tensors as size_t W, H, C
             uint32_t n, dim;
             fstream >> n >> dim;
             if (dim)        // heuristic to detect the old format. Old format stores a size_t, i.e. the second uint32_t is 0 (no dimensions are > 4G)
             {
-                m_tensorDims.resize(n);
-                m_tensorDims[0] = dim;
+                m_dims.resize(n);
+                m_dims[0] = dim;
                 for (size_t i = 1; i < n; i++)
                 {
                     fstream >> dim;
-                    m_tensorDims[i] = dim;
+                    m_dims[i] = dim;
                 }
-                assert(n == m_tensorDims.size());
+                assert(n == m_dims.size());
             }
             else            // detected the old size_t W, H, C format
             {
-                m_tensorDims.resize(3);     // current format is hard-coded for 3, for back compat
-                m_tensorDims[1] = n;
-                fstream >> m_tensorDims[2] >> m_tensorDims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
-        }
-#else
-            // TODO: need to use a generic format
-            m_tensorDims.resize(3);     // current format is hard-coded for 3, for back compat
-            fstream >> m_tensorDims[1] >> m_tensorDims[2] >> m_tensorDims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
-#endif
+                m_dims.resize(3);     // current format is hard-coded for 3, for back compat
+                m_dims[1] = n;
+                fstream >> m_dims[2] >> m_dims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
+            }
         }
 
         // accessors
-        size_t GetDim(size_t k) const { return m_tensorDims[k]; }
-        size_t GetNumDims() const { return m_tensorDims.size(); }
-        size_t GetNumElements() const { size_t res = 1; for (auto & dim : m_tensorDims) res *= dim; return res; }
+        size_t GetDim(size_t k) const { return m_dims[k]; }
+        size_t GetNumDims() const { return m_dims.size(); }
+        size_t GetNumElements() const { size_t res = 1; for (auto & dim : m_dims) res *= dim; return res; }
+        size_t size() const { return GetNumDims(); }
 
-        const std::vector<size_t> & GetDims() const { return m_tensorDims; }    // get all, e.g. for logging or for constructing derived tensors with edited dimensions
+        // vector-like accessors
+        size_t operator[](size_t k) const { return GetDim(k); }
+
+        const std::vector<size_t> & GetDims() const { return m_dims; }    // get all, e.g. for logging or for constructing derived tensors with edited dimensions
 
         // interpretation as an image tensor
-        size_t GetNumChannels() const { return m_tensorDims[0]; }
-        size_t GetWidth()       const { return m_tensorDims[1]; }
-        size_t GetHeight()      const { return m_tensorDims[2]; }
+        size_t GetNumChannels() const { return m_dims[0]; }
+        size_t GetWidth()       const { return m_dims[1]; }
+        size_t GetHeight()      const { return m_dims[2]; }
+
+        // pretty-printing. Returns tensor dims in the form "I x J x K".
+        operator std::string() const
+        {
+            std::string s;
+            for (const auto & dim : m_dims)
+            {
+                if (!s.empty())
+                    s.append(" x ");
+                s.append(std::to_string(dim));
+            }
+            return s;
+        }
 
     private:
-        std::vector<size_t> m_tensorDims;
+        // 
+        void InitAsNoSlice()
+        {
+            m_multipliers.resize(m_dims.size());
+            size_t mul = 1;
+            for (size_t k = 0; k < m_dims.size(); k++)
+            {
+                m_multipliers.push_back(mul);
+                mul *= m_dims[k];
+            }
+            m_storageSize = mul;
+        }
+
+    private:
+        std::vector<size_t> m_dims;         // dimensions of tensor or tensor slice
+        size_t m_offset;                    // offset to first element (may be non-0 in case of slice)
+        std::vector<size_t> m_multipliers;  // dimension gets multiplied by this for computing the index offset. Note this may be used for stride magic.
+        size_t m_storageSize;               // size of underlying storage object
     };
 
     // When constructing an image tensor with the usual W, H, C format, use the following function instead.
@@ -168,12 +210,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
     //         This will get fixed once we get more complete arbitrary tensor support throughout, including better-defined inference rules.
     static inline TensorShape ImageLayoutWHC(size_t width, size_t height, size_t channels)
     {
-        return TensorShape(std::vector<size_t> { channels, width, height });
+        return TensorShape(channels, width, height);
     }
     // and use this one when the data is a plain vector
     static inline TensorShape ImageLayoutVector(size_t n)
     {
-        return TensorShape(std::vector<size_t> { 1, 1, n });    // for now storing it as a 3D object as well  --TODO: fix this
+        return TensorShape(1, 1, n);    // for now storing it as a 3D object as well  --TODO: fix this
     }
     // TODO: we need a constructor from config; that will allow us to generalize
 
diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
index ab12cdf87..48b3ae40a 100644
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
@@ -7,6 +7,7 @@
 
 #include "Basics.h"
 #include "Matrix.h"
+#include "TensorView.h"
 #include "ScriptableObjects.h"
 #include "Sequences.h"
 #include "DataTensor.h"
diff --git a/Makefile b/Makefile
index bdbba9285..6dfefa38e 100644
--- a/Makefile
+++ b/Makefile
@@ -226,6 +226,7 @@ MATH_SRC =\
 	Math/Math/MatrixQuantizerCPU.cpp \
 	Math/Math/QuantizedMatrix.cpp \
 	Math/Math/Matrix.cpp \
+	Math/Math/TensorView.cpp \
 	Math/Math/CUDAPageLockedMemAllocator.cpp \
 	Math/Math/ConvolutionEngine.cpp \
 
diff --git a/Math/Math/Math.vcxproj b/Math/Math/Math.vcxproj
index e818d68fa..cf30de463 100644
--- a/Math/Math/Math.vcxproj
+++ b/Math/Math/Math.vcxproj
@@ -162,6 +162,7 @@
     <ClInclude Include="CommonMatrix.h" />
     <ClInclude Include="ConvolutionEngine.h" />
     <ClInclude Include="CPUMatrix.h" />
+    <ClInclude Include="TensorView.h" />
     <None Include="ClassDiagram.cd" />
     <None Include="GPUWatcher.cu" />
     <None Include="GPUWatcher.h">
@@ -210,6 +211,7 @@
       <PrecompiledHeader>
       </PrecompiledHeader>
     </ClCompile>
+    <ClCompile Include="TensorView.cpp" />
   </ItemGroup>
   <ItemGroup>
     <None Include="GPUMatrix.h" />
diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters
index 624705bbe..a52811bca 100644
--- a/Math/Math/Math.vcxproj.filters
+++ b/Math/Math/Math.vcxproj.filters
@@ -31,6 +31,9 @@
       <Filter>GPU\1bitSGD</Filter>
     </ClCompile>
     <ClCompile Include="ConvolutionEngine.cpp" />
+    <ClCompile Include="TensorView.cpp">
+      <Filter>Tensors</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="CommonMatrix.h" />
@@ -64,6 +67,9 @@
     </ClInclude>
     <ClInclude Include="..\..\Common\Include\DebugUtil.h" />
     <ClInclude Include="ConvolutionEngine.h" />
+    <ClInclude Include="TensorView.h">
+      <Filter>Tensors</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <None Include="GPUMatrix.h">
@@ -99,5 +105,8 @@
     <Filter Include="CPU\1bitSGD">
       <UniqueIdentifier>{af1f6489-f531-4338-a4c5-ebe52b884e5c}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Tensors">
+      <UniqueIdentifier>{70fb07cf-603e-4444-bc10-f0add4920fd2}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp
index 3bee1253b..10e7a2cf7 100755
--- a/Math/Math/Matrix.cpp
+++ b/Math/Math/Matrix.cpp
@@ -1,4 +1,4 @@
-// Matrix.cpp -- main CPP file that contains all functions exported by the CNTKMath.dll
+// Matrix.cpp -- main CPP file that contains all Matrix functions exported by the CNTKMath.dll
 //
 // <copyright file="Matrix.cpp" company="Microsoft">
 //     Copyright (c) Microsoft Corporation.  All rights reserved.
@@ -27,7 +27,6 @@
 #define min(a,b)            (((a) < (b)) ? (a) : (b))
 #endif
 
-
 //before calling the following macro the current matrix location and matrix type on MatrixPointerToCheck must have been set correctly
 #define DISPATCH_MATRIX_ON_FLAG(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
     { \