From 112dd21785612a7255ea09cf062e0d121a53442d Mon Sep 17 00:00:00 2001
From: Alexey Kamenev <alexeyk@microsoft.com>
Date: Wed, 4 May 2016 18:46:56 -0700
Subject: [PATCH] Deconvolution suport, part 1.

---
 Examples/Image/MNIST/Config/Macros.ndl        |  4 +
 Source/ActionsLib/NDLNetworkBuilder.cpp       |  7 +-
 .../ComputationNetworkBuilder.cpp             |  8 +-
 .../ComputationNetworkBuilder.h               |  4 +-
 .../ComputationNetworkLib/ComputationNode.h   |  3 +-
 .../ConvolutionalNodes.h                      | 97 +++++++++++--------
 6 files changed, 71 insertions(+), 52 deletions(-)
diff --git a/Examples/Image/MNIST/Config/Macros.ndl b/Examples/Image/MNIST/Config/Macros.ndl
index bae1796a4..4f790b01d 100644
--- a/Examples/Image/MNIST/Config/Macros.ndl
+++ b/Examples/Image/MNIST/Config/Macros.ndl
@@ -48,6 +48,10 @@ ConvND(w, inp, kW, kH, inMap, outMap, hStride, vStride) = [
     c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$)
 ]
 
+DeConv(w, inp, kW, kH, inMap, outMap, hStride, vStride) = [
+    c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, transpose=1, imageLayout=$imageLayout$)
+]
+
 Conv2DReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = [
     w = ConvW(outMap, inWCount, wScale)
     b = ConvB(outMap, bValue)
diff --git a/Source/ActionsLib/NDLNetworkBuilder.cpp b/Source/ActionsLib/NDLNetworkBuilder.cpp
index f2e8c1199..8f18ffe39 100644
--- a/Source/ActionsLib/NDLNetworkBuilder.cpp
+++ b/Source/ActionsLib/NDLNetworkBuilder.cpp
@@ -299,10 +299,10 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
             if (cnNodeType == OperationNameOf(ConvolutionNode))
             {
                 RuntimeError("%ls: unexpected parameter count. %ls supports 2 modes: \n"
-                             "1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] \n"
+                             "1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample] \n"
                              "and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"HWC\"|\"cudnn\"]. \n"
                              "2. ND convolution which takes 3 fixed parameters [weightNodeName, inputValueNodeName, kernelShape] and \n"
-                             "9 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n"
+                             "10 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], bool transpose = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n"
                              "For ND convolution, parameters kernelShape, mapCount, stride, sharing, autoPadding, lowerPad, upperPad can be arrays, e.g. kernelShape={5, 5, 3}",
                              cnNodeType.c_str(), cnNodeType.c_str());
             }
@@ -387,8 +387,9 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
 
                 if (pool == PoolKind::None)
                 {
+                    bool transpose = node->GetOptionalParameter("transpose", "false");
                     nodePtr = builder.Convolution(NULL, NULL, kernelShape, mapCount, stride, sharing, 
-                                                  autoPad, lowerPad, upperPad, imageLayout, maxTempMemSizeInSamples, name);
+                                                  autoPad, lowerPad, upperPad, transpose, imageLayout, maxTempMemSizeInSamples, name);
                 }
                 else
                 {
diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
index 7ed15ec08..e51715913 100644
--- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
+++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
@@ -249,12 +249,12 @@ template <class ElemType>
 shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount,
                                                                                                  const TensorShape& strideShape, const std::vector<bool>& sharing,
                                                                                                  const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                                                                                                 ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
+                                                                                                 bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
 {
     return net.AddNodeToNetWithElemType(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
                                                                        kernelShape, mapCount, strideShape,
                                                                        sharing, autoPadding, lowerPad, upperPad,
-                                                                       imageLayout, maxTempMemSizeInSamples));
+                                                                       transpose, imageLayout, maxTempMemSizeInSamples));
 }
 
 template <class ElemType>
@@ -314,13 +314,13 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convo
                                                                                        const TensorShape& kernelShape, const TensorShape& mapCount, 
                                                                                        const TensorShape& strideShape, const std::vector<bool>& sharing,
                                                                                        const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                                                                                       ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
+                                                                                       bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
                                                                                        const std::wstring nodeName)
 {
     return net.AddNodeToNetAndAttachInputs(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
                                                                           kernelShape, mapCount, strideShape,
                                                                           sharing, autoPadding, lowerPad, upperPad,
-                                                                          imageLayout, maxTempMemSizeInSamples),
+                                                                          transpose, imageLayout, maxTempMemSizeInSamples),
                                                                           { weight, inputValues });
 }
 
diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h
index a7ec97bd5..2dd8845af 100644
--- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h
+++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h
@@ -54,7 +54,7 @@ public:
     ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
     ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
                                              const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                                             ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
+                                             bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
     ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, 
                                              const size_t horizontalSubsample, const size_t verticalSubsample, 
                                              ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
@@ -81,7 +81,7 @@ public:
                                    const ComputationNodePtr inputValues,
                                    const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
                                    const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                                   ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
+                                   bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
                                    const std::wstring nodeName = L"");
     ComputationNodePtr Pooling(const ComputationNodePtr inputValues, 
                                PoolKind poolKind, const TensorShape& kernelShape, const TensorShape& strideShape,
diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h
index 78056391c..71ab9ae37 100644
--- a/Source/ComputationNetworkLib/ComputationNode.h
+++ b/Source/ComputationNetworkLib/ComputationNode.h
@@ -37,7 +37,8 @@
 #define CNTK_MODEL_VERSION_6 6 // Batch norm blending
 #define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
 #define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
-#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_8
+#define CNTK_MODEL_VERSION_9 9 // Deconvolution and unpooling
+#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_9
 
 extern bool g_shareNodeValueMatrices;
 
diff --git a/Source/ComputationNetworkLib/ConvolutionalNodes.h b/Source/ComputationNetworkLib/ConvolutionalNodes.h
index 3ce940f75..180afa8cc 100644
--- a/Source/ComputationNetworkLib/ConvolutionalNodes.h
+++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h
@@ -52,15 +52,15 @@ class ConvolutionNodeBase : public ComputationNode<ElemType>
 
 public:
     ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
-        : Base(deviceId, name), m_poolKind(PoolKind::None), m_maxTempMemSizeInSamples(0)
+        : Base(deviceId, name), m_poolKind(PoolKind::None), m_transpose(false), m_maxTempMemSizeInSamples(0)
     {
     }
     ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
-                    const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                    PoolKind poolKind, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
-                    : Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing),
-                    m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind),
-                    m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
+                        const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
+                        PoolKind poolKind, bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
+                        : Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing),
+                        m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), m_transpose(transpose),
+                        m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
     {
     }
 
@@ -79,6 +79,7 @@ public:
         fstream << (int32_t)m_poolKind;
         fstream << (int32_t)m_imageLayout;
         fstream << m_maxTempMemSizeInSamples;
+        fstream << m_transpose;
     }
 
     void Load(File& fstream, size_t modelVersion) override
@@ -102,7 +103,11 @@ public:
             fstream >> layout;
             m_imageLayout = (ImageLayoutKind)layout;
             fstream >> m_maxTempMemSizeInSamples;
-    }
+        }
+        if (modelVersion >= CNTK_MODEL_VERSION_9)
+        {
+            fstream >> m_transpose;
+        }
     }
 
     void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@@ -119,6 +124,7 @@ public:
             node->m_lowerPad = m_lowerPad;
             node->m_upperPad = m_upperPad;
             node->m_poolKind = m_poolKind;
+            node->m_transpose = m_transpose;
             node->m_imageLayout = m_imageLayout;
             node->m_maxTempMemSizeInSamples = m_maxTempMemSizeInSamples;
         }
@@ -130,19 +136,22 @@ public:
 
         if (m_poolKind == PoolKind::None)
         {
-        if (inputIndex == 0) // derivative with respect to the weight matrix
-        {
-            auto& grad = Input(0)->GradientAsMatrix();
+            if (inputIndex == 0) // derivative with respect to the weight matrix
+            {
+                auto& grad = Input(0)->GradientAsMatrix();
                 auto sliceInput1Value = Input(1)->ValueFor(fr);
                 m_convEng->BackwardKernel(sliceOutputGrad, sliceInput1Value, grad, fr.IsAllFrames(), *m_tempMatrix);
+            }
+            else if (inputIndex == 1) // derivative with respect to the input feature
+            {
+                auto& input0 = Input(0)->ValueAsMatrix();
+                auto sliceInput1Grad = Input(1)->GradientFor(fr);
+                if (!m_transpose)
+                    m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
+                else
+                    m_convEng->Forward(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
+            }
         }
-        else if (inputIndex == 1) // derivative with respect to the input feature
-        {
-            auto& input0 = Input(0)->ValueAsMatrix();
-            auto sliceInput1Grad = Input(1)->GradientFor(fr);
-                m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
-        }
-    }
         else
         {
             Matrix<ElemType> sliceInput0Grad = Input(0)->GradientFor(fr);
@@ -166,9 +175,12 @@ public:
 
         if (m_poolKind == PoolKind::None)
         {
-        const Matrix<ElemType>& input0 = Input(0)->ValueAsMatrix();
-        Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
-            m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
+            const Matrix<ElemType>& input0 = Input(0)->ValueAsMatrix();
+            Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
+            if (!m_transpose)
+                m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
+            else
+                m_convEng->BackwardData(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
         }
         else
         {
@@ -195,6 +207,7 @@ protected:
     TensorShape m_lowerPad;
     TensorShape m_upperPad;
     PoolKind m_poolKind;
+    bool m_transpose;
     ImageLayoutKind m_imageLayout;
 
     size_t m_maxTempMemSizeInSamples;
@@ -241,8 +254,8 @@ public:
     }
     ConvolutionNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
                     const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                    ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
-                    : Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, imageLayout, maxTempMemSizeInSamples),
+                    bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
+                    : Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, transpose, imageLayout, maxTempMemSizeInSamples),
                     m_convolution2D(false)
     {
     }
@@ -250,16 +263,16 @@ public:
                     const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayout,
                     bool zeroPadding, size_t maxTempMemSizeInSamples)
                     : ConvolutionNode(deviceId, name, TensorShape(kernelWidth, kernelHeight, 1), TensorShape(1, 1, outputChannels),
-                                      TensorShape(horizontalSubsample, verticalSubsample, 1), vector<bool>{true}, 
+                                      TensorShape(horizontalSubsample, verticalSubsample, 1), vector<bool>{true},
                                       vector<bool>{zeroPadding}, TensorShape(0), TensorShape(0),
-                                      imageLayout, maxTempMemSizeInSamples)
+                                      false, imageLayout, maxTempMemSizeInSamples)
     {
         m_convolution2D = true;
     }
     ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp)
         : ConvolutionNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelShape"), configp->Get(L"mapCount"), configp->Get(L"strideShape"),
                           configp->Get(L"dimSharing"), configp->Get(L"dimPadding"), configp->Get(L"dimPadLower"), configp->Get(L"dimPadUpper"),
-                          ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
+                          configp->Get(L"transpose"), ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
     {
         AttachInputsFromConfig(configp, GetExpectedNumInputs());
     }
@@ -346,12 +359,12 @@ public:
 
             if (isFinalValidationPass && (Input(0)->GetAsMatrixNumCols() != weightCols || Input(0)->GetAsMatrixNumRows() != mapCount))
             {
-                LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", 
+                LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]",
                            Input(0)->NodeName().c_str(), (int)mapCount, (int)weightCols);
             }
 
             auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
-                                                                 m_sharing, m_autoPad, m_lowerPad, m_upperPad);
+                                                                m_sharing, m_autoPad, m_lowerPad, m_upperPad);
             // ConvolveGeometry always uses CHW.
             SetDims(ImageDimensions(outDims, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
         }
@@ -432,9 +445,9 @@ public:
     {
     }
     PoolingNode(DEVICEID_TYPE deviceId, const wstring& name, PoolKind pool, const TensorShape& kernelShape, const TensorShape& strideShape,
-                    const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
-                    ImageLayoutKind imageLayout)
-                    : Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, imageLayout, 0)
+                const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
+                ImageLayoutKind imageLayout)
+                : Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, false, imageLayout, 0)
     {
     }
     PoolingNode(const ScriptableObjects::IConfigRecordPtr configp)
@@ -491,20 +504,20 @@ class PoolingNodeBase : public ComputationNode<ElemType>, public NumInputs<1>
 public:
     PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
         : Base(deviceId, name),
-          m_windowWidth(SIZE_MAX),
-          m_windowHeight(SIZE_MAX),
-          m_horizontalSubsample(SIZE_MAX),
-          m_verticalSubsample(SIZE_MAX),
-          m_imageLayoutKind(ImageLayoutKind::HWC)
+        m_windowWidth(SIZE_MAX),
+        m_windowHeight(SIZE_MAX),
+        m_horizontalSubsample(SIZE_MAX),
+        m_verticalSubsample(SIZE_MAX),
+        m_imageLayoutKind(ImageLayoutKind::HWC)
     {
     }
     PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind)
         : Base(deviceId, name),
-          m_windowWidth(windowWidth),
-          m_windowHeight(windowHeight),
-          m_horizontalSubsample(horizontalSubsample),
-          m_verticalSubsample(verticalSubsample),
-          m_imageLayoutKind(imageLayoutKind)
+        m_windowWidth(windowWidth),
+        m_windowHeight(windowHeight),
+        m_horizontalSubsample(horizontalSubsample),
+        m_verticalSubsample(verticalSubsample),
+        m_imageLayoutKind(imageLayoutKind)
     {
     }
     PoolingNodeBase(const ScriptableObjects::IConfigRecordPtr configp)
@@ -517,8 +530,8 @@ public:
     void Save(File& fstream) const override
     {
         Base::Save(fstream);
-        uint32_t imageLayoutKind = (uint32_t) m_imageLayoutKind;
-        uint32_t windowWidth = (uint32_t) m_windowWidth;
+        uint32_t imageLayoutKind = (uint32_t)m_imageLayoutKind;
+        uint32_t windowWidth = (uint32_t)m_windowWidth;
         fstream << windowWidth << imageLayoutKind << m_windowHeight << m_horizontalSubsample << m_verticalSubsample;
     }