Avoid asym pad for MKL, block channel axis padding.

2018-07-24 12:49:48 -07:00 · 2018-07-24 12:49:48 -07:00 · c78f40d0c1
--- a/Source/ComputationNetworkLib/ConvolutionalNodes.h
+++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h
@ -244,22 +244,8 @@ protected:
        return result;
    }

-    // Check if we are padding over channel axis.
-    // Normally in convolution, input channel of inputShape and kernelShape should have the same size, 
-    // with padding turned off or with stride equal to channel size. 
-    void CheckPaddingChannelAxis(const TensorShape& inputShape)
-    {
-        const bool isPaddingChannelAxis = ConvolveGeometry::isPaddingOverChannelAxis(inputShape, m_stride, m_autoPad, m_lowerPad, m_upperPad);
-        if (isPaddingChannelAxis)
-            std::call_once(m_padChannelWarningOnceFlag,
-                [this] { fprintf(stderr, "WARNING: %ls %ls operation: detected padding over channel axis. Is this intended?\n",
-                    NodeName().c_str(), OperationName().c_str()); });
-    }
-
    virtual TensorShape ComputeOutputShape(const TensorShape& inputShape, const TensorShape& dilate, bool ceilOutDim, bool isFinalValidationPass)
    {
-        CheckPaddingChannelAxis(inputShape);
-
        const size_t DEAFULT_NUM_GROUPS = 1;
        return ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
            m_sharing, m_autoPad, m_lowerPad, m_upperPad, dilate, DEAFULT_NUM_GROUPS, ceilOutDim,
@ -286,9 +272,6 @@ protected:
    shared_ptr<Matrix<ElemType>> m_tempMatrixBackward;

    std::unique_ptr<ConvolutionEngine<ElemType>> m_convEng;
-
-private:
-    std::once_flag m_padChannelWarningOnceFlag;
 };

 #define UsingConvolutionNodeBaseMembersNonInstantiate \
@ -312,7 +295,6 @@ protected:                                  \
    using Base::m_convEng;                  \
    using Base::InferConvolution2DReductionDims; \
    using Base::InferReductionDims;         \
-    using Base::CheckPaddingChannelAxis;    \
 public:

 #define UsingConvolutionNodeBaseMembers     \
@ -703,6 +685,26 @@ public:
                LogicError("Convolution weight matrix %ls should have dimension [(filter shape) x (input channels) x (output channels)]",
                           Input(0)->NodeName().c_str());
            }
+
+            // Check if we are padding over channel axis.
+            // Normally in convolution, input channel of inputShape and kernelShape should have the same size, 
+            // with padding turned off or with stride equal to channel size. 
+            if (m_convEng->Geometry()->IsPaddingOverChannelAxis())
+            {
+                // There are many cases that could lead to this failure.
+                //  Type 1: users explicitly trying to pad over channel axis.
+                //      case 1: lowerPad[channelAxis] is explicitly set to be > 0.
+                //      case 2: autoPad[channelAxis] is explicitly set to true, along with some other satisfying conditions resulting in lowerPad[channelAxis] > 0.
+                //  For type 1, these are behaviors that we aim at blocking, since cuDnn does not support padding over channel axis.
+                //
+                //  Type 2: users just used the default parameters.
+                //      case 1: channel size > 1, while autoPadding and strides are default value.
+                //      case 2: reduction rank = 0, outChannelSize > 1, while autoPadding and strides are default value.
+                //  For type 2, these will also get blocked.
+                //  This is due to a potential bug in PrimitiveFunction::ConvolutionOpOutputShape that we are not setting autoPadding to False for channel axis.
+                LogicError("%ls %ls operation: detected padding over channel axis. This is not supported in cuDnn. Please try setting autoPadding to false for channel axis. \n",
+                    NodeName().c_str(), OperationName().c_str());
+            }
        }
    }

@ -759,8 +761,6 @@ private:
    virtual TensorShape /*ConvolutionNode::*/ComputeOutputShape(const TensorShape& inputShape,
        const TensorShape& dilate, bool ceilOutDim, bool isFinalValidationPass)
    {
-        CheckPaddingChannelAxis(inputShape);
-
        return ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
            m_sharing, m_autoPad, m_lowerPad, m_upperPad, dilate, m_groups, ceilOutDim,
            Base::NeedsDynamicValidation(), isFinalValidationPass);
--- a/Source/Math/ConvolutionEngine.cpp
+++ b/Source/Math/ConvolutionEngine.cpp
@ -963,7 +963,10 @@ protected:
 #endif

            //MKL2017 does not support asymmetric padding yet
-            if (geometry->IsAsymmetricPadding()) return false;
+            if (geometry->IsAsymmetricPadding(/*useMKL=*/true)) {
+                fprintf(stderr, "WARNING: Detected asymmetric padding issue with lowerPad != higherPad, not supported by MKL. Switching to GEMM convolution engine. \n");
+                return false;
+            }

            //MKL-DNN calls does not support 4th dimention now, we will update the code once MKL release the update.
            return forward ? (geometry->InputShape().GetRank() < m_dimension) : (geometry->OutputShape().GetRank() < m_dimension);
--- a/Source/Math/ConvolveGeometry.h
+++ b/Source/Math/ConvolveGeometry.h
@ -401,11 +401,30 @@ public:
        return -(center - (kernSize - 1) / 2);
    }

-    int GetUpperPad(size_t dim) const
+    // GetUpperPad
+    // 
+    // There will be four cases
+    //      kernelSize  extraSize   padSizes                cuDnn & MKL (they only support symmetric padding)
+    // 1.   odd         even        lower = upper           supported
+    // 2.   even        odd         lower = upper           supported
+    // 3.   odd         odd         lower = upper + 1       supported with extra 1 padding on upperPad
+    // 4.   even        even        lower = upper - 1       unsupported
+    //
+    // extra size = (dim - 1) % stride
+    //
+    // So for cases where lower = upper + 1. We can simply decide to pad one extra for upperPad, 
+    // as it will yield the same shape and value results.
+    // However, for cases where lower = upper - 1. We cannot pad the extra for lowerPad, 
+    // as it will change the center of the kernel, and produce different value and maybe different shape results. 
+    //
+    // Parameter: 
+    //  bool trySymmetricAutoPad : if set to true, this function will return symmetric padding for case 3 by padding 1 extra on upperPad.
+    //                             This parameter is ignored if auto padding is not enabled. 
+    int GetUpperPad(size_t dim, bool trySymmetricAutoPad = false) const
    {
        if (!GetAutoPad(dim))
            return (int)m_upperPad[m_upperPad.size() == 1 ? 0 : dim];
-
+       
        int dilation = (int)GetDilation(dim);
        int kernSize = ((int)m_kernelShape[dim] - 1) * dilation + 1;
        int inpSize = (int)m_inputShape[dim];
@ -418,21 +437,23 @@ public:
        // Extra cells, to the left and right of "cells".
        int extra = inpSize - cells;
        int center = extra / 2;
-        return (kernSize - 1) - (kernSize - 1) / 2 - (extra - center);
+        int upperPad = (kernSize - 1) - (kernSize - 1) / 2 - (extra - center);
+
+        if (trySymmetricAutoPad && (kernSize % 2 == 1) && (extra % 2 == 1))
+        {
+            // case 3: pad extra 1 for upperPad to enable symmetric padding. 
+            upperPad++;
+        }
+        return upperPad;
    }

    // Return if padding is enabled for input channel axis. 
-    static bool isPaddingOverChannelAxis(const TensorShape& inputShape, const TensorShape& stride, const BoolVec& autoPad, 
-        const TensorShape& lowerPad, const TensorShape& upperPad)
+    bool IsPaddingOverChannelAxis() const
    {
-        size_t channelIdx = inputShape.GetRank() - 1;
-        assert(inputShape.GetRank() >= 1);
-        size_t delta = stride[stride.GetRank() == 1 ? 0 : channelIdx];
-        bool autoPadCur = autoPad[autoPad.size() == 1 ? 0 : channelIdx];
-        size_t lo = lowerPad[lowerPad.size() == 1 ? 0 : channelIdx];
-        size_t hi = upperPad[upperPad.size() == 1 ? 0 : channelIdx];
-        // padding is enabled for channel axis, i.e. channel axis output dim != 1, when any of autoPad, lowerPad or upperPad > 0 and stride != channel size.
-        return (autoPadCur || (lo + hi > 0)) && delta != inputShape[channelIdx];
+        size_t channelIdx = m_inputShape.GetRank() - 1;
+        assert(m_inputShape.GetRank() >= 1);
+        // check for lowerPad value. This value is incorrect when out channel size > 1. Check if channel stride is >= channel size in that case.
+        return (GetLowerPad(channelIdx) > 0) && (GetStride(channelIdx) < m_inputShape[channelIdx]);
    }

    // Computes output shape given input shape and other convolution parameters.
@ -609,12 +630,14 @@ public:
        return res.str();
    }

-    bool IsAsymmetricPadding() const
+    // For MKL, if auto padding is enabled, in some cases we can convert asymmetric padding to symmetric padding,
+    // with the same output shape and value. 
+    bool IsAsymmetricPadding(bool useMKL) const
    {
        for (size_t i = 0; i < KernelShape().size(); i++)
        {
            auto lowerPad = GetLowerPad(i);
-            auto upperPad = GetUpperPad(i);
+            auto upperPad = GetUpperPad(i, useMKL);
            auto stride = GetStride(i);
            if ((lowerPad != upperPad) && (stride < InputShape()[i]))
            {
--- a/Tests/EndToEndTests/CNTKv2Library/EndToEndTests/MNISTClassifier.cpp
+++ b/Tests/EndToEndTests/CNTKv2Library/EndToEndTests/MNISTClassifier.cpp
@ -154,13 +154,13 @@ void TrainMNISTSeqClassifier(const DeviceDescriptor& device)
    auto labelsVar = InputVariable({numOutputClasses}, AsDataType<float>(), L"labels");

    auto convParam = Parameter({filterDim, filterDim, numInputChannels, filterCount}, AsDataType<float>(), GlorotUniformInitializer(), device);
-    auto convFunc = Convolution(convParam, packedInput, {convStrides, convStrides, numInputChannels}, {true}, {true}, {1}, 1, 1, 0, true);
+    auto convFunc = Convolution(convParam, packedInput, {convStrides, convStrides, numInputChannels}, {true}, {true, true, false}, {1}, 1, 1, 0, true);

    auto convb = Parameter({1, filterCount}, AsDataType<float>(), GlorotUniformInitializer(), device);
    auto relu = LeakyReLU(Plus(convFunc, convb), 0.01);

    auto convParam2 = Parameter({filterDim, filterDim, filterCount, filterCount2}, AsDataType<float>(), GlorotUniformInitializer(), device);
-    auto convFunc2 = Convolution(convParam2, relu, {convStrides, convStrides, filterCount}, {true}, {true}, { 1 }, 1, 1, 0, true);
+    auto convFunc2 = Convolution(convParam2, relu, {convStrides, convStrides, filterCount}, {true}, {true, true, false}, { 1 }, 1, 1, 0, true);

    auto convb2 = Parameter({1, filterCount2}, AsDataType<float>(), GlorotUniformInitializer(), device);
    auto relu2 = LeakyReLU(Plus(convFunc2, convb2), 0.01);
--- a/Tests/UnitTests/V2LibraryTests/ConvolutionFunctionTests.cpp
+++ b/Tests/UnitTests/V2LibraryTests/ConvolutionFunctionTests.cpp
@ -115,7 +115,7 @@ void Run1DFreeDimConvLayer(const DeviceDescriptor& device, bool testFreeDimensio
        input = InputVariable({10}, AsDataType<ElementType>(), L"features");
    }
    auto convParam = Parameter({3, 1}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
-    auto conv = Convolution(convParam, input, {2});
+    auto conv = Convolution(convParam, input, {2}, {true}, {true}, {1}, 0);
    auto convb = Parameter({1}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
    auto relu = LeakyReLU(Plus(conv, convb), 0.01);

@ -163,7 +163,7 @@ void Run1DFreeDimSimpConvLayer(const DeviceDescriptor& device, bool testFreeDime
        input = InputVariable({10}, AsDataType<ElementType>(), L"features");
    }
    auto convParam = Parameter({3, 1}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
-    auto conv = Convolution(convParam, input, {2});
+    auto conv = Convolution(convParam, input, {2}, { true }, { true }, { 1 }, 0);
    auto convParam2 = Parameter({2, 1}, AsDataType<ElementType>(), (ElementType) 0.5f, device);
    auto conv2 = Convolution(convParam2, conv, {2});

@ -514,10 +514,10 @@ void RunConvRankTests2(const DeviceDescriptor& device)
 {
    auto input_ = InputVariable({36}, AsDataType<ElementType>());
    auto input = Reshape(input_, {4, 3, 3});
-    auto params = Parameter({3, 3, 2, 1, 4}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
+    auto params = Parameter({3, 3, 2, 4}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
    // requires kernel dim >= input dim ....

-    auto conv = Convolution(params, input, {2, 2});
+    auto conv = Convolution(params, input, {2, 2}, {true}, {true, true, true}, {1}, 0);

    const size_t inputDataSize = 36;
    const std::vector<size_t> sequenceSize = {2, 3};
@ -561,7 +561,7 @@ void RunConvRankTests3(const DeviceDescriptor& device)
    auto params = Parameter({2, 4}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
    // requires kernel dim >= input dim ....

-    auto conv = Convolution(params, input, {5});
+    auto conv = Convolution(params, input, {5}, { true }, { true }, { 1 }, 0);

    const size_t inputDataSize = 6;
    const std::vector<size_t> sequenceSize = {2, 3};
@ -605,7 +605,7 @@ void RunConvRankTests4(const DeviceDescriptor& device)
    auto params = Parameter({2, 3, 4}, AsDataType<ElementType>(), (ElementType) 1.0f, device);
    // requires kernel dim >= input dim ....

-    auto conv = Convolution(params, input, {2});
+    auto conv = Convolution(params, input, {2}, { true }, { true, true }, { 1 }, 0);

    const size_t inputDataSize = 9;
    const std::vector<size_t> sequenceSize = {2, 3};
@ -784,13 +784,11 @@ BOOST_AUTO_TEST_CASE(ConvolutionNetworkDifferentRankInGPU)

 BOOST_AUTO_TEST_CASE(ConvolutionNetwork1DFreeDimensionInGPU)
 {
-    // TODO: Currently failing on GPU. CUDNN_STATUS_EXECUTION_FAILED. 
-    // This failure is not related to free dimension though, but to the specific setting(shapes) of the test. 
    if (ShouldRunOnGpu())
    {
-        //auto device = DeviceDescriptor::GPUDevice(0);
-        //Run1DFreeDimConvLayer<float>(device, false);
-        //Run1DFreeDimSimpConvLayer<float>(device, false);
+        auto device = DeviceDescriptor::GPUDevice(0);
+        Run1DFreeDimConvLayer<float>(device);
+        Run1DFreeDimSimpConvLayer<float>(device);
    }
 }