Integrate chazhang/asym2 into master

2017-04-20 17:30:24 -07:00 · 2017-04-20 17:30:24 -07:00 · bed4cc9aa7
--- a/Source/ComputationNetworkLib/ConvolutionalNodes.h
+++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h
@ -195,6 +195,22 @@ private:
        shape = TensorShape(dims);
    }
 protected:
+    // infer reduction dimensions if m_convolution2D is true, for legacy NDL branch 
+    void InferConvolution2DReductionDims(const TensorShape& inputShape, size_t numChannels)
+    {
+        size_t kW = m_kernelShape[0];
+        size_t kH = m_kernelShape[1];
+        size_t sW = m_stride[0];
+        size_t sH = m_stride[1];
+        m_kernelShape = TensorShape(kW, kH, numChannels);
+        m_stride = TensorShape(sW, sH, numChannels);
+        size_t filterRank = 2; 
+        FixVectorShape(filterRank, inputShape.size(), m_autoPad, false);
+        FixTensorShape(filterRank, inputShape.size(), m_lowerPad, 0);
+        FixTensorShape(filterRank, inputShape.size(), m_upperPad, 0);
+        FixVectorShape(filterRank, inputShape.size(), m_sharing, true);
+    }
+
    // infer reduction dimensions if not given
    void InferReductionDims(const TensorShape& inputShape, const TensorShape& fromShape)
    {
@ -270,6 +286,7 @@ protected:                                  \
    using Base::m_tempMatrixForward;        \
    using Base::m_tempMatrixBackward;       \
    using Base::m_convEng;                  \
+    using Base::InferConvolution2DReductionDims; \
    using Base::InferReductionDims;         \
 public:

@ -439,13 +456,10 @@ public:
            auto inDims = ImageDimensions(GetInputSampleLayout(inputIdx), m_imageLayout);
            // inputShape is used in ConvolveGeometry which supports only CHW layout.
            inputShape = inDims.AsTensorShape(ImageLayoutKind::CHW);
+            InferConvolution2DReductionDims(inputShape, inDims.m_numChannels);
+
            size_t kW = m_kernelShape[0];
            size_t kH = m_kernelShape[1];
-            size_t sW = m_stride[0];
-            size_t sH = m_stride[1];
-            m_kernelShape = TensorShape(kW, kH, inDims.m_numChannels);
-            m_stride = TensorShape(sW, sH, inDims.m_numChannels);
-
            size_t mapCount = m_mapCount.GetNumElements();
            size_t weightCols = kW * kH * inDims.m_numChannels;

--- a/Source/Math/CuDnnConvolutionEngine.cu
+++ b/Source/Math/CuDnnConvolutionEngine.cu
@ -664,15 +664,17 @@ bool CuDnnConvolutionEngineFactory<ElemType>::IsSupported(DEVICEID_TYPE deviceId
                   (poolKind == PoolKind::None ||
                   inputRank <= 3 && (kernelRank < 3 || kernel[2] == 1)));

-    // cuDNN as of version 6.0 does not handle asymmetric padding for convolution correctly. We need to detect asymmetric
+    // cuDNN as of version 6.0 does not handle asymmetric padding for even size kernel convolution correctly. We need to detect asymmetric
    // padding due to auto-padding and choose the reference convolution implementation instead
    if (poolKind == PoolKind::None)     // only for convolution, pooling seems fine
    {
        for (int i = 0; i < kernelRank; i++)
        {
-            if (geometry->GetLowerPad(i) < geometry->GetUpperPad(i))
+            auto lowerPad = geometry->GetLowerPad(i); 
+            auto upperPad = geometry->GetUpperPad(i); 
+            if (kernel[i] % 2 == 0 && lowerPad < upperPad)
            {
-                fprintf(stderr, "Detected asymmetric padding issue (lowerPad < higherPad), cuDNN will not be able to produce correct result. Switch to reference engine (VERY SLOW). \n");
+                fprintf(stderr, "WARNING: Detected asymmetric padding issue with even kernel size and lowerPad (%d) < higherPad (%d) (i=%d), cuDNN will not be able to produce correct result. Switch to reference engine (VERY SLOW). \n", lowerPad, upperPad, i);
                retVal = false; 
                break; 
            }