Removed Resize from BN code. Updated samples.

2016-01-12 13:37:38 -08:00 · 2016-01-12 13:37:38 -08:00 · 9e25b7e61a
--- a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config
+++ b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config
@ -34,7 +34,7 @@ Train=[
        minibatchSize=128
        learningRatesPerMB=0.1*80:0.01*40:0.001
        momentumPerMB=0.9
-        maxEpochs=160
+        maxEpochs=80
        L2RegWeight=0.0001
        dropoutRate=0
        
@ -60,7 +60,7 @@ Train=[
            cropRatio=0.8
            jitterType=UniRatio
            interpolations=Linear
-            #meanFile=
+            meanFile=$ConfigDir$/CIFAR-10_mean.xml
        ]
        labels=[
            labelDim=10
@ -97,7 +97,7 @@ Test=[
            cropRatio=1
            jitterType=UniRatio
            interpolations=Linear
-            #meanFile=
+            meanFile=$ConfigDir$/CIFAR-10_mean.xml
        ]
        labels=[
            labelDim=10
--- a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl
+++ b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl
@ -8,8 +8,8 @@ LocalMacros = [
    LabelDim = 10

    features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
-    featOffs = Const(128)
-    featScaled = Minus(features, featOffs)
+    #featOffs = Const(128)
+    #featScaled = Minus(features, featOffs)
    labels = Input(LabelDim, tag = label)
    
    convWScale = 7.07
@ -31,8 +31,9 @@ LocalMacros = [
 ]

 DNN=[
+    conv1WScale = 0.26
    cMap1 = 16
-    conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue, expAvg)
+    conv1 = ConvBNReLULayer(features, cMap1, 27, kW, kH, hStride1, vStride1, conv1WScale, convBValue, scValue, expAvg)

    rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
    rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
--- a/Source/ComputationNetworkLib/ConvolutionalNodes.h
+++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h
@ -726,12 +726,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                // REVIEW alexeyk: hack, use m_expAvgFactor <= 0 to compute CMA.
                double expAvgFactor = (m_expAvgFactor > 0) ? m_expAvgFactor : (1.0 / (1.0 + m_mbCount));
+
+                if (m_saveMean->GetNumElements() != runMean.GetNumElements())
+                    m_saveMean->Resize(runMean.GetNumRows(), runMean.GetNumCols());
+                if (m_saveInvStdDev->GetNumElements() != runMean.GetNumElements())
+                    m_saveInvStdDev->Resize(runMean.GetNumRows(), runMean.GetNumCols());
+
                m_convEng->NormalizeBatch(*m_inT, sliceInputValue, *m_scaleBiasT, scale, bias, m_spatial, expAvgFactor, runMean, runInvStdDev,
                    sliceOutputValue, *m_saveMean, *m_saveInvStdDev);
+
                m_mbCount++;
            }
 #if NANCHECK
-            sliceOutputValue.HasNan("BatchNormalization");
+            sliceOutputValue.HasNan("BatchNormalization-output");
+            runMean.HasNan("BatchNormalization-runMean");
+            runInvStdDev.HasNan("BatchNormalization-runInvStdDev");
+            m_saveMean->HasNan("BatchNormalization-saveMean");
+            m_saveInvStdDev->HasNan("BatchNormalization-saveInvStdDev");
 #endif
        }

--- a/Source/Math/CuDnnConvolutionEngine.cpp
+++ b/Source/Math/CuDnnConvolutionEngine.cpp
@ -312,28 +312,35 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        void NormalizeBatch(const Tensor4D& inT, const Mat& in, const Tensor4D& scaleBiasT, const Mat& scale, const Mat& bias, 
            bool spatial, double expAvgFactor, Mat& runMean, Mat& runInvStdDev, Mat& out, Mat& saveMean, Mat& saveInvStdDev) override
        {
+            const size_t crowIn = inT.w() * inT.h() * inT.c();
+            UNUSED(crowIn); // crowIn used only in asserts.
            if (spatial)
            {
                assert(scaleBiasT.c() == inT.c());
                assert(scaleBiasT.w() == 1);
                assert(scaleBiasT.h() == 1);
+                assert(runMean.GetNumRows() == inT.c());
+                assert(runMean.GetNumCols() == 1);
+                assert(runInvStdDev.GetNumRows() == inT.c());
+                assert(runInvStdDev.GetNumCols() == 1);
            }
            else
            {
                assert(scaleBiasT.c() == inT.c());
                assert(scaleBiasT.w() == inT.w());
                assert(scaleBiasT.h() == inT.h());
+                assert(runMean.GetNumRows() == crowIn);
+                assert(runMean.GetNumCols() == 1);
+                assert(runInvStdDev.GetNumRows() == crowIn);
+                assert(runInvStdDev.GetNumCols() == 1);
            }
            assert(scaleBiasT.n() == 1);
-            const size_t crowIn = inT.w() * inT.h() * inT.c();
            assert(crowIn == in.GetNumRows());
            assert(inT.n() == in.GetNumCols());
+            assert(saveMean.GetNumElements() >= runMean.GetNumElements());
+            assert(saveInvStdDev.GetNumElements() >= runInvStdDev.GetNumElements());

            cudnnBatchNormMode_t mode = spatial ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION;
-            runMean.Resize(spatial ? inT.c() : crowIn, 1);
-            runInvStdDev.Resize(runMean.GetNumRows(), 1);
-            saveMean.Resize(runMean.GetNumRows(), 1);
-            saveInvStdDev.Resize(runMean.GetNumRows(), 1);
            CUDNN_CALL(cudnnBatchNormalizationForwardTraining(m_cudnn, mode, &C::One, &C::Zero, t(inT), ptr(in), t(inT), ptr(out),
                t(scaleBiasT), ptr(scale), ptr(bias), expAvgFactor, ptr(runMean), ptr(runInvStdDev), CUDNN_BN_MIN_EPSILON, ptr(saveMean), ptr(saveInvStdDev)));
        }