From 9e25b7e61a9ffede4ba903d3cfd86cb2d843c953 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 12 Jan 2016 13:37:38 -0800 Subject: [PATCH] Removed Resize from BN code. Updated samples. --- .../Miscellaneous/CIFAR-10/03_ResNet.config | 6 +++--- .../Image/Miscellaneous/CIFAR-10/03_ResNet.ndl | 7 ++++--- .../ComputationNetworkLib/ConvolutionalNodes.h | 13 ++++++++++++- Source/Math/CuDnnConvolutionEngine.cpp | 17 ++++++++++++----- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config index c3fd40bfe..dd6c394a6 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config +++ b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.config @@ -34,7 +34,7 @@ Train=[ minibatchSize=128 learningRatesPerMB=0.1*80:0.01*40:0.001 momentumPerMB=0.9 - maxEpochs=160 + maxEpochs=80 L2RegWeight=0.0001 dropoutRate=0 @@ -60,7 +60,7 @@ Train=[ cropRatio=0.8 jitterType=UniRatio interpolations=Linear - #meanFile= + meanFile=$ConfigDir$/CIFAR-10_mean.xml ] labels=[ labelDim=10 @@ -97,7 +97,7 @@ Test=[ cropRatio=1 jitterType=UniRatio interpolations=Linear - #meanFile= + meanFile=$ConfigDir$/CIFAR-10_mean.xml ] labels=[ labelDim=10 diff --git a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl index d84a9de37..f267db665 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl +++ b/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet.ndl @@ -8,8 +8,8 @@ LocalMacros = [ LabelDim = 10 features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn") - featOffs = Const(128) - featScaled = Minus(features, featOffs) + #featOffs = Const(128) + #featScaled = Minus(features, featOffs) labels = Input(LabelDim, tag = label) convWScale = 7.07 @@ -31,8 +31,9 @@ LocalMacros = [ ] DNN=[ + conv1WScale = 0.26 cMap1 = 16 - conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue, expAvg) + conv1 = ConvBNReLULayer(features, cMap1, 27, kW, kH, hStride1, vStride1, conv1WScale, convBValue, scValue, expAvg) rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg) rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg) diff --git a/Source/ComputationNetworkLib/ConvolutionalNodes.h b/Source/ComputationNetworkLib/ConvolutionalNodes.h index dffa996af..9c06b0795 100644 --- a/Source/ComputationNetworkLib/ConvolutionalNodes.h +++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h @@ -726,12 +726,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { { // REVIEW alexeyk: hack, use m_expAvgFactor <= 0 to compute CMA. double expAvgFactor = (m_expAvgFactor > 0) ? m_expAvgFactor : (1.0 / (1.0 + m_mbCount)); + + if (m_saveMean->GetNumElements() != runMean.GetNumElements()) + m_saveMean->Resize(runMean.GetNumRows(), runMean.GetNumCols()); + if (m_saveInvStdDev->GetNumElements() != runMean.GetNumElements()) + m_saveInvStdDev->Resize(runMean.GetNumRows(), runMean.GetNumCols()); + m_convEng->NormalizeBatch(*m_inT, sliceInputValue, *m_scaleBiasT, scale, bias, m_spatial, expAvgFactor, runMean, runInvStdDev, sliceOutputValue, *m_saveMean, *m_saveInvStdDev); + m_mbCount++; } #if NANCHECK - sliceOutputValue.HasNan("BatchNormalization"); + sliceOutputValue.HasNan("BatchNormalization-output"); + runMean.HasNan("BatchNormalization-runMean"); + runInvStdDev.HasNan("BatchNormalization-runInvStdDev"); + m_saveMean->HasNan("BatchNormalization-saveMean"); + m_saveInvStdDev->HasNan("BatchNormalization-saveInvStdDev"); #endif } diff --git a/Source/Math/CuDnnConvolutionEngine.cpp b/Source/Math/CuDnnConvolutionEngine.cpp index ae9336d56..e5c7c871c 100644 --- a/Source/Math/CuDnnConvolutionEngine.cpp +++ b/Source/Math/CuDnnConvolutionEngine.cpp @@ -312,28 +312,35 @@ namespace Microsoft { namespace MSR { namespace CNTK { void NormalizeBatch(const Tensor4D& inT, const Mat& in, const Tensor4D& scaleBiasT, const Mat& scale, const Mat& bias, bool spatial, double expAvgFactor, Mat& runMean, Mat& runInvStdDev, Mat& out, Mat& saveMean, Mat& saveInvStdDev) override { + const size_t crowIn = inT.w() * inT.h() * inT.c(); + UNUSED(crowIn); // crowIn used only in asserts. if (spatial) { assert(scaleBiasT.c() == inT.c()); assert(scaleBiasT.w() == 1); assert(scaleBiasT.h() == 1); + assert(runMean.GetNumRows() == inT.c()); + assert(runMean.GetNumCols() == 1); + assert(runInvStdDev.GetNumRows() == inT.c()); + assert(runInvStdDev.GetNumCols() == 1); } else { assert(scaleBiasT.c() == inT.c()); assert(scaleBiasT.w() == inT.w()); assert(scaleBiasT.h() == inT.h()); + assert(runMean.GetNumRows() == crowIn); + assert(runMean.GetNumCols() == 1); + assert(runInvStdDev.GetNumRows() == crowIn); + assert(runInvStdDev.GetNumCols() == 1); } assert(scaleBiasT.n() == 1); - const size_t crowIn = inT.w() * inT.h() * inT.c(); assert(crowIn == in.GetNumRows()); assert(inT.n() == in.GetNumCols()); + assert(saveMean.GetNumElements() >= runMean.GetNumElements()); + assert(saveInvStdDev.GetNumElements() >= runInvStdDev.GetNumElements()); cudnnBatchNormMode_t mode = spatial ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION; - runMean.Resize(spatial ? inT.c() : crowIn, 1); - runInvStdDev.Resize(runMean.GetNumRows(), 1); - saveMean.Resize(runMean.GetNumRows(), 1); - saveInvStdDev.Resize(runMean.GetNumRows(), 1); CUDNN_CALL(cudnnBatchNormalizationForwardTraining(m_cudnn, mode, &C::One, &C::Zero, t(inT), ptr(in), t(inT), ptr(out), t(scaleBiasT), ptr(scale), ptr(bias), expAvgFactor, ptr(runMean), ptr(runInvStdDev), CUDNN_BN_MIN_EPSILON, ptr(saveMean), ptr(saveInvStdDev))); }