From 937cbd7fcf1cd96362c1222644be83b051292bcb Mon Sep 17 00:00:00 2001
From: Cha Zhang <chazhang@microsoft.com>
Date: Fri, 26 Aug 2016 16:53:41 -0700
Subject: [PATCH] Change initialization to use heNormal.

---
 .../Image/Miscellaneous/CIFAR-10/ConvNet.cntk | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/Examples/Image/Miscellaneous/CIFAR-10/ConvNet.cntk b/Examples/Image/Miscellaneous/CIFAR-10/ConvNet.cntk
index d2440f8eb..b765ad5fb 100644
--- a/Examples/Image/Miscellaneous/CIFAR-10/ConvNet.cntk
+++ b/Examples/Image/Miscellaneous/CIFAR-10/ConvNet.cntk
@@ -1,7 +1,7 @@
 # Simple CIFAR-10 convnet, without and with BatchNormalization.
 
 command = TrainConvNet:Eval
-#command = TrainConvNetWithBN:Eval
+command = TrainConvNetWithBN:Eval
 
 makeMode = false ; traceLevel = 1 ; deviceId = 0
 
@@ -21,15 +21,15 @@ TrainConvNet = [
 
         model = Sequential (
             Subtract128 :
-            ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = 0.0043} :
+            ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = "heNormal"} :
               MaxPoolingLayer {(3:3), stride = (2:2)} :
-            ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = 1.414} :
+            ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = "heNormal"} :
               MaxPoolingLayer {(3:3), stride = (2:2)} :
-            ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = 1.414} :
+            ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU, init = "heNormal"} :
               MaxPoolingLayer {(3:3), stride = (2:2)} :
-            DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} :
+            DenseLayer {64, activation = ReLU, init = "heNormal"} :
               Dropout :
-            LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
+            LinearLayer {labelDim, init = "heNormal"}
         )
 
         # inputs
@@ -74,7 +74,6 @@ TrainConvNet = [
 ]
 
 # Training with BN
-# Not working well on this small set. Overtrains.
 TrainConvNetWithBN = [
     action = "train"
 
@@ -86,18 +85,18 @@ TrainConvNetWithBN = [
 
         model = Sequential (
             Subtract128 :
-            ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "gaussian", initValueScale = 0.0043} :
+            ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal"} :
               BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
                 MaxPoolingLayer {(3:3), stride = (2:2)} :
-            ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "gaussian", initValueScale = 1.414} :
+            ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal"} :
               BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
                 MaxPoolingLayer {(3:3), stride = (2:2)} :
-            ConvolutionalLayer {64, (5:5), pad = true, bias = false, init = "gaussian", initValueScale = 1.414} :
+            ConvolutionalLayer {64, (5:5), pad = true, bias = false, init = "heNormal"} :
               BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
                 MaxPoolingLayer {(3:3), stride = (2:2)} :
-            LinearLayer {64, bias = false, init = "gaussian", initValueScale = 12} :
+            LinearLayer {64, bias = false, init = "heNormal"} :
               BatchNormalizationLayer {normalizationTimeConstant = 4096} : ReLU :
-            LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
+            LinearLayer {labelDim, init = "heNormal"}
         )
 
         # inputs