changed ImageHandsOn from "gaussian" to "heNormal" initialization, and also most layers defaults in CNTK.core.bs

This commit is contained in:
Frank Seide 2016-08-19 23:34:17 -07:00
Родитель e343e71db7
Коммит db74d6b468
3 изменённых файлов: 19 добавлений и 21 удалений

Просмотреть файл

@ -30,7 +30,7 @@
# LinearLayer -- create a fully-connected linear projection layer
# Note: outDim may describe a tensor as well.
LinearLayer {outDim, bias = true, init='uniform', initValueScale=1, inputRank=0} =
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=0} =
{
W = ParameterTensor {_ConcatArrays (outDim, Inferred), init=init, initValueScale=initValueScale}
b = ParameterTensor {outDim, initValue=0}
@ -42,7 +42,7 @@ LinearLayer {outDim, bias = true, init='uniform', initValueScale=1, inputRank=0}
}.apply
# DenseLayer -- create a fully-connected layer with optional non-linearity
DenseLayer{outDim, bias = true, activation=(x=>x), init='uniform', initValueScale=1, inputRank=0} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank} : activation )
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=0} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank} : activation )
# EmbeddingLayer -- create a linear embedding layer
EmbeddingLayer {outDim, # dimension of embedding
@ -65,7 +65,7 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
filterShape, # e.g. (3:3)
bias = true,
activation = (x=>x),
init = "uniform",
init = "heNormal",
initValueScale = 1, # TODO: rename to initScale
#reductionRank = 1, # TODO: support this
stride = 1, pad = false,
@ -346,11 +346,11 @@ CNTK2 = [
Tanh(_, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = _ /*plus the function args*/ ]
// 6. Reductions
ReduceSum (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Sum" /*plus the function args*/ ]
ReduceLogSum(_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "LogSum" /*plus the function args*/ ]
ReduceMin (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Min" /*plus the function args*/ ]
ReduceMax (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Max" /*plus the function args*/ ]
#ReduceMean (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Mean" /*plus the function args*/ ]
ReduceSum (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Sum" /*plus the function args*/ ]}.r
ReduceLogSum(_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "LogSum" /*plus the function args*/ ]}.r
ReduceMin (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Min" /*plus the function args*/ ]}.r
ReduceMax (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Max" /*plus the function args*/ ]}.r
#ReduceMean (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Mean" /*plus the function args*/ ]}.r
// 7. Control flow (if, composite etc.)
// None so far

Просмотреть файл

@ -866,7 +866,7 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (!isEnabled(ConvolutionEngineKind::Legacy))
RuntimeError("Trying to use Legacy convolution engine when it's disabled.");
// REVIEW alexeyk: should honor m_traceLevel here.
fprintf(stderr, "\n%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<LegacyConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
@ -874,19 +874,19 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (isEnabled(ConvolutionEngineKind::CuDnn) &&
CuDnnConvolutionEngineFactory<ElemType>::IsSupported(deviceId, geometry, poolKind))
{
fprintf(stderr, "\n%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
{
fprintf(stderr, "\n%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (!isEnabled(ConvolutionEngineKind::Reference))
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
fprintf(stderr, "\n%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<ReferenceConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}

Просмотреть файл

@ -18,18 +18,16 @@ TrainConvNet = {
labelDim = 10
model (features) = {
featNorm = features - Constant (128)
l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=0.0043} (featNorm)
# TODO: update these new config values in all _Solution files and web page
featNorm = features - Constant (128) # TODO: suspicious that we don't normalize by stddev (~74 if uniform distr.)
l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU, initValueScale=0.1557/256} (featNorm)
p1 = MaxPoolingLayer {(3:3), stride=(2:2)} (l1)
l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=1.414} (p1)
l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU, initValueScale=0.2} (p1)
p2 = MaxPoolingLayer {(3:3), stride=(2:2)} (l2)
l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU,
init="gaussian", initValueScale=1.414} (p2)
l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU, initValueScale=0.2} (p2)
p3 = MaxPoolingLayer {(3:3), stride=(2:2)} (l3)
d1 = DenseLayer {64, activation=ReLU, init="gaussian", initValueScale=12} (p3)
z = LinearLayer {10, init="gaussian", initValueScale=1.5} (d1)
d1 = DenseLayer {64, activation=ReLU, initValueScale=1.697} (p3)
z = LinearLayer {10, initValueScale=0.212} (d1)
}.z
# inputs