changed ImageHandsOn from "gaussian" to "heNormal" initialization, and also most layers defaults in CNTK.core.bs
This commit is contained in:
Родитель
e343e71db7
Коммит
db74d6b468
|
@ -30,7 +30,7 @@
|
|||
|
||||
# LinearLayer -- create a fully-connected linear projection layer
|
||||
# Note: outDim may describe a tensor as well.
|
||||
LinearLayer {outDim, bias = true, init='uniform', initValueScale=1, inputRank=0} =
|
||||
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=0} =
|
||||
{
|
||||
W = ParameterTensor {_ConcatArrays (outDim, Inferred), init=init, initValueScale=initValueScale}
|
||||
b = ParameterTensor {outDim, initValue=0}
|
||||
|
@ -42,7 +42,7 @@ LinearLayer {outDim, bias = true, init='uniform', initValueScale=1, inputRank=0}
|
|||
}.apply
|
||||
|
||||
# DenseLayer -- create a fully-connected layer with optional non-linearity
|
||||
DenseLayer{outDim, bias = true, activation=(x=>x), init='uniform', initValueScale=1, inputRank=0} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank} : activation )
|
||||
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=0} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank} : activation )
|
||||
|
||||
# EmbeddingLayer -- create a linear embedding layer
|
||||
EmbeddingLayer {outDim, # dimension of embedding
|
||||
|
@ -65,7 +65,7 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
|
|||
filterShape, # e.g. (3:3)
|
||||
bias = true,
|
||||
activation = (x=>x),
|
||||
init = "uniform",
|
||||
init = "heNormal",
|
||||
initValueScale = 1, # TODO: rename to initScale
|
||||
#reductionRank = 1, # TODO: support this
|
||||
stride = 1, pad = false,
|
||||
|
@ -346,11 +346,11 @@ CNTK2 = [
|
|||
Tanh(_, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = _ /*plus the function args*/ ]
|
||||
|
||||
// 6. Reductions
|
||||
ReduceSum (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Sum" /*plus the function args*/ ]
|
||||
ReduceLogSum(_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "LogSum" /*plus the function args*/ ]
|
||||
ReduceMin (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Min" /*plus the function args*/ ]
|
||||
ReduceMax (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Max" /*plus the function args*/ ]
|
||||
#ReduceMean (_, axis=None, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = if BS.Constants.IsNone (axis) then 0 else axis ; reductionOp = "Mean" /*plus the function args*/ ]
|
||||
ReduceSum (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Sum" /*plus the function args*/ ]}.r
|
||||
ReduceLogSum(_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "LogSum" /*plus the function args*/ ]}.r
|
||||
ReduceMin (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Min" /*plus the function args*/ ]}.r
|
||||
ReduceMax (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Max" /*plus the function args*/ ]}.r
|
||||
#ReduceMean (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Mean" /*plus the function args*/ ]}.r
|
||||
|
||||
// 7. Control flow (if, composite etc.)
|
||||
// None so far
|
||||
|
|
|
@ -866,7 +866,7 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
|
|||
if (!isEnabled(ConvolutionEngineKind::Legacy))
|
||||
RuntimeError("Trying to use Legacy convolution engine when it's disabled.");
|
||||
// REVIEW alexeyk: should honor m_traceLevel here.
|
||||
fprintf(stderr, "\n%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<LegacyConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
|
@ -874,19 +874,19 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
|
|||
if (isEnabled(ConvolutionEngineKind::CuDnn) &&
|
||||
CuDnnConvolutionEngineFactory<ElemType>::IsSupported(deviceId, geometry, poolKind))
|
||||
{
|
||||
fprintf(stderr, "\n%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
|
||||
{
|
||||
fprintf(stderr, "\n%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
if (!isEnabled(ConvolutionEngineKind::Reference))
|
||||
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
|
||||
fprintf(stderr, "\n%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<ReferenceConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,18 +18,16 @@ TrainConvNet = {
|
|||
labelDim = 10
|
||||
|
||||
model (features) = {
|
||||
featNorm = features - Constant (128)
|
||||
l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
|
||||
init="gaussian", initValueScale=0.0043} (featNorm)
|
||||
# TODO: update these new config values in all _Solution files and web page
|
||||
featNorm = features - Constant (128) # TODO: suspicious that we don't normalize by stddev (~74 if uniform distr.)
|
||||
l1 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU, initValueScale=0.1557/256} (featNorm)
|
||||
p1 = MaxPoolingLayer {(3:3), stride=(2:2)} (l1)
|
||||
l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU,
|
||||
init="gaussian", initValueScale=1.414} (p1)
|
||||
l2 = ConvolutionalLayer {32, (5:5), pad=true, activation=ReLU, initValueScale=0.2} (p1)
|
||||
p2 = MaxPoolingLayer {(3:3), stride=(2:2)} (l2)
|
||||
l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU,
|
||||
init="gaussian", initValueScale=1.414} (p2)
|
||||
l3 = ConvolutionalLayer {64, (5:5), pad=true, activation=ReLU, initValueScale=0.2} (p2)
|
||||
p3 = MaxPoolingLayer {(3:3), stride=(2:2)} (l3)
|
||||
d1 = DenseLayer {64, activation=ReLU, init="gaussian", initValueScale=12} (p3)
|
||||
z = LinearLayer {10, init="gaussian", initValueScale=1.5} (d1)
|
||||
d1 = DenseLayer {64, activation=ReLU, initValueScale=1.697} (p3)
|
||||
z = LinearLayer {10, initValueScale=0.212} (d1)
|
||||
}.z
|
||||
|
||||
# inputs
|
||||
|
|
Загрузка…
Ссылка в новой задаче