Updated ResNet samples.
This commit is contained in:
Родитель
31a164602c
Коммит
08e4b993e0
|
@ -1,146 +1,95 @@
|
|||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
|
||||
ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn);
|
||||
y = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
}
|
||||
|
||||
# Standard building block for ResNet.
|
||||
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
||||
ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
p = Plus(bn2, inp)
|
||||
y2 = RectifiedLinear(p);
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
c = ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
|
||||
}
|
||||
|
||||
# Standard building block for ResNet with padding.
|
||||
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
|
||||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
c = ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
y = RectifiedLinear(c)
|
||||
}
|
||||
|
||||
# Standard building block for ResNet with identity shortcut (option A).
|
||||
ResNetNode2A(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
||||
{
|
||||
# First convolution layer.
|
||||
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# Second convolution layer, no ReLU.
|
||||
c2 = ConvBNLayer(c1, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# Identity shortcut.
|
||||
p = Plus(c2, inp)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
# Standard building block for ResNet with padding (option A).
|
||||
ResNetNode2AInc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, expAvg, Wproj)
|
||||
{
|
||||
# First convolution layer.
|
||||
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
|
||||
# Second convolution layer, no ReLU.
|
||||
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
# Projection convolution layer.
|
||||
c_proj = ConvBNLayerW(Wproj, inp, outMap, 1, 1, 2, 2, bValue, scValue, expAvg)
|
||||
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
p = Plus(bn2, cproj)
|
||||
y2 = RectifiedLinear(p);
|
||||
p = Plus(c2, c_proj)
|
||||
y2 = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
# Bottleneck building block for ResNet.
|
||||
ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
|
||||
ResNetNode3A(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
# 1x1 reducing convolution.
|
||||
W1 = Parameter(convMap, inMap, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(convMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(convMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# 3x3 convolution.
|
||||
W2 = Parameter(convMap, convWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(convMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(convMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# 1x1 expanding convolution, no ReLU.
|
||||
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y2 = RectifiedLinear(bn2);
|
||||
|
||||
# 1x1 expanding convolution.
|
||||
W3 = Parameter(outMap, convMap, init = Gaussian, initValueScale = wScale)
|
||||
b3 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc3 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
|
||||
p = Plus(bn3, inp)
|
||||
y3 = RectifiedLinear(p);
|
||||
p = Plus(c3, inp)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, wProj, projStride)
|
||||
ResNetNode3AInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg, wProj, projStride)
|
||||
{
|
||||
# 1x1 reducing convolution.
|
||||
W1 = Parameter(convMap, inMap, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(convMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(convMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
|
||||
# 3x3 convolution.
|
||||
W2 = Parameter(convMap, convWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(convMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(convMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# 1x1 expanding convolution, no ReLU.
|
||||
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# Input-to-output mapping convolution.
|
||||
c_proj = ConvBNLayerW(wProj, inp, outMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
|
||||
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, projStride, projStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y2 = RectifiedLinear(bn2);
|
||||
p = Plus(c3, c_proj)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
ResNetNode3BInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg, projStride)
|
||||
{
|
||||
# 1x1 reducing convolution.
|
||||
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
|
||||
# 3x3 convolution.
|
||||
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# 1x1 expanding convolution, no ReLU.
|
||||
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# Input-to-output mapping convolution.
|
||||
c_proj = ConvBNLayer(inp, outMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
|
||||
|
||||
# 1x1 expanding convolution.
|
||||
W3 = Parameter(outMap, convMap, init = Gaussian, initValueScale = wScale)
|
||||
b3 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc3 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
|
||||
# Increasing input dimension convolution
|
||||
cProj = Convolution(wProj, inp, 1, 1, outMap, projStride, projStride, zeroPadding = false, imageLayout = "cudnn")
|
||||
|
||||
p = Plus(bn3, cProj)
|
||||
y3 = RectifiedLinear(p);
|
||||
p = Plus(c3, c_proj)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
DnnLayer(hiddenDim, labelDim, x, wScale, bValue)
|
||||
|
|
|
@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
|
|||
precision=float
|
||||
deviceId=Auto
|
||||
|
||||
command=Train:AddTop5Eval:Test
|
||||
command=Train:CreateEval:Test
|
||||
|
||||
parallelTrain=false
|
||||
|
||||
stderr=$OutputDir$/ResNet_152
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Proj64to256Filename = $ConfigDir$/64to256.txt
|
||||
Proj256to512Filename = $ConfigDir$/256to512.txt
|
||||
|
@ -32,10 +33,12 @@ Train=[
|
|||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=32
|
||||
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
|
||||
minibatchSize=256
|
||||
# Note that learning rates are 10x more than in the paper due to a different
|
||||
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
|
||||
learningRatesPerMB=1.0*35:0.1*35:0.01
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=120
|
||||
maxEpochs=125
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
@ -45,11 +48,9 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
gradientBits=32
|
||||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=100
|
||||
]
|
||||
|
||||
reader=[
|
||||
|
@ -88,16 +89,16 @@ Train=[
|
|||
]
|
||||
]
|
||||
|
||||
AddTop5Eval=[
|
||||
CreateEval=[
|
||||
action=edit
|
||||
CurModel=$ModelDir$/ResNet_152
|
||||
NewModel=$ModelDir$/ResNet_152.Top5
|
||||
editPath=$ConfigDir$/add_top5_layer.mel
|
||||
NewModel=$ModelDir$/ResNet_152.Eval
|
||||
editPath=$ConfigDir$/create_eval_model.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/ResNet_152.Top5
|
||||
modelPath=$ModelDir$/ResNet_152.Eval
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=32
|
||||
|
||||
|
|
|
@ -17,18 +17,16 @@ ndlMacros = [
|
|||
hs = 1
|
||||
vs = 1
|
||||
|
||||
# Pooling settings.
|
||||
poolW = 2
|
||||
poolH = 2
|
||||
poolhs = 2
|
||||
poolvs = 2
|
||||
|
||||
# Initial parameter values.
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
|
||||
fcWScale = 2.26
|
||||
fcBValue = 0
|
||||
|
||||
scValue = 1
|
||||
fcWScale = 3.0
|
||||
fcBValue = 1
|
||||
|
||||
expAvg = 1
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
@ -39,7 +37,8 @@ DNN=[
|
|||
cMap5 = 1024
|
||||
cMap6 = 2048
|
||||
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
conv1WScale = 0.6
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
|
||||
# Max pooling
|
||||
pool1W = 2
|
||||
pool1H = 2
|
||||
|
@ -47,63 +46,59 @@ DNN=[
|
|||
pool1vs = 2
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
|
||||
|
||||
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
|
||||
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj, 1)
|
||||
rn1_2 = ResNetNode3(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
|
||||
rn1_3 = ResNetNode3(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
|
||||
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg, 1)
|
||||
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
|
||||
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn2_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode3Inc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, rn2_1_Wproj, 2)
|
||||
rn2_2 = ResNetNode3(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode3(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_4 = ResNetNode3(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_5 = ResNetNode3(rn2_4, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_6 = ResNetNode3(rn2_5, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_7 = ResNetNode3(rn2_6, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_8 = ResNetNode3(rn2_7, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_5 = ResNetNode3A(rn2_4, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_6 = ResNetNode3A(rn2_5, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_7 = ResNetNode3A(rn2_6, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_8 = ResNetNode3A(rn2_7, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn3_1_Wproj = Parameter(cMap5, cMap4, init = fromFile, initFromFilePath = "$Proj512to1024Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode3Inc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, rn3_1_Wproj, 2)
|
||||
rn3_2 = ResNetNode3(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode3(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_4 = ResNetNode3(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_5 = ResNetNode3(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_6 = ResNetNode3(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_7 = ResNetNode3(rn3_6, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_8 = ResNetNode3(rn3_7, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_9 = ResNetNode3(rn3_8, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_10= ResNetNode3(rn3_9, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_11= ResNetNode3(rn3_10, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_12= ResNetNode3(rn3_11, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_13= ResNetNode3(rn3_12, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_14= ResNetNode3(rn3_13, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_15= ResNetNode3(rn3_14, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_16= ResNetNode3(rn3_15, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_17= ResNetNode3(rn3_16, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_18= ResNetNode3(rn3_17, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_19= ResNetNode3(rn3_18, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_20= ResNetNode3(rn3_19, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_21= ResNetNode3(rn3_20, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_22= ResNetNode3(rn3_21, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_23= ResNetNode3(rn3_22, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_24= ResNetNode3(rn3_23, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_25= ResNetNode3(rn3_24, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_26= ResNetNode3(rn3_25, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_27= ResNetNode3(rn3_26, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_28= ResNetNode3(rn3_27, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_29= ResNetNode3(rn3_28, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_30= ResNetNode3(rn3_29, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_31= ResNetNode3(rn3_30, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_32= ResNetNode3(rn3_31, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_33= ResNetNode3(rn3_32, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_34= ResNetNode3(rn3_33, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_35= ResNetNode3(rn3_34, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_36= ResNetNode3(rn3_35, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_1 = ResNetNode3BInc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_5 = ResNetNode3A(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_6 = ResNetNode3A(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_7 = ResNetNode3A(rn3_6, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_8 = ResNetNode3A(rn3_7, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_9 = ResNetNode3A(rn3_8, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_10= ResNetNode3A(rn3_9, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_11= ResNetNode3A(rn3_10, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_12= ResNetNode3A(rn3_11, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_13= ResNetNode3A(rn3_12, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_14= ResNetNode3A(rn3_13, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_15= ResNetNode3A(rn3_14, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_16= ResNetNode3A(rn3_15, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_17= ResNetNode3A(rn3_16, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_18= ResNetNode3A(rn3_17, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_19= ResNetNode3A(rn3_18, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_20= ResNetNode3A(rn3_19, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_21= ResNetNode3A(rn3_20, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_22= ResNetNode3A(rn3_21, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_23= ResNetNode3A(rn3_22, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_24= ResNetNode3A(rn3_23, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_25= ResNetNode3A(rn3_24, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_26= ResNetNode3A(rn3_25, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_27= ResNetNode3A(rn3_26, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_28= ResNetNode3A(rn3_27, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_29= ResNetNode3A(rn3_28, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_30= ResNetNode3A(rn3_29, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_31= ResNetNode3A(rn3_30, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_32= ResNetNode3A(rn3_31, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_33= ResNetNode3A(rn3_32, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_34= ResNetNode3A(rn3_33, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_35= ResNetNode3A(rn3_34, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_36= ResNetNode3A(rn3_35, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn4_1_Wproj = Parameter(cMap6, cMap5, init = fromFile, initFromFilePath = "$Proj1024to2048Filename$", needGradient = false)
|
||||
rn4_1 = ResNetNode3Inc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, rn4_1_Wproj, 2)
|
||||
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
rn4_1 = ResNetNode3BInc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
|
||||
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
# Global average pooling
|
||||
pool2W = 7
|
||||
|
|
|
@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
|
|||
precision=float
|
||||
deviceId=Auto
|
||||
|
||||
command=Train:AddTop5Eval:Test
|
||||
command=Train:CreateEval:Test
|
||||
|
||||
parallelTrain=false
|
||||
|
||||
stderr=$OutputDir$/ResNet_34
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Proj64to128Filename = $ConfigDir$/64to128.txt
|
||||
Proj128to256Filename = $ConfigDir$/128to256.txt
|
||||
|
@ -31,10 +32,12 @@ Train=[
|
|||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=64
|
||||
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
|
||||
minibatchSize=256
|
||||
# Note that learning rates are 10x more than in the paper due to a different
|
||||
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
|
||||
learningRatesPerMB=1.0*35:0.1*35:0.01
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=120
|
||||
maxEpochs=125
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
@ -44,11 +47,9 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
gradientBits=32
|
||||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=100
|
||||
]
|
||||
|
||||
reader=[
|
||||
|
@ -87,16 +88,16 @@ Train=[
|
|||
]
|
||||
]
|
||||
|
||||
AddTop5Eval=[
|
||||
CreateEval=[
|
||||
action=edit
|
||||
CurModel=$ModelDir$/ResNet_34
|
||||
NewModel=$ModelDir$/ResNet_34.Top5
|
||||
editPath=$ConfigDir$/add_top5_layer.mel
|
||||
NewModel=$ModelDir$/ResNet_34.Eval
|
||||
editPath=$ConfigDir$/create_eval_model.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/ResNet_34.Top5
|
||||
modelPath=$ModelDir$/ResNet_34.Eval
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=64
|
||||
|
||||
|
|
|
@ -20,14 +20,19 @@ ndlMacros = [
|
|||
# Initial parameter values.
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
|
||||
fcWScale = 1.13
|
||||
fcBValue = 0
|
||||
|
||||
scValue = 1
|
||||
fcWScale = 3.0
|
||||
fcBValue = 1
|
||||
|
||||
expAvg = 1
|
||||
]
|
||||
|
||||
DNN=[
|
||||
conv1WScale = 0.6
|
||||
cMap1 = 64
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
|
||||
# Max pooling
|
||||
pool1W = 2
|
||||
pool1H = 2
|
||||
|
@ -35,31 +40,31 @@ DNN=[
|
|||
pool1vs = 2
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
|
||||
|
||||
rn1_1 = ResNetNode2(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_2 = ResNetNode2(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_3 = ResNetNode2(rn1_2, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_1 = ResNetNode2A(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn1_2 = ResNetNode2A(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn1_3 = ResNetNode2A(rn1_2, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap2 = 128
|
||||
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj64to128Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 576, 1152, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_4 = ResNetNode2(rn2_3, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_1 = ResNetNode2AInc(rn1_3, cMap2, 576, 1152, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
|
||||
rn2_2 = ResNetNode2A(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_3 = ResNetNode2A(rn2_2, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_4 = ResNetNode2A(rn2_3, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap3 = 256
|
||||
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj128to256Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode2Conv(rn2_4, cMap3, 1152, 2304, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_4 = ResNetNode2(rn3_3, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_5 = ResNetNode2(rn3_4, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_6 = ResNetNode2(rn3_5, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_1 = ResNetNode2AInc(rn2_4, cMap3, 1152, 2304, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
|
||||
rn3_2 = ResNetNode2A(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_3 = ResNetNode2A(rn3_2, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_4 = ResNetNode2A(rn3_3, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_5 = ResNetNode2A(rn3_4, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_6 = ResNetNode2A(rn3_5, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap4 = 512
|
||||
rn4_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
|
||||
rn4_1 = ResNetNode2Conv(rn3_6, cMap4, 2304, 4608, kW, kH, convWScale, convBValue, scValue, rn4_1_Wproj)
|
||||
rn4_2 = ResNetNode2(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
|
||||
rn4_3 = ResNetNode2(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
|
||||
rn4_1 = ResNetNode2AInc(rn3_6, cMap4, 2304, 4608, kW, kH, convWScale, convBValue, scValue, expAvg, rn4_1_Wproj)
|
||||
rn4_2 = ResNetNode2A(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn4_3 = ResNetNode2A(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
# Global average pooling
|
||||
pool2W = 7
|
||||
|
|
|
@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
|
|||
precision=float
|
||||
deviceId=Auto
|
||||
|
||||
command=Train:AddTop5Eval:Test
|
||||
command=Train:CreateEval:Test
|
||||
|
||||
parallelTrain=false
|
||||
|
||||
stderr=$OutputDir$/ResNet_50
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Proj64to256Filename = $ConfigDir$/64to256.txt
|
||||
Proj256to512Filename = $ConfigDir$/256to512.txt
|
||||
|
@ -32,10 +33,12 @@ Train=[
|
|||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=32
|
||||
learningRatesPerMB=0.1*30:0.03*30:0.01*25:0.003*25:0.001
|
||||
minibatchSize=256
|
||||
# Note that learning rates are 10x more than in the paper due to a different
|
||||
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
|
||||
learningRatesPerMB=1.0*35:0.1*35:0.01
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=120
|
||||
maxEpochs=125
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
@ -45,11 +48,9 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
gradientBits=32
|
||||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=100
|
||||
]
|
||||
|
||||
reader=[
|
||||
|
@ -88,16 +89,16 @@ Train=[
|
|||
]
|
||||
]
|
||||
|
||||
AddTop5Eval=[
|
||||
CreateEval=[
|
||||
action=edit
|
||||
CurModel=$ModelDir$/ResNet_50
|
||||
NewModel=$ModelDir$/ResNet_50.Top5
|
||||
editPath=$ConfigDir$/add_top5_layer.mel
|
||||
NewModel=$ModelDir$/ResNet_50.Eval
|
||||
editPath=$ConfigDir$/create_eval_model.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/ResNet_50.Top5
|
||||
modelPath=$ModelDir$/ResNet_50.Eval
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=32
|
||||
|
||||
|
|
|
@ -20,9 +20,13 @@ ndlMacros = [
|
|||
# Initial parameter values.
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
|
||||
fcWScale = 2.26
|
||||
fcBValue = 0
|
||||
|
||||
scValue = 1
|
||||
fcWScale = 3.0
|
||||
fcBValue = 1
|
||||
|
||||
expAvg = 1
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
@ -33,7 +37,8 @@ DNN=[
|
|||
cMap5 = 1024
|
||||
cMap6 = 2048
|
||||
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
conv1WScale = 0.6
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
|
||||
# Max pooling
|
||||
pool1W = 2
|
||||
pool1H = 2
|
||||
|
@ -41,29 +46,25 @@ DNN=[
|
|||
pool1vs = 2
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
|
||||
|
||||
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
|
||||
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj, 1)
|
||||
rn1_2 = ResNetNode3(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
|
||||
rn1_3 = ResNetNode3(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
|
||||
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg, 1)
|
||||
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
|
||||
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn2_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode3Inc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, rn2_1_Wproj, 2)
|
||||
rn2_2 = ResNetNode3(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode3(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_4 = ResNetNode3(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
|
||||
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn3_1_Wproj = Parameter(cMap5, cMap4, init = fromFile, initFromFilePath = "$Proj512to1024Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode3Inc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, rn3_1_Wproj, 2)
|
||||
rn3_2 = ResNetNode3(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode3(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_4 = ResNetNode3(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_5 = ResNetNode3(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_6 = ResNetNode3(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
|
||||
rn3_1 = ResNetNode3BInc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_5 = ResNetNode3A(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_6 = ResNetNode3A(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
rn4_1_Wproj = Parameter(cMap6, cMap5, init = fromFile, initFromFilePath = "$Proj1024to2048Filename$", needGradient = false)
|
||||
rn4_1 = ResNetNode3Inc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, rn4_1_Wproj, 2)
|
||||
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
rn4_1 = ResNetNode3BInc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg, 2)
|
||||
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
|
||||
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
# Global average pooling
|
||||
pool2W = 7
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
m1=LoadModel($CurModel$, format=cntk)
|
||||
SetDefaultModel(m1)
|
||||
|
||||
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
|
||||
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
|
@ -0,0 +1,10 @@
|
|||
m1=LoadModel($CurModel$, format=cntk)
|
||||
SetDefaultModel(m1)
|
||||
|
||||
# Switch batch normalization to eval mode.
|
||||
SetPropertyForSubTree(CE, batchNormEvalMode, true)
|
||||
|
||||
# Add top-5 error prediction node.
|
||||
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
|
||||
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
Загрузка…
Ссылка в новой задаче