This commit is contained in:
Alexey Kamenev 2016-01-21 15:19:48 -08:00
Родитель 31a164602c
Коммит 08e4b993e0
9 изменённых файлов: 215 добавлений и 258 удалений

Просмотреть файл

@ -1,146 +1,95 @@
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = fixedValue, value = scValue)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y = RectifiedLinear(bn);
y = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
}
# Standard building block for ResNet.
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
c = ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
}
# Standard building block for ResNet with padding.
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
y = RectifiedLinear(c)
}
# Standard building block for ResNet with identity shortcut (option A).
ResNetNode2A(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
{
# First convolution layer.
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
# Identity shortcut.
p = Plus(c2, inp)
y = RectifiedLinear(p)
}
# Standard building block for ResNet with padding (option A).
ResNetNode2AInc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, expAvg, Wproj)
{
# First convolution layer.
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
# Projection convolution layer.
c_proj = ConvBNLayerW(Wproj, inp, outMap, 1, 1, 2, 2, bValue, scValue, expAvg)
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn2, cproj)
y2 = RectifiedLinear(p);
p = Plus(c2, c_proj)
y2 = RectifiedLinear(p)
}
# Bottleneck building block for ResNet.
ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
ResNetNode3A(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg)
{
# 1x1 reducing convolution.
W1 = Parameter(convMap, inMap, init = Gaussian, initValueScale = wScale)
b1 = Parameter(convMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(convMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
# 3x3 convolution.
W2 = Parameter(convMap, convWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(convMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(convMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
# 1x1 expanding convolution, no ReLU.
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y2 = RectifiedLinear(bn2);
# 1x1 expanding convolution.
W3 = Parameter(outMap, convMap, init = Gaussian, initValueScale = wScale)
b3 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc3 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
p = Plus(bn3, inp)
y3 = RectifiedLinear(p);
p = Plus(c3, inp)
y = RectifiedLinear(p)
}
ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, wProj, projStride)
ResNetNode3AInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg, wProj, projStride)
{
# 1x1 reducing convolution.
W1 = Parameter(convMap, inMap, init = Gaussian, initValueScale = wScale)
b1 = Parameter(convMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(convMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
# 3x3 convolution.
W2 = Parameter(convMap, convWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(convMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(convMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
# 1x1 expanding convolution, no ReLU.
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
# Input-to-output mapping convolution.
c_proj = ConvBNLayerW(wProj, inp, outMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
c2 = Convolution(W2, y1, 3, 3, convMap, projStride, projStride, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y2 = RectifiedLinear(bn2);
p = Plus(c3, c_proj)
y = RectifiedLinear(p)
}
ResNetNode3BInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, expAvg, projStride)
{
# 1x1 reducing convolution.
c1 = ConvBNReLULayer(inp, convMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
# 3x3 convolution.
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, expAvg)
# 1x1 expanding convolution, no ReLU.
c3 = ConvBNLayer(c2, outMap, convMap, 1, 1, 1, 1, wScale, bValue, scValue, expAvg)
# Input-to-output mapping convolution.
c_proj = ConvBNLayer(inp, outMap, inMap, 1, 1, projStride, projStride, wScale, bValue, scValue, expAvg)
# 1x1 expanding convolution.
W3 = Parameter(outMap, convMap, init = Gaussian, initValueScale = wScale)
b3 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc3 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
# Increasing input dimension convolution
cProj = Convolution(wProj, inp, 1, 1, outMap, projStride, projStride, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn3, cProj)
y3 = RectifiedLinear(p);
p = Plus(c3, c_proj)
y = RectifiedLinear(p)
}
DnnLayer(hiddenDim, labelDim, x, wScale, bValue)

Просмотреть файл

@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
command=Train:CreateEval:Test
parallelTrain=false
stderr=$OutputDir$/ResNet_152
traceLevel=1
numMBsToShowResult=500
Proj64to256Filename = $ConfigDir$/64to256.txt
Proj256to512Filename = $ConfigDir$/256to512.txt
@ -32,10 +33,12 @@ Train=[
SGD=[
epochSize=0
minibatchSize=32
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
momentumPerMB=0.9
maxEpochs=120
maxEpochs=125
gradUpdateType=None
L2RegWeight=0.0001
dropoutRate=0
@ -45,11 +48,9 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
gradientBits=32
]
]
numMBsToShowResult=100
]
reader=[
@ -88,16 +89,16 @@ Train=[
]
]
AddTop5Eval=[
CreateEval=[
action=edit
CurModel=$ModelDir$/ResNet_152
NewModel=$ModelDir$/ResNet_152.Top5
editPath=$ConfigDir$/add_top5_layer.mel
NewModel=$ModelDir$/ResNet_152.Eval
editPath=$ConfigDir$/create_eval_model.mel
]
Test=[
action=test
modelPath=$ModelDir$/ResNet_152.Top5
modelPath=$ModelDir$/ResNet_152.Eval
# Set minibatch size for testing.
minibatchSize=32

Просмотреть файл

@ -17,18 +17,16 @@ ndlMacros = [
hs = 1
vs = 1
# Pooling settings.
poolW = 2
poolH = 2
poolhs = 2
poolvs = 2
# Initial parameter values.
convWScale = 7.07
convBValue = 0
fcWScale = 2.26
fcBValue = 0
scValue = 1
fcWScale = 3.0
fcBValue = 1
expAvg = 1
]
DNN=[
@ -39,7 +37,8 @@ DNN=[
cMap5 = 1024
cMap6 = 2048
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
conv1WScale = 0.6
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
# Max pooling
pool1W = 2
pool1H = 2
@ -47,63 +46,59 @@ DNN=[
pool1vs = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj, 1)
rn1_2 = ResNetNode3(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
rn1_3 = ResNetNode3(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg, 1)
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
rn2_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
rn2_1 = ResNetNode3Inc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, rn2_1_Wproj, 2)
rn2_2 = ResNetNode3(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_3 = ResNetNode3(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_4 = ResNetNode3(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_5 = ResNetNode3(rn2_4, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_6 = ResNetNode3(rn2_5, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_7 = ResNetNode3(rn2_6, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_8 = ResNetNode3(rn2_7, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg, 2)
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_5 = ResNetNode3A(rn2_4, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_6 = ResNetNode3A(rn2_5, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_7 = ResNetNode3A(rn2_6, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_8 = ResNetNode3A(rn2_7, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn3_1_Wproj = Parameter(cMap5, cMap4, init = fromFile, initFromFilePath = "$Proj512to1024Filename$", needGradient = false)
rn3_1 = ResNetNode3Inc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, rn3_1_Wproj, 2)
rn3_2 = ResNetNode3(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_3 = ResNetNode3(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_4 = ResNetNode3(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_5 = ResNetNode3(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_6 = ResNetNode3(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_7 = ResNetNode3(rn3_6, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_8 = ResNetNode3(rn3_7, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_9 = ResNetNode3(rn3_8, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_10= ResNetNode3(rn3_9, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_11= ResNetNode3(rn3_10, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_12= ResNetNode3(rn3_11, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_13= ResNetNode3(rn3_12, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_14= ResNetNode3(rn3_13, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_15= ResNetNode3(rn3_14, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_16= ResNetNode3(rn3_15, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_17= ResNetNode3(rn3_16, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_18= ResNetNode3(rn3_17, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_19= ResNetNode3(rn3_18, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_20= ResNetNode3(rn3_19, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_21= ResNetNode3(rn3_20, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_22= ResNetNode3(rn3_21, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_23= ResNetNode3(rn3_22, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_24= ResNetNode3(rn3_23, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_25= ResNetNode3(rn3_24, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_26= ResNetNode3(rn3_25, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_27= ResNetNode3(rn3_26, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_28= ResNetNode3(rn3_27, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_29= ResNetNode3(rn3_28, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_30= ResNetNode3(rn3_29, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_31= ResNetNode3(rn3_30, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_32= ResNetNode3(rn3_31, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_33= ResNetNode3(rn3_32, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_34= ResNetNode3(rn3_33, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_35= ResNetNode3(rn3_34, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_36= ResNetNode3(rn3_35, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_1 = ResNetNode3BInc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg, 2)
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_5 = ResNetNode3A(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_6 = ResNetNode3A(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_7 = ResNetNode3A(rn3_6, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_8 = ResNetNode3A(rn3_7, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_9 = ResNetNode3A(rn3_8, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_10= ResNetNode3A(rn3_9, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_11= ResNetNode3A(rn3_10, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_12= ResNetNode3A(rn3_11, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_13= ResNetNode3A(rn3_12, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_14= ResNetNode3A(rn3_13, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_15= ResNetNode3A(rn3_14, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_16= ResNetNode3A(rn3_15, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_17= ResNetNode3A(rn3_16, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_18= ResNetNode3A(rn3_17, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_19= ResNetNode3A(rn3_18, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_20= ResNetNode3A(rn3_19, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_21= ResNetNode3A(rn3_20, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_22= ResNetNode3A(rn3_21, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_23= ResNetNode3A(rn3_22, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_24= ResNetNode3A(rn3_23, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_25= ResNetNode3A(rn3_24, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_26= ResNetNode3A(rn3_25, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_27= ResNetNode3A(rn3_26, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_28= ResNetNode3A(rn3_27, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_29= ResNetNode3A(rn3_28, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_30= ResNetNode3A(rn3_29, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_31= ResNetNode3A(rn3_30, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_32= ResNetNode3A(rn3_31, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_33= ResNetNode3A(rn3_32, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_34= ResNetNode3A(rn3_33, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_35= ResNetNode3A(rn3_34, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_36= ResNetNode3A(rn3_35, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn4_1_Wproj = Parameter(cMap6, cMap5, init = fromFile, initFromFilePath = "$Proj1024to2048Filename$", needGradient = false)
rn4_1 = ResNetNode3Inc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, rn4_1_Wproj, 2)
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
rn4_1 = ResNetNode3BInc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg, 2)
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
# Global average pooling
pool2W = 7

Просмотреть файл

@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
command=Train:CreateEval:Test
parallelTrain=false
stderr=$OutputDir$/ResNet_34
traceLevel=1
numMBsToShowResult=500
Proj64to128Filename = $ConfigDir$/64to128.txt
Proj128to256Filename = $ConfigDir$/128to256.txt
@ -31,10 +32,12 @@ Train=[
SGD=[
epochSize=0
minibatchSize=64
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
momentumPerMB=0.9
maxEpochs=120
maxEpochs=125
gradUpdateType=None
L2RegWeight=0.0001
dropoutRate=0
@ -44,11 +47,9 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
gradientBits=32
]
]
numMBsToShowResult=100
]
reader=[
@ -87,16 +88,16 @@ Train=[
]
]
AddTop5Eval=[
CreateEval=[
action=edit
CurModel=$ModelDir$/ResNet_34
NewModel=$ModelDir$/ResNet_34.Top5
editPath=$ConfigDir$/add_top5_layer.mel
NewModel=$ModelDir$/ResNet_34.Eval
editPath=$ConfigDir$/create_eval_model.mel
]
Test=[
action=test
modelPath=$ModelDir$/ResNet_34.Top5
modelPath=$ModelDir$/ResNet_34.Eval
# Set minibatch size for testing.
minibatchSize=64

Просмотреть файл

@ -20,14 +20,19 @@ ndlMacros = [
# Initial parameter values.
convWScale = 7.07
convBValue = 0
fcWScale = 1.13
fcBValue = 0
scValue = 1
fcWScale = 3.0
fcBValue = 1
expAvg = 1
]
DNN=[
conv1WScale = 0.6
cMap1 = 64
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
# Max pooling
pool1W = 2
pool1H = 2
@ -35,31 +40,31 @@ DNN=[
pool1vs = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
rn1_1 = ResNetNode2(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
rn1_2 = ResNetNode2(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
rn1_3 = ResNetNode2(rn1_2, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
rn1_1 = ResNetNode2A(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
rn1_2 = ResNetNode2A(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
rn1_3 = ResNetNode2A(rn1_2, cMap1, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap2 = 128
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj64to128Filename$", needGradient = false)
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 576, 1152, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
rn2_2 = ResNetNode2(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
rn2_3 = ResNetNode2(rn2_2, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
rn2_4 = ResNetNode2(rn2_3, cMap2, 1152, kW, kH, convWScale, convBValue, scValue)
rn2_1 = ResNetNode2AInc(rn1_3, cMap2, 576, 1152, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
rn2_2 = ResNetNode2A(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
rn2_3 = ResNetNode2A(rn2_2, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
rn2_4 = ResNetNode2A(rn2_3, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap3 = 256
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj128to256Filename$", needGradient = false)
rn3_1 = ResNetNode2Conv(rn2_4, cMap3, 1152, 2304, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
rn3_2 = ResNetNode2(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
rn3_3 = ResNetNode2(rn3_2, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
rn3_4 = ResNetNode2(rn3_3, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
rn3_5 = ResNetNode2(rn3_4, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
rn3_6 = ResNetNode2(rn3_5, cMap3, 2304, kW, kH, convWScale, convBValue, scValue)
rn3_1 = ResNetNode2AInc(rn2_4, cMap3, 1152, 2304, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
rn3_2 = ResNetNode2A(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_3 = ResNetNode2A(rn3_2, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_4 = ResNetNode2A(rn3_3, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_5 = ResNetNode2A(rn3_4, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_6 = ResNetNode2A(rn3_5, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap4 = 512
rn4_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
rn4_1 = ResNetNode2Conv(rn3_6, cMap4, 2304, 4608, kW, kH, convWScale, convBValue, scValue, rn4_1_Wproj)
rn4_2 = ResNetNode2(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
rn4_3 = ResNetNode2(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
rn4_1 = ResNetNode2AInc(rn3_6, cMap4, 2304, 4608, kW, kH, convWScale, convBValue, scValue, expAvg, rn4_1_Wproj)
rn4_2 = ResNetNode2A(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, expAvg)
rn4_3 = ResNetNode2A(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, expAvg)
# Global average pooling
pool2W = 7

Просмотреть файл

@ -10,12 +10,13 @@ ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
command=Train:CreateEval:Test
parallelTrain=false
stderr=$OutputDir$/ResNet_50
traceLevel=1
numMBsToShowResult=500
Proj64to256Filename = $ConfigDir$/64to256.txt
Proj256to512Filename = $ConfigDir$/256to512.txt
@ -32,10 +33,12 @@ Train=[
SGD=[
epochSize=0
minibatchSize=32
learningRatesPerMB=0.1*30:0.03*30:0.01*25:0.003*25:0.001
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
momentumPerMB=0.9
maxEpochs=120
maxEpochs=125
gradUpdateType=None
L2RegWeight=0.0001
dropoutRate=0
@ -45,11 +48,9 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
gradientBits=32
]
]
numMBsToShowResult=100
]
reader=[
@ -88,16 +89,16 @@ Train=[
]
]
AddTop5Eval=[
CreateEval=[
action=edit
CurModel=$ModelDir$/ResNet_50
NewModel=$ModelDir$/ResNet_50.Top5
editPath=$ConfigDir$/add_top5_layer.mel
NewModel=$ModelDir$/ResNet_50.Eval
editPath=$ConfigDir$/create_eval_model.mel
]
Test=[
action=test
modelPath=$ModelDir$/ResNet_50.Top5
modelPath=$ModelDir$/ResNet_50.Eval
# Set minibatch size for testing.
minibatchSize=32

Просмотреть файл

@ -20,9 +20,13 @@ ndlMacros = [
# Initial parameter values.
convWScale = 7.07
convBValue = 0
fcWScale = 2.26
fcBValue = 0
scValue = 1
fcWScale = 3.0
fcBValue = 1
expAvg = 1
]
DNN=[
@ -33,7 +37,8 @@ DNN=[
cMap5 = 1024
cMap6 = 2048
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
conv1WScale = 0.6
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, conv1WScale, convBValue, scValue, expAvg)
# Max pooling
pool1W = 2
pool1H = 2
@ -41,29 +46,25 @@ DNN=[
pool1vs = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj, 1)
rn1_2 = ResNetNode3(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
rn1_3 = ResNetNode3(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue)
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg, 1)
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, expAvg)
rn2_1_Wproj = Parameter(cMap4, cMap3, init = fromFile, initFromFilePath = "$Proj256to512Filename$", needGradient = false)
rn2_1 = ResNetNode3Inc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, rn2_1_Wproj, 2)
rn2_2 = ResNetNode3(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_3 = ResNetNode3(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_4 = ResNetNode3(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg, 2)
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, expAvg)
rn3_1_Wproj = Parameter(cMap5, cMap4, init = fromFile, initFromFilePath = "$Proj512to1024Filename$", needGradient = false)
rn3_1 = ResNetNode3Inc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, rn3_1_Wproj, 2)
rn3_2 = ResNetNode3(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_3 = ResNetNode3(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_4 = ResNetNode3(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_5 = ResNetNode3(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_6 = ResNetNode3(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue)
rn3_1 = ResNetNode3BInc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg, 2)
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_5 = ResNetNode3A(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn3_6 = ResNetNode3A(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, expAvg)
rn4_1_Wproj = Parameter(cMap6, cMap5, init = fromFile, initFromFilePath = "$Proj1024to2048Filename$", needGradient = false)
rn4_1 = ResNetNode3Inc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, rn4_1_Wproj, 2)
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
rn4_1 = ResNetNode3BInc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg, 2)
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, expAvg)
# Global average pooling
pool2W = 7

Просмотреть файл

@ -1,6 +0,0 @@
m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
SaveModel(m1, $NewModel$, format=cntk)

Просмотреть файл

@ -0,0 +1,10 @@
m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
# Switch batch normalization to eval mode.
SetPropertyForSubTree(CE, batchNormEvalMode, true)
# Add top-5 error prediction node.
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
SaveModel(m1, $NewModel$, format=cntk)