Updated ResNet samples.
This commit is contained in:
Родитель
81ee9f2908
Коммит
949c30473b
|
@ -32,7 +32,9 @@ Train=[
|
|||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=128
|
||||
learningRatesPerMB=0.1*80:0.01*40:0.001
|
||||
# Note that learning rates are 10x more than in the paper due to a different
|
||||
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
|
||||
learningRatesPerMB=1.0*80:0.1*40:0.01
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=160
|
||||
L2RegWeight=0.0001
|
||||
|
@ -43,7 +45,7 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
gradientBits=32
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -12,7 +12,8 @@ LocalMacros = [
|
|||
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
fc1WScale = 12
|
||||
|
||||
fc1WScale = 0.4
|
||||
fc1BValue = 0
|
||||
|
||||
scValue = 1
|
||||
|
@ -24,8 +25,6 @@ LocalMacros = [
|
|||
|
||||
hStride1 = 1
|
||||
vStride1 = 1
|
||||
hStride2 = 2
|
||||
vStride2 = 2
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
@ -38,14 +37,16 @@ DNN=[
|
|||
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap2 = 32
|
||||
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
|
||||
#rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
#rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
|
||||
rn2_1 = ResNetNode2Inc2(rn1_3, cMap1, cMap2, 144, 288, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap3 = 64
|
||||
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
|
||||
#rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
#rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
|
||||
rn3_1 = ResNetNode2Inc2(rn2_3, cMap2, cMap3, 288, 576, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
|
|
|
@ -32,9 +32,11 @@ Train=[
|
|||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=128
|
||||
learningRatesPerMB=0.1*80:0.01*40:0.001
|
||||
# Note that learning rates are 10x more than in the paper due to a different
|
||||
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
|
||||
learningRatesPerMB=0.1*1:1.0*80:0.1*40:0.01
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=1
|
||||
maxEpochs=160
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
||||
|
@ -43,7 +45,7 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
gradientBits=32
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -12,7 +12,8 @@ LocalMacros = [
|
|||
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
fc1WScale = 12
|
||||
|
||||
fc1WScale = 0.4
|
||||
fc1BValue = 0
|
||||
|
||||
scValue = 1
|
||||
|
@ -24,8 +25,6 @@ LocalMacros = [
|
|||
|
||||
hStride1 = 1
|
||||
vStride1 = 1
|
||||
hStride2 = 2
|
||||
vStride2 = 2
|
||||
]
|
||||
|
||||
DNN=[
|
||||
|
@ -53,8 +52,9 @@ DNN=[
|
|||
rn1_18= ResNetNode2(rn1_17, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap2 = 32
|
||||
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode2Inc(rn1_18, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
|
||||
#rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
#rn2_1 = ResNetNode2Inc(rn1_18, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
|
||||
rn2_1 = ResNetNode2Inc2(rn1_18, cMap1, cMap2, 144, 288, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn2_4 = ResNetNode2(rn2_3, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
@ -74,8 +74,9 @@ DNN=[
|
|||
rn2_18= ResNetNode2(rn2_17, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
||||
cMap3 = 64
|
||||
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode2Inc(rn2_18, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
|
||||
#rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
#rn3_1 = ResNetNode2Inc(rn2_18, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
|
||||
rn3_1 = ResNetNode2Inc2(rn2_18, cMap2, cMap3, 288, 576, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
rn3_4 = ResNetNode2(rn3_3, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
|
||||
|
|
|
@ -3,82 +3,70 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
|||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
p = Plus(c, b);
|
||||
y = RectifiedLinear(p);
|
||||
p = Plus(c, b)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn);
|
||||
y = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
}
|
||||
|
||||
ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
c = ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
|
||||
}
|
||||
|
||||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
c = ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
|
||||
y = RectifiedLinear(c)
|
||||
}
|
||||
|
||||
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue, expAvg)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
p = Plus(bn2, inp)
|
||||
y2 = RectifiedLinear(p);
|
||||
# First convolution layer.
|
||||
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
# Second convolution layer, no ReLU.
|
||||
c2 = ConvBNLayer(c1, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
p = Plus(c2, inp)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, expAvg, Wproj)
|
||||
{
|
||||
# First convolution layer.
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
# Second convolution layer.
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
|
||||
# Second convolution layer, no ReLU.
|
||||
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
|
||||
|
||||
# Projection convolution layer.
|
||||
#b_proj = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
#sc_proj = Parameter(outMap, 1, init = fixedValue, value = scValue)
|
||||
#m_proj = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
#isd_proj = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
c_proj = ConvBNLayerW(Wproj, inp, outMap, 1, 1, 2, 2, bValue, scValue, expAvg)
|
||||
#c_proj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
|
||||
c_proj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
#bn_proj = BatchNormalization(c_proj, sc_proj, b_proj, m_proj, isd_proj, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
|
||||
p = Plus(c2, c_proj)
|
||||
y = RectifiedLinear(p)
|
||||
}
|
||||
|
||||
ResNetNode2Inc2(inp, inMap, outMap, inWCount, wCount, kW, kH, wScale, w1Scale, bValue, scValue, expAvg)
|
||||
{
|
||||
pool = MaxPooling(inp, 1, 1, 2, 2, imageLayout = "cudnn")
|
||||
# First convolution layer.
|
||||
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
|
||||
# Second convolution layer, no ReLU.
|
||||
c2 = ConvBNLayer(c1, inMap, wCount, kW, kH, 1, 1, w1Scale, bValue, scValue, expAvg)
|
||||
c3 = ConvBNLayer(c1, inMap, wCount, kW, kH, 1, 1, w1Scale, bValue, scValue, expAvg)
|
||||
|
||||
#p = Plus(bn2, bn_proj)
|
||||
p = Plus(bn2, c_proj)
|
||||
y2 = RectifiedLinear(p);
|
||||
p = Plus(c2, pool)
|
||||
r = RowStack(p, c3)
|
||||
y = RectifiedLinear(r)
|
||||
}
|
||||
|
||||
DnnReLULayer(inDim, outDim, x, wScale, bValue)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче