This commit is contained in:
Alexey Kamenev 2016-01-14 10:49:33 -08:00
Родитель 81ee9f2908
Коммит 949c30473b
6 изменённых файлов: 6481 добавлений и 73 удалений

Просмотреть файл

@ -32,7 +32,9 @@ Train=[
SGD=[
epochSize=0
minibatchSize=128
learningRatesPerMB=0.1*80:0.01*40:0.001
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*80:0.1*40:0.01
momentumPerMB=0.9
maxEpochs=160
L2RegWeight=0.0001
@ -43,7 +45,7 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
gradientBits=32
]
]
]

Просмотреть файл

@ -12,7 +12,8 @@ LocalMacros = [
convWScale = 7.07
convBValue = 0
fc1WScale = 12
fc1WScale = 0.4
fc1BValue = 0
scValue = 1
@ -24,8 +25,6 @@ LocalMacros = [
hStride1 = 1
vStride1 = 1
hStride2 = 2
vStride2 = 2
]
DNN=[
@ -38,14 +37,16 @@ DNN=[
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap2 = 32
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
#rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
#rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
rn2_1 = ResNetNode2Inc2(rn1_3, cMap1, cMap2, 144, 288, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap3 = 64
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
#rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
#rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
rn3_1 = ResNetNode2Inc2(rn2_3, cMap2, cMap3, 288, 576, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)

Просмотреть файл

@ -32,9 +32,11 @@ Train=[
SGD=[
epochSize=0
minibatchSize=128
learningRatesPerMB=0.1*80:0.01*40:0.001
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=0.1*1:1.0*80:0.1*40:0.01
momentumPerMB=0.9
maxEpochs=1
maxEpochs=160
L2RegWeight=0.0001
dropoutRate=0
@ -43,7 +45,7 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
gradientBits=32
]
]
]

Просмотреть файл

@ -12,7 +12,8 @@ LocalMacros = [
convWScale = 7.07
convBValue = 0
fc1WScale = 12
fc1WScale = 0.4
fc1BValue = 0
scValue = 1
@ -24,8 +25,6 @@ LocalMacros = [
hStride1 = 1
vStride1 = 1
hStride2 = 2
vStride2 = 2
]
DNN=[
@ -53,8 +52,9 @@ DNN=[
rn1_18= ResNetNode2(rn1_17, cMap1, 144, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap2 = 32
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
rn2_1 = ResNetNode2Inc(rn1_18, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
#rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
#rn2_1 = ResNetNode2Inc(rn1_18, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, expAvg, rn2_1_Wproj)
rn2_1 = ResNetNode2Inc2(rn1_18, cMap1, cMap2, 144, 288, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
rn2_4 = ResNetNode2(rn2_3, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
@ -74,8 +74,9 @@ DNN=[
rn2_18= ResNetNode2(rn2_17, cMap2, 288, kW, kH, convWScale, convBValue, scValue, expAvg)
cMap3 = 64
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
rn3_1 = ResNetNode2Inc(rn2_18, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
#rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
#rn3_1 = ResNetNode2Inc(rn2_18, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, expAvg, rn3_1_Wproj)
rn3_1 = ResNetNode2Inc2(rn2_18, cMap2, cMap3, 288, 576, kW, kH, convWScale, 3.5, convBValue, scValue, expAvg)
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)
rn3_4 = ResNetNode2(rn3_3, cMap3, 576, kW, kH, convWScale, convBValue, scValue, expAvg)

Просмотреть файл

@ -3,82 +3,70 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
p = Plus(c, b);
y = RectifiedLinear(p);
p = Plus(c, b)
y = RectifiedLinear(p)
}
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = fixedValue, value = scValue)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
y = RectifiedLinear(bn);
y = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
}
ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
c = ConvBNLayerW(W, inp, outMap, kW, kH, hStride, vStride, bValue, scValue, expAvg)
}
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
{
c = ConvBNLayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue, expAvg)
y = RectifiedLinear(c)
}
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue, expAvg)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
# First convolution layer.
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, outMap, inWCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
p = Plus(c2, inp)
y = RectifiedLinear(p)
}
ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, expAvg, Wproj)
{
# First convolution layer.
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
# Second convolution layer.
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = fixedValue, value = scValue)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, 1, 1, wScale, bValue, scValue, expAvg)
# Projection convolution layer.
#b_proj = Parameter(outMap, 1, init = fixedValue, value = bValue)
#sc_proj = Parameter(outMap, 1, init = fixedValue, value = scValue)
#m_proj = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
#isd_proj = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c_proj = ConvBNLayerW(Wproj, inp, outMap, 1, 1, 2, 2, bValue, scValue, expAvg)
#c_proj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
c_proj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
#bn_proj = BatchNormalization(c_proj, sc_proj, b_proj, m_proj, isd_proj, eval = false, spatial = true, expAvgFactor = expAvg, imageLayout = "cudnn")
p = Plus(c2, c_proj)
y = RectifiedLinear(p)
}
ResNetNode2Inc2(inp, inMap, outMap, inWCount, wCount, kW, kH, wScale, w1Scale, bValue, scValue, expAvg)
{
pool = MaxPooling(inp, 1, 1, 2, 2, imageLayout = "cudnn")
# First convolution layer.
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, expAvg)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, inMap, wCount, kW, kH, 1, 1, w1Scale, bValue, scValue, expAvg)
c3 = ConvBNLayer(c1, inMap, wCount, kW, kH, 1, 1, w1Scale, bValue, scValue, expAvg)
#p = Plus(bn2, bn_proj)
p = Plus(bn2, c_proj)
y2 = RectifiedLinear(p);
p = Plus(c2, pool)
r = RowStack(p, c3)
y = RectifiedLinear(r)
}
DnnReLULayer(inDim, outDim, x, wScale, bValue)

Разница между файлами не показана из-за своего большого размера Загрузить разницу