This commit is contained in:
Frank Seide 2015-12-23 15:29:34 -08:00
Родитель 28ffb10895 fd9e7923c3
Коммит 44e7343f71
15 изменённых файлов: 425 добавлений и 179 удалений

Просмотреть файл

@ -0,0 +1,12 @@
import sys
import numpy as np
def writeConvWeights(fname, cmapIn):
cmapOut = 2 * cmapIn
w = np.eye(cmapOut, cmapIn)
np.savetxt(fname, w, fmt = '%d', delimiter = ' ')
if __name__ == "__main__":
cmapIn = int(sys.argv[1])
fname = sys.argv[2]
writeConvWeights(fname, cmapIn)

Просмотреть файл

@ -17,6 +17,9 @@ command=Train:AddBNEval:Test
stderr=$OutputDir$/03_ResNet
traceLevel=1
Proj16to32Filename = $ConfigDir$/16to32.txt
Proj32to64Filename = $ConfigDir$/32to64.txt
Train=[
action=train
modelPath=$ModelDir$/03_ResNet
@ -39,7 +42,7 @@ Train=[
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=32
gradientBits=1
]
]

Просмотреть файл

@ -32,20 +32,22 @@ DNN=[
cMap1 = 16
conv1 = ConvBNReLULayer2(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
cMap2 = 32
rn2_1 = ResNetNode2Reduce(rn1_3, cMap2, 144, 288, 16384, 8192, kW, kH, convWScale, convBValue, scValue)
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
cMap3 = 64
rn3_1 = ResNetNode2Reduce(rn2_3, cMap3, 288, 576, 8192, 4096, kW, kH, convWScale, convBValue, scValue)
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
rn3_1 = ResNetNode2Conv(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
# pool
poolW = 3
poolH = 3

Просмотреть файл

@ -0,0 +1,32 @@
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Просмотреть файл

@ -0,0 +1,64 @@
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Просмотреть файл

@ -41,7 +41,7 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 0.9)
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
@ -51,12 +51,12 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 0.9)
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
}
ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale, bValue, scValue)
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
@ -65,7 +65,7 @@ ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale,
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 0.9)
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
@ -75,10 +75,10 @@ ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale,
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true)
WP = Parameter(outDim, inDim)
t = Times(WP, inp, init = Gaussian, initValueScale = wScale)
p = Plus(bn2, t)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 0.9)
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
p = Plus(bn2, cproj)
y2 = RectifiedLinear(p);
}

Просмотреть файл

@ -8,6 +8,20 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
y = RectifiedLinear(z)
}
# Fully-connected layer with batch normalization and ReLU activation.
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
{
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
t = Times(W, bn)
y = RectifiedLinear(t)
}
# Fully-connected layer.
DnnLayer(inDim, outDim, x, wScale, bValue)
{
@ -27,3 +41,16 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
y = RectifiedLinear(z);
}
# Convolutional layer with batch normalization and ReLU activation.
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true)
y = RectifiedLinear(bn);
}

Просмотреть файл

@ -67,7 +67,7 @@ Train=[
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.9
cropRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType=UniRatio

Просмотреть файл

@ -1,7 +1,7 @@
load=ndlMnistMacros
load=ndlMacros
run=DNN
ndlMnistMacros = [
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3

Просмотреть файл

@ -0,0 +1,118 @@
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
parallelTrain=false
stderr=$OutputDir$/VGG_E_BN
traceLevel=1
Train=[
action=train
modelPath=$ModelDir$/VGG_E_BN
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/VGG_E_BN.ndl
]
SGD=[
epochSize=0
minibatchSize=16
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
momentumPerMB=0.9
maxEpochs=70
gradUpdateType=None
L2RegWeight=0.0005
dropoutRate=0*5:0.5
ParallelTrain=[
parallelizationMethod=DataParallelSGD
distributedMBReading=true
parallelizationStartEpoch=1
DataParallelSGD=[
gradientBits=1
]
]
numMBsToShowResult=10
]
reader=[
readerType=ImageReader
# Map file which maps images to labels using the following format:
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file=$DataDir$/train_map.txt
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize=Auto
features=[
# Below are the required parameters.
width=224
height=224
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType=Random
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType=UniRatio
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations=Linear
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile=$ConfigDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000
]
]
]
AddTop5Eval=[
action=edit
CurModel=$ModelDir$/VGG_E_BN
NewModel=$ModelDir$/VGG_E_BN.Top5
editPath=$ConfigDir$/add_top5_layer.mel
]
Test=[
action=test
modelPath=$ModelDir$/VGG_E_BN.Top5
# Set minibatch size for testing.
minibatchSize=128
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/VGG_E_BN.ndl
]
reader=[
readerType=ImageReader
file=$DataDir$/val_map.txt
randomize=None
features=[
width=224
height=224
channels=3
cropType=Center
meanFile=$ConfigDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000
]
]
]

Просмотреть файл

@ -0,0 +1,87 @@
load=ndlMacros
run=DNN
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(0, rows = 150528)
featScaled = Plus(features, featOffs)
labels = Input(LabelDim, tag = label)
# Kernels width and height.
kW = 3
kH = 3
# Kernel stride.
hs = 1
vs = 1
# Pooling settings.
poolW = 2
poolH = 2
poolhs = 2
poolvs = 2
# Initial parameter values.
convWScale = 7.07
convBValue = 0
scValue = 0.03
fc1WScale = 3.0
fc1BValue = 1
fc2WScale = 3.0
fc2BValue = 1
fc3WScale = 1.0
fc3BValue = 1
]
DNN=[
cMap1 = 64
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv2 = ConvBNReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
cMap3 = 128
conv3 = ConvBNReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv4 = ConvBNReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
cMap5 = 256
conv5 = ConvBNReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv6 = ConvBNReLULayer(conv5, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv7 = ConvBNReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv8 = ConvBNReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
cMap9 = 512
conv9 = ConvBNReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv10 = ConvBNReLULayer(conv9, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv11 = ConvBNReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv12 = ConvBNReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
cMap13 = 512
conv13 = ConvBNReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv14 = ConvBNReLULayer(conv13, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv15 = ConvBNReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv16 = ConvBNReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
hiddenDim = 4096
h1 = DnnBNReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
h2 = DnnBNReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
h2_d = Dropout(h2)
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -171,9 +171,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Base::Validate(isFinalValidationPass);
if (m_horizontalSubsample > m_kernelWidth || m_verticalSubsample > m_kernelHeight)
InvalidArgument("In ConvolutionNode horizontalSubsample must <= kernelWidth and verticalSubsample must <= kernelHeight.");
InferMBLayoutFromInputsForStandardCase();
InferImageDimsFromInputs();
@ -387,9 +384,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Base::Validate(isFinalValidationPass);
if (m_horizontalSubsample > m_windowWidth || m_verticalSubsample > m_windowHeight)
InvalidArgument("PoolingNodeBase: horizontalSubsample must <= windowWidth and verticalSubsample must <= windowHeight.");
InferMBLayoutFromInputsForStandardCase();
InferImageDimsFromInputs();

Просмотреть файл

@ -1960,7 +1960,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
PrepareDevice();
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
// _elementWIseSigmoidOnCuda has an implementation that avoids possible overflow errors, but is slightly slower and may have an accuracy regression.
// We have a new implementation that is non-branching (yay!) that Frank will check in.
#if 0
_elementWiseSigmoidOnCuda<<<blocksPerGrid, threadsPerBlock, 0, t_stream>>>(a.m_pArray, m_pArray, N);
#else
_assignSigmoidOf<<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(a.m_pArray,m_pArray,N);
#endif
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
@ -2213,19 +2219,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceTruncateBottom (const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateBottom: Matrix is empty.");
CUDA_LONG N=(CUDA_LONG)GetNumElements();
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
PrepareDevice();
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_inplaceTruncateBottom<ElemType><<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(m_pArray,threshold,N);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
return *this;
return AssignTruncateBottomOf(*this, threshold);
}
template<class ElemType>
@ -2255,18 +2249,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceTruncateTop (const ElemType threshold)
{
if (IsEmpty())
LogicError("InplaceTruncateTop: Matrix is empty.");
CUDA_LONG N=(CUDA_LONG)GetNumElements();
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
PrepareDevice();
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_inplaceTruncateTop<ElemType><<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(m_pArray,threshold,N);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
return *this;
return AssignTruncateTopOf(*this, threshold);
}
template<class ElemType>

Просмотреть файл

@ -90,6 +90,25 @@ static __inline__ __device__ double atomicAdd(double* address, double val) UNUSE
// CUDA kernels follow, lots of them
// ===========================================================================
// _elementWise*() kernels
//
// Designed to operate on contiguous blocks of memory, where the output is a simple function of the inputs.
// The first parameters of every function are inputs, and the last two arguments to each function are always
// (ElemenType *res, CUDA_LONG N), a pointer and length of the output block. Each thread computes a function
// of the inputs for one value in the output.
// This macro overloads _x() with float and double arguments, and inlines the correct library function. This simplifies templated kernel code.
// TODO: merge with similar definition in TensorOps.h
#define DEF_ELEMENT_PRIMITIVE(x) __device__ __forceinline__ float _##x(float f) { return x##f(f); } __device__ __forceinline__ double _##x(double f) { return x(f); }
DEF_ELEMENT_PRIMITIVE(exp)
DEF_ELEMENT_PRIMITIVE(log)
DEF_ELEMENT_PRIMITIVE(tanh)
DEF_ELEMENT_PRIMITIVE(sqrt)
DEF_ELEMENT_PRIMITIVE(fabs)
DEF_ELEMENT_PRIMITIVE(cos)
DEF_ELEMENT_PRIMITIVE(sin)
template<class ElemType>
__global__ void _elementWisePowerOnCuda(
const ElemType alpha,
@ -134,48 +153,19 @@ __global__ void _elementWiseSigmoidOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
if (a[id] >= 0)
{
if (a[id]>=0)
{
double e = exp(-1*a[id]);
res[id]=1/(1+e);
double e = _exp(-a[id]);
res[id] = 1 / (1 + e);
}
else
{
double e = exp(a[id]);
res[id]=e/(1+e);
double e = _exp(a[id]);
res[id] = e / (1 + e);
}
}
else
{
if (res[id]>=0)
{
float e = expf(-1*a[id]);
res[id]=1/(1+e);
}
else
{
float e = exp(a[id]); // BUGBUG: Looks like this should be expf().
res[id]=e/(1+e);
}
}
};
__device__ __forceinline__ float _exp(float f)
{
return expf(f);
}
__device__ __forceinline__ double _exp(double f)
{
return exp(f);
}
//#define TENSOR_OPS_DECL __device__ __host__
//#include "TensorOps.h"
template<class ElemType>
__global__ void _assignSigmoidOf(
const ElemType* a,
@ -224,16 +214,8 @@ __global__ void _elementWiseTanhOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=tanh(a[id]);
}
else
{
res[id]=tanhf(a[id]);
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = _tanh(a[id]);
};
//to prevent negative values caused by floating operations, we force inputs to be >=0
@ -244,15 +226,8 @@ __global__ void _elementWiseSqrtOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=sqrt(max((ElemType)0, a[id]));
}
else
{
res[id]=sqrtf(max(ElemType(0), a[id]));
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = _sqrt(max((ElemType)0, a[id]));
};
template<class ElemType>
@ -261,15 +236,8 @@ __global__ void _elementWiseExpOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=exp(a[id]);
}
else
{
res[id]=expf(a[id]);
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = _exp(a[id]);
};
template<class ElemType>
@ -278,22 +246,8 @@ __global__ void _elementWiseLogOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (a[id]<EPS_IN_LOG)
{
res[id]=LOG_OF_EPS_IN_LOG;
}
else
{
if (sizeof(ElemType)==sizeof(double))
{
res[id]=log(a[id]);
}
else
{
res[id]=logf(a[id]);
}
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = (a[id] < EPS_IN_LOG) ? LOG_OF_EPS_IN_LOG : _log(a[id]);
};
template<class ElemType>
@ -302,15 +256,8 @@ __global__ void _elementWiseAbsOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=fabs(a[id]);
}
else
{
res[id]=fabsf(a[id]);
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = _fabs(a[id]);
};
template<class ElemType>
@ -319,15 +266,8 @@ __global__ void _elementWiseCosineOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=cos(a[id]);
}
else
{
res[id]=cosf(a[id]);
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = _cos(a[id]);
};
template<class ElemType>
@ -336,18 +276,10 @@ __global__ void _elementWiseNegativeSineOnCuda(
ElemType *res,
const CUDA_LONG N)
{
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
if (sizeof(ElemType)==sizeof(double))
{
res[id]=-sin(a[id]);
}
else
{
res[id]=-sinf(a[id]);
}
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
res[id] = -_sin(a[id]);
};
template<class ElemType>
__global__ void _setValue(
ElemType* a,
@ -1141,6 +1073,7 @@ __global__ void _assignColumnwiseHardmaxOf(
}
}
#if 0
template<class ElemType>
__global__ void _inplaceTruncateBottom(
ElemType* a,
@ -1153,6 +1086,7 @@ __global__ void _inplaceTruncateBottom(
if (a[id]<threshold)
a[id]=threshold;
}
#endif
template<class ElemType>
__global__ void _assignTruncateBottom(
@ -1161,15 +1095,11 @@ __global__ void _assignTruncateBottom(
const ElemType threshold,
const CUDA_LONG N)
{
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
if (id>=N)
return;
if (a[id]<threshold)
us[id]=threshold;
else
us[id]=a[id];
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
us[id] = a[id] < threshold ? threshold : a[id];
}
#if 0
template<class ElemType>
__global__ void _inplaceTruncateTop(
ElemType* a,
@ -1182,6 +1112,7 @@ __global__ void _inplaceTruncateTop(
if (a[id]>threshold)
a[id]=threshold;
}
#endif
template<class ElemType>
__global__ void _assignTruncateTop(
@ -1190,13 +1121,8 @@ __global__ void _assignTruncateTop(
const ElemType threshold,
const CUDA_LONG N)
{
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
if (id>=N)
return;
if (a[id]>threshold)
us[id]=threshold;
else
us[id]=a[id];
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
us[id] = a[id] > threshold ? threshold : a[id];
}
template<class ElemType>
@ -3716,9 +3642,7 @@ __global__ void _inplaceTruncate(
const ElemType threshold,
const CUDA_LONG N)
{
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
if (id>=N)
return;
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT
ElemType locThresholdPos = abs(threshold);
ElemType locTHresholdNeg = -locThresholdPos;
if (a[id] > locThresholdPos)

Просмотреть файл

@ -2526,7 +2526,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_inplaceTruncateBottom<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), threshold, N);
_assignTruncateBottom<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), NzValues(), threshold, N);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
@ -2570,7 +2570,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_inplaceTruncateTop<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), threshold, N);
_assignTruncateTop<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), NzValues(), threshold, N);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));