merged from master
This commit is contained in:
Коммит
44e7343f71
|
@ -0,0 +1,12 @@
|
|||
import sys
|
||||
import numpy as np
|
||||
|
||||
def writeConvWeights(fname, cmapIn):
|
||||
cmapOut = 2 * cmapIn
|
||||
w = np.eye(cmapOut, cmapIn)
|
||||
np.savetxt(fname, w, fmt = '%d', delimiter = ' ')
|
||||
|
||||
if __name__ == "__main__":
|
||||
cmapIn = int(sys.argv[1])
|
||||
fname = sys.argv[2]
|
||||
writeConvWeights(fname, cmapIn)
|
|
@ -17,6 +17,9 @@ command=Train:AddBNEval:Test
|
|||
stderr=$OutputDir$/03_ResNet
|
||||
traceLevel=1
|
||||
|
||||
Proj16to32Filename = $ConfigDir$/16to32.txt
|
||||
Proj32to64Filename = $ConfigDir$/32to64.txt
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/03_ResNet
|
||||
|
@ -39,7 +42,7 @@ Train=[
|
|||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=32
|
||||
gradientBits=1
|
||||
]
|
||||
]
|
||||
|
||||
|
|
|
@ -32,20 +32,22 @@ DNN=[
|
|||
cMap1 = 16
|
||||
conv1 = ConvBNReLULayer2(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
|
||||
|
||||
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_3 = ResNetNode2(rn1_2, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
cMap2 = 32
|
||||
rn2_1 = ResNetNode2Reduce(rn1_3, cMap2, 144, 288, 16384, 8192, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
cMap3 = 64
|
||||
rn3_1 = ResNetNode2Reduce(rn2_3, cMap3, 288, 576, 8192, 4096, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode2Conv(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
# pool
|
||||
poolW = 3
|
||||
poolH = 3
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
@ -0,0 +1,64 @@
|
|||
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
@ -41,7 +41,7 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
|||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 0.9)
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
|
@ -51,12 +51,12 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
|||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 0.9)
|
||||
p = Plus(bn2, inp)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
||||
ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale, bValue, scValue)
|
||||
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
|
@ -65,7 +65,7 @@ ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale,
|
|||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 0.9)
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
|
@ -75,10 +75,10 @@ ResNetNode2Reduce(inp, outMap, inWCount, wCount, inDim, outDim, kW, kH, wScale,
|
|||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true)
|
||||
WP = Parameter(outDim, inDim)
|
||||
t = Times(WP, inp, init = Gaussian, initValueScale = wScale)
|
||||
p = Plus(bn2, t)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 0.9)
|
||||
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
|
||||
p = Plus(bn2, cproj)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,20 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
|
|||
y = RectifiedLinear(z)
|
||||
}
|
||||
|
||||
# Fully-connected layer with batch normalization and ReLU activation.
|
||||
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
|
||||
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
|
||||
t = Times(W, bn)
|
||||
y = RectifiedLinear(t)
|
||||
}
|
||||
|
||||
# Fully-connected layer.
|
||||
DnnLayer(inDim, outDim, x, wScale, bValue)
|
||||
{
|
||||
|
@ -27,3 +41,16 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
|||
y = RectifiedLinear(z);
|
||||
}
|
||||
|
||||
# Convolutional layer with batch normalization and ReLU activation.
|
||||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true)
|
||||
y = RectifiedLinear(bn);
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ Train=[
|
|||
# Horizontal random flip, will be enabled by default if cropType=Random
|
||||
#hflip=0
|
||||
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
|
||||
cropRatio=0.9
|
||||
cropRatio=0.875
|
||||
# Crop scale ratio jitter type.
|
||||
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
|
||||
jitterType=UniRatio
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
load=ndlMnistMacros
|
||||
load=ndlMacros
|
||||
run=DNN
|
||||
|
||||
ndlMnistMacros = [
|
||||
ndlMacros = [
|
||||
ImageW = 224
|
||||
ImageH = 224
|
||||
ImageC = 3
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
RootDir = "."
|
||||
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
precision=float
|
||||
deviceId=Auto
|
||||
|
||||
command=Train:AddTop5Eval:Test
|
||||
|
||||
parallelTrain=false
|
||||
|
||||
stderr=$OutputDir$/VGG_E_BN
|
||||
traceLevel=1
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/VGG_E_BN
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/VGG_E_BN.ndl
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=16
|
||||
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=70
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0005
|
||||
dropoutRate=0*5:0.5
|
||||
|
||||
ParallelTrain=[
|
||||
parallelizationMethod=DataParallelSGD
|
||||
distributedMBReading=true
|
||||
parallelizationStartEpoch=1
|
||||
DataParallelSGD=[
|
||||
gradientBits=1
|
||||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=10
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=ImageReader
|
||||
# Map file which maps images to labels using the following format:
|
||||
# <full path to image><tab><numerical label (0-based class id)>
|
||||
# Example:
|
||||
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
|
||||
file=$DataDir$/train_map.txt
|
||||
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
|
||||
randomize=Auto
|
||||
features=[
|
||||
# Below are the required parameters.
|
||||
width=224
|
||||
height=224
|
||||
channels=3
|
||||
# Below are the optional parameters.
|
||||
# Possible values: Center, Random. Default: Center
|
||||
cropType=Random
|
||||
# Horizontal random flip, will be enabled by default if cropType=Random
|
||||
#hflip=0
|
||||
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
|
||||
cropRatio=0.875
|
||||
# Crop scale ratio jitter type.
|
||||
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
|
||||
jitterType=UniRatio
|
||||
# Interpolation to use when scaling image to width x height size.
|
||||
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
|
||||
interpolations=Linear
|
||||
# Stores mean values for each pixel in OpenCV matrix XML format.
|
||||
meanFile=$ConfigDir$/ImageNet1K_mean.xml
|
||||
]
|
||||
labels=[
|
||||
labelDim=1000
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
AddTop5Eval=[
|
||||
action=edit
|
||||
CurModel=$ModelDir$/VGG_E_BN
|
||||
NewModel=$ModelDir$/VGG_E_BN.Top5
|
||||
editPath=$ConfigDir$/add_top5_layer.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/VGG_E_BN.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/VGG_E_BN.ndl
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=ImageReader
|
||||
file=$DataDir$/val_map.txt
|
||||
randomize=None
|
||||
features=[
|
||||
width=224
|
||||
height=224
|
||||
channels=3
|
||||
cropType=Center
|
||||
meanFile=$ConfigDir$/ImageNet1K_mean.xml
|
||||
]
|
||||
labels=[
|
||||
labelDim=1000
|
||||
]
|
||||
]
|
||||
]
|
|
@ -0,0 +1,87 @@
|
|||
load=ndlMacros
|
||||
run=DNN
|
||||
|
||||
ndlMacros = [
|
||||
ImageW = 224
|
||||
ImageH = 224
|
||||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(0, rows = 150528)
|
||||
featScaled = Plus(features, featOffs)
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
# Kernels width and height.
|
||||
kW = 3
|
||||
kH = 3
|
||||
# Kernel stride.
|
||||
hs = 1
|
||||
vs = 1
|
||||
|
||||
# Pooling settings.
|
||||
poolW = 2
|
||||
poolH = 2
|
||||
poolhs = 2
|
||||
poolvs = 2
|
||||
|
||||
# Initial parameter values.
|
||||
convWScale = 7.07
|
||||
convBValue = 0
|
||||
scValue = 0.03
|
||||
fc1WScale = 3.0
|
||||
fc1BValue = 1
|
||||
fc2WScale = 3.0
|
||||
fc2BValue = 1
|
||||
fc3WScale = 1.0
|
||||
fc3BValue = 1
|
||||
]
|
||||
|
||||
DNN=[
|
||||
cMap1 = 64
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv2 = ConvBNReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
|
||||
|
||||
cMap3 = 128
|
||||
conv3 = ConvBNReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv4 = ConvBNReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
|
||||
|
||||
cMap5 = 256
|
||||
conv5 = ConvBNReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv6 = ConvBNReLULayer(conv5, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv7 = ConvBNReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv8 = ConvBNReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
|
||||
|
||||
cMap9 = 512
|
||||
conv9 = ConvBNReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv10 = ConvBNReLULayer(conv9, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv11 = ConvBNReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv12 = ConvBNReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
|
||||
|
||||
cMap13 = 512
|
||||
conv13 = ConvBNReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv14 = ConvBNReLULayer(conv13, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv15 = ConvBNReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv16 = ConvBNReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
|
||||
|
||||
hiddenDim = 4096
|
||||
h1 = DnnBNReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
|
||||
h1_d = Dropout(h1)
|
||||
h2 = DnnBNReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
|
||||
h2_d = Dropout(h2)
|
||||
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
|
@ -171,9 +171,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
|
||||
if (m_horizontalSubsample > m_kernelWidth || m_verticalSubsample > m_kernelHeight)
|
||||
InvalidArgument("In ConvolutionNode horizontalSubsample must <= kernelWidth and verticalSubsample must <= kernelHeight.");
|
||||
|
||||
InferMBLayoutFromInputsForStandardCase();
|
||||
InferImageDimsFromInputs();
|
||||
|
||||
|
@ -387,9 +384,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
|
||||
if (m_horizontalSubsample > m_windowWidth || m_verticalSubsample > m_windowHeight)
|
||||
InvalidArgument("PoolingNodeBase: horizontalSubsample must <= windowWidth and verticalSubsample must <= windowHeight.");
|
||||
|
||||
InferMBLayoutFromInputsForStandardCase();
|
||||
InferImageDimsFromInputs();
|
||||
|
||||
|
|
|
@ -1960,7 +1960,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
// _elementWIseSigmoidOnCuda has an implementation that avoids possible overflow errors, but is slightly slower and may have an accuracy regression.
|
||||
// We have a new implementation that is non-branching (yay!) that Frank will check in.
|
||||
#if 0
|
||||
_elementWiseSigmoidOnCuda<<<blocksPerGrid, threadsPerBlock, 0, t_stream>>>(a.m_pArray, m_pArray, N);
|
||||
#else
|
||||
_assignSigmoidOf<<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(a.m_pArray,m_pArray,N);
|
||||
#endif
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
|
@ -2213,19 +2219,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceTruncateBottom (const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
LogicError("InplaceTruncateBottom: Matrix is empty.");
|
||||
|
||||
CUDA_LONG N=(CUDA_LONG)GetNumElements();
|
||||
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
|
||||
PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceTruncateBottom<ElemType><<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(m_pArray,threshold,N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
return *this;
|
||||
return AssignTruncateBottomOf(*this, threshold);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -2255,18 +2249,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceTruncateTop (const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
LogicError("InplaceTruncateTop: Matrix is empty.");
|
||||
CUDA_LONG N=(CUDA_LONG)GetNumElements();
|
||||
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
|
||||
PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceTruncateTop<ElemType><<<blocksPerGrid,GridDim::maxThreadsPerBlock,0,t_stream>>>(m_pArray,threshold,N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
return *this;
|
||||
return AssignTruncateTopOf(*this, threshold);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
|
|
@ -90,6 +90,25 @@ static __inline__ __device__ double atomicAdd(double* address, double val) UNUSE
|
|||
// CUDA kernels follow, lots of them
|
||||
// ===========================================================================
|
||||
|
||||
// _elementWise*() kernels
|
||||
//
|
||||
// Designed to operate on contiguous blocks of memory, where the output is a simple function of the inputs.
|
||||
// The first parameters of every function are inputs, and the last two arguments to each function are always
|
||||
// (ElemenType *res, CUDA_LONG N), a pointer and length of the output block. Each thread computes a function
|
||||
// of the inputs for one value in the output.
|
||||
|
||||
// This macro overloads _x() with float and double arguments, and inlines the correct library function. This simplifies templated kernel code.
|
||||
// TODO: merge with similar definition in TensorOps.h
|
||||
#define DEF_ELEMENT_PRIMITIVE(x) __device__ __forceinline__ float _##x(float f) { return x##f(f); } __device__ __forceinline__ double _##x(double f) { return x(f); }
|
||||
|
||||
DEF_ELEMENT_PRIMITIVE(exp)
|
||||
DEF_ELEMENT_PRIMITIVE(log)
|
||||
DEF_ELEMENT_PRIMITIVE(tanh)
|
||||
DEF_ELEMENT_PRIMITIVE(sqrt)
|
||||
DEF_ELEMENT_PRIMITIVE(fabs)
|
||||
DEF_ELEMENT_PRIMITIVE(cos)
|
||||
DEF_ELEMENT_PRIMITIVE(sin)
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _elementWisePowerOnCuda(
|
||||
const ElemType alpha,
|
||||
|
@ -134,48 +153,19 @@ __global__ void _elementWiseSigmoidOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
if (a[id] >= 0)
|
||||
{
|
||||
if (a[id]>=0)
|
||||
{
|
||||
double e = exp(-1*a[id]);
|
||||
res[id]=1/(1+e);
|
||||
double e = _exp(-a[id]);
|
||||
res[id] = 1 / (1 + e);
|
||||
}
|
||||
else
|
||||
{
|
||||
double e = exp(a[id]);
|
||||
res[id]=e/(1+e);
|
||||
double e = _exp(a[id]);
|
||||
res[id] = e / (1 + e);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (res[id]>=0)
|
||||
{
|
||||
float e = expf(-1*a[id]);
|
||||
res[id]=1/(1+e);
|
||||
}
|
||||
else
|
||||
{
|
||||
float e = exp(a[id]); // BUGBUG: Looks like this should be expf().
|
||||
res[id]=e/(1+e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
__device__ __forceinline__ float _exp(float f)
|
||||
{
|
||||
return expf(f);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ double _exp(double f)
|
||||
{
|
||||
return exp(f);
|
||||
}
|
||||
|
||||
//#define TENSOR_OPS_DECL __device__ __host__
|
||||
//#include "TensorOps.h"
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _assignSigmoidOf(
|
||||
const ElemType* a,
|
||||
|
@ -224,16 +214,8 @@ __global__ void _elementWiseTanhOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=tanh(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=tanhf(a[id]);
|
||||
}
|
||||
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = _tanh(a[id]);
|
||||
};
|
||||
|
||||
//to prevent negative values caused by floating operations, we force inputs to be >=0
|
||||
|
@ -244,15 +226,8 @@ __global__ void _elementWiseSqrtOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=sqrt(max((ElemType)0, a[id]));
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=sqrtf(max(ElemType(0), a[id]));
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = _sqrt(max((ElemType)0, a[id]));
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -261,15 +236,8 @@ __global__ void _elementWiseExpOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=exp(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=expf(a[id]);
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = _exp(a[id]);
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -278,22 +246,8 @@ __global__ void _elementWiseLogOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (a[id]<EPS_IN_LOG)
|
||||
{
|
||||
res[id]=LOG_OF_EPS_IN_LOG;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=log(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=logf(a[id]);
|
||||
}
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = (a[id] < EPS_IN_LOG) ? LOG_OF_EPS_IN_LOG : _log(a[id]);
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -302,15 +256,8 @@ __global__ void _elementWiseAbsOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=fabs(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=fabsf(a[id]);
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = _fabs(a[id]);
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -319,15 +266,8 @@ __global__ void _elementWiseCosineOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=cos(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=cosf(a[id]);
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = _cos(a[id]);
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -336,18 +276,10 @@ __global__ void _elementWiseNegativeSineOnCuda(
|
|||
ElemType *res,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id,N);
|
||||
if (sizeof(ElemType)==sizeof(double))
|
||||
{
|
||||
res[id]=-sin(a[id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res[id]=-sinf(a[id]);
|
||||
}
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
res[id] = -_sin(a[id]);
|
||||
};
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _setValue(
|
||||
ElemType* a,
|
||||
|
@ -1141,6 +1073,7 @@ __global__ void _assignColumnwiseHardmaxOf(
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<class ElemType>
|
||||
__global__ void _inplaceTruncateBottom(
|
||||
ElemType* a,
|
||||
|
@ -1153,6 +1086,7 @@ __global__ void _inplaceTruncateBottom(
|
|||
if (a[id]<threshold)
|
||||
a[id]=threshold;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _assignTruncateBottom(
|
||||
|
@ -1161,15 +1095,11 @@ __global__ void _assignTruncateBottom(
|
|||
const ElemType threshold,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id>=N)
|
||||
return;
|
||||
if (a[id]<threshold)
|
||||
us[id]=threshold;
|
||||
else
|
||||
us[id]=a[id];
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
us[id] = a[id] < threshold ? threshold : a[id];
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<class ElemType>
|
||||
__global__ void _inplaceTruncateTop(
|
||||
ElemType* a,
|
||||
|
@ -1182,6 +1112,7 @@ __global__ void _inplaceTruncateTop(
|
|||
if (a[id]>threshold)
|
||||
a[id]=threshold;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _assignTruncateTop(
|
||||
|
@ -1190,13 +1121,8 @@ __global__ void _assignTruncateTop(
|
|||
const ElemType threshold,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id>=N)
|
||||
return;
|
||||
if (a[id]>threshold)
|
||||
us[id]=threshold;
|
||||
else
|
||||
us[id]=a[id];
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT;
|
||||
us[id] = a[id] > threshold ? threshold : a[id];
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -3716,9 +3642,7 @@ __global__ void _inplaceTruncate(
|
|||
const ElemType threshold,
|
||||
const CUDA_LONG N)
|
||||
{
|
||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id>=N)
|
||||
return;
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT
|
||||
ElemType locThresholdPos = abs(threshold);
|
||||
ElemType locTHresholdNeg = -locThresholdPos;
|
||||
if (a[id] > locThresholdPos)
|
||||
|
|
|
@ -2526,7 +2526,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceTruncateBottom<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), threshold, N);
|
||||
_assignTruncateBottom<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), NzValues(), threshold, N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
|
@ -2570,7 +2570,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
int blocksPerGrid =(int)ceil(N*1.0/GridDim::maxThreadsPerBlock);
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceTruncateTop<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), threshold, N);
|
||||
_assignTruncateTop<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(NzValues(), NzValues(), threshold, N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
|
|
Загрузка…
Ссылка в новой задаче