Merge branch 'master' into qiwye/multiverso
This commit is contained in:
Коммит
7628026b05
|
@ -19,6 +19,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
|
|||
|
||||
# comment the following line to write logs to the console
|
||||
stderr = "$OutputDir$/01_OneHidden_out"
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
#######################################
|
||||
# TRAINING CONFIG #
|
||||
|
@ -63,6 +65,7 @@ train = [
|
|||
|
||||
test = [
|
||||
action = "test"
|
||||
minibatchSize = 16
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription = "$ConfigDir$/01_OneHidden.ndl"
|
||||
|
|
|
@ -19,6 +19,10 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
|
|||
|
||||
# comment the following line to write logs to the console
|
||||
stderr = "$OutputDir$/02_Convolution_out"
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
prefetch=true
|
||||
|
||||
#######################################
|
||||
# TRAINING CONFIG #
|
||||
|
@ -63,6 +67,7 @@ train = [
|
|||
|
||||
test = [
|
||||
action = test
|
||||
minibatchSize = 16
|
||||
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/02_Convolution.ndl"
|
||||
|
|
|
@ -1,20 +1,28 @@
|
|||
WorkDir=.
|
||||
ModelDir=$WorkDir$/_out/$ConfigName$
|
||||
stderr=$WorkDir$/_out/$ConfigName$
|
||||
RootDir = "."
|
||||
|
||||
ndlMacros=$WorkDir$/Macros.ndl
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
precision=float
|
||||
deviceId=Auto
|
||||
prefetch=true
|
||||
|
||||
command=Train:Test
|
||||
|
||||
stderr=$OutputDir$/01_Conv
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/01_Convolution
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/01_Convolution.ndl
|
||||
networkDescription=$ConfigDir$/01_Convolution.ndl
|
||||
]
|
||||
|
||||
SGD=[
|
||||
|
@ -29,7 +37,7 @@ Train=[
|
|||
|
||||
reader=[
|
||||
readerType=UCIFastReader
|
||||
file=$WorkDir$/Train.txt
|
||||
file=$DataDir$/Train.txt
|
||||
randomize=None
|
||||
features=[
|
||||
dim=3072
|
||||
|
@ -39,7 +47,7 @@ Train=[
|
|||
dim=1
|
||||
start=0
|
||||
labelDim=10
|
||||
labelMappingFile=$WorkDir$/labelsmap.txt
|
||||
labelMappingFile=$DataDir$/labelsmap.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
|
@ -48,15 +56,15 @@ Test=[
|
|||
action=test
|
||||
modelPath=$ModelDir$/01_Convolution
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=16
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/01_Convolution.ndl
|
||||
networkDescription=$ConfigDir$/01_Convolution.ndl
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=UCIFastReader
|
||||
file=$WorkDir$/Test.txt
|
||||
file=$DataDir$/Test.txt
|
||||
randomize=None
|
||||
features=[
|
||||
dim=3072
|
||||
|
@ -66,7 +74,7 @@ Test=[
|
|||
dim=1
|
||||
start=0
|
||||
labelDim=10
|
||||
labelMappingFile=$WorkDir$/labelsmap.txt
|
||||
labelMappingFile=$DataDir$/labelsmap.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -7,8 +7,8 @@ ndlMnistMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 10
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(128, rows = 3072)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
featOffs = Const(128)
|
||||
featScaled = Minus(features, featOffs)
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
|
@ -39,7 +39,7 @@ DNN=[
|
|||
pool1H = 3
|
||||
pool1hStride = 2
|
||||
pool1vStride = 2
|
||||
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
|
||||
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv2
|
||||
kW2 = 5
|
||||
|
@ -55,7 +55,7 @@ DNN=[
|
|||
pool2H = 3
|
||||
pool2hStride = 2
|
||||
pool2vStride = 2
|
||||
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
|
||||
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv3
|
||||
kW3 = 5
|
||||
|
@ -71,7 +71,7 @@ DNN=[
|
|||
pool3H = 3
|
||||
pool3hStride = 2
|
||||
pool3vStride = 2
|
||||
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride)
|
||||
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 64
|
||||
h1 = DNNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)
|
||||
|
|
|
@ -1,37 +1,43 @@
|
|||
WorkDir=.
|
||||
ModelDir=$WorkDir$/_out/$ConfigName$
|
||||
stderr=$WorkDir$/_out/$ConfigName$
|
||||
RootDir = "."
|
||||
|
||||
ndlMacros=$WorkDir$/Macros.ndl
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
precision=float
|
||||
deviceId=Auto
|
||||
prefetch=true
|
||||
parallelTrain=false
|
||||
|
||||
command=Train:AddBNEval:Test
|
||||
|
||||
stderr=$OutputDir$/02_BatchNormConv
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/02_BatchNormConv
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/02_BatchNormConv.ndl
|
||||
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
|
||||
]
|
||||
|
||||
SGD=[
|
||||
epochSize=49984
|
||||
minibatchSize=64
|
||||
learningRatesPerMB=0.03*7:0.01*8:0.003
|
||||
#momentumPerMB=0.9*10:0.99
|
||||
learningRatesPerMB=0.03*7:0.01
|
||||
momentumPerMB=0
|
||||
maxEpochs=10
|
||||
#L2RegWeight=0.03
|
||||
dropoutRate=0*1:0.5
|
||||
L2RegWeight=0
|
||||
dropoutRate=0
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=UCIFastReader
|
||||
file=$WorkDir$/Train.txt
|
||||
file=$DataDir$/Train.txt
|
||||
randomize=None
|
||||
features=[
|
||||
dim=3072
|
||||
|
@ -41,7 +47,7 @@ Train=[
|
|||
dim=1
|
||||
start=0
|
||||
labelDim=10
|
||||
labelMappingFile=$WorkDir$/labelsmap.txt
|
||||
labelMappingFile=$DataDir$/labelsmap.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
|
@ -50,22 +56,22 @@ AddBNEval=[
|
|||
action=edit
|
||||
CurModel=$ModelDir$/02_BatchNormConv
|
||||
NewModel=$ModelDir$/02_BatchNormConv.Eval
|
||||
editPath=$WorkDir$/02_BatchNormConv.mel
|
||||
editPath=$ConfigDir$/02_BatchNormConv.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/02_BatchNormConv.Eval
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=16
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/02_BatchNormConv.ndl
|
||||
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=UCIFastReader
|
||||
file=$WorkDir$/Test.txt
|
||||
file=$DataDir$/Test.txt
|
||||
randomize=None
|
||||
features=[
|
||||
dim=3072
|
||||
|
@ -75,7 +81,7 @@ Test=[
|
|||
dim=1
|
||||
start=0
|
||||
labelDim=10
|
||||
labelMappingFile=$WorkDir$/labelsmap.txt
|
||||
labelMappingFile=$DataDir$/labelsmap.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
m=LoadModel($CurModel$, format=cntk)
|
||||
SetDefaultModel(m)
|
||||
|
||||
ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true)
|
||||
SetNodeInput(conv1.c, 1, ibn_e)
|
||||
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(conv1.y, 0, conv1.bn_e)
|
||||
|
||||
conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true)
|
||||
SetNodeInput(conv2.c, 1, conv2.bn_e)
|
||||
conv2.bn_e = BatchNormalization(conv2.c, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(conv2.y, 0, conv2.bn_e)
|
||||
|
||||
conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true)
|
||||
SetNodeInput(conv3.c, 1, conv3.bn_e)
|
||||
conv3.bn_e = BatchNormalization(conv3.c, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(conv3.y, 0, conv3.bn_e)
|
||||
|
||||
h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false)
|
||||
SetNodeInput(h1.t, 1, h1.bn_e)
|
||||
h1.bn_e = BatchNormalization(h1.t, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false)
|
||||
SetNodeInput(h1.y, 0, h1.bn_e)
|
||||
|
||||
SaveModel(m, $NewModel$, format=cntk)
|
|
@ -7,8 +7,8 @@ ndlMnistMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 10
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(128, rows = 3072)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
featOffs = Const(128)
|
||||
featScaled = Minus(features, featOffs)
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
|
@ -18,6 +18,9 @@ ndlMnistMacros = [
|
|||
conv2BValue = 0
|
||||
conv3WScale = 1.414
|
||||
conv3BValue = 0
|
||||
|
||||
scScale = 0.03
|
||||
|
||||
fc1WScale = 12
|
||||
fc1BValue = 0
|
||||
fc2WScale = 1.5
|
||||
|
@ -25,12 +28,6 @@ ndlMnistMacros = [
|
|||
]
|
||||
|
||||
DNN=[
|
||||
ib = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
|
||||
isc = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
|
||||
im = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
iisd = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
ibn = BatchNormalization(featScaled, isc, ib, im, iisd, eval = false, spatial = true)
|
||||
|
||||
# conv1
|
||||
kW1 = 5
|
||||
kH1 = 5
|
||||
|
@ -38,14 +35,14 @@ DNN=[
|
|||
hStride1 = 1
|
||||
vStride1 = 1
|
||||
# weight[cMap1, kW1 * kH1 * ImageC]
|
||||
conv1 = ConvReLULayer(ibn, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue, scScale)
|
||||
|
||||
# pool1
|
||||
pool1W = 3
|
||||
pool1H = 3
|
||||
pool1hStride = 2
|
||||
pool1vStride = 2
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride)
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv2
|
||||
kW2 = 5
|
||||
|
@ -54,14 +51,14 @@ DNN=[
|
|||
hStride2 = 1
|
||||
vStride2 = 1
|
||||
# weight[cMap2, kW2 * kH2 * cMap1]
|
||||
conv2 = ConvBNReLULayer(pool1, cMap1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
|
||||
conv2 = ConvBNReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue, scScale)
|
||||
|
||||
# pool2
|
||||
pool2W = 3
|
||||
pool2H = 3
|
||||
pool2hStride = 2
|
||||
pool2vStride = 2
|
||||
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride)
|
||||
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv3
|
||||
kW3 = 5
|
||||
|
@ -70,19 +67,18 @@ DNN=[
|
|||
hStride3 = 1
|
||||
vStride3 = 1
|
||||
# weight[cMap3, kW3 * kH3 * cMap2]
|
||||
conv3 = ConvBNReLULayer(pool2, cMap2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
|
||||
conv3 = ConvBNReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue, scScale)
|
||||
|
||||
# pool3
|
||||
pool3W = 3
|
||||
pool3H = 3
|
||||
pool3hStride = 2
|
||||
pool3vStride = 2
|
||||
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride)
|
||||
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 64
|
||||
h1 = DnnBNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)
|
||||
h1_d = Dropout(h1)
|
||||
ol = DNNLastLayer(hiddenDim, labelDim, h1_d, fc2WScale, fc2BValue)
|
||||
ol = DNNLastLayer(hiddenDim, labelDim, h1, fc2WScale, fc2BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
|
|
|
@ -16,6 +16,7 @@ command=Train:AddBNEval:Test
|
|||
|
||||
stderr=$OutputDir$/03_ResNet
|
||||
traceLevel=1
|
||||
numMBsToShowResult=200
|
||||
|
||||
Proj16to32Filename = $ConfigDir$/16to32.txt
|
||||
Proj32to64Filename = $ConfigDir$/32to64.txt
|
||||
|
@ -45,8 +46,6 @@ Train=[
|
|||
gradientBits=1
|
||||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=10
|
||||
]
|
||||
|
||||
reader=[
|
||||
|
|
|
@ -1,52 +1,52 @@
|
|||
m=LoadModel($CurModel$, format=cntk)
|
||||
SetDefaultModel(m)
|
||||
|
||||
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true)
|
||||
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(conv1.y, 0, conv1.bn_e)
|
||||
|
||||
rn1_1.bn1_e = BatchNormalization(rn1_1.c1, rn1_1.sc1, rn1_1.b1, rn1_1.m1, rn1_1.isd1, eval = true, spatial = true)
|
||||
rn1_1.bn1_e = BatchNormalization(rn1_1.c1, rn1_1.sc1, rn1_1.b1, rn1_1.m1, rn1_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_1.y1, 0, rn1_1.bn1_e)
|
||||
rn1_1.bn2_e = BatchNormalization(rn1_1.c2, rn1_1.sc2, rn1_1.b2, rn1_1.m2, rn1_1.isd2, eval = true, spatial = true)
|
||||
rn1_1.bn2_e = BatchNormalization(rn1_1.c2, rn1_1.sc2, rn1_1.b2, rn1_1.m2, rn1_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_1.p, 0, rn1_1.bn2_e)
|
||||
|
||||
rn1_2.bn1_e = BatchNormalization(rn1_2.c1, rn1_2.sc1, rn1_2.b1, rn1_2.m1, rn1_2.isd1, eval = true, spatial = true)
|
||||
rn1_2.bn1_e = BatchNormalization(rn1_2.c1, rn1_2.sc1, rn1_2.b1, rn1_2.m1, rn1_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_2.y1, 0, rn1_2.bn1_e)
|
||||
rn1_2.bn2_e = BatchNormalization(rn1_2.c2, rn1_2.sc2, rn1_2.b2, rn1_2.m2, rn1_2.isd2, eval = true, spatial = true)
|
||||
rn1_2.bn2_e = BatchNormalization(rn1_2.c2, rn1_2.sc2, rn1_2.b2, rn1_2.m2, rn1_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_2.p, 0, rn1_2.bn2_e)
|
||||
|
||||
rn1_3.bn1_e = BatchNormalization(rn1_3.c1, rn1_3.sc1, rn1_3.b1, rn1_3.m1, rn1_3.isd1, eval = true, spatial = true)
|
||||
rn1_3.bn1_e = BatchNormalization(rn1_3.c1, rn1_3.sc1, rn1_3.b1, rn1_3.m1, rn1_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_3.y1, 0, rn1_3.bn1_e)
|
||||
rn1_3.bn2_e = BatchNormalization(rn1_3.c2, rn1_3.sc2, rn1_3.b2, rn1_3.m2, rn1_3.isd2, eval = true, spatial = true)
|
||||
rn1_3.bn2_e = BatchNormalization(rn1_3.c2, rn1_3.sc2, rn1_3.b2, rn1_3.m2, rn1_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn1_3.p, 0, rn1_3.bn2_e)
|
||||
|
||||
rn2_1.bn1_e = BatchNormalization(rn2_1.c1, rn2_1.sc1, rn2_1.b1, rn2_1.m1, rn2_1.isd1, eval = true, spatial = true)
|
||||
rn2_1.bn1_e = BatchNormalization(rn2_1.c1, rn2_1.sc1, rn2_1.b1, rn2_1.m1, rn2_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_1.y1, 0, rn2_1.bn1_e)
|
||||
rn2_1.bn2_e = BatchNormalization(rn2_1.c2, rn2_1.sc2, rn2_1.b2, rn2_1.m2, rn2_1.isd2, eval = true, spatial = true)
|
||||
rn2_1.bn2_e = BatchNormalization(rn2_1.c2, rn2_1.sc2, rn2_1.b2, rn2_1.m2, rn2_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_1.p, 0, rn2_1.bn2_e)
|
||||
|
||||
rn2_2.bn1_e = BatchNormalization(rn2_2.c1, rn2_2.sc1, rn2_2.b1, rn2_2.m1, rn2_2.isd1, eval = true, spatial = true)
|
||||
rn2_2.bn1_e = BatchNormalization(rn2_2.c1, rn2_2.sc1, rn2_2.b1, rn2_2.m1, rn2_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_2.y1, 0, rn2_2.bn1_e)
|
||||
rn2_2.bn2_e = BatchNormalization(rn2_2.c2, rn2_2.sc2, rn2_2.b2, rn2_2.m2, rn2_2.isd2, eval = true, spatial = true)
|
||||
rn2_2.bn2_e = BatchNormalization(rn2_2.c2, rn2_2.sc2, rn2_2.b2, rn2_2.m2, rn2_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_2.p, 0, rn2_2.bn2_e)
|
||||
|
||||
rn2_3.bn1_e = BatchNormalization(rn2_3.c1, rn2_3.sc1, rn2_3.b1, rn2_3.m1, rn2_3.isd1, eval = true, spatial = true)
|
||||
rn2_3.bn1_e = BatchNormalization(rn2_3.c1, rn2_3.sc1, rn2_3.b1, rn2_3.m1, rn2_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_3.y1, 0, rn2_3.bn1_e)
|
||||
rn2_3.bn2_e = BatchNormalization(rn2_3.c2, rn2_3.sc2, rn2_3.b2, rn2_3.m2, rn2_3.isd2, eval = true, spatial = true)
|
||||
rn2_3.bn2_e = BatchNormalization(rn2_3.c2, rn2_3.sc2, rn2_3.b2, rn2_3.m2, rn2_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn2_3.p, 0, rn2_3.bn2_e)
|
||||
|
||||
rn3_1.bn1_e = BatchNormalization(rn3_1.c1, rn3_1.sc1, rn3_1.b1, rn3_1.m1, rn3_1.isd1, eval = true, spatial = true)
|
||||
rn3_1.bn1_e = BatchNormalization(rn3_1.c1, rn3_1.sc1, rn3_1.b1, rn3_1.m1, rn3_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_1.y1, 0, rn3_1.bn1_e)
|
||||
rn3_1.bn2_e = BatchNormalization(rn3_1.c2, rn3_1.sc2, rn3_1.b2, rn3_1.m2, rn3_1.isd2, eval = true, spatial = true)
|
||||
rn3_1.bn2_e = BatchNormalization(rn3_1.c2, rn3_1.sc2, rn3_1.b2, rn3_1.m2, rn3_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_1.p, 0, rn3_1.bn2_e)
|
||||
|
||||
rn3_2.bn1_e = BatchNormalization(rn3_2.c1, rn3_2.sc1, rn3_2.b1, rn3_2.m1, rn3_2.isd1, eval = true, spatial = true)
|
||||
rn3_2.bn1_e = BatchNormalization(rn3_2.c1, rn3_2.sc1, rn3_2.b1, rn3_2.m1, rn3_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_2.y1, 0, rn3_2.bn1_e)
|
||||
rn3_2.bn2_e = BatchNormalization(rn3_2.c2, rn3_2.sc2, rn3_2.b2, rn3_2.m2, rn3_2.isd2, eval = true, spatial = true)
|
||||
rn3_2.bn2_e = BatchNormalization(rn3_2.c2, rn3_2.sc2, rn3_2.b2, rn3_2.m2, rn3_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_2.p, 0, rn3_2.bn2_e)
|
||||
|
||||
rn3_3.bn1_e = BatchNormalization(rn3_3.c1, rn3_3.sc1, rn3_3.b1, rn3_3.m1, rn3_3.isd1, eval = true, spatial = true)
|
||||
rn3_3.bn1_e = BatchNormalization(rn3_3.c1, rn3_3.sc1, rn3_3.b1, rn3_3.m1, rn3_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_3.y1, 0, rn3_3.bn1_e)
|
||||
rn3_3.bn2_e = BatchNormalization(rn3_3.c2, rn3_3.sc2, rn3_3.b2, rn3_3.m2, rn3_3.isd2, eval = true, spatial = true)
|
||||
rn3_3.bn2_e = BatchNormalization(rn3_3.c2, rn3_3.sc2, rn3_3.b2, rn3_3.m2, rn3_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
|
||||
SetNodeInput(rn3_3.p, 0, rn3_3.bn2_e)
|
||||
|
||||
SaveModel(m, $NewModel$, format=cntk)
|
|
@ -7,8 +7,8 @@ LocalMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 10
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(128, rows = 3072)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
featOffs = Const(128)
|
||||
featScaled = Minus(features, featOffs)
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
|
@ -30,7 +30,7 @@ LocalMacros = [
|
|||
|
||||
DNN=[
|
||||
cMap1 = 16
|
||||
conv1 = ConvBNReLULayer2(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
|
||||
|
||||
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
|
||||
|
@ -38,13 +38,13 @@ DNN=[
|
|||
|
||||
cMap2 = 32
|
||||
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
|
||||
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
|
||||
rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
|
||||
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
cMap3 = 64
|
||||
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
|
||||
rn3_1 = ResNetNode2Conv(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
|
||||
rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
|
||||
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
|
@ -53,7 +53,7 @@ DNN=[
|
|||
poolH = 3
|
||||
poolhStride = 2
|
||||
poolvStride = 2
|
||||
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride)
|
||||
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride, imageLayout = "cudnn")
|
||||
|
||||
ol = DnnLastLayer(576, labelDim, pool, fc1WScale, fc1BValue)
|
||||
|
||||
|
|
|
@ -1,83 +1,71 @@
|
|||
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
p = Plus(c, b);
|
||||
y = RectifiedLinear(p);
|
||||
}
|
||||
|
||||
ConvBNReLULayer(inp, inMap, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
|
||||
sc = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
|
||||
m = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
bn = BatchNormalization(inp, sc, b, m, isd, eval = false, spatial = true)
|
||||
c = Convolution(W, bn, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
y = RectifiedLinear(c);
|
||||
}
|
||||
|
||||
ConvBNReLULayer2(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
|
||||
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scScale)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn);
|
||||
}
|
||||
|
||||
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
||||
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
p = Plus(bn2, inp)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
||||
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
|
||||
ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, Wproj)
|
||||
{
|
||||
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
|
||||
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
|
||||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
p = Plus(bn2, cproj)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
@ -94,13 +82,13 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
|
|||
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
|
||||
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
|
||||
t = Times(W, bn)
|
||||
y = RectifiedLinear(t)
|
||||
b = Parameter(outDim, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outDim, 1, init = Gaussian, initValueScale = 0.01)
|
||||
m = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
t = Times(W, x)
|
||||
bn = BatchNormalization(t, sc, b, m, isd, eval = false, spatial = false, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn)
|
||||
}
|
||||
|
||||
DnnLastLayer(hiddenDim, labelDim, x, wScale, bValue)
|
||||
|
|
|
@ -15,7 +15,7 @@ Short description of the network:
|
|||
01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here:
|
||||
https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg
|
||||
(main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer).
|
||||
The network produces 22% of error after training for about 4 minutes on GPU.
|
||||
The network produces 21% of error after training for about 3 minutes on GPU.
|
||||
To run the sample, navigate to this folder and run the following command:
|
||||
<path to CNTK executable> configFile=01_Conv.config configName=01_Conv
|
||||
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
RootDir = "."
|
||||
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
precision=float
|
||||
|
@ -7,10 +14,13 @@ command=Train:AddTop5Eval:Test
|
|||
|
||||
parallelTrain=false
|
||||
|
||||
stderr=$OutputDir$/AlexNet
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/AlexNet
|
||||
traceLevel=1
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/AlexNet.ndl
|
||||
|
@ -35,7 +45,7 @@ Train=[
|
|||
]
|
||||
]
|
||||
|
||||
numMBsToShowResult=10
|
||||
numMBsToShowResult=100
|
||||
]
|
||||
|
||||
reader=[
|
||||
|
@ -44,7 +54,7 @@ Train=[
|
|||
# <full path to image><tab><numerical label (0-based class id)>
|
||||
# Example:
|
||||
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
|
||||
file=$ConfigDir$/train_map_nfs.txt
|
||||
file=$ConfigDir$/train_map.txt
|
||||
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
|
||||
randomize=Auto
|
||||
features=[
|
||||
|
@ -93,7 +103,7 @@ Test=[
|
|||
|
||||
reader=[
|
||||
readerType=ImageReader
|
||||
file=$ConfigDir$/val_map_nfs.txt
|
||||
file=$ConfigDir$/val_map.txt
|
||||
randomize=None
|
||||
features=[
|
||||
width=224
|
||||
|
|
|
@ -7,7 +7,7 @@ ndlMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
conv1WScale = 0.95
|
||||
|
@ -36,14 +36,14 @@ DNN=[
|
|||
hStride1 = 4
|
||||
vStride1 = 4
|
||||
# weight[cMap1, kW1 * kH1 * ImageC]
|
||||
conv1_act = ConvReLULayer(features, cMap1, 363, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
|
||||
conv1 = ConvReLULayer(features, cMap1, 363, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
|
||||
|
||||
# pool1
|
||||
pool1W = 3
|
||||
pool1H = 3
|
||||
pool1hStride = 2
|
||||
pool1vStride = 2
|
||||
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
|
||||
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv2
|
||||
kW2 = 5
|
||||
|
@ -52,14 +52,14 @@ DNN=[
|
|||
hStride2 = 1
|
||||
vStride2 = 1
|
||||
# weight[cMap2, kW2 * kH2 * cMap1]
|
||||
conv2_act = ConvReLULayer(pool1, cMap2, 1600, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
|
||||
conv2 = ConvReLULayer(pool1, cMap2, 1600, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
|
||||
|
||||
# pool2
|
||||
pool2W = 3
|
||||
pool2H = 3
|
||||
pool2hStride = 2
|
||||
pool2vStride = 2
|
||||
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
|
||||
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
|
||||
|
||||
# conv3
|
||||
kW3 = 3
|
||||
|
@ -68,7 +68,7 @@ DNN=[
|
|||
hStride3 = 1
|
||||
vStride3 = 1
|
||||
# weight[cMap3, kW3 * kH3 * cMap2]
|
||||
conv3_act = ConvReLULayer(pool2, cMap3, 1728, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
|
||||
conv3 = ConvReLULayer(pool2, cMap3, 1728, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
|
||||
|
||||
# conv4
|
||||
kW4 = 3
|
||||
|
@ -77,7 +77,7 @@ DNN=[
|
|||
hStride4 = 1
|
||||
vStride4 = 1
|
||||
# weight[cMap4, kW4 * kH4 * cMap3]
|
||||
conv4_act = ConvReLULayer(conv3_act, cMap4, 3456, kW4, kH4, hStride4, vStride4, conv4WScale, conv4BValue)
|
||||
conv4 = ConvReLULayer(conv3, cMap4, 3456, kW4, kH4, hStride4, vStride4, conv4WScale, conv4BValue)
|
||||
|
||||
# conv5
|
||||
kW5 = 3
|
||||
|
@ -86,14 +86,14 @@ DNN=[
|
|||
hStride5 = 1
|
||||
vStride5 = 1
|
||||
# weight[cMap5, kW5 * kH5 * cMap4]
|
||||
conv5_act = ConvReLULayer(conv4_act, cMap5, 2304, kW5, kH5, hStride5, vStride5, conv5WScale, conv5BValue)
|
||||
conv5 = ConvReLULayer(conv4, cMap5, 2304, kW5, kH5, hStride5, vStride5, conv5WScale, conv5BValue)
|
||||
|
||||
# pool3
|
||||
pool3W = 3
|
||||
pool3H = 3
|
||||
pool3hStride = 2
|
||||
pool3vStride = 2
|
||||
pool3 = MaxPooling(conv5_act, pool3W, pool3H, pool3hStride, pool3vStride)
|
||||
pool3 = MaxPooling(conv5, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 4096
|
||||
h1 = DNNReLULayer(9216, hiddenDim, pool3, fc1WScale, fc1BValue)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
||||
{
|
||||
convW = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
convB = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
convPlusB = Plus(conv, convB);
|
||||
act = RectifiedLinear(convPlusB);
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
z = Plus(c, b);
|
||||
y = RectifiedLinear(z);
|
||||
}
|
||||
|
||||
DNNReLULayer(inDim, outDim, x, wScale, bValue)
|
||||
|
|
|
@ -6,8 +6,8 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
|
|||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn);
|
||||
}
|
||||
|
||||
|
@ -20,8 +20,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
|||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
|
@ -30,8 +30,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
|
|||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
p = Plus(bn2, inp)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
@ -45,8 +45,8 @@ ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue,
|
|||
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
|
||||
|
@ -55,10 +55,10 @@ ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue,
|
|||
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
|
||||
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
p = Plus(bn2, cproj)
|
||||
y2 = RectifiedLinear(p);
|
||||
}
|
||||
|
@ -73,8 +73,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
|
|||
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
# 3x3 convolution.
|
||||
|
@ -84,8 +84,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
|
|||
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y2 = RectifiedLinear(bn2);
|
||||
|
||||
# 1x1 expanding convolution.
|
||||
|
@ -95,8 +95,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
|
|||
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false)
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true)
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
|
||||
p = Plus(bn3, inp)
|
||||
y3 = RectifiedLinear(p);
|
||||
|
@ -111,8 +111,8 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
|
|||
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false)
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
|
||||
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
y1 = RectifiedLinear(bn1);
|
||||
|
||||
# 3x3 convolution.
|
||||
|
@ -122,8 +122,8 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
|
|||
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, 2, 2, zeroPadding = true)
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
|
||||
c2 = Convolution(W2, y1, 3, 3, convMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
|
||||
y2 = RectifiedLinear(bn2);
|
||||
|
||||
# 1x1 expanding convolution.
|
||||
|
@ -133,11 +133,11 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
|
|||
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false)
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true)
|
||||
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
|
||||
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
|
||||
# Increasing input dimension convolution
|
||||
cProj = Convolution(wProj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
|
||||
cProj = Convolution(wProj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
|
||||
|
||||
p = Plus(bn3, cProj)
|
||||
y3 = RectifiedLinear(p);
|
||||
|
|
|
@ -32,10 +32,10 @@ Train=[
|
|||
|
||||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=2
|
||||
learningRatesPerMB=0.1*20:0.03*10:0.01*30:0.003
|
||||
minibatchSize=32
|
||||
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=100
|
||||
maxEpochs=120
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
@ -72,7 +72,7 @@ Train=[
|
|||
# Horizontal random flip, will be enabled by default if cropType=Random
|
||||
#hflip=0
|
||||
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
|
||||
cropRatio=0.875
|
||||
cropRatio=0.46666:0.875
|
||||
# Crop scale ratio jitter type.
|
||||
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
|
||||
jitterType=UniRatio
|
||||
|
@ -99,7 +99,7 @@ Test=[
|
|||
action=test
|
||||
modelPath=$ModelDir$/ResNet_152.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=32
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/ResNet_152.ndl
|
||||
|
|
|
@ -7,7 +7,7 @@ ndlMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
featOffs = Const(0, rows = 150528)
|
||||
featScaled = Plus(features, featOffs)
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
@ -42,7 +42,7 @@ DNN=[
|
|||
cMap6 = 2048
|
||||
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
|
||||
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj)
|
||||
|
@ -102,7 +102,7 @@ DNN=[
|
|||
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
|
||||
|
||||
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs)
|
||||
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
ol = DnnLayer(8192, labelDim, pool5, fcWScale, fcBValue)
|
||||
|
||||
|
|
|
@ -32,9 +32,9 @@ Train=[
|
|||
SGD=[
|
||||
epochSize=0
|
||||
minibatchSize=64
|
||||
learningRatesPerMB=0.1*20:0.03*10:0.01*30:0.003
|
||||
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
|
||||
momentumPerMB=0.9
|
||||
maxEpochs=100
|
||||
maxEpochs=120
|
||||
gradUpdateType=None
|
||||
L2RegWeight=0.0001
|
||||
dropoutRate=0
|
||||
|
@ -71,7 +71,7 @@ Train=[
|
|||
# Horizontal random flip, will be enabled by default if cropType=Random
|
||||
#hflip=0
|
||||
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
|
||||
cropRatio=0.875
|
||||
cropRatio=0.46666:0.875
|
||||
# Crop scale ratio jitter type.
|
||||
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
|
||||
jitterType=UniRatio
|
||||
|
@ -98,7 +98,7 @@ Test=[
|
|||
action=test
|
||||
modelPath=$ModelDir$/ResNet_34.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=64
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/ResNet_34.ndl
|
||||
|
|
|
@ -7,9 +7,7 @@ ndlMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(0, rows = 150528)
|
||||
featScaled = Plus(features, featOffs)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
# Kernels width and height.
|
||||
|
@ -35,8 +33,8 @@ ndlMacros = [
|
|||
|
||||
DNN=[
|
||||
cMap1 = 64
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
rn1_1 = ResNetNode2(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
rn1_2 = ResNetNode2(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
|
||||
|
@ -64,7 +62,7 @@ DNN=[
|
|||
rn4_2 = ResNetNode2(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
|
||||
rn4_3 = ResNetNode2(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
|
||||
|
||||
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs)
|
||||
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
ol = DnnLayer(4608, labelDim, pool5, fcWScale, fcBValue)
|
||||
|
||||
|
|
|
@ -12,14 +12,13 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
|
|||
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
|
||||
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
|
||||
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
|
||||
t = Times(W, bn)
|
||||
y = RectifiedLinear(t)
|
||||
b = Parameter(outDim, 1, init = fixedValue, value = bValue)
|
||||
sc = Parameter(outDim, 1, init = Gaussian, initValueScale = 0.01)
|
||||
m = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
t = Times(W, x)
|
||||
bn = BatchNormalization(t, sc, b, m, isd, eval = false, spatial = false)
|
||||
y = RectifiedLinear(bn)
|
||||
}
|
||||
|
||||
# Fully-connected layer.
|
||||
|
@ -35,8 +34,8 @@ DnnLayer(inDim, outDim, x, wScale, bValue)
|
|||
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
|
||||
{
|
||||
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
|
||||
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
z = Plus(c, b);
|
||||
y = RectifiedLinear(z);
|
||||
}
|
||||
|
@ -50,7 +49,7 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
|
|||
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
|
||||
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true)
|
||||
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
|
||||
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, imageLayout = "cudnn")
|
||||
y = RectifiedLinear(bn);
|
||||
}
|
||||
|
|
|
@ -1,21 +1,28 @@
|
|||
WorkDir=.
|
||||
ModelDir=$WorkDir$/_out/$ConfigName$
|
||||
stderr=$WorkDir$/_out/$ConfigName$
|
||||
RootDir = "."
|
||||
|
||||
ndlMacros=$WorkDir$/Macros.ndl
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
precision=float
|
||||
deviceId=Auto
|
||||
|
||||
command=Train:AddTop5Eval:Test
|
||||
|
||||
stderr=$OutputDir$/VGG_A
|
||||
traceLevel=1
|
||||
numMBsToShowResult=500
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
modelPath=$ModelDir$/VGG_A
|
||||
traceLevel=1
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/VGG_A.ndl
|
||||
networkDescription=$ConfigDir$/VGG_A.ndl
|
||||
]
|
||||
|
||||
SGD=[
|
||||
|
@ -37,7 +44,7 @@ Train=[
|
|||
# <full path to image><tab><numerical label (0-based class id)>
|
||||
# Example:
|
||||
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
|
||||
file=$WorkDir$/train_map.txt
|
||||
file=$ConfigDir$/train_map.txt
|
||||
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
|
||||
randomize=Auto
|
||||
features=[
|
||||
|
@ -59,7 +66,7 @@ Train=[
|
|||
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
|
||||
interpolations=Linear
|
||||
# Stores mean values for each pixel in OpenCV matrix XML format.
|
||||
meanFile=$WorkDir$/ImageNet1K_mean.xml
|
||||
meanFile=$ConfigDir$/ImageNet1K_mean.xml
|
||||
]
|
||||
labels=[
|
||||
labelDim=1000
|
||||
|
@ -71,29 +78,29 @@ AddTop5Eval=[
|
|||
action=edit
|
||||
CurModel=$ModelDir$/VGG_A
|
||||
NewModel=$ModelDir$/VGG_A.Top5
|
||||
editPath=$WorkDir$/add_top5_layer.mel
|
||||
editPath=$ConfigDir$/add_top5_layer.mel
|
||||
]
|
||||
|
||||
Test=[
|
||||
action=test
|
||||
modelPath=$ModelDir$/VGG_A.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=32
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$WorkDir$/VGG_A.ndl
|
||||
networkDescription=$ConfigDir$/VGG_A.ndl
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=ImageReader
|
||||
file=$WorkDir$/val_map.txt
|
||||
file=$ConfigDir$/val_map.txt
|
||||
randomize=None
|
||||
features=[
|
||||
width=224
|
||||
height=224
|
||||
channels=3
|
||||
cropType=Center
|
||||
meanFile=$WorkDir$/ImageNet1K_mean.xml
|
||||
meanFile=$ConfigDir$/ImageNet1K_mean.xml
|
||||
]
|
||||
labels=[
|
||||
labelDim=1000
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
load=ndlMnistMacros
|
||||
load=ndlMacros
|
||||
run=DNN
|
||||
|
||||
ndlMnistMacros = [
|
||||
ndlMacros = [
|
||||
ImageW = 224
|
||||
ImageH = 224
|
||||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
# Kernels width and height.
|
||||
|
@ -38,30 +38,30 @@ DNN=[
|
|||
cMap1 = 64
|
||||
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
|
||||
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap2 = 128
|
||||
conv2 = ConvReLULayer(pool1, cMap2, 576, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool2 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
|
||||
pool2 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap3 = 256
|
||||
conv3 = ConvReLULayer(pool2, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv4 = ConvReLULayer(conv3, cMap3, 2304, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool3 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
|
||||
pool3 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap5 = 512
|
||||
conv5 = ConvReLULayer(pool3, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv6 = ConvReLULayer(conv5, cMap5, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool4 = MaxPooling(conv6, poolW, poolH, poolhs, poolvs)
|
||||
pool4 = MaxPooling(conv6, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap6 = 512
|
||||
conv7 = ConvReLULayer(pool4, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv8 = ConvReLULayer(conv7, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool5 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
|
||||
pool5 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 4096
|
||||
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
|
||||
|
|
|
@ -94,7 +94,7 @@ Test=[
|
|||
action=test
|
||||
modelPath=$ModelDir$/VGG_E.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=16
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/VGG_E.ndl
|
||||
|
|
|
@ -7,7 +7,7 @@ ndlMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
# Kernels width and height.
|
||||
|
@ -39,13 +39,13 @@ DNN=[
|
|||
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv2 = ConvReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
|
||||
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap3 = 128
|
||||
conv3 = ConvReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv4 = ConvReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
|
||||
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap5 = 256
|
||||
conv5 = ConvReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
@ -53,7 +53,7 @@ DNN=[
|
|||
conv7 = ConvReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv8 = ConvReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
|
||||
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap9 = 512
|
||||
conv9 = ConvReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
@ -61,7 +61,7 @@ DNN=[
|
|||
conv11 = ConvReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv12 = ConvReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
|
||||
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap13 = 512
|
||||
conv13 = ConvReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
@ -69,7 +69,7 @@ DNN=[
|
|||
conv15 = ConvReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
conv16 = ConvReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
|
||||
|
||||
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
|
||||
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 4096
|
||||
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
|
||||
|
|
|
@ -94,7 +94,7 @@ Test=[
|
|||
action=test
|
||||
modelPath=$ModelDir$/VGG_E_BN.Top5
|
||||
# Set minibatch size for testing.
|
||||
minibatchSize=128
|
||||
minibatchSize=16
|
||||
|
||||
NDLNetworkBuilder=[
|
||||
networkDescription=$ConfigDir$/VGG_E_BN.ndl
|
||||
|
|
|
@ -7,9 +7,7 @@ ndlMacros = [
|
|||
ImageC = 3
|
||||
LabelDim = 1000
|
||||
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
|
||||
featOffs = Const(0, rows = 150528)
|
||||
featScaled = Plus(features, featOffs)
|
||||
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
|
||||
labels = Input(LabelDim, tag = label)
|
||||
|
||||
# Kernels width and height.
|
||||
|
@ -39,16 +37,16 @@ ndlMacros = [
|
|||
|
||||
DNN=[
|
||||
cMap1 = 64
|
||||
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv1 = ConvBNReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv2 = ConvBNReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
|
||||
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap3 = 128
|
||||
conv3 = ConvBNReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv4 = ConvBNReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
|
||||
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap5 = 256
|
||||
conv5 = ConvBNReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
@ -56,7 +54,7 @@ DNN=[
|
|||
conv7 = ConvBNReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv8 = ConvBNReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
|
||||
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap9 = 512
|
||||
conv9 = ConvBNReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
@ -64,7 +62,7 @@ DNN=[
|
|||
conv11 = ConvBNReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv12 = ConvBNReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
|
||||
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
cMap13 = 512
|
||||
conv13 = ConvBNReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
@ -72,7 +70,7 @@ DNN=[
|
|||
conv15 = ConvBNReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
conv16 = ConvBNReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
|
||||
|
||||
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
|
||||
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
|
||||
|
||||
hiddenDim = 4096
|
||||
h1 = DnnBNReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)
|
||||
|
|
|
@ -47,6 +47,7 @@ using namespace std;
|
|||
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
|
||||
L"Shift(input, fromOffsets, boundaryValue, dim=-1, offsetRanges=1, multiOffsetDim=0, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) ; fromOffset = new IntVector [ items = fromOffsets ] ; offsetRange = new SizeVector [items= new SizeVector [ items = offsetRanges ] ]/*plus the function args*/ ]\n"
|
||||
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
|
||||
|
|
|
@ -699,5 +699,4 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
|||
}
|
||||
template<class V> /*static*/ const std::vector<typename V::value_type> & IConfigRecord::Array(const V & vec) { return static_cast<const std::vector<typename V::value_type> &>(vec); } // use this specifically for XXXargvector
|
||||
|
||||
|
||||
}}} // end namespaces
|
||||
|
|
|
@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
void Load(File& fstream, bool acceptLegacyFormat = false)
|
||||
const TensorShape & Load(File& fstream, bool acceptLegacyFormat = false)
|
||||
{
|
||||
// format: uint32_t n, dim[0], dim[1], ..., dim[n-1]
|
||||
// We are also able to read (but not write) an older format, which stores 3-dimensional tensors as size_t W, H, C
|
||||
|
@ -232,6 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream >> m_dims[2] >> m_dims[0]; // stored in order C, W, H
|
||||
}
|
||||
InitAsNoSlice();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// accessors
|
||||
|
@ -404,7 +405,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_strides.resize(m_dims.size());
|
||||
for (size_t k = 0; k < m_dims.size(); k++)
|
||||
m_strides[k] = k > 0 ? m_strides[k - 1] * (ptrdiff_t)m_dims[k - 1] : 1;
|
||||
m_allocation = m_dims.empty() ? 0 : m_dims.back() * (size_t)m_strides.back();
|
||||
m_allocation = m_dims.empty() ? 0 : m_dims.back() * (size_t)m_strides.back(); // TODO: Or should an empty shape mean it's a scalar?
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -24,6 +24,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
|
||||
// TODO: Can this be moved to a separate class?
|
||||
|
||||
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr &);
|
||||
|
||||
// FormRecurrentLoops() -- MAIN ENTRY POINT for network recurrent-loop analysis. All other functions in this CPP are called only from this one.
|
||||
// This function analysis the networks for recurrent loops present in the computation of 'rootNode.'
|
||||
// This sets/updates:
|
||||
|
@ -83,16 +85,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
unordered_set<ComputationNodeBasePtr> visited;
|
||||
unordered_set<ComputationNodeBasePtr> recStack;
|
||||
|
||||
// set m_indexInLoop for all nodes except Past/FutureValueNodes in all loops
|
||||
// set m_indexInLoop for all nodes except recurrent nodes in all loops
|
||||
// This value is only used in the block right after this.
|
||||
for (size_t j = 0; j < iter->m_nestedNodes.size(); j++)
|
||||
{
|
||||
ComputationNodeBasePtr node = iter->m_nestedNodes[j];
|
||||
const auto & node = iter->m_nestedNodes[j];
|
||||
for (size_t i = 0; i < node->GetNumInputs(); i++)
|
||||
{
|
||||
if (node->Input(i)->m_loopId == node->m_loopId &&
|
||||
node->OperationName() != OperationNameOf(PastValueNode) &&
|
||||
node->OperationName() != OperationNameOf(FutureValueNode)) // TODO: test for type RecurrentNode instead?
|
||||
if (node->Input(i)->m_loopId == node->m_loopId && GetRecurrenceDirections(node).empty())
|
||||
{
|
||||
//assert(node->Input(i)->m_indexInLoop == 0); // No. It seems this variable really counts the number of parents.
|
||||
node->Input(i)->m_indexInLoop++; // BUGBUG: this is bumping up the m_indexInLoop, but I don't think it is initialized anywhere other than PurgeStateForFormingRecurrentLoops(). i-1?
|
||||
|
@ -146,7 +146,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
// log the loops
|
||||
for (auto & iter : m_allSEQNodes)
|
||||
{
|
||||
|
@ -168,6 +167,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#endif
|
||||
}
|
||||
|
||||
// checks whether a node is recurrent, and which direction
|
||||
static vector<int> emptyVector;
|
||||
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr & node)
|
||||
{
|
||||
if (node->Is<IRecurrentNode>())
|
||||
return node->As<IRecurrentNode>()->GetRecurrenceDirections();
|
||||
else
|
||||
return emptyVector;
|
||||
}
|
||||
|
||||
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes);
|
||||
// get the strongly connected components from the graph
|
||||
// This sets index, lowLink, m_visited, and m_inStack.
|
||||
|
@ -299,8 +308,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
visited.insert(cur);
|
||||
recStack.insert(cur);
|
||||
|
||||
if (cur->OperationName() != OperationNameOf(PastValueNode) && // recurrence stops at delays
|
||||
cur->OperationName() != OperationNameOf(FutureValueNode))
|
||||
if (GetRecurrenceDirections(cur).empty()) // recurrence stops at delays
|
||||
{
|
||||
for (size_t i = 0; i < cur->GetNumInputs(); i++)
|
||||
if (cur->Input(i)->m_loopId == cur->m_loopId)
|
||||
|
@ -384,28 +392,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// set m_steppingDirection for all loops
|
||||
// TODO: Move this up to where it is used (in a separate commit since git cannot track moving and changing at the same time).
|
||||
// BUGBUG: Need to extend to multi-dimensional loop directions. Use a vector<int>.
|
||||
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes)
|
||||
{
|
||||
|
||||
bool hasPastValueNode = false;
|
||||
bool hasFutureValueNode = false;
|
||||
vector<int> recurrenceDirections;
|
||||
|
||||
for (auto & node : nestedNodes)
|
||||
{
|
||||
if (node->OperationName() == OperationNameOf(PastValueNode))
|
||||
hasPastValueNode = true;
|
||||
else if (node->OperationName() == OperationNameOf(FutureValueNode))
|
||||
hasFutureValueNode = true;
|
||||
const auto & dirs = GetRecurrenceDirections(node);
|
||||
if (dirs.empty()) // not a recurrent node
|
||||
continue;
|
||||
if (recurrenceDirections.empty())
|
||||
recurrenceDirections = dirs;
|
||||
else if (recurrenceDirections != dirs)
|
||||
InvalidArgument("It is not allowed to have multiple different recurrence directions in the same loop (loop connected to %ls %ls operation).",
|
||||
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
|
||||
}
|
||||
|
||||
if (hasPastValueNode && !hasFutureValueNode)
|
||||
return +1;
|
||||
else if (hasFutureValueNode && !hasPastValueNode)
|
||||
return -1;
|
||||
else if (hasPastValueNode && hasFutureValueNode)
|
||||
InvalidArgument("It is not allowed to have both PastValue and FutureValue nodes in the same loop. How do you think that should work??");
|
||||
else
|
||||
LogicError("There is neither PastValue nor FutureValue nodes in the loop.");
|
||||
if (recurrenceDirections.empty())
|
||||
LogicError("There is no recurrent node in the loop connected to %ls %ls operation.",
|
||||
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
|
||||
// BUGBUG: Multiple recurrence dimensions not yet supported beyond this point.
|
||||
return -recurrenceDirections[0];
|
||||
}
|
||||
|
||||
}}}
|
||||
|
|
|
@ -46,6 +46,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else if (nodeType == OperationNameOf(CrossEntropyNode)) return New<CrossEntropyNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(CrossEntropyWithSoftmaxNode)) return New<CrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SequenceWithSoftmaxNode)) return New<SequenceWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DiagonalNode)) return New<DiagonalNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DiagTimesNode)) return New<DiagTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DropoutNode)) return New<DropoutNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DummyCriterionNode)) return New<DummyCriterionNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -82,7 +83,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else if (nodeType == OperationNameOf(RowElementTimesNode)) return New<RowElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(RowRepeatNode)) return New<RowRepeatNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DiagonalNode)) return New<DiagonalNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowSliceNode)) return New<RowSliceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RowStackNode)) return New<RowStackNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#ifdef ENABLE_BROADCASTING_ELEMENTTIMES
|
||||
|
@ -91,6 +91,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else if (nodeType == OperationNameOf(ScaleNode)) return New<ScaleNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(SequenceDecoderNode)) return New<SequenceDecoderNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ShiftNode)) return New<ShiftNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SigmoidNode)) return New<SigmoidNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SoftmaxNode)) return New<SoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(SquareErrorNode)) return New<SquareErrorNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
|
|
@ -305,8 +305,9 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
|||
static TensorShape TensorShapeFromConfig(const IConfigRecord & config)
|
||||
{
|
||||
const auto & valp = config[L"dims"];
|
||||
// TODO: Add code that if input is already a tensor shape it is also OK.
|
||||
if (valp.Is<ConfigArray>())
|
||||
if (valp.Is<TensorShape>())
|
||||
return valp.AsRef<TensorShape>(); // UNTESTED
|
||||
else if (valp.Is<ConfigArray>())
|
||||
return TensorShape(valp.AsRef<ConfigArray>().AsVector<size_t>([&](const wstring & msg){ valp.Fail(msg); }));
|
||||
else
|
||||
return TensorShape(std::vector<size_t>(1, (size_t)valp)); // single element
|
||||
|
@ -315,6 +316,26 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
|||
BoxedTensorShape(const IConfigRecordPtr configp) : BoxOf<TensorShape>(TensorShapeFromConfig(*configp)) { }
|
||||
};
|
||||
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedTensorShape> registerTensoShape(L"TensorShape");
|
||||
template<typename E>
|
||||
class BoxedVector : public BoxOf<vector<E>>
|
||||
{
|
||||
// create a vector from config
|
||||
static vector<E> VectorFromConfig(const IConfigRecord & config)
|
||||
{
|
||||
const auto & valp = config[L"items"];
|
||||
if (valp.Is<vector<E>>())
|
||||
return valp.AsRef<vector<E>>(); // UNTESTED
|
||||
else if (valp.Is<ConfigArray>())
|
||||
return valp.AsRef<ConfigArray>().AsVector<E>([&](const wstring & msg){ valp.Fail(msg); });
|
||||
else
|
||||
return std::vector<E>(1, (E)valp); // single element
|
||||
}
|
||||
public:
|
||||
BoxedVector(const IConfigRecordPtr configp) : BoxOf<vector<E>>(VectorFromConfig(*configp)) { }
|
||||
};
|
||||
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedTensorShape> registerTensorShape(L"TensorShape");
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedVector<int>> registerIntVector(L"IntVector");
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedVector<size_t>> registerSizeVector(L"SizeVector");
|
||||
|
||||
}}}
|
||||
|
|
|
@ -132,12 +132,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
virtual ~INodeState() {}
|
||||
};
|
||||
|
||||
struct /*interface*/ IStateFulNode
|
||||
struct /*interface*/ IStatefulNode
|
||||
{
|
||||
typedef std::shared_ptr<INodeState> NodeStatePtr;
|
||||
virtual NodeStatePtr ExportState() = 0;
|
||||
virtual void ImportState(const NodeStatePtr& pImportedState) = 0;
|
||||
virtual void ImportState(NodeStatePtr && state) = 0;
|
||||
};
|
||||
typedef IStatefulNode::NodeStatePtr NodeStatePtr;
|
||||
|
||||
// =======================================================================
|
||||
// ComputationNetworkOwnedNodeState -- class to collect ComputationNode members that are really owned by ComputationNetwork
|
||||
|
@ -444,7 +445,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
void LinkToMBLayout(MBLayoutPtr pMBLayout) { m_pMBLayout = pMBLayout; }
|
||||
//MBLayoutPtr GetMBLayout() { return m_pMBLayout; }
|
||||
const MBLayoutPtr & GetMBLayout() const { return m_pMBLayout; }
|
||||
bool HasMBLayout() const { return !!m_pMBLayout; }
|
||||
|
||||
|
@ -1505,6 +1505,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
};
|
||||
|
||||
|
||||
// =======================================================================
|
||||
// IRecurrentNode -- helper wrapper class for ComputationNodes that can be recurrent
|
||||
// =======================================================================
|
||||
|
||||
struct IRecurrentNode { virtual const std::vector<int> & GetRecurrenceDirections() const = 0; };
|
||||
|
||||
|
||||
// =======================================================================
|
||||
// helper macro to ease access to base members in presence of C++ two-phase name lookup
|
||||
|
|
|
@ -734,20 +734,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (isFinalValidationPass)
|
||||
{
|
||||
const auto m_imageLayoutKind = ImageLayoutKind::CHW; // BUGBUG: Finish this. Must be serialized.
|
||||
auto dims = ImageDimensions(GetSampleLayout(), m_imageLayoutKind);
|
||||
|
||||
auto shape = GetSampleLayout();
|
||||
|
||||
if (m_factory == nullptr)
|
||||
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
|
||||
if (m_convEng == nullptr)
|
||||
m_convEng = m_factory->CreateConvEngine(m_deviceId, 0);
|
||||
if (m_spatial)
|
||||
{
|
||||
auto dims = ImageDimensions(shape, m_imageLayoutKind);
|
||||
if (m_inT == nullptr)
|
||||
m_inT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
|
||||
if (m_scaleBiasT == nullptr)
|
||||
{
|
||||
if (m_spatial)
|
||||
m_scaleBiasT = m_factory->CreateTensor(1, 1, dims.m_numChannels, 1);
|
||||
}
|
||||
else
|
||||
m_scaleBiasT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
|
||||
{
|
||||
if (m_inT == nullptr)
|
||||
m_inT = m_factory->CreateTensor(shape.GetNumElements(), 1, 1, 1);
|
||||
if (m_scaleBiasT == nullptr)
|
||||
m_scaleBiasT = m_factory->CreateTensor(shape.GetNumElements(), 1, 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,8 +72,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ValidateUnaryMap(isFinalValidationPass);
|
||||
}
|
||||
|
||||
// We don't need our output values in backprop.
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return gradientFromOutput; }
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return !gradientFromOutput; }
|
||||
};
|
||||
|
||||
#define UnaryElementWiseWithOpCodeNodeBaseMembers UsingComputationNodeMembersBoilerplate;
|
||||
|
|
|
@ -25,6 +25,256 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ShiftNode (input, fromOffset, boundaryValue, dim=-1, offsetRange=1, multiOffsetDim=0) -- delay and rolling window
|
||||
//
|
||||
// This shifts the input by (-fromOffset) steps. In other words, output(t) will be input(t+fromOffset).
|
||||
// E.g. for fromOffset=-1, this gives the past value.
|
||||
// This node has quite some options that make it powerful for many use cases.
|
||||
//
|
||||
// This node can be used in a recurrent loop. This requires special handling by the ComputationNetwork,
|
||||
// for both execution (sequential execution) and creation (avoiding circular references).
|
||||
// TODO: When outside a recurrent loop and used with frame randomization, this will communicate to the reader
|
||||
// that additional frames are needed, which will then return a frame range. TODO: This will not match
|
||||
// the labels, which are still 1 frame. Think through which dimension this should go in.
|
||||
//
|
||||
// Values shifted in from beyond sequence boundaries will be copied from boundaryValue.
|
||||
// Normally, this is a scalar Constant(). However, it can be any node, which will be indexed from the end
|
||||
// (e.g. for fromOffset=-1, the last frame of boundaryValue will be used). This can implement
|
||||
// sequence-to-sequence models. Broadcasting is supported, so it can be e.g. a single output-dimension vector
|
||||
// applied to all sequences.
|
||||
//
|
||||
// To delay (past value), use negative fromOffset. To access future value, use positive fromOffset.
|
||||
//
|
||||
// To pull in multiple offsets, use offsetRange>1. This will pull in offsetRange consecutive offsets starting
|
||||
// with fromOffset. This implements a rolling window. A new dimension will be inserted at multiOffsetDim
|
||||
// (default 0 means after the last sample dimension). Special considerations:
|
||||
// - If the boundaryValue is not wide enough, the sequence will be dropped (e.g. if you pull in 5 history frames,
|
||||
// but the sequence in boundaryValue only has 4 samples).
|
||||
// - If you feed back such an expanded output into this node in a loop, you get an inconsistency
|
||||
// and will eventually fail. You must pull the dimensions apart.
|
||||
// - If the current time step (offset 0) is included in the range (e.g. fromOffset=-1, offsetRange=3) then
|
||||
// this node cannot participate in a recurrence.
|
||||
//
|
||||
// By default, this shifts over the time dimension, but you can choose to shift over any
|
||||
// sample tensor dimension instead using 'dim' (-1 stands for time). This will only work, however,
|
||||
// when all involved nodes are implemented using the tensor library. Nodes implemented using
|
||||
// Matrix slices can only support iterating over time.
|
||||
//
|
||||
// The fromOffset can also be a tensor, e.g. (1,1). In that case, iteration will be over multiple
|
||||
// consecutive dimensions. offsetRange must have the same number of dimensions.
|
||||
//
|
||||
// If the boundaryValue has 0 elements, the sequence will be trimmed (frames reaching beyond the boundary
|
||||
// are dropped). This will initially not be implemented for the time dimension (as it would require
|
||||
// change of MBLayout).
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template<class ElemType>
|
||||
class ShiftNode : public ComputationNode<ElemType>, public IRecurrentNode, public ILateAttachingNode, public IStatefulNode, public NumInputs<2>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"Shift"; }
|
||||
public:
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, const std::vector<int> & fromOffset, int shiftDimension, const std::vector<size_t> & offsetRange, int expandDimension) :
|
||||
Base(deviceId, name), m_fromOffsetBegin(fromOffset),
|
||||
m_shiftDimension(shiftDimension), m_expandDimension(expandDimension),
|
||||
m_insertExpandShapeAt(SIZE_MAX/*uninitialized at this point*/)
|
||||
{
|
||||
// determine m_fromOffsetEnd from fromOffset/offsetRange
|
||||
bool anyNonRecurrent = false;
|
||||
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
|
||||
{
|
||||
m_fromOffsetEnd.push_back(m_fromOffsetBegin[k] + (k < offsetRange.size() ? (int)offsetRange[k] : 1));
|
||||
if (m_fromOffsetEnd[k] <= 0)
|
||||
m_recurrenceDirections.push_back(-1);
|
||||
else if (m_fromOffsetBegin[k] > 0)
|
||||
m_recurrenceDirections.push_back(+1);
|
||||
else
|
||||
m_recurrenceDirections.push_back(0);
|
||||
anyNonRecurrent |= m_recurrenceDirections[k] == 0;
|
||||
}
|
||||
if (anyNonRecurrent)
|
||||
m_recurrenceDirections.clear();
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(TensorShape(), 0); // empty for now
|
||||
}
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
ShiftNode(deviceId, name, std::vector<int> { 1 }, -1, std::vector<size_t> { 1 }, 0)
|
||||
{ }
|
||||
ShiftNode(const ScriptableObjects::IConfigRecordPtr configp) :
|
||||
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), configp->Get(L"dim"), configp->Get(L"offsetRange"), configp->Get(L"multiOffsetDim"))
|
||||
{
|
||||
// We do NOT attach the inputs, as we cannot resolve the main input without causing a circular reference.
|
||||
// Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below.
|
||||
// This is a contract between ComputationNetwork and this specific node type.
|
||||
// (TODO: We could force-evaluate the boundary input here.)
|
||||
m_attachInputsFn = [this, configp]() // This is the lambda to complete the process. Note that config captured as a shared_ptr.
|
||||
{
|
||||
AttachInputs(GetInputsFromConfig(configp)); // this is executed by network builder while iterating the nodes
|
||||
};
|
||||
}
|
||||
virtual void /*ILateAttachingNode::*/LateAttachInputs() override final
|
||||
{
|
||||
m_attachInputsFn();
|
||||
m_attachInputsFn = [](){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
|
||||
}
|
||||
public:
|
||||
void Save(File& fstream) const
|
||||
{
|
||||
Base::Save(fstream);
|
||||
|
||||
fstream << m_fromOffsetBegin;
|
||||
fstream << m_fromOffsetEnd;
|
||||
fstream << m_shiftDimension;
|
||||
fstream << m_expandDimension;
|
||||
fstream << m_recurrenceDirections;
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
|
||||
fstream >> m_fromOffsetBegin;
|
||||
fstream >> m_fromOffsetEnd;
|
||||
fstream >> m_shiftDimension;
|
||||
fstream >> m_expandDimension;
|
||||
fstream >> m_recurrenceDirections;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
{
|
||||
assert(inputIndex == 0); inputIndex;
|
||||
fr;
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override {return false; }
|
||||
|
||||
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
Base::EndForwardProp();
|
||||
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
// TODO: package up the state using ExportState(). Then in BeginForwardProp() bring it back. In-between, the packages can be moved around.
|
||||
}
|
||||
|
||||
// This function assumes BeginForwardProp/EndForwardProp() to be called before/after the iteration loop.
|
||||
// TODO: In the future, there may be value for one more way of handling the boundary condition: Fill as 'NoInput'. Then we can use this to implement rolling windows (albeit inefficiently). Would require to unshare the layout.
|
||||
virtual void ForwardProp(const FrameRange & fr) override
|
||||
{
|
||||
fr;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
assert(m_inputs.size() == 2);
|
||||
ComputationNodeBase::Validate(isFinalValidationPass);
|
||||
|
||||
if (isFinalValidationPass)
|
||||
sin(1.0f);
|
||||
|
||||
// MBLayout is just inherited
|
||||
m_pMBLayout = Input(0)->GetMBLayout();
|
||||
if (isFinalValidationPass && !m_pMBLayout)
|
||||
InvalidArgument("%ls %ls operation must operate on data (must have an MB Layout).", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
// determine expandShape--empty if no multiple offsets; otherwise the 1 or more dimensions that need to be added at m_expandDimension
|
||||
m_expandShape.clear();
|
||||
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
|
||||
{
|
||||
size_t dim = m_fromOffsetEnd[k] - m_fromOffsetBegin[k];
|
||||
if (dim > 1)
|
||||
{
|
||||
m_expandShape.resize(k, 1);
|
||||
m_expandShape.push_back(dim);
|
||||
}
|
||||
}
|
||||
if (!m_expandShape.empty())
|
||||
m_expandShape.resize(m_fromOffsetBegin.size(), 1); // pad ones to end
|
||||
// now it either matches the dimensions to insert, or is empty if none to append
|
||||
|
||||
// determine final sample layout
|
||||
auto inputSampleLayout = Input(0)->GetSampleLayout();
|
||||
auto inputDims = inputSampleLayout.GetDims();
|
||||
if (m_expandDimension < 0)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d refers to a time dimension, but this is not allowed.",
|
||||
NodeName().c_str(), OperationName().c_str(), m_expandDimension);
|
||||
m_insertExpandShapeAt = m_expandShape.empty() ? 0 : (m_expandDimension > 0 ? m_expandDimension - 1 : inputDims.size());
|
||||
if (m_insertExpandShapeAt > inputDims.size())
|
||||
if (isFinalValidationPass)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d beyond end of input sample layout [%s].",
|
||||
NodeName().c_str(), OperationName().c_str(), m_expandDimension, string(inputSampleLayout).c_str());
|
||||
else
|
||||
m_insertExpandShapeAt = inputDims.size(); // this may be an error, but we want to catch that only in the final pass
|
||||
SmallVector<size_t> dims;
|
||||
if (!m_expandShape.empty() && inputDims.size() + m_expandShape.size() > dims.capacity())
|
||||
InvalidArgument("%ls %ls operation: Too many dimensions. Did you feed back output of this node without stripping the extra dimensions?",
|
||||
NodeName().c_str(), OperationName().c_str());
|
||||
dims.append(inputDims.begin(), inputDims.begin() + m_insertExpandShapeAt);
|
||||
dims.append(m_expandShape.begin(), m_expandShape.end());
|
||||
dims.append(inputDims.begin() + m_insertExpandShapeAt, inputDims.end());
|
||||
auto sampleLayout = TensorShape(dims);
|
||||
|
||||
SetDims(sampleLayout, 0);
|
||||
}
|
||||
|
||||
// special interface for use by loop detection
|
||||
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
|
||||
{
|
||||
return m_recurrenceDirections;
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<ShiftNode<ElemType>>(nodeP);
|
||||
node->m_fromOffsetBegin = m_fromOffsetBegin;
|
||||
node->m_fromOffsetEnd = m_fromOffsetEnd;
|
||||
node->m_recurrenceDirections = m_recurrenceDirections;
|
||||
node->m_shiftDimension = m_shiftDimension;
|
||||
node->m_expandDimension = m_expandDimension;
|
||||
node->m_expandShape = m_expandShape;
|
||||
node->m_insertExpandShapeAt = m_insertExpandShapeAt;
|
||||
node->m_state = m_state;
|
||||
}
|
||||
}
|
||||
|
||||
class ShiftNodeState : public INodeState
|
||||
{
|
||||
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
|
||||
};
|
||||
typedef std::shared_ptr<ShiftNodeState> ShiftNodeStatePtr;
|
||||
|
||||
// state export/import
|
||||
// This is done with a shared_ptr. The moment state is exported, the internal state is cleared; ownership is transferred to the exporting entity.
|
||||
// This way, the next invocation does not overwrite the exported state, but is required to create a new one if needed.
|
||||
// On the other hand, once imported, the state object is owned by the node and will be overwritten with the next state.
|
||||
virtual NodeStatePtr ExportState() { return std::move(m_state); }
|
||||
virtual void ImportState(NodeStatePtr && state) override
|
||||
{
|
||||
m_state = dynamic_pointer_cast<ShiftNodeState>(state);
|
||||
if (state && !m_state)
|
||||
LogicError("ImportState: Wrong state object passed (wrong type).");
|
||||
}
|
||||
protected:
|
||||
// parameters remembered from construction
|
||||
std::vector<int> m_fromOffsetBegin; // offset to pull from; first offset in case of offset range
|
||||
std::vector<int> m_fromOffsetEnd; // end of offset range
|
||||
int m_shiftDimension; // dimension to shift (default: time)
|
||||
int m_expandDimension; // in case of offset range, this is where a new dimension will be inserted
|
||||
|
||||
// derived params set up in Validate()
|
||||
SmallVector<size_t> m_expandShape; // offsetEnd-offsetBegin if >1 offset in any dimension; empty otherwise
|
||||
size_t m_insertExpandShapeAt; // at which dimension to insert (internal 0-based index)
|
||||
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
|
||||
|
||||
ShiftNodeStatePtr m_state; // saves the activation of the previous step that this node points to
|
||||
|
||||
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DelayedValueNodeState -- helper class for exporting/importing state from/to DelayedValueNodes.
|
||||
// This is used for sub-minibatching in case of truncated BPTT.
|
||||
|
@ -76,12 +326,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// -----------------------------------------------------------------------
|
||||
// DelayedValueNodeBase (input) -- abstract base class for PastValueNode and FutureValueNode to hold all shared code
|
||||
// The two differ in the step direction, some loop directions, and sequence-boundary flags.
|
||||
// This is an old node which will be replaced by ShiftNode (with Past/FutureValueNode being emulated).
|
||||
//
|
||||
// This is planned:
|
||||
// - carrying over state at sentence boundaries from other nodes (for s2s)
|
||||
// - ranges of neighbor frames as a secondary tensor dimension (i.e. can be used to implement a rolling window)
|
||||
// - full support/efficiency of non-recurrent use (in which case the range can be from negative to positive, e.g. a symmetric rolling window)
|
||||
// - denoting which tensor dimension to loop over (this may not be completed, but I will plant a seed)
|
||||
// - support for Yongqiang’s sub-minibatching with BPTT (export/import state)
|
||||
// - more efficient storage of carried-over state (only store the needed frames, not a full copy of the previous MB as currently; which will on the other hand also allow windows that reach back beyond a minibatch)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// TODO: 'direction' is really too general. signOfTimeOffset?
|
||||
template<class ElemType, int direction/*-1 for Past/left-to-right or +1 for Future/right-to-left*/ /*, MinibatchPackingFlags SequenceStart_or_End/*-Start or -End*/>
|
||||
class DelayedValueNodeBase : public ComputationNode<ElemType>, public
|
||||
ILateAttachingNode, public IStateFulNode, public NumInputs<1>
|
||||
class DelayedValueNodeBase : public ComputationNode<ElemType>, public IRecurrentNode,
|
||||
public ILateAttachingNode, public IStatefulNode, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
typedef std::shared_ptr<DelayedValueNodeState<ElemType>> DelayedNodeStatePtr;
|
||||
|
@ -91,9 +350,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
m_initialActivationValue = initialActivationValue;
|
||||
m_timeStep = 1;
|
||||
m_recurrenceDirections.push_back(direction);
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(sampleLayout, 0); // TODO: needed? Can we not infer it? How about setting a sample layout?
|
||||
m_isHistoryCarryOverManagedExternally = false; // used for PairNetworkNode/PastValueNode combination, which is deprecated
|
||||
SetDims(sampleLayout, 0);
|
||||
m_value->SetValue(m_initialActivationValue); // is this needed?
|
||||
}
|
||||
protected:
|
||||
|
@ -139,7 +398,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
// the node has already been initialized e.g. w.r.t. direction and sequence flags
|
||||
// the node has already been initialized e.g. w.r.t. direction
|
||||
Base::Load(fstream, modelVersion);
|
||||
|
||||
fstream >> m_timeStep;
|
||||
|
@ -155,63 +414,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream >> m_initialActivationValue;
|
||||
}
|
||||
|
||||
#if 0
|
||||
private:
|
||||
// cache a post-processed version of m_pMBLayout (depends on the actual minibatch)
|
||||
// This post-processed layout has its bits spread out over m_timeStep, to help detect if we'd hop across a boundary.
|
||||
void CacheMBLayout()
|
||||
{
|
||||
if (m_timeStep <= 0)
|
||||
LogicError("timeStep should be 1 or larger");
|
||||
|
||||
m_pShiftedMBLayout->CopyFrom(m_pMBLayout); // it gets modified below
|
||||
if (m_timeStep == 1)
|
||||
return;
|
||||
|
||||
#if 1
|
||||
LogicError("CacheMBLayout: m_timeStep > 1 temporarily disabled until MBLayout update completed.");
|
||||
#else
|
||||
// modify m_pShiftedMBLayout
|
||||
// If two utterances are packed together (S: start, E: end, N: no input) and we need to get values 2 steps in the past
|
||||
// S X X X E S X X X X E N N
|
||||
// then this becomes
|
||||
// S S X X E S S X X X E N N
|
||||
|
||||
size_t numSeq = GetNumParallelSequences();
|
||||
|
||||
// each row has a number to indicate how many values should be reset for that utterance
|
||||
// TODO: This algorithm is not obvious and should be explained. E.g. how come it is direction independent?
|
||||
vector<int> numResetLeft(numSeq, 0);
|
||||
for (size_t i = 0; i < GetNumTimeSteps(); i++) // i = frame index (time)
|
||||
{
|
||||
if (m_pMBLayout->Is(i, SequenceStart_or_End | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
// we set timeStep-1 elements following it to be SequenceStart until met NoInput
|
||||
for (size_t j = 0; j < numSeq; j++) // j = stream
|
||||
{
|
||||
// we use & since ((int) MinibatchPackingFlags::SequenceStart) may come with NoLabel
|
||||
if (m_pMBLayout->Is(j, i, SequenceStart_or_End))
|
||||
numResetLeft[j] = m_timeStep;
|
||||
else if (m_pMBLayout->Is(j, i, MinibatchPackingFlags::NoFeature))
|
||||
numResetLeft[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// now set the sequence-boundary flag
|
||||
for (size_t j = 0; j < numSeq; j++)
|
||||
{
|
||||
if (numResetLeft[j]-- > 0)
|
||||
{
|
||||
m_pShiftedMBLayout->Mask(j, i, MinibatchPackingFlags::NoLabel); // keep only this flag
|
||||
m_pShiftedMBLayout->Set(j, i, SequenceStart_or_End); // now implant the boundary flag
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
public:
|
||||
#endif
|
||||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
{
|
||||
assert(inputIndex == 0); inputIndex;
|
||||
|
@ -283,12 +485,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return false;
|
||||
}
|
||||
|
||||
//virtual void BeginForwardProp() override // called before first iteration step of ForwardProp()
|
||||
//{
|
||||
// Base::BeginForwardProp();
|
||||
// CacheMBLayout();
|
||||
//}
|
||||
|
||||
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
|
@ -299,12 +495,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// - we don't need to keep anything if all sequences are closed (sentence end)
|
||||
// This condition includes full-sequence mode.
|
||||
// TODO: Can we optimize this and only copy if there is a sequence spanning across the end of the MB? And add a check to BeginForwardProp() to make sure we got one if there is a boundary at the start?
|
||||
if (!m_isHistoryCarryOverManagedExternally) // means it's externally managed (for PairNetworkNode)
|
||||
{
|
||||
m_delayedActivation = Input(0)->Value();
|
||||
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
|
||||
m_delayedActivationMBLayout->CopyFrom(m_pMBLayout);
|
||||
}
|
||||
|
||||
Base::EndForwardProp();
|
||||
}
|
||||
|
@ -350,6 +543,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
for (size_t id = 0; id < GetNumParallelSequences(); id++)
|
||||
{
|
||||
if (m_pMBLayout->IsGap(fr.Sequence(id))) // if output is in a gap then don't bother filling it
|
||||
continue;
|
||||
|
||||
Matrix<ElemType> out = ValueFor(fr.Sequence(id));
|
||||
|
||||
//assert(m_pShiftedMBLayout->Is(id, t, SequenceStart_or_End) == m_pMBLayout->IsBeyondStartOrEnd(frDelayed.Sequence(id)));
|
||||
|
@ -391,34 +587,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ValidateUnaryMap(isFinalValidationPass);
|
||||
}
|
||||
|
||||
// this function is only used for PairNetworkNode (on PastValueNode)
|
||||
// BUGBUG: Need to transfer the layout as well. PairNetworkNode will go away.
|
||||
bool GetHistory(Matrix<ElemType>& hist, bool)
|
||||
// special interface for use by loop detection
|
||||
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
|
||||
{
|
||||
DEVICEID_TYPE device = hist.GetDeviceId();
|
||||
hist.TransferFromDeviceToDevice(device, m_deviceId, true);
|
||||
|
||||
hist.SetValue(Input(0)->Value());
|
||||
|
||||
hist.TransferFromDeviceToDevice(m_deviceId, device, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
// this function is only used for PairNetworkNode (on PastValueNode)
|
||||
void SetHistory(const Matrix<ElemType>& hist)
|
||||
{
|
||||
DEVICEID_TYPE device = hist.GetDeviceId();
|
||||
hist.TransferFromDeviceToDevice(device, m_deviceId, true);
|
||||
|
||||
m_delayedActivation.SetValue(hist);
|
||||
m_isHistoryCarryOverManagedExternally = true;
|
||||
|
||||
hist.TransferFromDeviceToDevice(m_deviceId, device, true);
|
||||
|
||||
// need a layout as well
|
||||
// ForwardProp() expects it to have the same number of parallel sequences.
|
||||
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
|
||||
m_delayedActivationMBLayout->Init(GetNumParallelSequences(), hist.GetNumCols() / GetNumParallelSequences());
|
||||
return m_recurrenceDirections;
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
|
@ -434,15 +606,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
(node->m_delayedActivationMBLayout = make_shared<MBLayout>())->CopyFrom(m_delayedActivationMBLayout);
|
||||
else
|
||||
node->m_delayedActivationMBLayout = nullptr;
|
||||
node->m_isHistoryCarryOverManagedExternally = false;
|
||||
}
|
||||
}
|
||||
|
||||
//========================================
|
||||
// implement the IStateFulNode interface
|
||||
//========================================
|
||||
|
||||
virtual NodeStatePtr ExportState()
|
||||
virtual NodeStatePtr /*IStatefulNode::*/ExportState() override
|
||||
{
|
||||
NodeStatePtr pExportedState;
|
||||
size_t nT = m_pMBLayout->GetNumTimeSteps();
|
||||
|
@ -530,7 +697,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
return pExportedState;
|
||||
}
|
||||
virtual void ImportState(const NodeStatePtr& pImportedState) override
|
||||
|
||||
virtual void /*IStatefulNode::*/ImportState(NodeStatePtr && pImportedState) override
|
||||
{
|
||||
DelayedNodeStatePtr pState = dynamic_pointer_cast<DelayedValueNodeState<ElemType>> (pImportedState);
|
||||
|
||||
|
@ -561,7 +729,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{// it is really a compile error ?
|
||||
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
|
||||
}
|
||||
protected:
|
||||
|
||||
|
@ -569,14 +736,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
|
||||
MBLayoutPtr m_delayedActivationMBLayout; // layout for m_delayedActivation
|
||||
int m_timeStep; // delay in frames (typ. 1)
|
||||
//MBLayoutPtr m_pShiftedMBLayout; // individual sentence boundary information --TODO: do we actually need this separate variable?
|
||||
bool m_isHistoryCarryOverManagedExternally; // for PastValueNode only
|
||||
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
|
||||
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
|
||||
};
|
||||
|
||||
#define UsingDelayedValueNodeMembers UsingComputationNodeMembersBoilerplate; \
|
||||
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep; \
|
||||
/*using Base::m_pShiftedMBLayout;*/ using Base::m_isHistoryCarryOverManagedExternally;
|
||||
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// PastValueNode (input) -- delay node
|
||||
|
@ -606,7 +771,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template class PastValueNode<float>;
|
||||
template class PastValueNode<double>;
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// FutureValueNode (input) -- delay node in future direction
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -5621,7 +5621,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#pragma omp parallel for
|
||||
for (int k = 0; k < (int)K; k++)
|
||||
TensorOpIteration<ElemType, OPFN, 3, true/*vectorizable*/, -1/*no reduction*/, -1/*scalar*/>::Loop(0, array<ElemType*, 3> { pa + k, pb + k, pc + k }, 1, opfn, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
// TODO: somehow this does not use 4-way parallelism with SSE (VS 2013), and the signedness of k (required for omp) causes an extra sign-extend
|
||||
// TODO: According to Amit, the VS compiler is not able to vectorize into lambdas. Solution: change the lambda to take an N, or to implement the loop inside (with 1 element by default).
|
||||
// TODO: The signedness of k (required for omp) causes an extra sign-extend.
|
||||
// TODO: OMP adds LOTS of overhead. Do we need a guard, a min size when to use it?
|
||||
}
|
||||
};
|
||||
|
@ -5737,6 +5738,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const SmallVector<size_t> & regularOpDims, const array<SmallVector<ptrdiff_t>, 2> & regularStrides,
|
||||
const SmallVector<size_t> & reducingOpDims, const array<SmallVector<ptrdiff_t>, 2> & reducingStrides)
|
||||
{
|
||||
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
|
||||
#define CaseUnaryTensorOp(oper) \
|
||||
case ElementWiseOperator::op ## oper: \
|
||||
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 2> & pp) { return Op ## oper((*(pp[0]))); }, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides)
|
||||
|
|
|
@ -46,7 +46,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t batchSize = inT.n();
|
||||
size_t maxTempMemSizeInSamples = (m_maxTempMemSizeInSamples == 0 ? batchSize : m_maxTempMemSizeInSamples);
|
||||
|
||||
assert(filter.GetNumCols() == packedInputRows && filter.GetNumRows() == outT.c());
|
||||
assert(filter.GetNumCols() == packedInputRows && filter.GetNumRows() == outT.c()); UNUSED(packedInputRows);
|
||||
|
||||
// GPU and 1-dimensional image
|
||||
bool gpuSparse1D = (inT.h() == 1 &&
|
||||
|
@ -100,7 +100,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
Mat outputSubBatch = out.ColumnSlice(outputSizePerChannel * startSampleId, outputSizePerChannel * smallBatchSize);
|
||||
|
||||
workspace.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
|
||||
//workspace.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
|
||||
// BUGBUG: This ^^ destroys the content of the matrix. Also it seems not to change the size. Does it? Should this be a Reshape()?
|
||||
Mat::Multiply(filter, false, workspace, false, outputSubBatch);
|
||||
}
|
||||
}
|
||||
|
@ -454,8 +455,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else if (engType == EngineType::Legacy)
|
||||
{
|
||||
// REVIEW alexeyk: temp hack to allow this to work in MEL scenarios. InvalidArgument should be used instead.
|
||||
if (imageLayoutKind != ImageLayoutKind::HWC)
|
||||
InvalidArgument("ConvolutionEngineFactory: ImageLayout '%s' is not compatible with the legacy convolution engine.", ToString(imageLayoutKind).c_str());
|
||||
fprintf(stderr, "WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.\n");
|
||||
//InvalidArgument("ConvolutionEngineFactory: ImageLayout '%s' is not compatible with the legacy convolution engine.", ToString(imageLayoutKind).c_str());
|
||||
return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>();
|
||||
}
|
||||
|
||||
|
|
|
@ -378,7 +378,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (tid < i && tid + i < tids) accumulators[tid] += accumulators[tid + i];
|
||||
if (0 + i < tids) __syncthreads(); // sync if condition true for at least one thread
|
||||
// TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values
|
||||
// TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values. See Amit's allreduce() function implementation in MatrixQuantizer_kernel.cu.
|
||||
}
|
||||
|
||||
// now set final value to output coordinate
|
||||
|
|
|
@ -230,12 +230,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
std::map<wstring, vector<shared_ptr<INodeState>>> m_NetStates; // m_NetStatefulNodes[node][i] caches the state of i-th subminibatch of node
|
||||
bool m_hasLattices;
|
||||
|
||||
Matrices m_CachedGraident;
|
||||
Matrices m_cachedGradient;
|
||||
// we also need to remember where to put into the net
|
||||
MBLayoutPtr m_NetMBLayoutPtr;
|
||||
std::map<wstring, shared_ptr<ComputationNode<ElemType>>> m_LearnableNodePtr;
|
||||
// followings are lattice-related
|
||||
Matrices m_NetInputMatrixPtr;
|
||||
Matrices m_NetInputMatrixPtr; // TODO: camelCase for all m_Net...
|
||||
LatticePtr m_NetLatticePtr;
|
||||
UidPtr m_NetUidPtr;
|
||||
ExtrauttMapPtr m_NetExtrauttMapPtr;
|
||||
|
@ -248,18 +248,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetCriterionNodes;
|
||||
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetEvaluationNodes;
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
|
||||
std::map<wstring, shared_ptr<IStatefulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
|
||||
|
||||
private:
|
||||
|
||||
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStateFulNode>>& statefulnode)
|
||||
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStatefulNode>>& statefulnode)
|
||||
{
|
||||
const std::list<ComputationNodeBasePtr> evalorder = net.GetEvalOrder(root);
|
||||
for (auto& x : evalorder)
|
||||
{
|
||||
wstring name = x->GetName();
|
||||
if (statefulnode.find(name) != statefulnode.end()) continue; // already in the list
|
||||
shared_ptr<IStateFulNode> pNode = dynamic_pointer_cast<IStateFulNode>(x);
|
||||
shared_ptr<IStatefulNode> pNode = dynamic_pointer_cast<IStatefulNode>(x);
|
||||
if (pNode)
|
||||
{
|
||||
statefulnode[name] = pNode;
|
||||
|
@ -267,20 +267,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> EnumerateStatefulNode(ComputationNetwork& net,
|
||||
std::map<wstring, shared_ptr<IStatefulNode>> EnumerateStatefulNode(ComputationNetwork& net,
|
||||
const std::vector<ComputationNodeBasePtr>& criterionNode,
|
||||
const std::vector<ComputationNodeBasePtr>& evaluationNode)
|
||||
{
|
||||
std::map<wstring, shared_ptr<IStateFulNode>> statefulnodes;
|
||||
std::map<wstring, shared_ptr<IStatefulNode>> statefulNodes;
|
||||
for (auto& root : criterionNode)
|
||||
{
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulNodes);
|
||||
}
|
||||
for (auto& root : evaluationNode)
|
||||
{
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
|
||||
EnumerateStatefulNodeWithRoot(net, root, statefulNodes);
|
||||
}
|
||||
return statefulnodes;
|
||||
return statefulNodes;
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -353,7 +353,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
delete x.second;
|
||||
}
|
||||
|
||||
for (auto x : m_CachedGraident)
|
||||
for (auto x : m_cachedGradient)
|
||||
{
|
||||
delete x.second;
|
||||
}
|
||||
|
@ -418,11 +418,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
auto funvalue = pLearnableNode->Value(); // gradient may not be allocated when this function is first called
|
||||
size_t nrow = funvalue.GetNumRows();
|
||||
size_t ncol = funvalue.GetNumCols();
|
||||
if (m_CachedGraident.find(nodeName) == m_CachedGraident.end())
|
||||
if (m_cachedGradient.find(nodeName) == m_cachedGradient.end())
|
||||
{
|
||||
// not allocated yet
|
||||
m_CachedGraident[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
|
||||
m_CachedGraident[nodeName]->SetValue((ElemType)0);
|
||||
m_cachedGradient[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
|
||||
m_cachedGradient[nodeName]->SetValue((ElemType)0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -511,9 +511,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (auto& x : m_NetStatefulNodes)
|
||||
{
|
||||
wstring name = x.first;
|
||||
shared_ptr<IStateFulNode> pNode = x.second;
|
||||
shared_ptr<IStatefulNode> pNode = x.second;
|
||||
if (m_NetStates[name][iSubminibatch])
|
||||
pNode->ImportState(m_NetStates[name][iSubminibatch]);
|
||||
pNode->ImportState(std::move(m_NetStates[name][iSubminibatch]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -521,7 +521,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void DoneWithCurrentSubMinibatch(size_t iSubminibatch)
|
||||
{
|
||||
// accumulate gradient here
|
||||
for (auto x : m_CachedGraident)
|
||||
for (auto x : m_cachedGradient)
|
||||
{
|
||||
wstring nodename = x.first;
|
||||
if (m_LearnableNodePtr.find(nodename) == m_LearnableNodePtr.end())
|
||||
|
@ -529,7 +529,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LeanrableNode", nodename.c_str());
|
||||
}
|
||||
shared_ptr<ComputationNode<ElemType>> pNode = m_LearnableNodePtr[nodename];
|
||||
m_CachedGraident[nodename]->operator+=(pNode->Gradient());
|
||||
m_cachedGradient[nodename]->operator+=(pNode->Gradient());
|
||||
pNode->Gradient().SetValue((ElemType)0);
|
||||
}
|
||||
// accumulate criterion value
|
||||
|
@ -554,7 +554,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void DoneWithCurrentMinibatch()
|
||||
{
|
||||
for (auto& x : m_CachedGraident)
|
||||
for (auto& x : m_cachedGradient)
|
||||
{
|
||||
wstring name = x.first;
|
||||
Matrix<ElemType>* accumulategrad = x.second;
|
||||
|
|
|
@ -66,8 +66,12 @@ speechTrain = [
|
|||
C(c) = DiagTimes(WeightParam(cellDim, 1), Stabilize(c)) // cell-to-hiddden
|
||||
|
||||
// LSTM cell
|
||||
dh = PastValue(outputDim, output); // hidden state(t-1)
|
||||
dc = PastValue(cellDim, ct); // cell(t-1)
|
||||
# TODO: This is temporary test code for the new ShiftNode (until we switch PastValue() itself over)
|
||||
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1, offsetRanges=1, multiOffsetDim=2)
|
||||
PastValue1 = PastValue
|
||||
#PastValue1 = PastValueShift
|
||||
dh = PastValue1(outputDim, output); // hidden state(t-1)
|
||||
dc = PastValue1(cellDim, ct); // cell(t-1)
|
||||
|
||||
// note: the W(inputx) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
||||
it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t)
|
||||
|
|
Загрузка…
Ссылка в новой задаче