Merge branch 'master' into qiwye/multiverso

This commit is contained in:
Qiwei Ye 2016-01-08 21:03:14 +08:00
Родитель 6c2ee1aa51 c1c818c85b
Коммит 7628026b05
42 изменённых файлов: 708 добавлений и 474 удалений

Просмотреть файл

@ -19,6 +19,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
# comment the following line to write logs to the console
stderr = "$OutputDir$/01_OneHidden_out"
traceLevel=1
numMBsToShowResult=500
#######################################
# TRAINING CONFIG #
@ -63,6 +65,7 @@ train = [
test = [
action = "test"
minibatchSize = 16
NDLNetworkBuilder=[
networkDescription = "$ConfigDir$/01_OneHidden.ndl"

Просмотреть файл

@ -19,6 +19,10 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
# comment the following line to write logs to the console
stderr = "$OutputDir$/02_Convolution_out"
traceLevel=1
numMBsToShowResult=500
prefetch=true
#######################################
# TRAINING CONFIG #
@ -63,6 +67,7 @@ train = [
test = [
action = test
minibatchSize = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/02_Convolution.ndl"

Просмотреть файл

@ -1,20 +1,28 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
RootDir = "."
ndlMacros=$WorkDir$/Macros.ndl
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
prefetch=true
command=Train:Test
stderr=$OutputDir$/01_Conv
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/01_Convolution
NDLNetworkBuilder=[
networkDescription=$WorkDir$/01_Convolution.ndl
networkDescription=$ConfigDir$/01_Convolution.ndl
]
SGD=[
@ -29,7 +37,7 @@ Train=[
reader=[
readerType=UCIFastReader
file=$WorkDir$/Train.txt
file=$DataDir$/Train.txt
randomize=None
features=[
dim=3072
@ -39,7 +47,7 @@ Train=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]
@ -48,15 +56,15 @@ Test=[
action=test
modelPath=$ModelDir$/01_Convolution
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$WorkDir$/01_Convolution.ndl
networkDescription=$ConfigDir$/01_Convolution.ndl
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Test.txt
file=$DataDir$/Test.txt
randomize=None
features=[
dim=3072
@ -66,7 +74,7 @@ Test=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]

Просмотреть файл

@ -7,8 +7,8 @@ ndlMnistMacros = [
ImageC = 3
LabelDim = 10
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(128, rows = 3072)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
featOffs = Const(128)
featScaled = Minus(features, featOffs)
labels = Input(LabelDim, tag = label)
@ -39,7 +39,7 @@ DNN=[
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
# conv2
kW2 = 5
@ -55,7 +55,7 @@ DNN=[
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
# conv3
kW3 = 5
@ -71,7 +71,7 @@ DNN=[
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride)
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
hiddenDim = 64
h1 = DNNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)

Просмотреть файл

@ -1,37 +1,43 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
RootDir = "."
ndlMacros=$WorkDir$/Macros.ndl
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
prefetch=true
parallelTrain=false
command=Train:AddBNEval:Test
stderr=$OutputDir$/02_BatchNormConv
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/02_BatchNormConv
NDLNetworkBuilder=[
networkDescription=$WorkDir$/02_BatchNormConv.ndl
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
]
SGD=[
epochSize=49984
minibatchSize=64
learningRatesPerMB=0.03*7:0.01*8:0.003
#momentumPerMB=0.9*10:0.99
learningRatesPerMB=0.03*7:0.01
momentumPerMB=0
maxEpochs=10
#L2RegWeight=0.03
dropoutRate=0*1:0.5
L2RegWeight=0
dropoutRate=0
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Train.txt
file=$DataDir$/Train.txt
randomize=None
features=[
dim=3072
@ -41,7 +47,7 @@ Train=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]
@ -50,22 +56,22 @@ AddBNEval=[
action=edit
CurModel=$ModelDir$/02_BatchNormConv
NewModel=$ModelDir$/02_BatchNormConv.Eval
editPath=$WorkDir$/02_BatchNormConv.mel
editPath=$ConfigDir$/02_BatchNormConv.mel
]
Test=[
action=test
modelPath=$ModelDir$/02_BatchNormConv.Eval
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$WorkDir$/02_BatchNormConv.ndl
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Test.txt
file=$DataDir$/Test.txt
randomize=None
features=[
dim=3072
@ -75,7 +81,7 @@ Test=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]

Просмотреть файл

@ -1,16 +1,16 @@
m=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m)
ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true)
SetNodeInput(conv1.c, 1, ibn_e)
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv1.y, 0, conv1.bn_e)
conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true)
SetNodeInput(conv2.c, 1, conv2.bn_e)
conv2.bn_e = BatchNormalization(conv2.c, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv2.y, 0, conv2.bn_e)
conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true)
SetNodeInput(conv3.c, 1, conv3.bn_e)
conv3.bn_e = BatchNormalization(conv3.c, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv3.y, 0, conv3.bn_e)
h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false)
SetNodeInput(h1.t, 1, h1.bn_e)
h1.bn_e = BatchNormalization(h1.t, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false)
SetNodeInput(h1.y, 0, h1.bn_e)
SaveModel(m, $NewModel$, format=cntk)

Просмотреть файл

@ -7,8 +7,8 @@ ndlMnistMacros = [
ImageC = 3
LabelDim = 10
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(128, rows = 3072)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
featOffs = Const(128)
featScaled = Minus(features, featOffs)
labels = Input(LabelDim, tag = label)
@ -18,6 +18,9 @@ ndlMnistMacros = [
conv2BValue = 0
conv3WScale = 1.414
conv3BValue = 0
scScale = 0.03
fc1WScale = 12
fc1BValue = 0
fc2WScale = 1.5
@ -25,12 +28,6 @@ ndlMnistMacros = [
]
DNN=[
ib = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
isc = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
im = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
iisd = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
ibn = BatchNormalization(featScaled, isc, ib, im, iisd, eval = false, spatial = true)
# conv1
kW1 = 5
kH1 = 5
@ -38,14 +35,14 @@ DNN=[
hStride1 = 1
vStride1 = 1
# weight[cMap1, kW1 * kH1 * ImageC]
conv1 = ConvReLULayer(ibn, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
conv1 = ConvBNReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue, scScale)
# pool1
pool1W = 3
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride)
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
# conv2
kW2 = 5
@ -54,14 +51,14 @@ DNN=[
hStride2 = 1
vStride2 = 1
# weight[cMap2, kW2 * kH2 * cMap1]
conv2 = ConvBNReLULayer(pool1, cMap1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
conv2 = ConvBNReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue, scScale)
# pool2
pool2W = 3
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride)
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
# conv3
kW3 = 5
@ -70,19 +67,18 @@ DNN=[
hStride3 = 1
vStride3 = 1
# weight[cMap3, kW3 * kH3 * cMap2]
conv3 = ConvBNReLULayer(pool2, cMap2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
conv3 = ConvBNReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue, scScale)
# pool3
pool3W = 3
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride)
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
hiddenDim = 64
h1 = DnnBNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
ol = DNNLastLayer(hiddenDim, labelDim, h1_d, fc2WScale, fc2BValue)
ol = DNNLastLayer(hiddenDim, labelDim, h1, fc2WScale, fc2BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)

Просмотреть файл

@ -16,6 +16,7 @@ command=Train:AddBNEval:Test
stderr=$OutputDir$/03_ResNet
traceLevel=1
numMBsToShowResult=200
Proj16to32Filename = $ConfigDir$/16to32.txt
Proj32to64Filename = $ConfigDir$/32to64.txt
@ -45,8 +46,6 @@ Train=[
gradientBits=1
]
]
numMBsToShowResult=10
]
reader=[

Просмотреть файл

@ -1,52 +1,52 @@
m=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m)
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true)
conv1.bn_e = BatchNormalization(conv1.c, conv1.sc, conv1.b, conv1.m, conv1.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv1.y, 0, conv1.bn_e)
rn1_1.bn1_e = BatchNormalization(rn1_1.c1, rn1_1.sc1, rn1_1.b1, rn1_1.m1, rn1_1.isd1, eval = true, spatial = true)
rn1_1.bn1_e = BatchNormalization(rn1_1.c1, rn1_1.sc1, rn1_1.b1, rn1_1.m1, rn1_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_1.y1, 0, rn1_1.bn1_e)
rn1_1.bn2_e = BatchNormalization(rn1_1.c2, rn1_1.sc2, rn1_1.b2, rn1_1.m2, rn1_1.isd2, eval = true, spatial = true)
rn1_1.bn2_e = BatchNormalization(rn1_1.c2, rn1_1.sc2, rn1_1.b2, rn1_1.m2, rn1_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_1.p, 0, rn1_1.bn2_e)
rn1_2.bn1_e = BatchNormalization(rn1_2.c1, rn1_2.sc1, rn1_2.b1, rn1_2.m1, rn1_2.isd1, eval = true, spatial = true)
rn1_2.bn1_e = BatchNormalization(rn1_2.c1, rn1_2.sc1, rn1_2.b1, rn1_2.m1, rn1_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_2.y1, 0, rn1_2.bn1_e)
rn1_2.bn2_e = BatchNormalization(rn1_2.c2, rn1_2.sc2, rn1_2.b2, rn1_2.m2, rn1_2.isd2, eval = true, spatial = true)
rn1_2.bn2_e = BatchNormalization(rn1_2.c2, rn1_2.sc2, rn1_2.b2, rn1_2.m2, rn1_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_2.p, 0, rn1_2.bn2_e)
rn1_3.bn1_e = BatchNormalization(rn1_3.c1, rn1_3.sc1, rn1_3.b1, rn1_3.m1, rn1_3.isd1, eval = true, spatial = true)
rn1_3.bn1_e = BatchNormalization(rn1_3.c1, rn1_3.sc1, rn1_3.b1, rn1_3.m1, rn1_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_3.y1, 0, rn1_3.bn1_e)
rn1_3.bn2_e = BatchNormalization(rn1_3.c2, rn1_3.sc2, rn1_3.b2, rn1_3.m2, rn1_3.isd2, eval = true, spatial = true)
rn1_3.bn2_e = BatchNormalization(rn1_3.c2, rn1_3.sc2, rn1_3.b2, rn1_3.m2, rn1_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn1_3.p, 0, rn1_3.bn2_e)
rn2_1.bn1_e = BatchNormalization(rn2_1.c1, rn2_1.sc1, rn2_1.b1, rn2_1.m1, rn2_1.isd1, eval = true, spatial = true)
rn2_1.bn1_e = BatchNormalization(rn2_1.c1, rn2_1.sc1, rn2_1.b1, rn2_1.m1, rn2_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_1.y1, 0, rn2_1.bn1_e)
rn2_1.bn2_e = BatchNormalization(rn2_1.c2, rn2_1.sc2, rn2_1.b2, rn2_1.m2, rn2_1.isd2, eval = true, spatial = true)
rn2_1.bn2_e = BatchNormalization(rn2_1.c2, rn2_1.sc2, rn2_1.b2, rn2_1.m2, rn2_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_1.p, 0, rn2_1.bn2_e)
rn2_2.bn1_e = BatchNormalization(rn2_2.c1, rn2_2.sc1, rn2_2.b1, rn2_2.m1, rn2_2.isd1, eval = true, spatial = true)
rn2_2.bn1_e = BatchNormalization(rn2_2.c1, rn2_2.sc1, rn2_2.b1, rn2_2.m1, rn2_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_2.y1, 0, rn2_2.bn1_e)
rn2_2.bn2_e = BatchNormalization(rn2_2.c2, rn2_2.sc2, rn2_2.b2, rn2_2.m2, rn2_2.isd2, eval = true, spatial = true)
rn2_2.bn2_e = BatchNormalization(rn2_2.c2, rn2_2.sc2, rn2_2.b2, rn2_2.m2, rn2_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_2.p, 0, rn2_2.bn2_e)
rn2_3.bn1_e = BatchNormalization(rn2_3.c1, rn2_3.sc1, rn2_3.b1, rn2_3.m1, rn2_3.isd1, eval = true, spatial = true)
rn2_3.bn1_e = BatchNormalization(rn2_3.c1, rn2_3.sc1, rn2_3.b1, rn2_3.m1, rn2_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_3.y1, 0, rn2_3.bn1_e)
rn2_3.bn2_e = BatchNormalization(rn2_3.c2, rn2_3.sc2, rn2_3.b2, rn2_3.m2, rn2_3.isd2, eval = true, spatial = true)
rn2_3.bn2_e = BatchNormalization(rn2_3.c2, rn2_3.sc2, rn2_3.b2, rn2_3.m2, rn2_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn2_3.p, 0, rn2_3.bn2_e)
rn3_1.bn1_e = BatchNormalization(rn3_1.c1, rn3_1.sc1, rn3_1.b1, rn3_1.m1, rn3_1.isd1, eval = true, spatial = true)
rn3_1.bn1_e = BatchNormalization(rn3_1.c1, rn3_1.sc1, rn3_1.b1, rn3_1.m1, rn3_1.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_1.y1, 0, rn3_1.bn1_e)
rn3_1.bn2_e = BatchNormalization(rn3_1.c2, rn3_1.sc2, rn3_1.b2, rn3_1.m2, rn3_1.isd2, eval = true, spatial = true)
rn3_1.bn2_e = BatchNormalization(rn3_1.c2, rn3_1.sc2, rn3_1.b2, rn3_1.m2, rn3_1.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_1.p, 0, rn3_1.bn2_e)
rn3_2.bn1_e = BatchNormalization(rn3_2.c1, rn3_2.sc1, rn3_2.b1, rn3_2.m1, rn3_2.isd1, eval = true, spatial = true)
rn3_2.bn1_e = BatchNormalization(rn3_2.c1, rn3_2.sc1, rn3_2.b1, rn3_2.m1, rn3_2.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_2.y1, 0, rn3_2.bn1_e)
rn3_2.bn2_e = BatchNormalization(rn3_2.c2, rn3_2.sc2, rn3_2.b2, rn3_2.m2, rn3_2.isd2, eval = true, spatial = true)
rn3_2.bn2_e = BatchNormalization(rn3_2.c2, rn3_2.sc2, rn3_2.b2, rn3_2.m2, rn3_2.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_2.p, 0, rn3_2.bn2_e)
rn3_3.bn1_e = BatchNormalization(rn3_3.c1, rn3_3.sc1, rn3_3.b1, rn3_3.m1, rn3_3.isd1, eval = true, spatial = true)
rn3_3.bn1_e = BatchNormalization(rn3_3.c1, rn3_3.sc1, rn3_3.b1, rn3_3.m1, rn3_3.isd1, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_3.y1, 0, rn3_3.bn1_e)
rn3_3.bn2_e = BatchNormalization(rn3_3.c2, rn3_3.sc2, rn3_3.b2, rn3_3.m2, rn3_3.isd2, eval = true, spatial = true)
rn3_3.bn2_e = BatchNormalization(rn3_3.c2, rn3_3.sc2, rn3_3.b2, rn3_3.m2, rn3_3.isd2, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(rn3_3.p, 0, rn3_3.bn2_e)
SaveModel(m, $NewModel$, format=cntk)

Просмотреть файл

@ -7,8 +7,8 @@ LocalMacros = [
ImageC = 3
LabelDim = 10
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(128, rows = 3072)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
featOffs = Const(128)
featScaled = Minus(features, featOffs)
labels = Input(LabelDim, tag = label)
@ -30,7 +30,7 @@ LocalMacros = [
DNN=[
cMap1 = 16
conv1 = ConvBNReLULayer2(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hStride1, vStride1, convWScale, convBValue, scValue)
rn1_1 = ResNetNode2(conv1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
rn1_2 = ResNetNode2(rn1_1, cMap1, 144, kW, kH, convWScale, convBValue, scValue)
@ -38,13 +38,13 @@ DNN=[
cMap2 = 32
rn2_1_Wproj = Parameter(cMap2, cMap1, init = fromFile, initFromFilePath = "$Proj16to32Filename$", needGradient = false)
rn2_1 = ResNetNode2Conv(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
rn2_1 = ResNetNode2Inc(rn1_3, cMap2, 144, 288, kW, kH, convWScale, convBValue, scValue, rn2_1_Wproj)
rn2_2 = ResNetNode2(rn2_1, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
rn2_3 = ResNetNode2(rn2_2, cMap2, 288, kW, kH, convWScale, convBValue, scValue)
cMap3 = 64
rn3_1_Wproj = Parameter(cMap3, cMap2, init = fromFile, initFromFilePath = "$Proj32to64Filename$", needGradient = false)
rn3_1 = ResNetNode2Conv(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
rn3_1 = ResNetNode2Inc(rn2_3, cMap3, 288, 576, kW, kH, convWScale, convBValue, scValue, rn3_1_Wproj)
rn3_2 = ResNetNode2(rn3_1, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
rn3_3 = ResNetNode2(rn3_2, cMap3, 576, kW, kH, convWScale, convBValue, scValue)
@ -53,7 +53,7 @@ DNN=[
poolH = 3
poolhStride = 2
poolvStride = 2
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride)
pool = AveragePooling(rn3_3, poolW, poolH, poolhStride, poolvStride, imageLayout = "cudnn")
ol = DnnLastLayer(576, labelDim, pool, fc1WScale, fc1BValue)

Просмотреть файл

@ -1,83 +1,71 @@
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
p = Plus(c, b);
y = RectifiedLinear(p);
}
ConvBNReLULayer(inp, inMap, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
sc = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
m = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
bn = BatchNormalization(inp, sc, b, m, isd, eval = false, spatial = true)
c = Convolution(W, bn, kW, kH, outMap, hStride, vStride, zeroPadding = true)
y = RectifiedLinear(c);
}
ConvBNReLULayer2(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scScale)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y = RectifiedLinear(bn);
}
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
}
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
ResNetNode2Inc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, Wproj)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn2, cproj)
y2 = RectifiedLinear(p);
}
@ -94,13 +82,13 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
{
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
t = Times(W, bn)
y = RectifiedLinear(t)
b = Parameter(outDim, 1, init = fixedValue, value = bValue)
sc = Parameter(outDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
t = Times(W, x)
bn = BatchNormalization(t, sc, b, m, isd, eval = false, spatial = false, imageLayout = "cudnn")
y = RectifiedLinear(bn)
}
DnnLastLayer(hiddenDim, labelDim, x, wScale, bValue)

Просмотреть файл

@ -15,7 +15,7 @@ Short description of the network:
01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here:
https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg
(main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer).
The network produces 22% of error after training for about 4 minutes on GPU.
The network produces 21% of error after training for about 3 minutes on GPU.
To run the sample, navigate to this folder and run the following command:
<path to CNTK executable> configFile=01_Conv.config configName=01_Conv

Просмотреть файл

@ -1,3 +1,10 @@
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
@ -7,10 +14,13 @@ command=Train:AddTop5Eval:Test
parallelTrain=false
stderr=$OutputDir$/AlexNet
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/AlexNet
traceLevel=1
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/AlexNet.ndl
@ -35,7 +45,7 @@ Train=[
]
]
numMBsToShowResult=10
numMBsToShowResult=100
]
reader=[
@ -44,7 +54,7 @@ Train=[
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file=$ConfigDir$/train_map_nfs.txt
file=$ConfigDir$/train_map.txt
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize=Auto
features=[
@ -93,7 +103,7 @@ Test=[
reader=[
readerType=ImageReader
file=$ConfigDir$/val_map_nfs.txt
file=$ConfigDir$/val_map.txt
randomize=None
features=[
width=224

Просмотреть файл

@ -7,7 +7,7 @@ ndlMacros = [
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
conv1WScale = 0.95
@ -36,14 +36,14 @@ DNN=[
hStride1 = 4
vStride1 = 4
# weight[cMap1, kW1 * kH1 * ImageC]
conv1_act = ConvReLULayer(features, cMap1, 363, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
conv1 = ConvReLULayer(features, cMap1, 363, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
# pool1
pool1W = 3
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
# conv2
kW2 = 5
@ -52,14 +52,14 @@ DNN=[
hStride2 = 1
vStride2 = 1
# weight[cMap2, kW2 * kH2 * cMap1]
conv2_act = ConvReLULayer(pool1, cMap2, 1600, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
conv2 = ConvReLULayer(pool1, cMap2, 1600, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
# pool2
pool2W = 3
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
# conv3
kW3 = 3
@ -68,7 +68,7 @@ DNN=[
hStride3 = 1
vStride3 = 1
# weight[cMap3, kW3 * kH3 * cMap2]
conv3_act = ConvReLULayer(pool2, cMap3, 1728, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
conv3 = ConvReLULayer(pool2, cMap3, 1728, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
# conv4
kW4 = 3
@ -77,7 +77,7 @@ DNN=[
hStride4 = 1
vStride4 = 1
# weight[cMap4, kW4 * kH4 * cMap3]
conv4_act = ConvReLULayer(conv3_act, cMap4, 3456, kW4, kH4, hStride4, vStride4, conv4WScale, conv4BValue)
conv4 = ConvReLULayer(conv3, cMap4, 3456, kW4, kH4, hStride4, vStride4, conv4WScale, conv4BValue)
# conv5
kW5 = 3
@ -86,14 +86,14 @@ DNN=[
hStride5 = 1
vStride5 = 1
# weight[cMap5, kW5 * kH5 * cMap4]
conv5_act = ConvReLULayer(conv4_act, cMap5, 2304, kW5, kH5, hStride5, vStride5, conv5WScale, conv5BValue)
conv5 = ConvReLULayer(conv4, cMap5, 2304, kW5, kH5, hStride5, vStride5, conv5WScale, conv5BValue)
# pool3
pool3W = 3
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv5_act, pool3W, pool3H, pool3hStride, pool3vStride)
pool3 = MaxPooling(conv5, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DNNReLULayer(9216, hiddenDim, pool3, fc1WScale, fc1BValue)

Просмотреть файл

@ -1,10 +1,10 @@
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
convW = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
convB = Parameter(outMap, 1, init = fixedValue, value = bValue)
convPlusB = Plus(conv, convB);
act = RectifiedLinear(convPlusB);
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
z = Plus(c, b);
y = RectifiedLinear(z);
}
DNNReLULayer(inDim, outDim, x, wScale, bValue)

Просмотреть файл

@ -6,8 +6,8 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y = RectifiedLinear(bn);
}
@ -20,8 +20,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
@ -30,8 +30,8 @@ ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
}
@ -45,8 +45,8 @@ ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue,
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
@ -55,10 +55,10 @@ ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue,
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn2, cproj)
y2 = RectifiedLinear(p);
}
@ -73,8 +73,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
# 3x3 convolution.
@ -84,8 +84,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, 3, 3, convMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y2 = RectifiedLinear(bn2);
# 1x1 expanding convolution.
@ -95,8 +95,8 @@ ResNetNode3(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue)
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false)
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
p = Plus(bn3, inp)
y3 = RectifiedLinear(p);
@ -111,8 +111,8 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
m1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true)
c1 = Convolution(W1, inp, 1, 1, convMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
# 3x3 convolution.
@ -122,8 +122,8 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
m2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(convMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, 3, 3, convMap, 2, 2, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, 3, 3, convMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y2 = RectifiedLinear(bn2);
# 1x1 expanding convolution.
@ -133,11 +133,11 @@ ResNetNode3Inc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue,
m3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd3 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false)
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true)
c3 = Convolution(W3, y2, 1, 1, outMap, 1, 1, zeroPadding = false, imageLayout = "cudnn")
bn3 = BatchNormalization(c3, sc3, b3, m3, isd3, eval = false, spatial = true, imageLayout = "cudnn")
# Increasing input dimension convolution
cProj = Convolution(wProj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
cProj = Convolution(wProj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn3, cProj)
y3 = RectifiedLinear(p);

Просмотреть файл

@ -32,10 +32,10 @@ Train=[
SGD=[
epochSize=0
minibatchSize=2
learningRatesPerMB=0.1*20:0.03*10:0.01*30:0.003
minibatchSize=32
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
momentumPerMB=0.9
maxEpochs=100
maxEpochs=120
gradUpdateType=None
L2RegWeight=0.0001
dropoutRate=0
@ -72,7 +72,7 @@ Train=[
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
cropRatio=0.46666:0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType=UniRatio
@ -99,7 +99,7 @@ Test=[
action=test
modelPath=$ModelDir$/ResNet_152.Top5
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=32
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/ResNet_152.ndl

Просмотреть файл

@ -7,7 +7,7 @@ ndlMacros = [
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
featOffs = Const(0, rows = 150528)
featScaled = Plus(features, featOffs)
labels = Input(LabelDim, tag = label)
@ -42,7 +42,7 @@ DNN=[
cMap6 = 2048
conv1 = ConvBNReLULayer(featScaled, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
rn1_1_Wproj = Parameter(cMap3, cMap1, init = fromFile, initFromFilePath = "$Proj64to256Filename$", needGradient = false)
rn1_1 = ResNetNode3Inc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, rn1_1_Wproj)
@ -102,7 +102,7 @@ DNN=[
rn4_2 = ResNetNode3(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
rn4_3 = ResNetNode3(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue)
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs)
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
ol = DnnLayer(8192, labelDim, pool5, fcWScale, fcBValue)

Просмотреть файл

@ -32,9 +32,9 @@ Train=[
SGD=[
epochSize=0
minibatchSize=64
learningRatesPerMB=0.1*20:0.03*10:0.01*30:0.003
learningRatesPerMB=0.1*30:0.03*25:0.01*25:0.003*25:0.001
momentumPerMB=0.9
maxEpochs=100
maxEpochs=120
gradUpdateType=None
L2RegWeight=0.0001
dropoutRate=0
@ -71,7 +71,7 @@ Train=[
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
cropRatio=0.46666:0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType=UniRatio
@ -98,7 +98,7 @@ Test=[
action=test
modelPath=$ModelDir$/ResNet_34.Top5
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=64
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/ResNet_34.ndl

Просмотреть файл

@ -7,9 +7,7 @@ ndlMacros = [
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(0, rows = 150528)
featScaled = Plus(features, featOffs)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
@ -35,8 +33,8 @@ ndlMacros = [
DNN=[
cMap1 = 64
conv1 = ConvBNReLULayer(featScaled, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
conv1 = ConvBNReLULayer(features, cMap1, 147, 7, 7, 2, 2, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
rn1_1 = ResNetNode2(pool1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
rn1_2 = ResNetNode2(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue)
@ -64,7 +62,7 @@ DNN=[
rn4_2 = ResNetNode2(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
rn4_3 = ResNetNode2(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue)
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs)
pool5 = AveragePooling(rn4_3, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
ol = DnnLayer(4608, labelDim, pool5, fcWScale, fcBValue)

Просмотреть файл

@ -12,14 +12,13 @@ DnnReLULayer(inDim, outDim, x, wScale, bValue)
DnnBNReLULayer(inDim, outDim, x, wScale, bValue)
{
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(inDim, 1, init = fixedValue, value = bValue)
sc = Parameter(inDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(inDim, 1, init = fixedValue, value = 0, needGradient = false)
bn = BatchNormalization(x, sc, b, m, isd, eval = false, spatial = false)
t = Times(W, bn)
y = RectifiedLinear(t)
b = Parameter(outDim, 1, init = fixedValue, value = bValue)
sc = Parameter(outDim, 1, init = Gaussian, initValueScale = 0.01)
m = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outDim, 1, init = fixedValue, value = 0, needGradient = false)
t = Times(W, x)
bn = BatchNormalization(t, sc, b, m, isd, eval = false, spatial = false)
y = RectifiedLinear(bn)
}
# Fully-connected layer.
@ -35,8 +34,8 @@ DnnLayer(inDim, outDim, x, wScale, bValue)
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
z = Plus(c, b);
y = RectifiedLinear(z);
}
@ -50,7 +49,7 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, imageLayout = "cudnn")
y = RectifiedLinear(bn);
}

Просмотреть файл

@ -1,21 +1,28 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
RootDir = "."
ndlMacros=$WorkDir$/Macros.ndl
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
stderr=$OutputDir$/VGG_A
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/VGG_A
traceLevel=1
NDLNetworkBuilder=[
networkDescription=$WorkDir$/VGG_A.ndl
networkDescription=$ConfigDir$/VGG_A.ndl
]
SGD=[
@ -37,7 +44,7 @@ Train=[
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file=$WorkDir$/train_map.txt
file=$ConfigDir$/train_map.txt
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize=Auto
features=[
@ -59,7 +66,7 @@ Train=[
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations=Linear
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile=$WorkDir$/ImageNet1K_mean.xml
meanFile=$ConfigDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000
@ -71,29 +78,29 @@ AddTop5Eval=[
action=edit
CurModel=$ModelDir$/VGG_A
NewModel=$ModelDir$/VGG_A.Top5
editPath=$WorkDir$/add_top5_layer.mel
editPath=$ConfigDir$/add_top5_layer.mel
]
Test=[
action=test
modelPath=$ModelDir$/VGG_A.Top5
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=32
NDLNetworkBuilder=[
networkDescription=$WorkDir$/VGG_A.ndl
networkDescription=$ConfigDir$/VGG_A.ndl
]
reader=[
readerType=ImageReader
file=$WorkDir$/val_map.txt
file=$ConfigDir$/val_map.txt
randomize=None
features=[
width=224
height=224
channels=3
cropType=Center
meanFile=$WorkDir$/ImageNet1K_mean.xml
meanFile=$ConfigDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000

Просмотреть файл

@ -1,13 +1,13 @@
load=ndlMnistMacros
load=ndlMacros
run=DNN
ndlMnistMacros = [
ndlMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
@ -38,30 +38,30 @@ DNN=[
cMap1 = 64
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs)
pool1 = MaxPooling(conv1, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap2 = 128
conv2 = ConvReLULayer(pool1, cMap2, 576, kW, kH, hs, vs, convWScale, convBValue)
pool2 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
pool2 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 256
conv3 = ConvReLULayer(pool2, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
conv4 = ConvReLULayer(conv3, cMap3, 2304, kW, kH, hs, vs, convWScale, convBValue)
pool3 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
pool3 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 512
conv5 = ConvReLULayer(pool3, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv6 = ConvReLULayer(conv5, cMap5, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool4 = MaxPooling(conv6, poolW, poolH, poolhs, poolvs)
pool4 = MaxPooling(conv6, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap6 = 512
conv7 = ConvReLULayer(pool4, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv8 = ConvReLULayer(conv7, cMap6, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool5 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
pool5 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)

Просмотреть файл

@ -94,7 +94,7 @@ Test=[
action=test
modelPath=$ModelDir$/VGG_E.Top5
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/VGG_E.ndl

Просмотреть файл

@ -7,7 +7,7 @@ ndlMacros = [
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
@ -39,13 +39,13 @@ DNN=[
conv1 = ConvReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue)
conv2 = ConvReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 128
conv3 = ConvReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue)
conv4 = ConvReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 256
conv5 = ConvReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue)
@ -53,7 +53,7 @@ DNN=[
conv7 = ConvReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
conv8 = ConvReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap9 = 512
conv9 = ConvReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue)
@ -61,7 +61,7 @@ DNN=[
conv11 = ConvReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv12 = ConvReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap13 = 512
conv13 = ConvReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
@ -69,7 +69,7 @@ DNN=[
conv15 = ConvReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
conv16 = ConvReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)

Просмотреть файл

@ -94,7 +94,7 @@ Test=[
action=test
modelPath=$ModelDir$/VGG_E_BN.Top5
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$ConfigDir$/VGG_E_BN.ndl

Просмотреть файл

@ -7,9 +7,7 @@ ndlMacros = [
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(0, rows = 150528)
featScaled = Plus(features, featOffs)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
labels = Input(LabelDim, tag = label)
# Kernels width and height.
@ -39,16 +37,16 @@ ndlMacros = [
DNN=[
cMap1 = 64
conv1 = ConvBNReLULayer(featScaled, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv1 = ConvBNReLULayer(features, cMap1, 27, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv2 = ConvBNReLULayer(conv1, cMap1, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs)
pool1 = MaxPooling(conv2, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap3 = 128
conv3 = ConvBNReLULayer(pool1, cMap3, 576, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv4 = ConvBNReLULayer(conv3, cMap3, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs)
pool2 = MaxPooling(conv4, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap5 = 256
conv5 = ConvBNReLULayer(pool2, cMap5, 1152, kW, kH, hs, vs, convWScale, convBValue, scValue)
@ -56,7 +54,7 @@ DNN=[
conv7 = ConvBNReLULayer(conv6, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv8 = ConvBNReLULayer(conv7, cMap5, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs)
pool3 = MaxPooling(conv8, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap9 = 512
conv9 = ConvBNReLULayer(pool3, cMap9, 2304, kW, kH, hs, vs, convWScale, convBValue, scValue)
@ -64,7 +62,7 @@ DNN=[
conv11 = ConvBNReLULayer(conv10, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv12 = ConvBNReLULayer(conv11, cMap9, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs)
pool4 = MaxPooling(conv12, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
cMap13 = 512
conv13 = ConvBNReLULayer(pool4, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
@ -72,7 +70,7 @@ DNN=[
conv15 = ConvBNReLULayer(conv14, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
conv16 = ConvBNReLULayer(conv15, cMap13, 4608, kW, kH, hs, vs, convWScale, convBValue, scValue)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs)
pool5 = MaxPooling(conv16, poolW, poolH, poolhs, poolvs, imageLayout = "cudnn")
hiddenDim = 4096
h1 = DnnBNReLULayer(25088, hiddenDim, pool5, fc1WScale, fc1BValue)

Просмотреть файл

@ -47,6 +47,7 @@ using namespace std;
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
L"Shift(input, fromOffsets, boundaryValue, dim=-1, offsetRanges=1, multiOffsetDim=0, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) ; fromOffset = new IntVector [ items = fromOffsets ] ; offsetRange = new SizeVector [items= new SizeVector [ items = offsetRanges ] ]/*plus the function args*/ ]\n"
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"

Просмотреть файл

@ -699,5 +699,4 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
}
template<class V> /*static*/ const std::vector<typename V::value_type> & IConfigRecord::Array(const V & vec) { return static_cast<const std::vector<typename V::value_type> &>(vec); } // use this specifically for XXXargvector
}}} // end namespaces

Просмотреть файл

@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
void Load(File& fstream, bool acceptLegacyFormat = false)
const TensorShape & Load(File& fstream, bool acceptLegacyFormat = false)
{
// format: uint32_t n, dim[0], dim[1], ..., dim[n-1]
// We are also able to read (but not write) an older format, which stores 3-dimensional tensors as size_t W, H, C
@ -232,6 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> m_dims[2] >> m_dims[0]; // stored in order C, W, H
}
InitAsNoSlice();
return *this;
}
// accessors
@ -404,7 +405,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_strides.resize(m_dims.size());
for (size_t k = 0; k < m_dims.size(); k++)
m_strides[k] = k > 0 ? m_strides[k - 1] * (ptrdiff_t)m_dims[k - 1] : 1;
m_allocation = m_dims.empty() ? 0 : m_dims.back() * (size_t)m_strides.back();
m_allocation = m_dims.empty() ? 0 : m_dims.back() * (size_t)m_strides.back(); // TODO: Or should an empty shape mean it's a scalar?
}
private:

Просмотреть файл

@ -24,6 +24,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
// TODO: Can this be moved to a separate class?
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr &);
// FormRecurrentLoops() -- MAIN ENTRY POINT for network recurrent-loop analysis. All other functions in this CPP are called only from this one.
// This function analysis the networks for recurrent loops present in the computation of 'rootNode.'
// This sets/updates:
@ -83,16 +85,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
unordered_set<ComputationNodeBasePtr> visited;
unordered_set<ComputationNodeBasePtr> recStack;
// set m_indexInLoop for all nodes except Past/FutureValueNodes in all loops
// set m_indexInLoop for all nodes except recurrent nodes in all loops
// This value is only used in the block right after this.
for (size_t j = 0; j < iter->m_nestedNodes.size(); j++)
{
ComputationNodeBasePtr node = iter->m_nestedNodes[j];
const auto & node = iter->m_nestedNodes[j];
for (size_t i = 0; i < node->GetNumInputs(); i++)
{
if (node->Input(i)->m_loopId == node->m_loopId &&
node->OperationName() != OperationNameOf(PastValueNode) &&
node->OperationName() != OperationNameOf(FutureValueNode)) // TODO: test for type RecurrentNode instead?
if (node->Input(i)->m_loopId == node->m_loopId && GetRecurrenceDirections(node).empty())
{
//assert(node->Input(i)->m_indexInLoop == 0); // No. It seems this variable really counts the number of parents.
node->Input(i)->m_indexInLoop++; // BUGBUG: this is bumping up the m_indexInLoop, but I don't think it is initialized anywhere other than PurgeStateForFormingRecurrentLoops(). i-1?
@ -146,7 +146,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
// log the loops
for (auto & iter : m_allSEQNodes)
{
@ -168,6 +167,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
// checks whether a node is recurrent, and which direction
static vector<int> emptyVector;
static const vector<int> & GetRecurrenceDirections(const ComputationNodeBasePtr & node)
{
if (node->Is<IRecurrentNode>())
return node->As<IRecurrentNode>()->GetRecurrenceDirections();
else
return emptyVector;
}
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes);
// get the strongly connected components from the graph
// This sets index, lowLink, m_visited, and m_inStack.
@ -299,8 +308,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
visited.insert(cur);
recStack.insert(cur);
if (cur->OperationName() != OperationNameOf(PastValueNode) && // recurrence stops at delays
cur->OperationName() != OperationNameOf(FutureValueNode))
if (GetRecurrenceDirections(cur).empty()) // recurrence stops at delays
{
for (size_t i = 0; i < cur->GetNumInputs(); i++)
if (cur->Input(i)->m_loopId == cur->m_loopId)
@ -384,28 +392,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// set m_steppingDirection for all loops
// TODO: Move this up to where it is used (in a separate commit since git cannot track moving and changing at the same time).
// BUGBUG: Need to extend to multi-dimensional loop directions. Use a vector<int>.
static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr> & nestedNodes)
{
bool hasPastValueNode = false;
bool hasFutureValueNode = false;
vector<int> recurrenceDirections;
for (auto & node : nestedNodes)
{
if (node->OperationName() == OperationNameOf(PastValueNode))
hasPastValueNode = true;
else if (node->OperationName() == OperationNameOf(FutureValueNode))
hasFutureValueNode = true;
const auto & dirs = GetRecurrenceDirections(node);
if (dirs.empty()) // not a recurrent node
continue;
if (recurrenceDirections.empty())
recurrenceDirections = dirs;
else if (recurrenceDirections != dirs)
InvalidArgument("It is not allowed to have multiple different recurrence directions in the same loop (loop connected to %ls %ls operation).",
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
}
if (hasPastValueNode && !hasFutureValueNode)
return +1;
else if (hasFutureValueNode && !hasPastValueNode)
return -1;
else if (hasPastValueNode && hasFutureValueNode)
InvalidArgument("It is not allowed to have both PastValue and FutureValue nodes in the same loop. How do you think that should work??");
else
LogicError("There is neither PastValue nor FutureValue nodes in the loop.");
if (recurrenceDirections.empty())
LogicError("There is no recurrent node in the loop connected to %ls %ls operation.",
nestedNodes.front()->NodeName().c_str(), nestedNodes.front()->OperationName().c_str());
// BUGBUG: Multiple recurrence dimensions not yet supported beyond this point.
return -recurrenceDirections[0];
}
}}}

Просмотреть файл

@ -46,6 +46,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (nodeType == OperationNameOf(CrossEntropyNode)) return New<CrossEntropyNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CrossEntropyWithSoftmaxNode)) return New<CrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(SequenceWithSoftmaxNode)) return New<SequenceWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DiagonalNode)) return New<DiagonalNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DiagTimesNode)) return New<DiagTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DropoutNode)) return New<DropoutNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DummyCriterionNode)) return New<DummyCriterionNode<ElemType>>(forward<_Types>(_Args)...);
@ -82,7 +83,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (nodeType == OperationNameOf(RowElementTimesNode)) return New<RowElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
#endif
else if (nodeType == OperationNameOf(RowRepeatNode)) return New<RowRepeatNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DiagonalNode)) return New<DiagonalNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RowSliceNode)) return New<RowSliceNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RowStackNode)) return New<RowStackNode<ElemType>>(forward<_Types>(_Args)...);
#ifdef ENABLE_BROADCASTING_ELEMENTTIMES
@ -91,6 +91,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (nodeType == OperationNameOf(ScaleNode)) return New<ScaleNode<ElemType>>(forward<_Types>(_Args)...);
#endif
else if (nodeType == OperationNameOf(SequenceDecoderNode)) return New<SequenceDecoderNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ShiftNode)) return New<ShiftNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(SigmoidNode)) return New<SigmoidNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(SoftmaxNode)) return New<SoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(SquareErrorNode)) return New<SquareErrorNode<ElemType>>(forward<_Types>(_Args)...);

Просмотреть файл

@ -305,8 +305,9 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
static TensorShape TensorShapeFromConfig(const IConfigRecord & config)
{
const auto & valp = config[L"dims"];
// TODO: Add code that if input is already a tensor shape it is also OK.
if (valp.Is<ConfigArray>())
if (valp.Is<TensorShape>())
return valp.AsRef<TensorShape>(); // UNTESTED
else if (valp.Is<ConfigArray>())
return TensorShape(valp.AsRef<ConfigArray>().AsVector<size_t>([&](const wstring & msg){ valp.Fail(msg); }));
else
return TensorShape(std::vector<size_t>(1, (size_t)valp)); // single element
@ -315,6 +316,26 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
BoxedTensorShape(const IConfigRecordPtr configp) : BoxOf<TensorShape>(TensorShapeFromConfig(*configp)) { }
};
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedTensorShape> registerTensoShape(L"TensorShape");
template<typename E>
class BoxedVector : public BoxOf<vector<E>>
{
// create a vector from config
static vector<E> VectorFromConfig(const IConfigRecord & config)
{
const auto & valp = config[L"items"];
if (valp.Is<vector<E>>())
return valp.AsRef<vector<E>>(); // UNTESTED
else if (valp.Is<ConfigArray>())
return valp.AsRef<ConfigArray>().AsVector<E>([&](const wstring & msg){ valp.Fail(msg); });
else
return std::vector<E>(1, (E)valp); // single element
}
public:
BoxedVector(const IConfigRecordPtr configp) : BoxOf<vector<E>>(VectorFromConfig(*configp)) { }
};
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedTensorShape> registerTensorShape(L"TensorShape");
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedVector<int>> registerIntVector(L"IntVector");
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<BoxedVector<size_t>> registerSizeVector(L"SizeVector");
}}}

Просмотреть файл

@ -132,12 +132,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual ~INodeState() {}
};
struct /*interface*/ IStateFulNode
struct /*interface*/ IStatefulNode
{
typedef std::shared_ptr<INodeState> NodeStatePtr;
virtual NodeStatePtr ExportState() = 0;
virtual void ImportState(const NodeStatePtr& pImportedState) = 0;
virtual void ImportState(NodeStatePtr && state) = 0;
};
typedef IStatefulNode::NodeStatePtr NodeStatePtr;
// =======================================================================
// ComputationNetworkOwnedNodeState -- class to collect ComputationNode members that are really owned by ComputationNetwork
@ -444,7 +445,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
void LinkToMBLayout(MBLayoutPtr pMBLayout) { m_pMBLayout = pMBLayout; }
//MBLayoutPtr GetMBLayout() { return m_pMBLayout; }
const MBLayoutPtr & GetMBLayout() const { return m_pMBLayout; }
bool HasMBLayout() const { return !!m_pMBLayout; }
@ -1505,6 +1505,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
};
// =======================================================================
// IRecurrentNode -- helper wrapper class for ComputationNodes that can be recurrent
// =======================================================================
struct IRecurrentNode { virtual const std::vector<int> & GetRecurrenceDirections() const = 0; };
// =======================================================================
// helper macro to ease access to base members in presence of C++ two-phase name lookup

Просмотреть файл

@ -734,20 +734,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (isFinalValidationPass)
{
const auto m_imageLayoutKind = ImageLayoutKind::CHW; // BUGBUG: Finish this. Must be serialized.
auto dims = ImageDimensions(GetSampleLayout(), m_imageLayoutKind);
auto shape = GetSampleLayout();
if (m_factory == nullptr)
m_factory = ConvolutionEngineFactory<ElemType>::Create(m_deviceId, ConvolutionEngineFactory<ElemType>::EngineType::Auto, m_imageLayoutKind);
if (m_convEng == nullptr)
m_convEng = m_factory->CreateConvEngine(m_deviceId, 0);
if (m_spatial)
{
auto dims = ImageDimensions(shape, m_imageLayoutKind);
if (m_inT == nullptr)
m_inT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
if (m_scaleBiasT == nullptr)
{
if (m_spatial)
m_scaleBiasT = m_factory->CreateTensor(1, 1, dims.m_numChannels, 1);
}
else
m_scaleBiasT = m_factory->CreateTensor(dims.m_width, dims.m_height, dims.m_numChannels, 1);
{
if (m_inT == nullptr)
m_inT = m_factory->CreateTensor(shape.GetNumElements(), 1, 1, 1);
if (m_scaleBiasT == nullptr)
m_scaleBiasT = m_factory->CreateTensor(shape.GetNumElements(), 1, 1, 1);
}
}
}

Просмотреть файл

@ -72,8 +72,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ValidateUnaryMap(isFinalValidationPass);
}
// We don't need our output values in backprop.
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool OutputUsedInComputingInputNodesGradients() const override { return gradientFromOutput; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return !gradientFromOutput; }
};
#define UnaryElementWiseWithOpCodeNodeBaseMembers UsingComputationNodeMembersBoilerplate;

Просмотреть файл

@ -25,6 +25,256 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// ShiftNode (input, fromOffset, boundaryValue, dim=-1, offsetRange=1, multiOffsetDim=0) -- delay and rolling window
//
// This shifts the input by (-fromOffset) steps. In other words, output(t) will be input(t+fromOffset).
// E.g. for fromOffset=-1, this gives the past value.
// This node has quite some options that make it powerful for many use cases.
//
// This node can be used in a recurrent loop. This requires special handling by the ComputationNetwork,
// for both execution (sequential execution) and creation (avoiding circular references).
// TODO: When outside a recurrent loop and used with frame randomization, this will communicate to the reader
// that additional frames are needed, which will then return a frame range. TODO: This will not match
// the labels, which are still 1 frame. Think through which dimension this should go in.
//
// Values shifted in from beyond sequence boundaries will be copied from boundaryValue.
// Normally, this is a scalar Constant(). However, it can be any node, which will be indexed from the end
// (e.g. for fromOffset=-1, the last frame of boundaryValue will be used). This can implement
// sequence-to-sequence models. Broadcasting is supported, so it can be e.g. a single output-dimension vector
// applied to all sequences.
//
// To delay (past value), use negative fromOffset. To access future value, use positive fromOffset.
//
// To pull in multiple offsets, use offsetRange>1. This will pull in offsetRange consecutive offsets starting
// with fromOffset. This implements a rolling window. A new dimension will be inserted at multiOffsetDim
// (default 0 means after the last sample dimension). Special considerations:
// - If the boundaryValue is not wide enough, the sequence will be dropped (e.g. if you pull in 5 history frames,
// but the sequence in boundaryValue only has 4 samples).
// - If you feed back such an expanded output into this node in a loop, you get an inconsistency
// and will eventually fail. You must pull the dimensions apart.
// - If the current time step (offset 0) is included in the range (e.g. fromOffset=-1, offsetRange=3) then
// this node cannot participate in a recurrence.
//
// By default, this shifts over the time dimension, but you can choose to shift over any
// sample tensor dimension instead using 'dim' (-1 stands for time). This will only work, however,
// when all involved nodes are implemented using the tensor library. Nodes implemented using
// Matrix slices can only support iterating over time.
//
// The fromOffset can also be a tensor, e.g. (1,1). In that case, iteration will be over multiple
// consecutive dimensions. offsetRange must have the same number of dimensions.
//
// If the boundaryValue has 0 elements, the sequence will be trimmed (frames reaching beyond the boundary
// are dropped). This will initially not be implemented for the time dimension (as it would require
// change of MBLayout).
// -----------------------------------------------------------------------
template<class ElemType>
class ShiftNode : public ComputationNode<ElemType>, public IRecurrentNode, public ILateAttachingNode, public IStatefulNode, public NumInputs<2>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"Shift"; }
public:
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, const std::vector<int> & fromOffset, int shiftDimension, const std::vector<size_t> & offsetRange, int expandDimension) :
Base(deviceId, name), m_fromOffsetBegin(fromOffset),
m_shiftDimension(shiftDimension), m_expandDimension(expandDimension),
m_insertExpandShapeAt(SIZE_MAX/*uninitialized at this point*/)
{
// determine m_fromOffsetEnd from fromOffset/offsetRange
bool anyNonRecurrent = false;
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
{
m_fromOffsetEnd.push_back(m_fromOffsetBegin[k] + (k < offsetRange.size() ? (int)offsetRange[k] : 1));
if (m_fromOffsetEnd[k] <= 0)
m_recurrenceDirections.push_back(-1);
else if (m_fromOffsetBegin[k] > 0)
m_recurrenceDirections.push_back(+1);
else
m_recurrenceDirections.push_back(0);
anyNonRecurrent |= m_recurrenceDirections[k] == 0;
}
if (anyNonRecurrent)
m_recurrenceDirections.clear();
CreateMatrixIfNull(m_value);
SetDims(TensorShape(), 0); // empty for now
}
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name) :
ShiftNode(deviceId, name, std::vector<int> { 1 }, -1, std::vector<size_t> { 1 }, 0)
{ }
ShiftNode(const ScriptableObjects::IConfigRecordPtr configp) :
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), configp->Get(L"dim"), configp->Get(L"offsetRange"), configp->Get(L"multiOffsetDim"))
{
// We do NOT attach the inputs, as we cannot resolve the main input without causing a circular reference.
// Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below.
// This is a contract between ComputationNetwork and this specific node type.
// (TODO: We could force-evaluate the boundary input here.)
m_attachInputsFn = [this, configp]() // This is the lambda to complete the process. Note that config captured as a shared_ptr.
{
AttachInputs(GetInputsFromConfig(configp)); // this is executed by network builder while iterating the nodes
};
}
virtual void /*ILateAttachingNode::*/LateAttachInputs() override final
{
m_attachInputsFn();
m_attachInputsFn = [](){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
}
public:
void Save(File& fstream) const
{
Base::Save(fstream);
fstream << m_fromOffsetBegin;
fstream << m_fromOffsetEnd;
fstream << m_shiftDimension;
fstream << m_expandDimension;
fstream << m_recurrenceDirections;
}
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::Load(fstream, modelVersion);
fstream >> m_fromOffsetBegin;
fstream >> m_fromOffsetEnd;
fstream >> m_shiftDimension;
fstream >> m_expandDimension;
fstream >> m_recurrenceDirections;
}
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
{
assert(inputIndex == 0); inputIndex;
fr;
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override {return false; }
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
{
Base::EndForwardProp();
// In BPTT, we carry over left-to-right state across minibatches.
// TODO: package up the state using ExportState(). Then in BeginForwardProp() bring it back. In-between, the packages can be moved around.
}
// This function assumes BeginForwardProp/EndForwardProp() to be called before/after the iteration loop.
// TODO: In the future, there may be value for one more way of handling the boundary condition: Fill as 'NoInput'. Then we can use this to implement rolling windows (albeit inefficiently). Would require to unshare the layout.
virtual void ForwardProp(const FrameRange & fr) override
{
fr;
}
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
assert(m_inputs.size() == 2);
ComputationNodeBase::Validate(isFinalValidationPass);
if (isFinalValidationPass)
sin(1.0f);
// MBLayout is just inherited
m_pMBLayout = Input(0)->GetMBLayout();
if (isFinalValidationPass && !m_pMBLayout)
InvalidArgument("%ls %ls operation must operate on data (must have an MB Layout).", NodeName().c_str(), OperationName().c_str());
// determine expandShape--empty if no multiple offsets; otherwise the 1 or more dimensions that need to be added at m_expandDimension
m_expandShape.clear();
for (size_t k = 0; k < m_fromOffsetBegin.size(); k++)
{
size_t dim = m_fromOffsetEnd[k] - m_fromOffsetBegin[k];
if (dim > 1)
{
m_expandShape.resize(k, 1);
m_expandShape.push_back(dim);
}
}
if (!m_expandShape.empty())
m_expandShape.resize(m_fromOffsetBegin.size(), 1); // pad ones to end
// now it either matches the dimensions to insert, or is empty if none to append
// determine final sample layout
auto inputSampleLayout = Input(0)->GetSampleLayout();
auto inputDims = inputSampleLayout.GetDims();
if (m_expandDimension < 0)
InvalidArgument("%ls %ls operation: Specified insertion location %d refers to a time dimension, but this is not allowed.",
NodeName().c_str(), OperationName().c_str(), m_expandDimension);
m_insertExpandShapeAt = m_expandShape.empty() ? 0 : (m_expandDimension > 0 ? m_expandDimension - 1 : inputDims.size());
if (m_insertExpandShapeAt > inputDims.size())
if (isFinalValidationPass)
InvalidArgument("%ls %ls operation: Specified insertion location %d beyond end of input sample layout [%s].",
NodeName().c_str(), OperationName().c_str(), m_expandDimension, string(inputSampleLayout).c_str());
else
m_insertExpandShapeAt = inputDims.size(); // this may be an error, but we want to catch that only in the final pass
SmallVector<size_t> dims;
if (!m_expandShape.empty() && inputDims.size() + m_expandShape.size() > dims.capacity())
InvalidArgument("%ls %ls operation: Too many dimensions. Did you feed back output of this node without stripping the extra dimensions?",
NodeName().c_str(), OperationName().c_str());
dims.append(inputDims.begin(), inputDims.begin() + m_insertExpandShapeAt);
dims.append(m_expandShape.begin(), m_expandShape.end());
dims.append(inputDims.begin() + m_insertExpandShapeAt, inputDims.end());
auto sampleLayout = TensorShape(dims);
SetDims(sampleLayout, 0);
}
// special interface for use by loop detection
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
{
return m_recurrenceDirections;
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<ShiftNode<ElemType>>(nodeP);
node->m_fromOffsetBegin = m_fromOffsetBegin;
node->m_fromOffsetEnd = m_fromOffsetEnd;
node->m_recurrenceDirections = m_recurrenceDirections;
node->m_shiftDimension = m_shiftDimension;
node->m_expandDimension = m_expandDimension;
node->m_expandShape = m_expandShape;
node->m_insertExpandShapeAt = m_insertExpandShapeAt;
node->m_state = m_state;
}
}
class ShiftNodeState : public INodeState
{
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
};
typedef std::shared_ptr<ShiftNodeState> ShiftNodeStatePtr;
// state export/import
// This is done with a shared_ptr. The moment state is exported, the internal state is cleared; ownership is transferred to the exporting entity.
// This way, the next invocation does not overwrite the exported state, but is required to create a new one if needed.
// On the other hand, once imported, the state object is owned by the node and will be overwritten with the next state.
virtual NodeStatePtr ExportState() { return std::move(m_state); }
virtual void ImportState(NodeStatePtr && state) override
{
m_state = dynamic_pointer_cast<ShiftNodeState>(state);
if (state && !m_state)
LogicError("ImportState: Wrong state object passed (wrong type).");
}
protected:
// parameters remembered from construction
std::vector<int> m_fromOffsetBegin; // offset to pull from; first offset in case of offset range
std::vector<int> m_fromOffsetEnd; // end of offset range
int m_shiftDimension; // dimension to shift (default: time)
int m_expandDimension; // in case of offset range, this is where a new dimension will be inserted
// derived params set up in Validate()
SmallVector<size_t> m_expandShape; // offsetEnd-offsetBegin if >1 offset in any dimension; empty otherwise
size_t m_insertExpandShapeAt; // at which dimension to insert (internal 0-based index)
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
ShiftNodeStatePtr m_state; // saves the activation of the previous step that this node points to
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
};
// -----------------------------------------------------------------------
// DelayedValueNodeState -- helper class for exporting/importing state from/to DelayedValueNodes.
// This is used for sub-minibatching in case of truncated BPTT.
@ -76,12 +326,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// DelayedValueNodeBase (input) -- abstract base class for PastValueNode and FutureValueNode to hold all shared code
// The two differ in the step direction, some loop directions, and sequence-boundary flags.
// This is an old node which will be replaced by ShiftNode (with Past/FutureValueNode being emulated).
//
// This is planned:
// - carrying over state at sentence boundaries from other nodes (for s2s)
// - ranges of neighbor frames as a secondary tensor dimension (i.e. can be used to implement a rolling window)
// - full support/efficiency of non-recurrent use (in which case the range can be from negative to positive, e.g. a symmetric rolling window)
// - denoting which tensor dimension to loop over (this may not be completed, but I will plant a seed)
// - support for Yongqiangs sub-minibatching with BPTT (export/import state)
// - more efficient storage of carried-over state (only store the needed frames, not a full copy of the previous MB as currently; which will on the other hand also allow windows that reach back beyond a minibatch)
// -----------------------------------------------------------------------
// TODO: 'direction' is really too general. signOfTimeOffset?
template<class ElemType, int direction/*-1 for Past/left-to-right or +1 for Future/right-to-left*/ /*, MinibatchPackingFlags SequenceStart_or_End/*-Start or -End*/>
class DelayedValueNodeBase : public ComputationNode<ElemType>, public
ILateAttachingNode, public IStateFulNode, public NumInputs<1>
class DelayedValueNodeBase : public ComputationNode<ElemType>, public IRecurrentNode,
public ILateAttachingNode, public IStatefulNode, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
typedef std::shared_ptr<DelayedValueNodeState<ElemType>> DelayedNodeStatePtr;
@ -91,9 +350,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
m_initialActivationValue = initialActivationValue;
m_timeStep = 1;
m_recurrenceDirections.push_back(direction);
CreateMatrixIfNull(m_value);
SetDims(sampleLayout, 0); // TODO: needed? Can we not infer it? How about setting a sample layout?
m_isHistoryCarryOverManagedExternally = false; // used for PairNetworkNode/PastValueNode combination, which is deprecated
SetDims(sampleLayout, 0);
m_value->SetValue(m_initialActivationValue); // is this needed?
}
protected:
@ -139,7 +398,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void Load(File& fstream, size_t modelVersion) override
{
// the node has already been initialized e.g. w.r.t. direction and sequence flags
// the node has already been initialized e.g. w.r.t. direction
Base::Load(fstream, modelVersion);
fstream >> m_timeStep;
@ -155,63 +414,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> m_initialActivationValue;
}
#if 0
private:
// cache a post-processed version of m_pMBLayout (depends on the actual minibatch)
// This post-processed layout has its bits spread out over m_timeStep, to help detect if we'd hop across a boundary.
void CacheMBLayout()
{
if (m_timeStep <= 0)
LogicError("timeStep should be 1 or larger");
m_pShiftedMBLayout->CopyFrom(m_pMBLayout); // it gets modified below
if (m_timeStep == 1)
return;
#if 1
LogicError("CacheMBLayout: m_timeStep > 1 temporarily disabled until MBLayout update completed.");
#else
// modify m_pShiftedMBLayout
// If two utterances are packed together (S: start, E: end, N: no input) and we need to get values 2 steps in the past
// S X X X E S X X X X E N N
// then this becomes
// S S X X E S S X X X E N N
size_t numSeq = GetNumParallelSequences();
// each row has a number to indicate how many values should be reset for that utterance
// TODO: This algorithm is not obvious and should be explained. E.g. how come it is direction independent?
vector<int> numResetLeft(numSeq, 0);
for (size_t i = 0; i < GetNumTimeSteps(); i++) // i = frame index (time)
{
if (m_pMBLayout->Is(i, SequenceStart_or_End | MinibatchPackingFlags::NoFeature))
{
// we set timeStep-1 elements following it to be SequenceStart until met NoInput
for (size_t j = 0; j < numSeq; j++) // j = stream
{
// we use & since ((int) MinibatchPackingFlags::SequenceStart) may come with NoLabel
if (m_pMBLayout->Is(j, i, SequenceStart_or_End))
numResetLeft[j] = m_timeStep;
else if (m_pMBLayout->Is(j, i, MinibatchPackingFlags::NoFeature))
numResetLeft[j] = 0;
}
}
// now set the sequence-boundary flag
for (size_t j = 0; j < numSeq; j++)
{
if (numResetLeft[j]-- > 0)
{
m_pShiftedMBLayout->Mask(j, i, MinibatchPackingFlags::NoLabel); // keep only this flag
m_pShiftedMBLayout->Set(j, i, SequenceStart_or_End); // now implant the boundary flag
}
}
}
#endif
}
public:
#endif
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
{
assert(inputIndex == 0); inputIndex;
@ -283,12 +485,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return false;
}
//virtual void BeginForwardProp() override // called before first iteration step of ForwardProp()
//{
// Base::BeginForwardProp();
// CacheMBLayout();
//}
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
{
// In BPTT, we carry over left-to-right state across minibatches.
@ -299,12 +495,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// - we don't need to keep anything if all sequences are closed (sentence end)
// This condition includes full-sequence mode.
// TODO: Can we optimize this and only copy if there is a sequence spanning across the end of the MB? And add a check to BeginForwardProp() to make sure we got one if there is a boundary at the start?
if (!m_isHistoryCarryOverManagedExternally) // means it's externally managed (for PairNetworkNode)
{
m_delayedActivation = Input(0)->Value();
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
m_delayedActivationMBLayout->CopyFrom(m_pMBLayout);
}
Base::EndForwardProp();
}
@ -350,6 +543,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
for (size_t id = 0; id < GetNumParallelSequences(); id++)
{
if (m_pMBLayout->IsGap(fr.Sequence(id))) // if output is in a gap then don't bother filling it
continue;
Matrix<ElemType> out = ValueFor(fr.Sequence(id));
//assert(m_pShiftedMBLayout->Is(id, t, SequenceStart_or_End) == m_pMBLayout->IsBeyondStartOrEnd(frDelayed.Sequence(id)));
@ -391,34 +587,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ValidateUnaryMap(isFinalValidationPass);
}
// this function is only used for PairNetworkNode (on PastValueNode)
// BUGBUG: Need to transfer the layout as well. PairNetworkNode will go away.
bool GetHistory(Matrix<ElemType>& hist, bool)
// special interface for use by loop detection
virtual const std::vector<int> & /*IRecurrentNode::*/GetRecurrenceDirections() const override
{
DEVICEID_TYPE device = hist.GetDeviceId();
hist.TransferFromDeviceToDevice(device, m_deviceId, true);
hist.SetValue(Input(0)->Value());
hist.TransferFromDeviceToDevice(m_deviceId, device, true);
return true;
}
// this function is only used for PairNetworkNode (on PastValueNode)
void SetHistory(const Matrix<ElemType>& hist)
{
DEVICEID_TYPE device = hist.GetDeviceId();
hist.TransferFromDeviceToDevice(device, m_deviceId, true);
m_delayedActivation.SetValue(hist);
m_isHistoryCarryOverManagedExternally = true;
hist.TransferFromDeviceToDevice(m_deviceId, device, true);
// need a layout as well
// ForwardProp() expects it to have the same number of parallel sequences.
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
m_delayedActivationMBLayout->Init(GetNumParallelSequences(), hist.GetNumCols() / GetNumParallelSequences());
return m_recurrenceDirections;
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -434,15 +606,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
(node->m_delayedActivationMBLayout = make_shared<MBLayout>())->CopyFrom(m_delayedActivationMBLayout);
else
node->m_delayedActivationMBLayout = nullptr;
node->m_isHistoryCarryOverManagedExternally = false;
}
}
//========================================
// implement the IStateFulNode interface
//========================================
virtual NodeStatePtr ExportState()
virtual NodeStatePtr /*IStatefulNode::*/ExportState() override
{
NodeStatePtr pExportedState;
size_t nT = m_pMBLayout->GetNumTimeSteps();
@ -530,7 +697,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
return pExportedState;
}
virtual void ImportState(const NodeStatePtr& pImportedState) override
virtual void /*IStatefulNode::*/ImportState(NodeStatePtr && pImportedState) override
{
DelayedNodeStatePtr pState = dynamic_pointer_cast<DelayedValueNodeState<ElemType>> (pImportedState);
@ -561,7 +729,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{// it is really a compile error ?
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
}
}
protected:
@ -569,14 +736,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> m_delayedActivation; // saves the activation of the previous step that this node points to
MBLayoutPtr m_delayedActivationMBLayout; // layout for m_delayedActivation
int m_timeStep; // delay in frames (typ. 1)
//MBLayoutPtr m_pShiftedMBLayout; // individual sentence boundary information --TODO: do we actually need this separate variable?
bool m_isHistoryCarryOverManagedExternally; // for PastValueNode only
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
std::vector<int> m_recurrenceDirections; // for GetRecurrenceDirections()
};
#define UsingDelayedValueNodeMembers UsingComputationNodeMembersBoilerplate; \
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep; \
/*using Base::m_pShiftedMBLayout;*/ using Base::m_isHistoryCarryOverManagedExternally;
using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep;
// -----------------------------------------------------------------------
// PastValueNode (input) -- delay node
@ -606,7 +771,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class PastValueNode<float>;
template class PastValueNode<double>;
// -----------------------------------------------------------------------
// FutureValueNode (input) -- delay node in future direction
// -----------------------------------------------------------------------

Просмотреть файл

@ -5621,7 +5621,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#pragma omp parallel for
for (int k = 0; k < (int)K; k++)
TensorOpIteration<ElemType, OPFN, 3, true/*vectorizable*/, -1/*no reduction*/, -1/*scalar*/>::Loop(0, array<ElemType*, 3> { pa + k, pb + k, pc + k }, 1, opfn, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
// TODO: somehow this does not use 4-way parallelism with SSE (VS 2013), and the signedness of k (required for omp) causes an extra sign-extend
// TODO: According to Amit, the VS compiler is not able to vectorize into lambdas. Solution: change the lambda to take an N, or to implement the loop inside (with 1 element by default).
// TODO: The signedness of k (required for omp) causes an extra sign-extend.
// TODO: OMP adds LOTS of overhead. Do we need a guard, a min size when to use it?
}
};
@ -5737,6 +5738,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const SmallVector<size_t> & regularOpDims, const array<SmallVector<ptrdiff_t>, 2> & regularStrides,
const SmallVector<size_t> & reducingOpDims, const array<SmallVector<ptrdiff_t>, 2> & reducingStrides)
{
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
#define CaseUnaryTensorOp(oper) \
case ElementWiseOperator::op ## oper: \
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 2> & pp) { return Op ## oper((*(pp[0]))); }, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides)

Просмотреть файл

@ -46,7 +46,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t batchSize = inT.n();
size_t maxTempMemSizeInSamples = (m_maxTempMemSizeInSamples == 0 ? batchSize : m_maxTempMemSizeInSamples);
assert(filter.GetNumCols() == packedInputRows && filter.GetNumRows() == outT.c());
assert(filter.GetNumCols() == packedInputRows && filter.GetNumRows() == outT.c()); UNUSED(packedInputRows);
// GPU and 1-dimensional image
bool gpuSparse1D = (inT.h() == 1 &&
@ -100,7 +100,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Mat outputSubBatch = out.ColumnSlice(outputSizePerChannel * startSampleId, outputSizePerChannel * smallBatchSize);
workspace.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
//workspace.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
// BUGBUG: This ^^ destroys the content of the matrix. Also it seems not to change the size. Does it? Should this be a Reshape()?
Mat::Multiply(filter, false, workspace, false, outputSubBatch);
}
}
@ -454,8 +455,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
else if (engType == EngineType::Legacy)
{
// REVIEW alexeyk: temp hack to allow this to work in MEL scenarios. InvalidArgument should be used instead.
if (imageLayoutKind != ImageLayoutKind::HWC)
InvalidArgument("ConvolutionEngineFactory: ImageLayout '%s' is not compatible with the legacy convolution engine.", ToString(imageLayoutKind).c_str());
fprintf(stderr, "WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.\n");
//InvalidArgument("ConvolutionEngineFactory: ImageLayout '%s' is not compatible with the legacy convolution engine.", ToString(imageLayoutKind).c_str());
return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>();
}

Просмотреть файл

@ -378,7 +378,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (tid < i && tid + i < tids) accumulators[tid] += accumulators[tid + i];
if (0 + i < tids) __syncthreads(); // sync if condition true for at least one thread
// TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values
// TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values. See Amit's allreduce() function implementation in MatrixQuantizer_kernel.cu.
}
// now set final value to output coordinate

Просмотреть файл

@ -230,12 +230,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::map<wstring, vector<shared_ptr<INodeState>>> m_NetStates; // m_NetStatefulNodes[node][i] caches the state of i-th subminibatch of node
bool m_hasLattices;
Matrices m_CachedGraident;
Matrices m_cachedGradient;
// we also need to remember where to put into the net
MBLayoutPtr m_NetMBLayoutPtr;
std::map<wstring, shared_ptr<ComputationNode<ElemType>>> m_LearnableNodePtr;
// followings are lattice-related
Matrices m_NetInputMatrixPtr;
Matrices m_NetInputMatrixPtr; // TODO: camelCase for all m_Net...
LatticePtr m_NetLatticePtr;
UidPtr m_NetUidPtr;
ExtrauttMapPtr m_NetExtrauttMapPtr;
@ -248,18 +248,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetCriterionNodes;
std::vector<shared_ptr<ComputationNode<ElemType>>> m_NetEvaluationNodes;
std::map<wstring, shared_ptr<IStateFulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
std::map<wstring, shared_ptr<IStatefulNode>> m_NetStatefulNodes; // we need to Export/Import states of stateful nodes when we swtich subminibatches
private:
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStateFulNode>>& statefulnode)
void EnumerateStatefulNodeWithRoot(ComputationNetwork& net, ComputationNodeBasePtr root, std::map<wstring, shared_ptr<IStatefulNode>>& statefulnode)
{
const std::list<ComputationNodeBasePtr> evalorder = net.GetEvalOrder(root);
for (auto& x : evalorder)
{
wstring name = x->GetName();
if (statefulnode.find(name) != statefulnode.end()) continue; // already in the list
shared_ptr<IStateFulNode> pNode = dynamic_pointer_cast<IStateFulNode>(x);
shared_ptr<IStatefulNode> pNode = dynamic_pointer_cast<IStatefulNode>(x);
if (pNode)
{
statefulnode[name] = pNode;
@ -267,20 +267,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
std::map<wstring, shared_ptr<IStateFulNode>> EnumerateStatefulNode(ComputationNetwork& net,
std::map<wstring, shared_ptr<IStatefulNode>> EnumerateStatefulNode(ComputationNetwork& net,
const std::vector<ComputationNodeBasePtr>& criterionNode,
const std::vector<ComputationNodeBasePtr>& evaluationNode)
{
std::map<wstring, shared_ptr<IStateFulNode>> statefulnodes;
std::map<wstring, shared_ptr<IStatefulNode>> statefulNodes;
for (auto& root : criterionNode)
{
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
EnumerateStatefulNodeWithRoot(net, root, statefulNodes);
}
for (auto& root : evaluationNode)
{
EnumerateStatefulNodeWithRoot(net, root, statefulnodes);
EnumerateStatefulNodeWithRoot(net, root, statefulNodes);
}
return statefulnodes;
return statefulNodes;
}
public:
@ -353,7 +353,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
delete x.second;
}
for (auto x : m_CachedGraident)
for (auto x : m_cachedGradient)
{
delete x.second;
}
@ -418,11 +418,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto funvalue = pLearnableNode->Value(); // gradient may not be allocated when this function is first called
size_t nrow = funvalue.GetNumRows();
size_t ncol = funvalue.GetNumCols();
if (m_CachedGraident.find(nodeName) == m_CachedGraident.end())
if (m_cachedGradient.find(nodeName) == m_cachedGradient.end())
{
// not allocated yet
m_CachedGraident[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
m_CachedGraident[nodeName]->SetValue((ElemType)0);
m_cachedGradient[nodeName] = new Matrix<ElemType>(nrow, ncol, funvalue.GetDeviceId());
m_cachedGradient[nodeName]->SetValue((ElemType)0);
}
}
}
@ -511,9 +511,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto& x : m_NetStatefulNodes)
{
wstring name = x.first;
shared_ptr<IStateFulNode> pNode = x.second;
shared_ptr<IStatefulNode> pNode = x.second;
if (m_NetStates[name][iSubminibatch])
pNode->ImportState(m_NetStates[name][iSubminibatch]);
pNode->ImportState(std::move(m_NetStates[name][iSubminibatch]));
}
}
@ -521,7 +521,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void DoneWithCurrentSubMinibatch(size_t iSubminibatch)
{
// accumulate gradient here
for (auto x : m_CachedGraident)
for (auto x : m_cachedGradient)
{
wstring nodename = x.first;
if (m_LearnableNodePtr.find(nodename) == m_LearnableNodePtr.end())
@ -529,7 +529,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("ERROR: in DoneWithCurrentSubMinibatch: node %ls not found in LeanrableNode", nodename.c_str());
}
shared_ptr<ComputationNode<ElemType>> pNode = m_LearnableNodePtr[nodename];
m_CachedGraident[nodename]->operator+=(pNode->Gradient());
m_cachedGradient[nodename]->operator+=(pNode->Gradient());
pNode->Gradient().SetValue((ElemType)0);
}
// accumulate criterion value
@ -554,7 +554,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void DoneWithCurrentMinibatch()
{
for (auto& x : m_CachedGraident)
for (auto& x : m_cachedGradient)
{
wstring name = x.first;
Matrix<ElemType>* accumulategrad = x.second;

Просмотреть файл

@ -66,8 +66,12 @@ speechTrain = [
C(c) = DiagTimes(WeightParam(cellDim, 1), Stabilize(c)) // cell-to-hiddden
// LSTM cell
dh = PastValue(outputDim, output); // hidden state(t-1)
dc = PastValue(cellDim, ct); // cell(t-1)
# TODO: This is temporary test code for the new ShiftNode (until we switch PastValue() itself over)
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1, offsetRanges=1, multiOffsetDim=2)
PastValue1 = PastValue
#PastValue1 = PastValueShift
dh = PastValue1(outputDim, output); // hidden state(t-1)
dc = PastValue1(cellDim, ct); // cell(t-1)
// note: the W(inputx) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t)