This commit is contained in:
Alexey Kamenev 2016-01-05 20:55:00 -08:00
Родитель 3943020d86
Коммит ad2c6bc37e
10 изменённых файлов: 97 добавлений и 90 удалений

Просмотреть файл

@ -19,6 +19,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
# comment the following line to write logs to the console
stderr = "$OutputDir$/01_OneHidden_out"
traceLevel=1
numMBsToShowResult=500
#######################################
# TRAINING CONFIG #
@ -63,6 +65,7 @@ train = [
test = [
action = "test"
minibatchSize = 16
NDLNetworkBuilder=[
networkDescription = "$ConfigDir$/01_OneHidden.ndl"

Просмотреть файл

@ -19,6 +19,10 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
# comment the following line to write logs to the console
stderr = "$OutputDir$/02_Convolution_out"
traceLevel=1
numMBsToShowResult=500
prefetch=true
#######################################
# TRAINING CONFIG #
@ -63,6 +67,7 @@ train = [
test = [
action = test
minibatchSize = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/02_Convolution.ndl"

Просмотреть файл

@ -1,20 +1,28 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
RootDir = "."
ndlMacros=$WorkDir$/Macros.ndl
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
prefetch=true
command=Train:Test
stderr=$OutputDir$/01_Conv
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/01_Convolution
NDLNetworkBuilder=[
networkDescription=$WorkDir$/01_Convolution.ndl
networkDescription=$ConfigDir$/01_Convolution.ndl
]
SGD=[
@ -29,7 +37,7 @@ Train=[
reader=[
readerType=UCIFastReader
file=$WorkDir$/Train.txt
file=$DataDir$/Train.txt
randomize=None
features=[
dim=3072
@ -39,7 +47,7 @@ Train=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]
@ -48,15 +56,15 @@ Test=[
action=test
modelPath=$ModelDir$/01_Convolution
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$WorkDir$/01_Convolution.ndl
networkDescription=$ConfigDir$/01_Convolution.ndl
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Test.txt
file=$DataDir$/Test.txt
randomize=None
features=[
dim=3072
@ -66,7 +74,7 @@ Test=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]

Просмотреть файл

@ -28,50 +28,50 @@ DNN=[
# conv1
kW1 = 5
kH1 = 5
cMap1 = 36
cMap1 = 32
hStride1 = 1
vStride1 = 1
# weight[cMap1, kW1 * kH1 * ImageC]
conv1_act = ConvReLULayer(featScaled, cMap1, ImageC, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
conv1_act = ConvReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
# pool1
pool1W = 3
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
# conv2
kW2 = 5
kH2 = 5
cMap2 = 28
cMap2 = 32
hStride2 = 1
vStride2 = 1
# weight[cMap2, kW2 * kH2 * cMap1]
conv2_act = ConvReLULayer(pool1, cMap2, cMap1, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
conv2_act = ConvReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
# pool2
pool2W = 3
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
# conv3
kW3 = 5
kH3 = 5
cMap3 = 68
cMap3 = 64
hStride3 = 1
vStride3 = 1
# weight[cMap3, kW3 * kH3 * cMap2]
conv3_act = ConvReLULayer(pool2, cMap3, cMap2, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
conv3_act = ConvReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
# pool3
pool3W = 3
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride)
pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
hiddenDim = 64
h1 = DNNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)

Просмотреть файл

@ -1,37 +1,43 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
RootDir = "."
ndlMacros=$WorkDir$/Macros.ndl
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros=$ConfigDir$/Macros.ndl
precision=float
deviceId=Auto
prefetch=true
parallelTrain=false
command=Train:AddBNEval:Test
stderr=$OutputDir$/02_BatchNormConv
traceLevel=1
numMBsToShowResult=500
Train=[
action=train
modelPath=$ModelDir$/02_BatchNormConv
NDLNetworkBuilder=[
networkDescription=$WorkDir$/02_BatchNormConv.ndl
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
]
SGD=[
epochSize=49984
minibatchSize=64
learningRatesPerMB=0.03*7:0.01*8:0.003
#momentumPerMB=0.9*10:0.99
maxEpochs=10
#L2RegWeight=0.03
learningRatesPerMB=0.03*7:0.01
momentumPerMB=0
maxEpochs=1
L2RegWeight=0
dropoutRate=0*1:0.5
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Train.txt
file=$DataDir$/Train.txt
randomize=None
features=[
dim=3072
@ -41,7 +47,7 @@ Train=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]
@ -50,22 +56,22 @@ AddBNEval=[
action=edit
CurModel=$ModelDir$/02_BatchNormConv
NewModel=$ModelDir$/02_BatchNormConv.Eval
editPath=$WorkDir$/02_BatchNormConv.mel
editPath=$ConfigDir$/02_BatchNormConv.mel
]
Test=[
action=test
modelPath=$ModelDir$/02_BatchNormConv.Eval
# Set minibatch size for testing.
minibatchSize=128
minibatchSize=16
NDLNetworkBuilder=[
networkDescription=$WorkDir$/02_BatchNormConv.ndl
networkDescription=$ConfigDir$/02_BatchNormConv.ndl
]
reader=[
readerType=UCIFastReader
file=$WorkDir$/Test.txt
file=$DataDir$/Test.txt
randomize=None
features=[
dim=3072
@ -75,7 +81,7 @@ Test=[
dim=1
start=0
labelDim=10
labelMappingFile=$WorkDir$/labelsmap.txt
labelMappingFile=$DataDir$/labelsmap.txt
]
]
]

Просмотреть файл

@ -1,16 +1,16 @@
m=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m)
ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true)
ibn_e = BatchNormalization(featScaled, isc, ib, im, iisd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv1.c, 1, ibn_e)
conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true)
conv2.bn_e = BatchNormalization(pool1, conv2.sc, conv2.b, conv2.m, conv2.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv2.c, 1, conv2.bn_e)
conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true)
conv3.bn_e = BatchNormalization(pool2, conv3.sc, conv3.b, conv3.m, conv3.isd, eval = true, spatial = true, imageLayout = "cudnn")
SetNodeInput(conv3.c, 1, conv3.bn_e)
h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false)
h1.bn_e = BatchNormalization(pool3, h1.sc, h1.b, h1.m, h1.isd, eval = true, spatial = false, imageLayout = "cudnn")
SetNodeInput(h1.t, 1, h1.bn_e)
SaveModel(m, $NewModel$, format=cntk)

Просмотреть файл

@ -7,8 +7,8 @@ ndlMnistMacros = [
ImageC = 3
LabelDim = 10
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
featOffs = Const(128, rows = 3072)
features = ImageInput(ImageW, ImageH, ImageC, tag = feature, imageLayout = "cudnn")
featOffs = Const(128)
featScaled = Minus(features, featOffs)
labels = Input(LabelDim, tag = label)
@ -18,6 +18,9 @@ ndlMnistMacros = [
conv2BValue = 0
conv3WScale = 1.414
conv3BValue = 0
scScale = 0.03
fc1WScale = 12
fc1BValue = 0
fc2WScale = 1.5
@ -25,12 +28,6 @@ ndlMnistMacros = [
]
DNN=[
ib = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
isc = Parameter(ImageC, 1, init = Uniform, initValueScale = 100)
im = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
iisd = Parameter(ImageC, 1, init = fixedValue, value = 0, needGradient = false)
ibn = BatchNormalization(featScaled, isc, ib, im, iisd, eval = false, spatial = true)
# conv1
kW1 = 5
kH1 = 5
@ -38,14 +35,14 @@ DNN=[
hStride1 = 1
vStride1 = 1
# weight[cMap1, kW1 * kH1 * ImageC]
conv1 = ConvReLULayer(ibn, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
conv1 = ConvBNReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue, scScale)
# pool1
pool1W = 3
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride)
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout = "cudnn")
# conv2
kW2 = 5
@ -54,14 +51,14 @@ DNN=[
hStride2 = 1
vStride2 = 1
# weight[cMap2, kW2 * kH2 * cMap1]
conv2 = ConvBNReLULayer(pool1, cMap1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
conv2 = ConvBNReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue, scScale)
# pool2
pool2W = 3
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride)
pool2 = MaxPooling(conv2, pool2W, pool2H, pool2hStride, pool2vStride, imageLayout = "cudnn")
# conv3
kW3 = 5
@ -70,14 +67,14 @@ DNN=[
hStride3 = 1
vStride3 = 1
# weight[cMap3, kW3 * kH3 * cMap2]
conv3 = ConvBNReLULayer(pool2, cMap2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
conv3 = ConvBNReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue, scScale)
# pool3
pool3W = 3
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride)
pool3 = MaxPooling(conv3, pool3W, pool3H, pool3hStride, pool3vStride, imageLayout = "cudnn")
hiddenDim = 64
h1 = DnnBNReLULayer(576, hiddenDim, pool3, fc1WScale, fc1BValue)

Просмотреть файл

@ -1,83 +1,71 @@
ConvReLULayer(inp, outMap, inMap, kW, kH, hStride, vStride, wScale, bValue)
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
W = ImageParameter(kW, kH, inMap, init = Gaussian, initValueScale = wScale, imageLayout = "cudnn")
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = ImageParameter(1, 1, outMap, init = fixedValue, value = bValue, imageLayout = "cudnn")
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
p = Plus(c, b);
y = RectifiedLinear(p);
}
ConvBNReLULayer(inp, inMap, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
sc = Parameter(inMap, 1, init = Gaussian, initValueScale = 0.03)
m = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(inMap, 1, init = fixedValue, value = 0, needGradient = false)
bn = BatchNormalization(inp, sc, b, m, isd, eval = false, spatial = true)
c = Convolution(W, bn, kW, kH, outMap, hStride, vStride, zeroPadding = true)
y = RectifiedLinear(c);
}
ConvBNReLULayer2(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scValue)
ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue, scScale)
{
W = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0)
c = Convolution(W, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true, imageLayout = "cudnn")
bn = BatchNormalization(c, sc, b, m, isd, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y = RectifiedLinear(bn);
}
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scValue)
ResNetNode2(inp, outMap, inWCount, kW, kH, wScale, bValue, scScale)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
p = Plus(bn2, inp)
y2 = RectifiedLinear(p);
}
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, Wproj)
ResNetNode2Conv(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scScale, Wproj)
{
W1 = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
b1 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc1 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd1 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true)
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0)
c1 = Convolution(W1, inp, kW, kH, outMap, 2, 2, zeroPadding = true, imageLayout = "cudnn")
bn1 = BatchNormalization(c1, sc1, b1, m1, isd1, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
y1 = RectifiedLinear(bn1);
W2 = Parameter(outMap, wCount, init = Gaussian, initValueScale = wScale)
b2 = Parameter(outMap, 1, init = fixedValue, value = bValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scValue)
sc2 = Parameter(outMap, 1, init = Gaussian, initValueScale = scScale)
m2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
isd2 = Parameter(outMap, 1, init = fixedValue, value = 0, needGradient = false)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true)
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0)
c2 = Convolution(W2, y1, kW, kH, outMap, 1, 1, zeroPadding = true, imageLayout = "cudnn")
bn2 = BatchNormalization(c2, sc2, b2, m2, isd2, eval = false, spatial = true, expAvgFactor = 1.0, imageLayout = "cudnn")
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false)
cproj = Convolution(Wproj, inp, 1, 1, outMap, 2, 2, zeroPadding = false, imageLayout = "cudnn")
p = Plus(bn2, cproj)
y2 = RectifiedLinear(p);
}

Просмотреть файл

@ -15,7 +15,7 @@ Short description of the network:
01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here:
https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg
(main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer).
The network produces 22% of error after training for about 4 minutes on GPU.
The network produces 21% of error after training for about 3 minutes on GPU.
To run the sample, navigate to this folder and run the following command:
<path to CNTK executable> configFile=01_Conv.config configName=01_Conv

Просмотреть файл

@ -152,7 +152,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(SparseInputValue), L"SparseInput"))
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter"), L"ImageParameter")
else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter"))
ret = true;
else if (EqualInsensitive(nodeType, L"ImageParameter"))
ret = true;