Merge remote-tracking branch 'origin/master' into mahilleb/CuDnn5Test

Conflicts:
	Source/ComputationNetworkLib/ComputationNode.h
	Source/ComputationNetworkLib/TrainingNodes.h
	Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/baseline.linux.txt
	Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/baseline.windows.txt
	Tests/UnitTests/MathTests/ConvolutionEngineTests.cpp
This commit is contained in:
Mark Hillebrand 2016-08-25 00:29:10 +02:00
Родитель 493744d922 9d6345b2fa
Коммит 0285fa9a13
426 изменённых файлов: 221026 добавлений и 715675 удалений

Просмотреть файл

@ -31,6 +31,9 @@
<HasOpenCv>false</HasOpenCv>
<HasOpenCv Condition="Exists('$(OPENCV_PATH)') Or Exists('$(OPENCV_PATH_V31)')">true</HasOpenCv>
<HasBoost>false</HasBoost>
<HasBoost Condition="Exists('$(BOOST_INCLUDE_PATH)') And Exists('$(BOOST_LIB_PATH)')">true</HasBoost>
<UseZip>false</UseZip>
<UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip>

Просмотреть файл

@ -1156,6 +1156,72 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\U
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tutorials", "Tutorials", "{8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ImageHandsOn", "ImageHandsOn", "{2230BF3D-4317-4A3F-A743-DDD6160503F8}"
ProjectSection(SolutionItems) = preProject
Tutorials\ImageHandsOn\ImageHandsOn.cntk = Tutorials\ImageHandsOn\ImageHandsOn.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLUHandsOn", "SLUHandsOn", "{CC143D08-567D-4DAC-9E14-264749C19039}"
ProjectSection(SolutionItems) = preProject
Tutorials\SLUHandsOn\SLUHandsOn.cntk = Tutorials\SLUHandsOn\SLUHandsOn.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Pretrained Models", "Pretrained Models", "{0ED2EE97-0A26-4865-871F-11033867BA34}"
ProjectSection(SolutionItems) = preProject
Tutorials\ImageHandsOn\cifar10.pretrained.cmf = Tutorials\ImageHandsOn\cifar10.pretrained.cmf
Tutorials\ImageHandsOn\cifar10.ResNet.cmf = Tutorials\ImageHandsOn\cifar10.ResNet.cmf
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solutions", "Solutions", "{A2A4893C-0D5B-42E2-BFAD-C123AE7FDAFD}"
ProjectSection(SolutionItems) = preProject
Tutorials\ImageHandsOn\ImageHandsOn_Solution1.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution1.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Solution2.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution2.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Solution3.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution3.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Solution4.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution4.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Solution5.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution5.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Task4_Start.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Task4_Start.cntk
Tutorials\ImageHandsOn\ImageHandsOn_Task6.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Task6.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{8CFBD0DB-5F16-48E6-984C-4401317FA10E}"
ProjectSection(SolutionItems) = preProject
Tutorials\SLUHandsOn\atis.test.ctf = Tutorials\SLUHandsOn\atis.test.ctf
Tutorials\SLUHandsOn\atis.train.ctf = Tutorials\SLUHandsOn\atis.train.ctf
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solutions", "Solutions", "{BD7FF8C0-EC3A-49CD-9D81-4A8A29B8AD8E}"
ProjectSection(SolutionItems) = preProject
Tutorials\SLUHandsOn\SLUHandsOn_Solution1.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution1.cntk
Tutorials\SLUHandsOn\SLUHandsOn_Solution2.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution2.cntk
Tutorials\SLUHandsOn\SLUHandsOn_Solution3.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution3.cntk
Tutorials\SLUHandsOn\SLUHandsOn_Solution4.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution4.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Pretrained Models", "Pretrained Models", "{4727594B-A052-4834-B0E8-57DBB9ADEF13}"
ProjectSection(SolutionItems) = preProject
Tutorials\SLUHandsOn\slu.forward.backward.cmf = Tutorials\SLUHandsOn\slu.forward.backward.cmf
Tutorials\SLUHandsOn\slu.forward.cmf = Tutorials\SLUHandsOn\slu.forward.cmf
Tutorials\SLUHandsOn\slu.forward.lookahead.cmf = Tutorials\SLUHandsOn\slu.forward.lookahead.cmf
Tutorials\SLUHandsOn\slu.forward.nobn.cmf = Tutorials\SLUHandsOn\slu.forward.nobn.cmf
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{4A59163B-1EDE-4439-9E7D-40A30B82A3A0}"
ProjectSection(SolutionItems) = preProject
Tutorials\ImageHandsOn\CifarConverter.py = Tutorials\ImageHandsOn\CifarConverter.py
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TIMIT", "TIMIT", "{B586AA4C-0BB9-4629-9EDA-25FF2618AC9F}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TrainSimpleNetwork", "TrainSimpleNetwork", "{C2102C39-BF5F-4B12-9C41-849D1ED35EE8}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.linux.txt = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.linux.txt
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.windows.txt = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.windows.txt
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\run-test = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\run-test
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\testcases.yml = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\testcases.yml
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1601,5 +1667,15 @@ Global
{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
{9F999212-AFC5-4EAC-AA78-F7247D46C456} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{2230BF3D-4317-4A3F-A743-DDD6160503F8} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
{CC143D08-567D-4DAC-9E14-264749C19039} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
{0ED2EE97-0A26-4865-871F-11033867BA34} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
{A2A4893C-0D5B-42E2-BFAD-C123AE7FDAFD} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
{8CFBD0DB-5F16-48E6-984C-4401317FA10E} = {CC143D08-567D-4DAC-9E14-264749C19039}
{BD7FF8C0-EC3A-49CD-9D81-4A8A29B8AD8E} = {CC143D08-567D-4DAC-9E14-264749C19039}
{4727594B-A052-4834-B0E8-57DBB9ADEF13} = {CC143D08-567D-4DAC-9E14-264749C19039}
{4A59163B-1EDE-4439-9E7D-40A30B82A3A0} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
{B586AA4C-0BB9-4629-9EDA-25FF2618AC9F} = {FB7AF7B9-6BEA-459F-94D9-94D53916D2B6}
{C2102C39-BF5F-4B12-9C41-849D1ED35EE8} = {B586AA4C-0BB9-4629-9EDA-25FF2618AC9F}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -259,7 +259,7 @@ CE=CrossEntropyWithSoftmax(labels, Plus2)
\begin_layout Plain Layout
ErrPredict=ErrorPrediction(labels, Plus2)
ErrPredict=ClassificationError(labels, Plus2)
\end_layout
\begin_layout Plain Layout
@ -616,7 +616,7 @@ CE=CrossEntropyWithSoftmax(labels, Plus2)
\begin_layout Plain Layout
ErrPredict=ErrorPrediction(labels, Plus2)
ErrPredict=ClassificationError(labels, Plus2)
\end_layout
\end_inset
@ -633,19 +633,19 @@ CrossEntropyWithSoftmax
\end_inset
() to compute the training criterion and the operator ErrorPrediction
() to compute the training criterion and the operator ClassificationError
\begin_inset Index idx
status open
\begin_layout Plain Layout
ErrorPrediction
ClassificationError
\end_layout
\end_inset
() to compute the testing criterion.
These operators are internally represented as computation nodes CrossEntropyWit
hSoftmaxNode and ErrorPredictionNode with names CE and ErrPredict, respectively.
hSoftmaxNode and ClassificationErrorNode with names CE and ErrPredict, respectively.
\end_layout
\begin_layout Subsubsection
@ -740,7 +740,7 @@ status open
\begin_layout Plain Layout
ErrPredict=ErrorPrediction(labels, Plus2) # classification error
ErrPredict=ClassificationError(labels, Plus2) # classification error
\end_layout
\begin_layout Plain Layout
@ -1025,7 +1025,7 @@ reference "sub:NDL-Basic-Concepts"
but is much simpler and easier to understand because of the use of macros.
One new feature shown in this network definition is the access to macro-region
variables.
ErrorPrediction() needs to access an intermediate result from SMBFF before
ClassificationError() needs to access an intermediate result from SMBFF before
the CrossEntropyWithSoftmax() is applied.
Although the needed variable is local to the macro, it can be accessed
via the
@ -1107,7 +1107,7 @@ CE = SMBFF(L1, LDim, HDim, labels)
\begin_layout Plain Layout
Err=ErrorPrediction(labels, CE.F)
Err=ClassificationError(labels, CE.F)
\end_layout
\end_inset
@ -1280,7 +1280,7 @@ CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
\begin_layout Plain Layout
Err=ErrorPrediction(labels, CE.F, tag="evaluation")
Err=ClassificationError(labels, CE.F, tag="evaluation")
\end_layout
\end_inset
@ -2900,12 +2900,12 @@ classProbBeforeSoftmax - applying softmax on this matrix will result in
\end_layout
\begin_layout Subsubsection
ErrorPrediction
ClassificationError
\begin_inset Index idx
status open
\begin_layout Plain Layout
ErrorPrediction
ClassificationError
\end_layout
\end_inset
@ -2941,7 +2941,7 @@ status open
\begin_layout Plain Layout
ErrorPrediction(labels, m)
ClassificationError(labels, m)
\end_layout
\begin_layout Plain Layout
@ -4059,7 +4059,7 @@ CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
\begin_layout Plain Layout
Err=ErrorPrediction(labels, CE.F, tag="evaluation")
Err=ClassificationError(labels, CE.F, tag="evaluation")
\end_layout
\begin_layout Plain Layout

Просмотреть файл

@ -290,7 +290,7 @@ cntkSpeech.dnn"
\begin_layout Plain Layout
evalCriterion="ErrorPrediction"
evalCriterion="ClassificationError"
\end_layout
\begin_layout Plain Layout
@ -1610,7 +1610,7 @@ CE1=CrossEntropyWithSoftmax(labels,BFF1.FF.P,tag="evaluation")
\begin_layout Plain Layout
FER1 = ErrorPrediction(labels,BFF1.FF.P,tag="evaluation")
FER1 = ClassificationError(labels,BFF1.FF.P,tag="evaluation")
\end_layout
\begin_layout Plain Layout
@ -1634,7 +1634,7 @@ CE2=CrossEntropyWithSoftmax(regions,BFF2.FF.P,tag="evaluation")
\begin_layout Plain Layout
FER2 = ErrorPrediction(regions,BFF2.FF.P,tag="evaluation")
FER2 = ClassificationError(regions,BFF2.FF.P,tag="evaluation")
\end_layout
\begin_layout Plain Layout

Просмотреть файл

@ -514,7 +514,7 @@ Simple_Demo=[
\begin_layout Plain Layout
evalCriterion="ErrorPrediction"
evalCriterion="ClassificationError"
\end_layout
\begin_layout Plain Layout

Просмотреть файл

@ -52,12 +52,12 @@ train = [
z = DNNLayer (hiddenDim, labelDim, h1, 1)
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
errs = ClassificationError (labels, z)
# set top5Errs as an evaluation node to compute the top-5 error rate
# This is not marked tag="evaluation" since expensive during training.
# We explicitly select it as an output node in the "test" command.
top5Errs = ErrorPrediction (labels, z, topN=5)
top5Errs = ClassificationError (labels, z, topN=5)
# declare special nodes
featureNodes = (features)

Просмотреть файл

@ -22,8 +22,8 @@ DNN = [
ol = DNNLayer(hiddenDim, labelDim, h1, 1)
ce = CrossEntropyWithSoftmax(labels, ol)
errs = ErrorPrediction(labels, ol)
top5Errs = ErrorPrediction(labels, ol, Const(5), tag="eval") # only used in testing
errs = ClassificationError(labels, ol)
top5Errs = ClassificationError(labels, ol, Const(5), tag="eval") # only used in testing
# Special Nodes
FeatureNodes = (features)

Просмотреть файл

@ -58,7 +58,7 @@ DNN=[
ol = DNNLayer(h1Dim, labelDim, h1, 1)
ce = CrossEntropyWithSoftmax(labels, ol)
errs = ErrorPrediction(labels, ol)
errs = ClassificationError(labels, ol)
# Special Nodes
FeatureNodes = (features)

Просмотреть файл

@ -64,7 +64,7 @@ DNN = [
ol = DNNLayer(h1Dim, labelDim, h1, 1)
ce = CrossEntropyWithSoftmax(labels, ol)
errs = ErrorPrediction(labels, ol)
errs = ClassificationError(labels, ol)
# Special Nodes
FeatureNodes = (features)

Просмотреть файл

@ -48,7 +48,7 @@ DNN=[
deconv1 = DeconvReLULayer(unpool1, kW1, kH1, imageC, 25, cMap1, hStride1, vStride1, lpad1, upad1, wScale1, bValue1)
mse = SquareError(featScaled, deconv1)
#err = ErrorPrediction(labels, ol)
#err = ClassificationError(labels, ol)
# Special Nodes
FeatureNodes = (features)

Просмотреть файл

@ -79,7 +79,7 @@ DNN=[
ol = DNNLastLayer(hiddenDim, labelDim, h1_d, fc2WScale, fc2BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -84,7 +84,7 @@ DNN=[
ol = DNNLastLayer(hiddenDim, labelDim, h1, fc2WScale, fc2BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -61,7 +61,7 @@ DNN=[
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -106,7 +106,7 @@ DNN=[
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -78,7 +78,7 @@ DNN=[
ol = DnnImageLastLayer(7, 7, cMap4, labelDim, conv4, fc1WScale, fc1BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -0,0 +1,105 @@
import sys
import urllib.request as ul
import pickle as cp
import tarfile
import shutil
import os
import struct
import numpy as np
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src, outFmt):
with open(src, 'rb') as f:
d = cp.load(f, encoding="latin1")
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
# R0..RN,G0..GN,B0..BN
# There are 2 possible options in CNTK:
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
# NCHW format, it has to be converted to CNTK legacy format first.
data = d['data']
if outFmt == 'cudnn':
feat = data
elif outFmt == 'legacy':
r = data[:, : ImgSize * ImgSize]
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
feat = np.empty_like(data)
feat[:, ::3] = r
feat[:, 1::3] = g
feat[:, 2::3] = b
else:
print ('Format not supported: ' + outFmt)
usage()
sys.exit(1)
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src, outFmt):
print ('Downloading ' + src)
fname, h = ul.urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName, outFmt)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def usage():
print ('Usage: CIFAR_convert.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
def parseCmdOpt(argv):
if len(argv) == 0:
print ("Using cudnn output format.")
return "cudnn"
try:
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
except getopt.GetoptError:
usage()
sys.exit(1)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt in ('-f', '--outFormat'):
fmt = arg
if fmt != 'cudnn' and fmt != 'legacy':
print ('Invalid output format option.')
usage()
sys.exit(1)
return fmt
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
fmt = parseCmdOpt(sys.argv[1:])
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
print ('Writing train text file...')
savetxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
savetxt(r'./Test_cntk_text.txt', tst)
print ('Done.')

Просмотреть файл

@ -0,0 +1,73 @@
import os
import sys
import struct
import pickle as cp
from PIL import Image
import numpy as np
import xml.etree.cElementTree as et
import xml.dom.minidom
imgSize = 32
def saveImage(fname, data, label, mapFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, imgSize, imgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(imgSize)
et.SubElement(root, 'Col').text = str(imgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
if __name__ == "__main__":
if len(sys.argv) != 2:
print ("Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
sys.exit(1)
rootDir = sys.argv[1]
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
if not os.path.exists(trainDir):
os.makedirs(trainDir)
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
if not os.path.exists(testDir):
os.makedirs(testDir)
data = {}
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
for ifile in range(1, 6):
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(testDir, ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 0)

Просмотреть файл

@ -41,8 +41,8 @@ TrainConvNet = [
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
top5Errs = ErrorPrediction (labels, z, topN=5) # only used in Eval action
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
@ -109,8 +109,8 @@ TrainConvNetWithBN = [
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
top5Errs = ErrorPrediction (labels, z, topN=5)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5)
featureNodes = (features)
labelNodes = (labels)

Просмотреть файл

@ -1,229 +0,0 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization.
command = TrainConvNet:Eval
#command = TrainConvNetWithBN:Eval
makeMode = false ; traceLevel = 0 ; deviceId = "auto"
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
modelPath = "$ModelDir$/cifar10.cmf"
# Training without BN
TrainConvNet = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
# basic model
model_basic (features) =
{
featNorm = features - Constant (128)
l1 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 0.0043} (featNorm)
p1 = MaxPoolingLayer {(3:3), stride = (2:2)} (l1)
l2 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 1.414} (p1)
p2 = MaxPoolingLayer {(3:3), stride = (2:2)} (l2)
l3 = ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU,
init = "gaussian", initValueScale = 1.414} (p2)
p3 = MaxPoolingLayer {(3:3), stride = (2:2)} (l3)
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
z = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1)
}.z
# with self-defined layer
MyConvReLUPoolLayer {dim, initValueScale} =
{
C = ConvolutionalLayer {dim, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = initValueScale}
P = MaxPoolingLayer {(3:3), stride = (2:2)}
apply (x) = P(C(x))
}.apply
model_layers (features) =
{
featNorm = features - Constant (128)
h1 = MyConvReLUPoolLayer {32, 0.0043} (featNorm)
h2 = MyConvReLUPoolLayer {32, 1.414} (h1)
h3 = MyConvReLUPoolLayer {64, 1.414} (h2)
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (h3)
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (d1)
}.z
# model-composition style
# ...TODO: test this again; last run was a little worse
Subtract128 (x) = x - Constant (128)
model_compositionStyle = Sequential (
Subtract128 :
MyConvReLUPoolLayer {32, 0.0043} :
MyConvReLUPoolLayer {32, 1.414} :
MyConvReLUPoolLayer {64, 1.414} :
DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} :
LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
)
#model_compositionStyle =
# Subtract128 >>
# LayerStack {3, i => MyConvReLUPoolLayer {dims[i], initValueScales[i]} } >>
# MyConvReLUPoolLayer {32, 0.0043} >>
# MyConvReLUPoolLayer {32, 1.414} >>
# MyConvReLUPoolLayer {64, 1.414} >>
# DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} >>
# LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
// --- with BatchNorm
MyConvBNReLUPoolLayer {dim, initValueScale} =
{
C = ConvolutionalLayer {dim, (5:5), pad = true, bias = false, init = "gaussian", initValueScale = initValueScale}
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
P = MaxPoolingLayer {(3:3), stride = (2:2)}
apply (x) = P(ReLU(B(C(x))))
}.apply
MyDenseBNReLULayer {dim, initValueScale} =
{
D = DenseLayer {dim, bias = false, init = "gaussian", initValueScale = initValueScale}
B = BatchNormalizationLayer {normalizationTimeConstant = 4096}
apply (x) = ReLU(B(D(x)))
}.apply
model_withBatchNorm (features) =
{
featNorm = features - Constant (128)
h1 = MyConvBNReLUPoolLayer {32, 0.0043} (featNorm)
h2 = MyConvBNReLUPoolLayer {32, 1.414} (h1)
h3 = MyConvBNReLUPoolLayer {64, 1.414} (h2)
d1 = MyDenseBNReLULayer {64, 12} (h3)
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (d1)
}.z
// --- ResNet
MyConvBNLayer {dim, initValueScale, stride} =
{
# note: (3:3), while the macro above is (5:5)
C = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale}
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
apply (x) = B(C(x))
}.apply
ResNetNode {dim, initValueScale} =
{
C1 = MyConvBNLayer {dim, initValueScale, 1} # first convolution layer
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
#B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
# ^^ Note: Adding an exra BN to 'x' trains slightly better.
apply (x) = ReLU (x + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
}.apply
ResNetIncNode {dim, initValueScale} =
{
# first branch. This doubles the #channels but halves the image size
C1 = MyConvBNLayer {dim, initValueScale, 2} # first convolution layer, stride = 2
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
# second branch:
# sub-sample spatially by a factor of 2
DownSamplingLayer {stride} = MaxPoolingLayer {(1:1), stride = stride}
# append dim/2 zero output channels
pad = ConstantTensor (0, (1:1:dim/2)) # the 1s will broadcast to image size
P(x) = Splice ((DownSamplingLayer {(2:2)} (x) : pad), axis = 3)
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
# layer sums both branches and rectifies the result
apply (x) = ReLU (B(P(x)) + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
}.apply
model_resNet (features) =
{
conv1 = MyConvBNLayer {16, 0.26, 1} (features)
rl1 = ReLU (conv1)
rn1 = LayerStack {3, _ => ResNetNode {16, 7.07}} (rl1)
rn2_1 = ResNetIncNode {32, 7.07} (rn1)
rn2 = LayerStack {2, _ => ResNetNode {32, 7.07}} (rn2_1)
rn3_1 = ResNetIncNode {64, 7.07} (rn2)
rn3 = LayerStack {2, _ => ResNetNode {64, 7.07}} (rn3_1)
pool = AveragePoolingLayer {(8:8)} (rn3)
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
}.z
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model_basic (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
top5Errs = ErrorPrediction (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs) # top5Errs only used in Eval
outputNodes = (z)
}
SGD = {
epochSize = 50000 # 49984 --TODO: why 16 less?
# without BatchNormalization:
maxEpochs = 30 ; minibatchSize = 64
learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
momentumAsTimeConstant = 600*20:6400
L2RegWeight = 0.03
# with BatchNormalization:
#maxEpochs = 30 ; minibatchSize = 64
#learningRatesPerSample = 0.00046875*7:0.00015625
#momentumAsTimeConstant = 0
#L2RegWeight = 0
# ResNet
#maxEpochs = 160 ; minibatchSize = 128
#learningRatesPerSample = 0.0078125*80:0.00078125*40:0.000078125
#momentumAsTimeConstant = 1200
#L2RegWeight = 0.0001
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
}
reader = {
verbosity = 0
randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}
# Eval action
Eval = {
action = "eval"
minibatchSize = 16
evalNodeNames = errs:top5Errs # also test top-5 error rate
reader = {
verbosity = 0
randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/cifar-10-batches-py/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}

Просмотреть файл

@ -1,6 +1,6 @@
m1 = LoadModel("$curModel$", format="cntk")
SetDefaultModel(m1)
errTop5 = ErrorPrediction(labels, outputNodes.z, Const(5), tag="eval")
errTop5 = ClassificationError(labels, outputNodes.z, Const(5), tag="eval")
SaveModel(m1, "$newModel$", format="cntk")

Просмотреть файл

@ -103,6 +103,6 @@ DNN=[
ol = DNNLastLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -2,6 +2,6 @@ m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
# Add top-5 error prediction node.
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
ErrTop5 = ClassificationError(labels, OutputNodes.z, Const(5), tag = Eval)
SaveModel(m1, $NewModel$, format=cntk)

Просмотреть файл

@ -111,6 +111,6 @@ DNN=[
ol = DnnLayer(cMap6, labelDim, pool2, fcWScale, fcBValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -67,6 +67,6 @@ DNN=[
ol = DnnLayer(cMap4, labelDim, pool5, fcWScale, fcBValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -74,6 +74,6 @@ DNN=[
ol = DnnLayer(cMap4, labelDim, pool5, fcWScale, fcBValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -77,6 +77,6 @@ DNN=[
ol = DnnLayer(cMap6, labelDim, pool2, fcWScale, fcBValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -2,6 +2,6 @@ m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
# Add top-5 error prediction node.
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = "eval")
ErrTop5 = ClassificationError(labels, OutputNodes.z, Const(5), tag = "eval")
SaveModel(m1, $NewModel$, format=cntk)

Просмотреть файл

@ -71,6 +71,6 @@ DNN=[
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -79,6 +79,6 @@ DNN=[
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -80,6 +80,6 @@ DNN=[
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
Err = ClassificationError(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -29,7 +29,7 @@ ndlTestCosDist=[
CD = CosDistance(L4, labels);
CDAll=SumElements(CD)
NCD=Negate(CDALL, tag="criterion")
Err=ErrorPrediction(labels, L4, tag="evaluation")
Err=ClassificationError(labels, L4, tag="evaluation")
# rootNodes defined here
OutputNodes=(L4)
@ -129,7 +129,7 @@ ndlFull=[
#SM=Softmax(Plus2)
#CE=CrossEntropy(labels, SM)
CE=CrossEntropyWithSoftmax(labels, Plus2)
ErrPredict=ErrorPrediction(labels, Plus2)
ErrPredict=ClassificationError(labels, Plus2)
FeatureNodes=(features)
LabelNodes=(labels)
CriterionNodes=(CE)
@ -233,7 +233,7 @@ ndlMacroUse2=[
L2 = RBFF(L1, HDim, HDim)
L3 = RBFF(L2, HDim, HDim)
CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF)
@ -290,7 +290,7 @@ ndlMacroUseCNNSubSample2ZeroPadding=[
HDim=128
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF)
@ -349,7 +349,7 @@ ndlMacroUseCNNSubSample2=[
HDim=128
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF)
@ -399,7 +399,7 @@ ndlMacroUseCNN=[
HDim=128
L1 = SBFF(mp, HDim, 0)
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF)
@ -430,7 +430,7 @@ ndlMacroUseNoBase=[
L2 = RFFD(L1, HDim, HDim)
L3 = RFFD(L2, HDim, HDim)
CE = SMFF(L3, LDim, SDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF)
]
@ -463,7 +463,7 @@ ndlMacroUseMask=[
L4=ElementTimes(L3, ML2)
CE = SMBFF(L4, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
# output nodes
Prior=Mean(labels)

Просмотреть файл

@ -39,7 +39,7 @@ Multigpu_Demo_Train=[
# 2 input, 2 50-element hidden, 2 output
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
evalCriterion = "ClassificationError"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true

Просмотреть файл

@ -32,7 +32,7 @@ Simple_Demo_Train = [
# 2 input, 2 50-element hidden, 2 output
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
evalCriterion = "ClassificationError"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -137,6 +137,6 @@ Simple_Demo_Output=[
# grep labels SimpleOutput.labels | awk '{print $1}' > L
# diff L P | grep "<" | wc -l
# wc -l P
# The ratio of the two numbers gives the same error rate as ErrorPrediction/Sample in the log.
# The ratio of the two numbers gives the same error rate as ClassificationError/Sample in the log.
]
]

Просмотреть файл

@ -29,7 +29,7 @@ speechTrain = [
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
evalCriterion = "ClassificationError"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true

Просмотреть файл

@ -199,7 +199,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3 = [
LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
ce = CrossEntropyWithSoftmax(labels, LSTMoutputW);
err = ErrorPrediction(labels, LSTMoutputW);
err = ClassificationError(labels, LSTMoutputW);
logPrior = LogPrior(labels)
scaledLogLikelihood = Minus(LSTMoutputW, logPrior)

Просмотреть файл

@ -172,7 +172,7 @@ ndlCreateNetwork=[
criterion = Plus(Scale(cr2,criterion2), Scale(cr1,criterion1), tag=Criteria)
#CE = SMBFF(Dout,labelDim,hiddenDim,labels,tag=Criteria)
Err = ErrorPrediction(labels,DNN_A_CE_BFF,tag=Eval)
Err = ClassificationError(labels,DNN_A_CE_BFF,tag=Eval)
logPrior = LogPrior(labels)

Просмотреть файл

@ -33,7 +33,7 @@ ndlCreateNetwork=[
L2 = SBFF(L1,hiddenDim,hiddenDim)
L3 = SBFF(L2,hiddenDim,hiddenDim)
CE = SMBFF(L3,labelDim,hiddenDim,labels,tag=Criteria)
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)
Err = ClassificationError(labels,CE.BFF.FF.P,tag=Eval)
# define output (scaled loglikelihood)
logPrior = LogPrior(labels)

Просмотреть файл

@ -122,7 +122,7 @@ ndlCreateNetwork=[
L5 = SBFF(L4,hiddenDim,hiddenDim)
L6 = SBFF(L5,hiddenDim,hiddenDim)
CE = SMBFF(L6,labelDim,hiddenDim,labels,tag=Criteria)
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)
Err = ClassificationError(labels,CE.BFF.FF.P,tag=Eval)
# define output (scaled loglikelihood)
logPrior = LogPrior(labels)

Просмотреть файл

@ -128,7 +128,7 @@ ndlCreateNetwork=[
# same name as the corresponding node in the non-sequence training model.
CE.BFF = BFF(L6, labelDim, hiddenDim)
Cr = DummyCriterion(objectives, derivatives, CE.BFF.FF.P, tag=Criteria)
Err = ErrorPrediction(labels, CE.BFF.FF.P, tag=Eval)
Err = ClassificationError(labels, CE.BFF.FF.P, tag=Eval)
# define output (scaled loglikelihood)
logPrior = LogPrior(labels)

Просмотреть файл

@ -106,7 +106,7 @@ ndlCreateNetwork=[
LSTMoutputW1 = Times(W1, LSTMoutput3)
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
Err = ErrorPrediction(labels,LSTMoutputW1,tag=Eval)
Err = ClassificationError(labels,LSTMoutputW1,tag=Eval)
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag=Output)

Просмотреть файл

@ -142,7 +142,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
#LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -184,7 +184,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -182,7 +182,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -111,7 +111,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -112,7 +112,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -187,7 +187,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -116,7 +116,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
LSTMoutputW = Plus(Times(W, LSTMoutput8), b);
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)

Просмотреть файл

@ -27,7 +27,7 @@ TimitTrainSimple=[
SimpleNetworkBuilder=[
layerSizes=792:512*3:183
trainingCriterion=CrossEntropyWithSoftmax
evalCriterion=ErrorPrediction
evalCriterion=ClassificationError
layerTypes=Sigmoid
initValueScale=1.0
applyMeanVarNorm=true

Просмотреть файл

@ -163,7 +163,7 @@ ndlCreateNetwork=[
criterion2 = CrossEntropyWithSoftmax(statelabels, DNN_B_CE_BFF)
criterion = Plus(Scale(cr2,criterion2), Scale(cr1,criterion1), tag="criterion")
Err = ErrorPrediction(labels,DNN_A_CE_BFF,tag="evaluation")
Err = ClassificationError(labels,DNN_A_CE_BFF,tag="evaluation")
logPrior = LogPrior(labels)

Просмотреть файл

@ -18,7 +18,7 @@ TIMIT_TrainAdaptLR=[
SimpleNetworkBuilder=[
layerSizes=792:512*3:183
trainingCriterion=CrossEntropyWithSoftmax
evalCriterion=ErrorPrediction
evalCriterion=ClassificationError
layerTypes=Sigmoid
initValueScale=1.0
applyMeanVarNorm=true

Просмотреть файл

@ -24,7 +24,7 @@ TIMIT_TrainSimple=[
SimpleNetworkBuilder=[
layerSizes=792:512*3:183
trainingCriterion=CrossEntropyWithSoftmax
evalCriterion=ErrorPrediction
evalCriterion=ClassificationError
layerTypes=Sigmoid
initValueScale=1.0
applyMeanVarNorm=true

Просмотреть файл

@ -33,7 +33,7 @@ ndlCreateNetwork=[
L2 = SBFF(L1,hiddenDim,hiddenDim)
L3 = SBFF(L2,hiddenDim,hiddenDim)
CE = SMBFF(L3,labelDim,hiddenDim,myLabels,tag="criterion")
Err = ErrorPrediction(myLabels,CE.BFF.FF.P,tag="evaluation")
Err = ClassificationError(myLabels,CE.BFF.FF.P,tag="evaluation")
# define output (scaled loglikelihood)
logPrior = LogPrior(myLabels)

Просмотреть файл

@ -31,7 +31,7 @@ ndlCreateNetwork=[
featNorm = MeanVarNorm(features)
L1 = SBFF(featNorm,hiddenDim,featDim)
CE = SMBFF(L1,labelDim,hiddenDim,labels,tag="criterion")
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag="evaluation")
Err = ClassificationError(labels,CE.BFF.FF.P,tag="evaluation")
# define output (scaled loglikelihood)
logPrior = LogPrior(labels)

Просмотреть файл

@ -102,7 +102,7 @@ ndlCreateNetwork=[
LSTMoutputW1 = Times(W1, LSTMoutput)
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag="criterion")
Err = ErrorPrediction(labels,LSTMoutputW1,tag="evaluation")
Err = ClassificationError(labels,LSTMoutputW1,tag="evaluation")
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag="output")

Просмотреть файл

@ -51,7 +51,7 @@ L1 = SBFF2(featInput1, HiddenDim, FeatDim1, featInput2, FeatDim2)
L2 = SBFF(L1, HiddenDim, HiddenDim)
L3 = SBFF(L2, HiddenDim, HiddenDim)
CE = SMBFF(L3, LabelDim1, HiddenDim, labels,tag="criterion") # do I need a tag?
FER = ErrorPrediction(labels,CE.BFF.FF.P,tag="evaluation")
FER = ClassificationError(labels,CE.BFF.FF.P,tag="evaluation")
# outputNodes
Prior=Mean(labels)

Просмотреть файл

@ -41,12 +41,12 @@ L3 = SBFF(L2, HiddenDim, HiddenDim2)
# objective function 1
BFF1=BFF(L3,LabelDim1,HiddenDim)
CE1=CrossEntropyWithSoftmax(labels,BFF1.FF.P,tag="evaluation")
FER1 = ErrorPrediction(labels,BFF1.FF.P,tag="evaluation")
FER1 = ClassificationError(labels,BFF1.FF.P,tag="evaluation")
# objective function 2
BFF2=BFF(L3,LabelDim2,HiddenDim)
CE2=CrossEntropyWithSoftmax(regions,BFF2.FF.P,tag="evaluation")
FER2 = ErrorPrediction(regions,BFF2.FF.P,tag="evaluation")
FER2 = ClassificationError(regions,BFF2.FF.P,tag="evaluation")
# weighted final objective function
Alpha1=0.8

Просмотреть файл

@ -60,7 +60,7 @@ Train = [
outputs = W * LSTMoutput + b
cr = CrossEntropyWithSoftmax(labels, outputs)
errs = ErrorPrediction(labels, outputs)
errs = ClassificationError(labels, outputs)
criterionNodes = (cr)
evaluationNodes = (errs)
@ -191,7 +191,7 @@ Test = [
labels = Input($labelCount$, tag = "label")
modelAsTrained = BS.Network.Load ("$modelPath$")
final = Hardmax(modelAsTrained.outputs)
errorRate = ErrorPrediction(labels, final, tag='evaluation')
errorRate = ClassificationError(labels, final, tag='evaluation')
]
evalNodeNames = errorRate

Просмотреть файл

@ -1,169 +0,0 @@
# The configuration file to build language understanding model with ATIS corpus.
# An LSTM model is built to tag each word in sentences with its semantic label.
makeMode = false ; traceLevel = 1 ; deviceId = -1
WorkDir = Work
DataDir = Data
modelPath = $WorkDir$/slot.model
parallelTrain = true
#stderr = $WorkDir$/log
command = TrainATIS:RunATIS:EvalATIS
vocabSize = 943 # number of words
numLabels = 129 # number of slot labels
numIntents = 26 # number of intent labels
# The command to train the LSTM model
TrainATIS = [
action = "train"
BrainScriptNetworkBuilder = [
inputDim = $vocabSize$
labelDim = $numLabels$
embDim = 150
hiddenDim = 300
#hiddenDim = 150
model = Sequential (
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
EmbeddingLayer {embDim} : # embedding
RecurrentLSTMLayer {hiddenDim} : # LSTM
#Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) : # bidirectional LSTM
#Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) : # bidirectional LSTM
DenseLayer {labelDim, initValueScale=7} # output layer
)
# features
query = Input {inputDim}
slotLabels = Input {labelDim}
# model application
z = model (query)
# loss and metric
ce = CrossEntropyWithSoftmax (slotLabels, z)
errs = ErrorPrediction (slotLabels, z)
featureNodes = (query)
labelNodes = (slotLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
]
# rename this to BrainScriptNetworkBuilder to switch to intent-classification task
Intent_BrainScriptNetworkBuilder = [
inputDim = $vocabSize$
labelDim = $numIntents$
embDim = 150
#hiddenDim = 300
hiddenDim = 150
model = Sequential (
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
EmbeddingLayer {embDim} : # embedding
RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last : # LSTM state, final state
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true} : BS.Sequences.First)), Splice) : # bidirectional LSTM
DenseLayer {labelDim, initValueScale=7} # output layer
)
# features
t = DynamicAxis{}
query = Input {inputDim, dynamicAxis=t}
intentLabels = Input {labelDim}
# model application
z = model (query)
# loss and metric
ce = CrossEntropyWithSoftmax (intentLabels, z)
errs = ErrorPrediction (intentLabels, z)
featureNodes = (query)
labelNodes = (intentLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
]
SGD = [
maxEpochs = 20 ; epochSize = 36000
minibatchSize = 70
learningRatesPerSample = 0.01*2:0.005*12:0.001
gradUpdateType = "FSAdaGrad"
gradientClippingWithTruncation = true ; clippingThresholdPerSample = 15.0
# number of minibatches to report progress
firstMBsToShowResult = 10 ; numMBsToShowResult = 100
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
parallelizationStartEpoch = 2
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
]
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/atis.train.ctf"
randomize = true
input = [
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
]
]
]
# Run the model to predict slot labels
RunATIS = [
action = "write"
BrainScriptNetworkBuilder = [
modelAsTrained = BS.Network.Load ("$modelPath$")
final = Hardmax (modelAsTrained.z) # make a decision
#labels = Pass (modelAsTrained.slotLabels)
# enable this for intent classification:
labels = Pass (modelAsTrained.intentLabels)
t = DynamicAxis()
]
outputPath = $WorkDir$/model.writeaction
outputNodeNames = intentLabels:slotLabels:final
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/atis.test.ctf"
randomize = false
input = [
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
]
]
]
# Evaluate the model's slot-tagging accuracy (as an error count)
EvalATIS = [
action = "eval"
modelPath = $modelPath$ # from outside
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/atis.test.ctf"
randomize = false
input = [
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
]
]
]

Просмотреть файл

Просмотреть файл

@ -34,7 +34,7 @@ Train = [
z = w * features + b
ce = CrossEntropyWithSoftmax (labels, z)
errs = ErrorPrediction (labels, z)
errs = ClassificationError (labels, z)
# root nodes
featureNodes = (features)

Просмотреть файл

@ -713,6 +713,9 @@ endif
########################################
ifdef OPENCV_PATH
ifdef BOOST_PATH
INCLUDEPATH += $(BOOST_PATH)/include
IMAGE_READER_LIBS += -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
@ -743,6 +746,7 @@ $(IMAGEREADER): $(IMAGEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) $(IMAGE_READER_LIBS)
endif
endif
########################################
# 1bit SGD setup
@ -841,7 +845,6 @@ UNITTEST_READER_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/HTKLMFReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ImageReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ReaderLibTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/UCIFastReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/stdafx.cpp \
$(SOURCEDIR)/Readers/CNTKTextFormatReader/Indexer.cpp \
$(SOURCEDIR)/Readers/CNTKTextFormatReader/TextParser.cpp \

Просмотреть файл

@ -39,6 +39,8 @@ void DoTrain(const ConfigRecordType& config);
template <typename ElemType>
void DoAdapt(const ConfigParameters& config);
template <typename ElemType>
void DoDumpNodes(const ConfigParameters& config);
template <typename ElemType>
void DoEdit(const ConfigParameters& config);
// evaluation (EvalActions.cpp)

Просмотреть файл

@ -154,6 +154,13 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
m_net->InitLearnableParameters(nodePtr, L"uniform", initValueScale, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initOnCPUOnly);
else if (EqualCI(initString, L"gaussian"))
m_net->InitLearnableParameters(nodePtr, L"gaussian", initValueScale, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initOnCPUOnly);
else if (EqualCI(initString, L"bilinear"))
{
const size_t kernelWidth = node->GetOptionalParameter("kernelWidth", "0");
const size_t kernelHeight = node->GetOptionalParameter("kernelHeight", "0");
assert(kernelWidth > 0 && kernelHeight > 0);
m_net->InitLearnableParametersWithBilinearFill<ElemType>(nodePtr, kernelWidth, kernelHeight);
}
else if (EqualCI(initString, L"fromFile"))
{
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");

Просмотреть файл

@ -158,6 +158,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(CRFNode), L"CRF")) ret = true;
#endif
else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ClassificationErrorNode), L"ErrorPrediction")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(EqualNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterEqualNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterNode))) ret = true;
@ -177,7 +178,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(DropoutNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(DummyCriterionNode), L"DummyCriterion")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ElementTimesNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ErrorPredictionNode), L"ClassificationError")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ExpNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(FloorNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(FutureValueNode))) ret = true;

Просмотреть файл

@ -1751,10 +1751,10 @@ shared_ptr<ComputationNode<ElemType>> SimpleNetworkBuilder<ElemType>::AddTrainAn
// output = builder.SquareError(label, tinput, (evalNodeName == L"")?L"EvalSquareError":evalNodeName);
output = builder.Logistic(label, tinput, (evalNodeName == L"") ? L"Logistic" : evalNodeName);
break;
case EvalCriterion::ErrorPrediction:
case EvalCriterion::ClassificationError:
if (matrix != nullptr && tinput == input)
tinput = builder.Times(matrix, input);
output = builder.ErrorPrediction(label, tinput, (evalNodeName == L"") ? L"EvalErrorPrediction" : evalNodeName);
output = builder.ClassificationError(label, tinput, (evalNodeName == L"") ? L"EvalClassificationError" : evalNodeName);
break;
#ifdef COMING_SOON
case EvalCriterion::CRF:
@ -1785,23 +1785,26 @@ template class SimpleNetworkBuilder<double>;
TrainingCriterion ParseTrainingCriterionString(wstring s)
{
if (EqualCI(s, L"crossEntropyWithSoftmax")) return TrainingCriterion::CrossEntropyWithSoftmax;
else if (EqualCI(s, L"sequenceWithSoftmax")) return TrainingCriterion::SequenceWithSoftmax;
else if (EqualCI(s, L"squareError")) return TrainingCriterion::SquareError;
else if (EqualCI(s, L"logistic")) return TrainingCriterion::Logistic;
else if (EqualCI(s, L"noiseContrastiveEstimation")) return TrainingCriterion::NCECrossEntropyWithSoftmax;
// legacy/deprecated
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return TrainingCriterion::ClassCrossEntropyWithSoftmax;
else if (EqualCI(s, L"sequenceWithSoftmax")) return TrainingCriterion::SequenceWithSoftmax;
else LogicError("trainingCriterion: Invalid trainingCriterion value. Valid values are (crossEntropyWithSoftmax | squareError | logistic | classCrossEntropyWithSoftmax| sequenceWithSoftmax)");
}
EvalCriterion ParseEvalCriterionString(wstring s)
{
if (EqualCI(s, L"errorPrediction")) return EvalCriterion::ErrorPrediction;
if (EqualCI(s, L"classificationError")) return EvalCriterion::ClassificationError;
else if (EqualCI(s, L"crossEntropyWithSoftmax")) return EvalCriterion::CrossEntropyWithSoftmax;
else if (EqualCI(s, L"sequenceWithSoftmax")) return EvalCriterion::SequenceWithSoftmax;
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return EvalCriterion::ClassCrossEntropyWithSoftmax;
else if (EqualCI(s, L"logistic")) return EvalCriterion::Logistic;
else if (EqualCI(s, L"noiseContrastiveEstimation")) return EvalCriterion::NCECrossEntropyWithSoftmax;
else if (EqualCI(s, L"squareError")) return EvalCriterion::SquareError;
// legacy/deprecated
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return EvalCriterion::ClassCrossEntropyWithSoftmax;
else if (EqualCI(s, L"sequenceWithSoftmax")) return EvalCriterion::SequenceWithSoftmax;
else if (EqualCI(s, L"errorPrediction")) return EvalCriterion::ClassificationError;
else LogicError("evalCriterion: Invalid trainingCriterion value. Valid values are (errorPrediction | crossEntropyWithSoftmax | squareError | logistic | sequenceWithSoftmax)");
}

Просмотреть файл

@ -66,7 +66,7 @@ enum class EvalCriterion : int
CrossEntropy,
SquareError,
Logistic,
ErrorPrediction,
ClassificationError,
ClassCrossEntropyWithSoftmax,
NCECrossEntropyWithSoftmax,
CRF,

Просмотреть файл

@ -191,6 +191,30 @@ void DoAdapt(const ConfigParameters& config)
template void DoAdapt<float>(const ConfigParameters& config);
template void DoAdapt<double>(const ConfigParameters& config);
// ===========================================================================
// DoDumpNodes() - implements CNTK "dumpNode" command
// ===========================================================================
template <typename ElemType>
void DoDumpNodes(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring nodeName = config(L"nodeName", L"__AllNodes__");
wstring nodeNameRegexStr = config(L"nodeNameRegex", L"");
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
wstring outputFile = config(L"outputFile", defOutFilePath);
bool printValues = config(L"printValues", true);
bool printMetadata = config(L"printMetadata", true);
if (!printValues && !printMetadata)
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
}
template void DoDumpNodes<float>(const ConfigParameters& config);
template void DoDumpNodes<double>(const ConfigParameters& config);
// ===========================================================================
// DoEdit() - implements CNTK "edit" command
// ===========================================================================

Просмотреть файл

@ -277,7 +277,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
if (rightVal.Is<Double>()) // ComputeNode * scalar
swap(leftVal, rightVal); // -> scalar * ComputeNode
if (leftVal.Is<Double>())
operationName = L"Scale"; // scalar * ComputeNode
operationName = L"ElementTimes"; // scalar * ComputeNode
else
operationName = L"Times"; // ComputeNode * ComputeNode (matrix produt)
}
@ -305,6 +305,8 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
config->Add(L"operation", MakeFailFn(e->location), ConfigValuePtr(make_shared<String>(operationName), MakeFailFn(e->location), exprPath));
let leftFailFn = leftVal.GetFailFn(); // report any error for this Constant object as belonging to the scalar factor's expression
vector<ConfigValuePtr> inputs;
#if 0 // BUGBUG: rows,cols is no longer right, we need a TensorShape here
// TODO: Solve this by directly constructing Constant() off a 'double' input in the ComputationNode constructor.
if (operationName == L"Scale")
{
// if we scale, the first operand is a Double, and we must convert that into a 1x1 Constant
@ -314,7 +316,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
let one = MakePrimitiveConfigValuePtr(1.0, leftFailFn, exprPath);
constantConfig->Add(L"rows", leftFailFn, one);
constantConfig->Add(L"cols", leftFailFn, one);
//constantConfig->Add(L"shape", leftFailFn, one); // BUGBUG: rows,cols is no longer right, we need a TensorShape here
//constantConfig->Add(L"shape", leftFailFn, one);
constantConfig->Add(L"value", leftFailFn, leftVal);
constantConfig->Add(L"learningRateMultiplier", leftFailFn, MakePrimitiveConfigValuePtr(0.0f, leftFailFn, exprPath));
let value = ConfigValuePtr(rtInfo->construct(constantConfig), leftFailFn, exprPath);
@ -323,6 +325,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
valueWithName->SetName(value.GetExpressionName());
leftVal = value; // and that's our actual left value
}
#endif
inputs.push_back(leftVal);
if (operationName != L"Negate") // Negate only has one input (rightVal is a nullptr)
inputs.push_back(rightVal);
@ -332,6 +335,8 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
{
let one = MakePrimitiveConfigValuePtr(1.0, leftFailFn, exprPath);
config->Add(L"outputRank", leftFailFn, one);
let minusOne = MakePrimitiveConfigValuePtr(-1.0, leftFailFn, exprPath);
config->Add(L"inferInputRankToMap", leftFailFn, minusOne);
}
// instantiate the ComputationNode
let value = ConfigValuePtr(rtInfo->construct(config), MakeFailFn(e->location), exprPath);

Просмотреть файл

@ -30,27 +30,38 @@
# LinearLayer -- create a fully-connected linear projection layer
# Note: outDim may describe a tensor as well.
LinearLayer {outDim, bias = true, init='uniform', initValueScale=1} =
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
{
W = ParameterTensor {_ConcatArrays (outDim, 0), init=init, initValueScale=initValueScale}
# inputRank given: number of zeroes to add to W (mapRank must not be given)
# mapRank given: expand W to leave exactly mapRank axes (inputRank must not be given)
# none given: expand W to all (same as mapRank=0)
inputShape =
if BS.Constants.IsNone (inputRank) then Inferred # not given: one Inferred, which will get expanded
else if !BS.Constants.IsNone (mapRank) then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
else Repeat (inputRank, Inferred)
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
b = ParameterTensor {outDim, initValue=0}
outRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
inferInputRankToMap =
if !BS.Constants.IsNone (inputRank) then -1 # means not specified
else if BS.Constants.IsNone (mapRank) then 0 # default to 'use all input dims'
else mapRank
apply (x) =
if bias
then Times (W, x, outputRank = outRank) + b
else Times (W, x, outputRank = outRank)
then Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) + b
else Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap)
}.apply
# DenseLayer -- create a fully-connected layer with optional non-linearity
DenseLayer{outDim, bias = true, activation=(x=>x), init='uniform', initValueScale=1} = Sequential ( LinearLayer{outDim, bias = bias, init = init, initValueScale = initValueScale} : activation )
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=None, mapRank=None} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank, mapRank=mapRank} : activation )
# EmbeddingLayer -- create a linear embedding layer
EmbeddingLayer {outDim, # dimension of embedding
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
{
shape = if transpose then (0 : outDim) else (outDim : 0)
shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
E = if embeddingPath == ''
then ParameterTensor {shape, init='uniform'} # learnable
then ParameterTensor {shape, init='heNormal'} # learnable
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
TimesOp = if transpose then TransposeTimes else Times
apply (x) = TimesOp (E, x) # x is expected to be sparse one-hot
@ -65,7 +76,7 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
filterShape, # e.g. (3:3)
bias = true,
activation = (x=>x),
init = "uniform",
init = "heNormal",
initValueScale = 1, # TODO: rename to initScale
#reductionRank = 1, # TODO: support this
stride = 1, pad = false,
@ -77,9 +88,9 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
outputChannelsShape = _AsArray (numOutputChannels)
outputRank = Length (outputChannelsShape)
filterRank = Length (filterShape)
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # kernel := filter plus reductionDims
#W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale} # [ W x H x C x K ]
W = ParameterTensor{(outputChannelsShape:0), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, Inferred)) # kernel := filter plus reductionDims
#W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale, initOutputRank = -1} # [ W x H x C x K ]
W = ParameterTensor{(outputChannelsShape : Inferred), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = 0) # [ 1 x 1 x K ]
sharing = true # TODO: support this
transpose = false # TODO: support this
@ -106,10 +117,11 @@ AveragePoolingLayer {filterShape, stride = 1, pad = false, lowerPad = 0, upperPa
RecurrentLSTMLayer {outputDim,
cellShape = BS.Constants.None, # if set then use a projection
goBackwards = false,
init='heNormal', initValueScale=1,
enableSelfStabilization = false} =
{
previousHook = if goBackwards then BS.RNNs.NextHC else BS.RNNs.PreviousHC
lstm = BS.RNNs.LSTMBlock {outputDim, cellShape = cellShape, enableSelfStabilization = enableSelfStabilization}
lstm = BS.RNNs.LSTMBlock {outputDim, cellShape = cellShape, enableSelfStabilization = enableSelfStabilization, init=init, initValueScale=initValueScale}
apply (x) = {
prevState = previousHook (lstmState) # recurrent memory. E.g. Previous or Next, with or without initial state, beam reordering etc.
@ -138,7 +150,7 @@ DelayLayer {T=1, defaultHiddenActivation=0} =
# BatchNormalizationLayer -- create a batch-normalization layer
BatchNormalizationLayer {spatialRank = 0, # reduce over these dims. E.g. 2 to reduce over (w,h) in a [W x H x C]-shaped input
initialScale = 1,
normalizationTimeConstant = 0, blendTimeConstant = 0,
normalizationTimeConstant = 0, blendTimeConstant = 0, # TODO: normTimeConst should be INF, not 0
epsilon = 0.00001, useCntkEngine = true} =
{
#normShape = _ConcatArrays (Repeat (spatialRank, 1), 0) # spatial dims get a dimension of 1 (broadcasting, while all others are inferred from input)
@ -151,18 +163,16 @@ BatchNormalizationLayer {spatialRank = 0, # reduce over these dims. E.g. 2 to r
}.apply
# LayerNormalizationLayer -- create a layer-normalization layer
LayerNormalizationLayer {dim = BS.Constants.None, initScale = 1, initBias = 0} = if BS.Constants.IsNone (dim) then Fail ("LayerNormalizationLayer: 'dim' parameter is currently required.") else
LayerNormalizationLayer {initScale = 1, initBias = 0} =
{
gain = ParameterTensor{(1), initValue = initScale}
bias = ParameterTensor{(1), initValue = initBias}
apply (x) = {
div = Constant (1.0 / dim)
# normalize w.r.t. actual sample statistics
mean = div .* ReduceSum (x)
mean = ReduceMean (x)
x0 = x - mean;
std = Sqrt (div .* ReduceSum (x0 .* x0))
std = Sqrt (ReduceMean (x0 .* x0))
xHat = ElementDivide (x0, std)
# denormalize with learned parameters
@ -171,16 +181,22 @@ LayerNormalizationLayer {dim = BS.Constants.None, initScale = 1, initBias = 0} =
}.apply
# StabilizerLayer -- create a scalar stabilizer [J. Droppo, 2014 -- TODO: get the reference]
StabilizerLayer {} =
StabilizerLayer{} =
{
# BUGBUG: Calling f(x) twice will create a second set of parameters. Needs to refactor Stabilize() for this.
apply (x) = Stabilize (x)
apply (x) = BS.Parameters.Stabilize (x)
}.apply
# FeatureMVNLayer -- create a corpus-level feature-normalization layer
# This can only be applied to features. Statistics are not shared across invocations,
# which is semantically OK because the values are the same. However, it is not efficient.
FeatureMVNLayer {} = MeanVarNorm
FeatureMVNLayer{} = MeanVarNorm
# LogPriorLayer -- create a corpus-level label-prior layer
# This can only be applied to labels. Statistics are not shared across invocations,
# which is semantically OK because the values are the same. However, it is not efficient.
# TODO: document on Wiki
LogPriorLayer{} = LogPrior
# Layers that exist in other tools that we will not have:
# FlattenLayer{}: Not needed since DenseLayer() can handle tensors just fine.
@ -188,6 +204,10 @@ FeatureMVNLayer {} = MeanVarNorm
Identity(x) = x # sometimes helpful
None = BS.Constants.None # for use with some optional parameters; test with IsNone()
Inferred = 0 # denotes a dimension that is to be inferred
##############################################################################
# Composing layers or models into more more complex models
##############################################################################
@ -234,7 +254,7 @@ CrossEntropyWithSoftmax = CNTK2.CrossEntropyWithSoftmax
Dropout = CNTK2.Dropout
ElementTimes = CNTK2.ElementTimes
ElementDivide = CNTK2.ElementDivide
ErrorPrediction = CNTK2.ErrorPrediction
ClassificationError = CNTK2.ClassificationError
Exp = CNTK2.Exp
Floor = CNTK2.Floor
Log = CNTK2.Log
@ -245,6 +265,7 @@ RectifiedLinear = CNTK2.ReLU # deprecated
ReLU = CNTK2.ReLU
ReduceSum = CNTK2.ReduceSum
ReduceLogSum = CNTK2.ReduceLogSum
ReduceMean = CNTK2.ReduceMean
ReduceMin = CNTK2.ReduceMin
ReduceMax = CNTK2.ReduceMax
@ -283,7 +304,7 @@ CNTK2 = [
// TODO: The API for Parameter is different in current 2.0 design, getting a constant as input for the initial values.
// This needs to be fixed to follow the way the Constant() is exposed in Python
// Making this an internal node with "_" until we agree on the final interface:
_Parameter(shape, value = 0, initValue = '', learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*shape */ ] /*plus the function args*/ ]
_Parameter(shape, value = 0, initValue = '', learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, initOutputRank = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*shape */ ] /*plus the function args*/ ]
// 3. Shape operations
// Changes: NewReshape -> Reshape, input -> _, dims -> shape
@ -316,13 +337,14 @@ CNTK2 = [
// 4. Tensor operations
// Changes: Matrix -> Tensor. A -> x, B -> y. Data must come on y ("default parameter") hence not using _
Times(x, y, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
Times(x, y, outputRank=1, inferInputRankToMap=-1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
// 5. Elementwise operations.
// Changes: "Matrix" -> "Tensor"; left input -> _; Clip: move input to front. ElementDivide/Times: anotherTensor -> y
Abs(_, tag='') = new ComputationNode [ operation = 'Abs' ; inputs = _ /*plus the function args*/ ]
Ceil(_, tag='') = Negate(Floor(Negate(_)), tag=tag)
Clip(_, minValue, maxValue, tag='') = new ComputationNode [ operation = 'Clip' ; inputs = (minValue : maxValue : _) /* plus the function args*/ ]
# TODO: Make ElementDivide a proper operation
ElementDivide(_, y, tag='') = ElementTimes(_, Reciprocal(y), tag=tag)
ElementTimes(_, y, tag='') = new ComputationNode [ operation = 'ElementTimes' ; inputs = (_ : y) /*plus the function args*/ ]
Exp(_, tag='') = new ComputationNode [ operation = 'Exp' ; inputs = _ /*plus the function args*/ ]
@ -336,12 +358,12 @@ CNTK2 = [
Tanh(_, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = _ /*plus the function args*/ ]
// 6. Reductions
ReduceSum (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Sum" /*plus the function args*/ ]
ReduceLogSum(_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "LogSum" /*plus the function args*/ ]
ReduceMin (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Min" /*plus the function args*/ ]
ReduceMax (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Max" /*plus the function args*/ ]
#ReduceMean (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Mean" /*plus the function args*/ ]
ReduceSum (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Sum" /*plus the function args*/ ]}.r
ReduceLogSum(_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "LogSum" /*plus the function args*/ ]}.r
ReduceMean (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Mean" /*plus the function args*/ ]}.r
ReduceMin (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Min" /*plus the function args*/ ]}.r
ReduceMax (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Max" /*plus the function args*/ ]}.r
// 7. Control flow (if, composite etc.)
// None so far
@ -365,11 +387,12 @@ CNTK2 = [
// No changes here - we said the default input would be the label sequence here, against which the
// empirical sequence is compared to. Keeping this for now.
CrossEntropyWithSoftmax(_, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = (_ : outProbVectorSequence) /*plus the function args*/ ]
ErrorPrediction(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ErrorPrediction' ; inputs = if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN)) /*plus the function args*/ ]
ClassificationError(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ClassificationError' ; inputs = if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN)) /*plus the function args*/ ]
ErrorPrediction = ClassificationError # legacy
# TODO: replace with this (need to deal with topN thing):
# (_new will be removed once the change is made)
CrossEntropyWithSoftmax_new (L, z, tag='') = Minus (ReduceLogSum (z), TransposeTimes (L, z), tag=tag)
ErrorPrediction_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
ClassificationError_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
// 12. Comparison nodes
Less(_, y, tag='') = new ComputationNode [ operation = 'Less' ; inputs = (_ : y) /*plus the function args*/ ]
@ -393,12 +416,12 @@ CNTK2 = [
# - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form)
# - init="fixedValue", value from 'value'
# Warning: Current config will behave unexpected if user mistypes 'initValue' as 'value' (which will be ignored, defaulting to "uniform" init)
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; initOutputRank = 1 ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
LearnableParameter = Parameter // deprecated
# TODO: make Parameter take tensor dims?
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initOutputRank = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, initFromLiteral = literal, learningRateMultiplier = 0.0)
# TODO: Deprecate ConstantFromString() in favor of Constant(array expression)
DynamicAxis(tag='') = new ComputationNode [ operation = 'DynamicAxis' ; /*plus the function args*/ ]
@ -452,7 +475,7 @@ MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, l
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]
AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = input /*plus the function args*/ ]
ColumnwiseCrossProduct = KhatriRaoProduct // deprecated
ClassificationError = ErrorPrediction
ErrorPrediction = ClassificationError # legacy name
Delay = PastValue
BatchNormalization(input, scale, bias, runMean, runVariance, spatial, normalizationTimeConstant = 0, blendTimeConstant = 0, epsilon = 0.00001, useCntkEngine = true, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runVariance) /*plus the function args*/ ]
@ -551,16 +574,22 @@ IntDiv(x, y) = new NumericFunction [ what = 'IntDiv' ; args = (x:y) ]
# macros from NDL book
##############################################################################
# deprecated--use LinearLayer{} and DenseLayer{} instead
BFF(in, rows, cols) = [ B = Parameter(rows, 1, initValue = 0) ; W = Parameter(rows, cols) ; z = W*in+B ]
SBFF(in, rows, cols) = [ Eh = Sigmoid(BFF(in, rows, cols).z) ]
# deprecated--use FeatureMVNLayer{} instead
MeanVarNorm(feat) = PerDimMeanVarNormalization(feat, Mean(feat), InvStdDev(feat))
# deprecated--use LogPriorLayer{} instead
LogPrior(labels) = Log(Mean(labels))
# specify one of these two for initialization:
# - init = "uniform"|"gaussian"
# - embeddingFile = PATHNAME
# deprecated--use EmbeddingLayer{} instead
Embedding (embeddingDim, input, inputDim=input.dim, initFrom=''/*|fromFile|gaussian|uniform*/, embeddingPath = '', sparseInput = false, learningRateWeight = 0.0) = [
embedding = Transpose (LearnableParameter (inputDim, embeddingDim, learningRateMultiplier = learningRateWeight, init = initFrom, initFromFilePath = embeddingPath))
embedding = Transpose (Parameter (inputDim, embeddingDim, learningRateMultiplier = learningRateWeight, init = initFrom, initFromFilePath = embeddingPath))
lookup = if sparseInput then embedding * input
else GatherPacked (input, embedding)
].lookup
@ -802,7 +831,8 @@ Loop = [
Parameters =
[
WeightParam (outputDim, inputDim) = Parameter (outputDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
# TODO: These all have randomSeed set to 1!
WeightParam (outputDim, inputDim) = ParameterTensor ((outputDim : inputDim), init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
DiagWeightParam (outputDim) = ParameterTensor ((outputDim), init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1) # meant to be applied elementwise
BiasParam (dim) = ParameterTensor ((dim), initValue=0.0)
ScalarParam() = BiasParam (1)
@ -845,23 +875,25 @@ RNNs =
# This is the stateless version that takes the previous state as an input.
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
# This function also takes an optional auxiliary input, e.g. for suporting attention models.
LSTMBlock (outputDim, cellShape=Constants.None, enableSelfStabilization=false) =
[
LSTMBlock (outputDim, cellShape=None, init='heNormal', initValueScale=1, enableSelfStabilization=false) =
{
cellDim = if Constants.IsNone (cellShape) then outputDim else cellShape
// parameter macros
# note: each invocation comes with its own set of weights
B{} = Parameters.BiasParam {cellDim}
W{} = Parameters.WeightParam {cellDim, 0} // input
A{} = Parameters.WeightParam {cellDim, 0} // aux input
H{} = Parameters.WeightParam {cellDim, outputDim} // hidden-to-hidden
C{} = Parameters.DiagWeightParam {cellDim} // cell-to-hiddden {note: applied elementwise}
Wmr = Parameters.WeightParam {outputDim, cellDim};
# parameter helpers
# note: invoked multiple times, each invocation comes with its own set of weights
B{} = ParameterTensor {(cellDim), initValue=0} # a bias
W{} = ParameterTensor {(cellDim : Inferred), init=init, initValueScale=initValueScale} # a input
A{} = ParameterTensor {(cellDim : Inferred), init=init, initValueScale=initValueScale} # an aux input
H{} = ParameterTensor {(cellDim : outputDim), init=init, initValueScale=initValueScale} # a hidden-to-hidden
C{} = ParameterTensor {(cellDim), init=init, initValueScale=initValueScale} # a cell-to-hiddden {note: applied elementwise}
Wmr = ParameterTensor {(outputDim : cellDim), init=init, initValueScale=initValueScale}; # final projection
S(x) = Parameters.Stabilize (x, enabled=enableSelfStabilization)
# BUGBUG: S() must not be a macro either, but also an object instance
apply (x, prevState, aux=Constants.None) = [
_ = [ // encapsulate the inner workings
apply (x, prevState, aux=None) = {
_ = { // encapsulate the inner workings
dh = prevState.h // previous values
dc = prevState.c
@ -872,20 +904,20 @@ RNNs =
# projected contribution from input(s) and bias
pin() = if Constants.IsNone (aux)
then B() + W() * x
else B() + W() * x + A() * aux
then B{} + W{} * x
else B{} + W{} * x + A{} * aux
it = Sigmoid (pin() + H() * dhs + C() .* dcs) // input gate(t)
bit = it .* Tanh (pin() + H() * dhs) // applied to tanh of input network
it = Sigmoid (pin() + H{} * dhs + C{} .* dcs) // input gate(t)
bit = it .* Tanh (pin() + H{} * dhs) // applied to tanh of input network
ft = Sigmoid (pin() + H() * dhs + C() .* dcs) // forget-me-not gate(t)
ft = Sigmoid (pin() + H{} * dhs + C{} .* dcs) // forget-me-not gate(t)
bft = ft .* dc // applied to cell(t-1)
ct = bft + bit // c(t) is sum of both
ot = Sigmoid (pin() + H() * dhs + C() .* S(ct)) // output gate(t)
ot = Sigmoid (pin() + H{} * dhs + C{} .* S(ct)) // output gate(t)
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
]
}
# our return values
c = _.ct // cell value
@ -893,8 +925,8 @@ RNNs =
then Wmr * S(_.ht) // project
else _.ht // no projection
dim = outputDim
] // end of apply(x)
].apply
} // end of apply (x, prevState)
}.apply
# LSTMP -- LSTM function with projection and self-stabilization
# Projection is enabled by passing different values for outputDim and cellDim.

Просмотреть файл

@ -216,13 +216,13 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
needPrior = true
// the following two belong into SGD, so they were removed here
//trainingCriterion = CrossEntropyWithSoftmax
//evalCriterion = ErrorPrediction
//evalCriterion = ClassificationError
// new: connect to input stream from source; and expose the output layer
input = source.features.data // these are also ComputeNodeRefs, exposed by the source
output = ComputeNodeRef [ dim = source.labels.dim ] // SimpleNetworkBuilder will put top layer affine transform output (input to softmax) here
// criteria are configurable here; these are ComputeNodes created here
trainingCriterion = CrossEntropyWithSoftmax (source.labels.data, output)
evalCriterion = ErrorPrediction (source.labels.data, output)
evalCriterion = ClassificationError (source.labels.data, output)
// new: (and half-baked) define Input nodes
myFeatures=Input(featDim) // reader stream will reference this
myLabels=Input(labelDim)
@ -245,7 +245,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
//L2 = SBFF(L1,hiddenDim,hiddenDim)
//L3 = SBFF(L2,hiddenDim,hiddenDim)
//CE = SMBFF(L3,labelDim,hiddenDim,myLabels,tag=Criteria)
//Err = ErrorPrediction(myLabels,CE.BFF.FF.P,tag=Eval)
//Err = ClassificationError(myLabels,CE.BFF.FF.P,tag=Eval)
//logPrior = LogPrior(myLabels)
//ScaledLogLikelihood=Minus(CE.BFF.FF.P,logPrior,tag=Output)
@ -279,7 +279,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
// define criterion nodes
CE = CrossEntropyWithSoftmax(myLabels, outZ)
Err = ErrorPrediction(myLabels, outZ)
Err = ClassificationError(myLabels, outZ)
// define output node for decoding
logPrior = LogPrior(myLabels)
@ -392,7 +392,7 @@ network = new NDL [
// define criterion nodes
CE = CrossEntropyWithSoftmax(myLabels, outZ)
Err = ErrorPrediction(myLabels, outZ)
Err = ClassificationError(myLabels, outZ)
// define output node for decoding
logPrior = LogPrior(myLabels)

Просмотреть файл

@ -93,26 +93,6 @@ std::string WCharToString(const wchar_t* wst)
return s;
}
// TODO: This is an action, it should be moved into ActionsLib.
template <typename ElemType>
void DumpNodeInfo(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring nodeName = config(L"nodeName", L"__AllNodes__");
wstring nodeNameRegexStr = config(L"nodeNameRegex", L"");
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
wstring outputFile = config(L"outputFile", defOutFilePath);
bool printValues = config(L"printValues", true);
bool printMetadata = config(L"printMetadata", true);
if (!printValues && !printMetadata)
{
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
}
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
}
size_t GetMaxEpochs(const ConfigParameters& configParams)
{
ConfigParameters configSGD(configParams("SGD"));
@ -286,9 +266,9 @@ void DoCommands(const ConfigParameters& config, const shared_ptr<MPIWrapper>& mp
{
TestCn<ElemType>(config); // for "devtest" action pass the root config instead
}
else if (thisAction == "dumpNode" /*deprecated:*/|| thisAction == "dumpnode")
else if (thisAction == "dumpNodes" /*deprecated:*/ || thisAction == "dumpNode" || thisAction == "dumpnode")
{
DumpNodeInfo<ElemType>(commandParams);
DoDumpNodes<ElemType>(commandParams);
}
else if (thisAction == "convertdbn")
{
@ -682,28 +662,22 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
fprintf(stderr, "%*s%ls", i > 0 ? 2 : 0, "", argv[i]); // use 2 spaces for better visual separability
fprintf(stderr, "\n\n");
#if 1 //def _DEBUG
#ifdef _DEBUG
// This simply merges all the different config parameters specified (eg, via config files or via command line directly),
// and prints it.
fprintf(stderr, "\n\n");
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
fprintf(stderr, "\nConfiguration, Raw:\n\n");
LOGPRINTF(stderr, "%s\n", rawConfigString.c_str());
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n");
// Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overridden at command line),
// All of these assignments will appear, even though only the last assignment matters.
fprintf(stderr, "\n");
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
fprintf(stderr, "\nConfiguration After Variable Resolution:\n\n");
LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
#endif
// This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
// value it is set to will appear).
fprintf(stderr, "\n");
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
fprintf(stderr, "\nConfiguration After Processing and Variable Resolution:\n\n");
config.dumpWithResolvedVariables();
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
#endif
LOGPRINTF(stderr, "Commands:");
for (int i = 0; i < command.size(); i++)

Просмотреть файл

@ -23,7 +23,7 @@ m1=[
L2 = RBFF(L1, HDim, HDim)
L3 = RBFF(L2, HDim, HDim)
CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
Err=ErrorPrediction(labels, CE.BFF.FF.P, tag="evaluation")
Err=ClassificationError(labels, CE.BFF.FF.P, tag="evaluation")
# rootNodes defined here
OutputNodes=(CE.BFF.FF.P)

Просмотреть файл

@ -218,7 +218,7 @@ namespace CNTK
std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::CrossEntropyWithSoftmax;
}
else if (node->OperationName() == OperationNameOf(ErrorPredictionNode))
else if (node->OperationName() == OperationNameOf(ClassificationErrorNode))
{
std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::ClassificationError;

Просмотреть файл

@ -355,7 +355,7 @@ namespace CNTK
computationNodePtr = builder.CrossEntropyWithSoftmax(input1Node, input0Node, function->Name());
break;
case PrimitiveOpType::ClassificationError:
computationNodePtr = builder.ErrorPrediction(input1Node, input0Node, function->Name());
computationNodePtr = builder.ClassificationError(input1Node, input0Node, function->Name());
break;
case PrimitiveOpType::PastValue:
case PrimitiveOpType::FutureValue:

Просмотреть файл

@ -426,6 +426,13 @@ void ComputationNetwork::RandomInitLearnableParameters(const ComputationNodeBase
InitLearnableParameters(node, uniformInit ? L"uniform" : L"gaussian", initValueScale, randomSeed, initOnCPUOnly);
}
template <class ElemType>
void ComputationNetwork::InitLearnableParametersWithBilinearFill(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight)
{
auto learnableParameterNode = dynamic_pointer_cast<LearnableParameter<ElemType>>(node);
learnableParameterNode->InitBilinear(kernelWidth, kernelHeight);
}
bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr)
{
// TODO: just use return!
@ -435,7 +442,7 @@ bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr)
nodePtr->OperationName() == OperationNameOf(SequenceWithSoftmaxNode) ||
nodePtr->OperationName() == OperationNameOf(CrossEntropyNode) ||
nodePtr->OperationName() == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode) ||
nodePtr->OperationName() == OperationNameOf(ErrorPredictionNode) ||
nodePtr->OperationName() == OperationNameOf(ClassificationErrorNode) ||
#ifdef COMING_SOON
nodePtr->OperationName() == OperationNameOf(CRFNode) ||
#endif
@ -1228,7 +1235,7 @@ void ComputationNetwork::SaveToDbnFile(ComputationNetworkPtr net, const std::wst
};
// Get output node
std::list<ComputationNodeBasePtr> outputNodes = net->GetNodesWithType(OperationNameOf(ErrorPredictionNode));
std::list<ComputationNodeBasePtr> outputNodes = net->GetNodesWithType(OperationNameOf(ClassificationErrorNode));
ComputationNodeBasePtr outputNode = GetFirstNodeWithDifferentType(outputNodes.front()->GetInputs(), OperationNameOf(InputValue));
if (outputNode == nullptr)
@ -1478,6 +1485,7 @@ void ComputationNetwork::SaveToDbnFile(ComputationNetworkPtr net, const std::wst
PutTag("EDBN");
}
template void ComputationNetwork::InitLearnableParametersWithBilinearFill<float>(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
template void ComputationNetwork::Read<float>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<float>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
@ -1487,6 +1495,7 @@ template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net,
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
template void ComputationNetwork::SaveToDbnFile<float>(ComputationNetworkPtr net, const std::wstring& fileName) const;
template void ComputationNetwork::InitLearnableParametersWithBilinearFill<double>(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
template void ComputationNetwork::Read<double>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<double>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);

Просмотреть файл

@ -349,6 +349,9 @@ public:
// Legacy version that is for random only.
void RandomInitLearnableParameters(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly = false) const;
template <class ElemType>
void InitLearnableParametersWithBilinearFill(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
template <typename N>
static shared_ptr<N> AsNodePtr(const ComputationNodeBasePtr& inode)
{

Просмотреть файл

@ -40,13 +40,8 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
#endif
if (nodeType == OperationNameOf(AbsNode)) return New<AbsNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))return New<ClassBasedCrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ClassificationErrorNode)) return New<ClassificationErrorNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ClipNode)) return New<ClipNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EqualNode)) return New<EqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(GreaterEqualNode)) return New<GreaterEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(GreaterNode)) return New<GreaterNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LessEqualNode)) return New<LessEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LessNode)) return New<LessNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NotEqualNode)) return New<NotEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosDistanceNode)) return New<CosDistanceNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode)) return New<CosDistanceWithNegativeSamplesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosineNode)) return New<CosineNode<ElemType>>(forward<_Types>(_Args)...);
@ -59,7 +54,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(DynamicAxisNode)) return New<DynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ElementTimesNode)) return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EqualNode)) return New<EqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ExpNode)) return New<ExpNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(FloorNode)) return New<FloorNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(FutureValueNode)) return New<FutureValueNode<ElemType>>(forward<_Types>(_Args)...);
@ -67,10 +62,14 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
#ifdef COMING_SOON
else if (nodeType == OperationNameOf(GMMLogLikelihoodNode)) return New<GMMLogLikelihoodNode<ElemType>>(forward<_Types>(_Args)...);
#endif
else if (nodeType == OperationNameOf(GreaterEqualNode)) return New<GreaterEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(GreaterNode)) return New<GreaterNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(HardmaxNode)) return New<HardmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(IfNode)) return New<IfNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LessEqualNode)) return New<LessEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LessNode)) return New<LessNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogNode)) return New<LogNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogPlusNode)) return New<LogPlusNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogSoftmaxNode)) return New<LogSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
@ -80,6 +79,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(MeanNode)) return New<MeanNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(MinusNode)) return New<MinusNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NegateNode)) return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NotEqualNode)) return New<NotEqualNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode)) return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PackedIndexNode)) return New<PackedIndexNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PastValueNode)) return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
@ -119,6 +119,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(WhereNode)) return New<WhereNode<ElemType>>(forward<_Types>(_Args)...);
// legacy names we also support for back compat of model-files
else if (nodeType == L"ColumnElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"ErrorPrediction") return New<ClassificationErrorNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"Delay") return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.
else if (nodeType == L"PerDimMeanVarNormalizationNode") return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
@ -368,9 +369,9 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Avera
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ClassificationError(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ErrorPredictionNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
return net.AddNodeToNetAndAttachInputs(New<ClassificationErrorNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
}
template <class ElemType>

Просмотреть файл

@ -122,7 +122,7 @@ public:
ComputationNodePtr DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr ClassificationError(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Floor(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");

Просмотреть файл

@ -447,7 +447,7 @@ ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNetworkWithEd
// refWeight = 0.9
// kldLabels = labels * (1-refWeight) + Softmax (zRef) * refWeight # interpolate with ref output
// ce = CrossEntropyWithSoftmax (z, kldLabels)
// errs = ErrorPrediction (z, labels)
// errs = ClassificationError (z, labels)
// criterionNodes = (ce)
// evaluationNodes = (errs)
// ===================================================================

Просмотреть файл

@ -32,16 +32,17 @@
#define CNTK_MODEL_VERSION_1 1
#define CNTK_MODEL_VERSION_2 2
#define CNTK_MODEL_VERSION_3 3
#define CNTK_MODEL_VERSION_4 4 // PastValue
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // Batch norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CNTK_MODEL_VERSION_9 9 // Transpose flag in ConvolutionNode to support deconvolution.
#define CNTK_MODEL_VERSION_10 10 // Learning rate multiplier for input nodes.
#define CNTK_MODEL_VERSION_11 11 // Dynamic axis name for where nodes.
#define CNTK_MODEL_VERSION_12 12 // Batch norm: switch running inverse std deviation -> variance, MB count -> samplesSeen; CuDNN v5
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_12
#define CNTK_MODEL_VERSION_4 4 // PastValue
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
#define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes
#define CNTK_MODEL_VERSION_11 11 // dynamic axis name for where nodes
#define CNTK_MODEL_VERSION_12 12 // Times() m_inputRank to support parameter-rank inference
#define CNTK_MODEL_VERSION_13 13 // Batch norm: switch running inverse std deviation -> variance, MB count -> samplesSeen; CuDNN v5
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_13
extern bool g_shareNodeValueMatrices;

Просмотреть файл

@ -166,4 +166,128 @@ public:
template class PerDimMeanVarNormalizationNode<float>;
template class PerDimMeanVarNormalizationNode<double>;
// -----------------------------------------------------------------------
// DiagTimesNode (vector representing the diagonal of a square matrix, data)
// Deprecated because can be implemented with ElementTimes.
// -----------------------------------------------------------------------
template <class ElemType>
class DiagTimesNode : public ComputationNode<ElemType>, public NumInputs<2>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"DiagTimes"; }
public:
DeclareConstructorFromConfigWithNumInputs(DiagTimesNode);
DiagTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (inputIndex == 0) // left derivative
{
Matrix<ElemType> sliceOutputGrad = MaskedGradientFor(fr); // use Masked- version since this is reducing over frames
Matrix<ElemType> sliceInput1Value = Input(1)->MaskedValueFor(fr);
m_innerproduct->AssignInnerProductOf(sliceOutputGrad, sliceInput1Value, false);
Input(0)->GradientAsMatrix() += *m_innerproduct;
}
else // right derivative
{
Matrix<ElemType> sliceOutputGrad = GradientFor(fr);
Matrix<ElemType> sliceInput1Grad = Input(1)->GradientFor(fr);
m_rightGradient->SetValue(sliceOutputGrad);
m_rightGradient->ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
sliceInput1Grad += *m_rightGradient;
}
}
virtual bool OutputUsedInComputingInputNodesGradients() const override
{
// The DiagTimesNode does not require its output value for computing
// the gradients of its input nodes
return false;
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
sliceOutputValue.AssignValuesOf(sliceInput1Value);
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
size_t rows0 = Input(0)->GetAsMatrixNumRows();
size_t rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
// if dimension not specified we assume two operands' dimensions should match
Input(0)->ValidateInferInputDimsFrom(TensorShape(rows1));
if (Input(1)->HasMBLayout())
{
// infer rows1 as rows0
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0));
SetDims(TensorShape(rows0), true);
}
else // multiplying two straight matrices
{
size_t cols1 = Input(1)->GetAsMatrixNumCols();
// infer rows1 as rows0
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0, cols1));
SetDims(TensorShape(rows0, cols1), false);
}
// update after inference
rows0 = Input(0)->GetAsMatrixNumRows();
rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
if (isFinalValidationPass && rows0 != rows1)
InvalidArgument("The inner matrix dimension in the %ls %ls operation does not match (%d vs. %d).", NodeName().c_str(), OperationName().c_str(), (int) rows1, (int) rows0);
size_t cols0 = Input(0)->GetAsMatrixNumCols();
if (isFinalValidationPass && cols0 != 1)
InvalidArgument("The first matrix should be a column vector representing the diagonal of a square matrix in the DiagTimes operation.");
SetDims(Input(1));
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<DiagTimesNode<ElemType>>(nodeP);
node->m_innerproduct->SetValue(*m_innerproduct);
node->m_rightGradient->SetValue(*m_rightGradient);
}
}
// request matrices that are needed for gradient computation
virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool)
{
Base::RequestMatricesBeforeBackprop(matrixPool);
RequestMatrixFromPool(m_innerproduct, matrixPool);
RequestMatrixFromPool(m_rightGradient, matrixPool);
}
// release gradient and temp matrices that no longer needed after all the children's gradients are computed.
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
{
Base::ReleaseMatricesAfterBackprop(matrixPool);
ReleaseMatrixToPool(m_innerproduct, matrixPool);
ReleaseMatrixToPool(m_rightGradient, matrixPool);
}
private:
shared_ptr<Matrix<ElemType>> m_innerproduct;
shared_ptr<Matrix<ElemType>> m_rightGradient;
};
template class DiagTimesNode<float>;
template class DiagTimesNode<double>;
}}}

Просмотреть файл

@ -18,24 +18,20 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// ErrorPredictionNode (label, prediction) or ErrorPredictionNode (prediction, label)
// ClassificationErrorNode (label, prediction) or ClassificationErrorNode (prediction, label)
// Performs classification and error counting.
// Result is an error rate, lower = better.
// -----------------------------------------------------------------------
template <class ElemType>
class ErrorPredictionNode : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>
class ClassificationErrorNode : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>
{
typedef ComputationNodeNonLooping<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"ErrorPrediction";
}
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"ClassificationError"; }
public:
DeclareConstructorFromConfig(ErrorPredictionNode);
ErrorPredictionNode(DEVICEID_TYPE deviceId, const wstring& name)
DeclareConstructorFromConfig(ClassificationErrorNode);
ClassificationErrorNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
@ -63,10 +59,10 @@ public:
MaskMissingColumnsToZero(*m_maxIndexes1, Input(1)->GetMBLayout(), fr);
Value().AssignNumOfDiff(*m_maxIndexes0, *m_maxIndexes1, m_topK > 1);
#if NANCHECK
Value().HasNan("ErrorPrediction");
Value().HasNan("ClassificationError");
#endif
#if DUMPOUTPUT
Value().Print("ErrorPredictionNode");
Value().Print("ClassificationErrorNode");
#endif
}
@ -100,7 +96,7 @@ public:
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<ErrorPredictionNode<ElemType>>(nodeP);
auto node = dynamic_pointer_cast<ClassificationErrorNode<ElemType>>(nodeP);
node->m_maxIndexes0->SetValue(*m_maxIndexes0);
node->m_maxIndexes1->SetValue(*m_maxIndexes1);
node->m_maxValues->SetValue(*m_maxValues);
@ -131,8 +127,8 @@ private:
int m_topK;
};
template class ErrorPredictionNode<float>;
template class ErrorPredictionNode<double>;
template class ClassificationErrorNode<float>;
template class ClassificationErrorNode<double>;
#ifdef COMING_SOON

Просмотреть файл

@ -26,7 +26,7 @@ void LearnableParameter<ElemType>::InitShape(const TensorShape& shape)
Value().Invalidate();
}
static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(const std::wstring& type, size_t fanOut = 1, size_t fanIn = 1);
static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(const wstring& type, size_t fanOut = 1, size_t fanIn = 1);
// constructor from config
// Parameterization is a little wicked. An older version required to specify the type of initialization
@ -41,6 +41,11 @@ static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(c
// - init="fixedValue", value from 'value' --deprecated in favor of just specifying initValue
// - init="fromFile", value from 'initFromFilePath' --deprecated in favor of just specifying 'initFromFilePath'
// - init="fromLiteral", value from 'initFromLiteral' --deprecated in favor of initValue=array expression
// Random initialization takes an additional optional parameter initOutputRank, default 1.
// All dimensions that are not amongst the first 'initOutputRank' are considered inputs.
// This is necessary e.g. for convolution.
// 'initOutputRank' can also be negative to denote output dims on the right, to cater to the needs
// of convolution kernels where the output rank is the right-most axis (initOutputRank=-1).
// The forms that infer the dimensions have different BrainScript names. TODO: need one for fromFile
// TODO: All forms that require specified dimensions but contain zeroes (to be updated by graph)
// will need to do deferred initialization, or have a way to repeat it.
@ -91,7 +96,8 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
int forcedRandomSeed = configp->Get(L"randomSeed"); // forcing a specific random seed is useful for testing to get repeatable initialization independent of evaluation order
m_randomSeed = forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed;
m_initValueScale = configp->Get(L"initValueScale");
m_initOnCPUOnly = configp->Get(L"initOnCPUOnly");
m_initOutputRank = configp->Get(L"initOutputRank");
m_initOnCPUOnly = configp->Get(L"initOnCPUOnly");
}
else if (initString == L"zero")
{
@ -114,6 +120,13 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
InitFromFile(initFromFilePath);
m_initString.clear();
}
else if (initString == L"bilinear")
{
const size_t kernelWidth = configp->Get(L"kernelWidth");
const size_t kernelHeight = configp->Get(L"kernelHeight");
InitBilinear(kernelWidth, kernelHeight);
m_initString.clear();
}
// legacy
else if (initString == L"fixedValue") // deprecated. Use initValue=... instead
{
@ -155,6 +168,7 @@ void LearnableParameter<ElemType>::PostInitParameters(const wstring& initString,
m_initString = initString;
m_randomSeed = randomSeed;
m_initValueScale = initValue;
m_initOutputRank = 1; // default. NDL (deprecated) cannot specify a different value.
m_initOnCPUOnly = initOnCPUOnly;
}
else if (initString == L"fixedValue") // from constant value
@ -182,7 +196,7 @@ void LearnableParameter<ElemType>::PostInitParameters(const wstring& initString,
// heNormal: sqrt(2 / fanin)
// heUniform: sqrt(6 / fanin)
// returns (*,0) for unrecognized string
static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(const std::wstring& type, size_t fanOut /* = 1*/, size_t fanIn /*= 1*/)
static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(const wstring& type, size_t fanOut /* = 1*/, size_t fanIn /*= 1*/)
{
if (type == L"uniform") return make_pair( true, 0.05f);
else if (type == L"gaussian") return make_pair(false, 0.2 / sqrt(fanIn));
@ -197,26 +211,33 @@ static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(co
// initialize with random numbers
// if 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing)
template <class ElemType>
void LearnableParameter<ElemType>::InitRandom(const std::wstring& type,
void LearnableParameter<ElemType>::InitRandom(const wstring& type,
const unsigned long randomSeed,
const ElemType initValueScale,
bool initOnCPUOnly)
const int initOutputRank,
const bool initOnCPUOnly)
{
// fprintf(stderr, "%d x %d: %d %ls\n", (int)GetNumRows(), (int)GetNumCols(), (int)randomSeed, NodeName().c_str());
let& sampleLayout = GetSampleLayout();
#if 1 // this more complex version is needed to repro test cases generated with an older version
auto& value = sampleLayout.GetRank() > 2 ? Value() : ValueAsMatrix();
#else
auto& value = Value();
#endif
let numElements = sampleLayout.GetNumElements();
if (numElements == 0)
return;
// We assume that the matrix row dimension is the output dimension. This is wrong in case of ND biases, convolution filters, and BatchNorm.
size_t fanIn = value.GetNumCols(); // fan-in
size_t fanOut = numElements / fanIn; // remaining dimensions
// determine fan-in and fan-out
// This is controlled by initOutputRank.
// For a normal matrix [I x J], fanOut = I, fanIn = J=inDim --> initOutputRank = +1
// For a convolution kernel [w x h x C x K], fanOut = K, fanIn = w*h*C. --> initOutputRank = -1, meaning count from back
if (abs(initOutputRank) > sampleLayout.GetRank())
InvalidArgument("InitRandom: initOutputRank=%d exceeds sampleLayout rank %d", initOutputRank, (int)sampleLayout.GetRank());
// fanIn is determined by multiplying a range of dimensions:
// - initOutputRank >= 0: [ initOutputRank, rank )
// - initOutputRank < 0: [ 0, rank-abs(initOutputRank) )
let inDimsBegin = (initOutputRank >= 0) ? (size_t)initOutputRank : 0;
let inDimsEnd = (initOutputRank >= 0) ? sampleLayout.GetRank() : (size_t)((int)sampleLayout.GetRank() + initOutputRank);
size_t fanIn = 1;
for (size_t k = inDimsBegin; k < inDimsEnd; k++)
fanIn *= sampleLayout[k];
let fanOut = numElements / fanIn; // remaining dimensions
let opts = ParseRandomizationType(type, fanOut, fanIn);
let isUniform = opts.first;
ElemType range = (ElemType)opts.second;
@ -224,18 +245,74 @@ void LearnableParameter<ElemType>::InitRandom(const std::wstring& type,
LogicError("InitRandom: Invalid initialization type '%ls'", type.c_str());
// the random seed offset is set via the "randomSeedOffset" parameter in config
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, range=%f*%f, onCPU=%s).\n", NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(), (int)m_randomSeed, range, m_initValueScale, m_initOnCPUOnly ? "true" : "false");
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f*%f, onCPU=%s).\n",
NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(),
(int)m_randomSeed, (int)fanOut, (int)fanIn, range, m_initValueScale, m_initOnCPUOnly ? "true" : "false");
range *= initValueScale;
if (initOnCPUOnly)
Value().TransferToDeviceIfNotThere(CPUDEVICE, true);
if (isUniform)
value.SetUniformRandomValue(-range, range, randomSeed);
Value().SetUniformRandomValue(-range, range, randomSeed);
else
value.SetGaussianRandomValue(0, range, randomSeed);
Value().SetGaussianRandomValue(0, range, randomSeed);
if (initOnCPUOnly)
Value().TransferToDeviceIfNotThere(m_deviceId, true);
}
// Initialize with bilinear interpolation coefficients (useful for deconvolution layer).
template <class ElemType>
void LearnableParameter<ElemType>::InitBilinear(size_t kernelWidth, size_t kernelHeight)
{
if (kernelHeight != kernelWidth)
LogicError("Filter for bilinear interpolation must be square.");
// Transfer to CPU as GPU initialization is still not supported.
Value().TransferToDeviceIfNotThere(CPUDEVICE, true);
const SmallVector<size_t>& dims = GetSampleLayout().GetDims();
assert(dims.size() == 2);
const size_t kernelCount = dims[0];
const size_t kernelWeightCount = dims[1];
assert(kernelWeightCount % (kernelWidth * kernelHeight) == 0);
const size_t channels = kernelWeightCount / (kernelWidth * kernelHeight);
if (kernelCount != channels)
LogicError("Number of input and output channels of filter for bilinear interpolation must be equal.");
ElemType* data = Value().Data();
const size_t factor = (kernelWidth + 1) / 2;
const float center = (kernelWidth - 1) / 2.0f;
int count = 0;
// Filter dimensions are [W x H x C x K] or ARRAY[1..K] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W], where:
// W = width, H = height, C = input channels, K = output channels.
// In deconvolution, output channel should be upsampled version of corresponding input channel.
// 2D filter for bilinear interpolation where height=width=3 contains the following values:
// |0.25, 0.50, 0.25|
// |0.50, 1.00, 0.50|
// |0.25, 0.50, 0.25|
// So, output kernel with dimensions [3 x 3 x C] will contain all zeros except for the channel which we want to
// upsample. For that channel it will contain values above.
for (size_t kernel = 0; kernel < kernelCount; ++kernel)
{
for (size_t channel = 0; channel < channels; ++channel)
{
for (size_t h = 0; h < kernelHeight; ++h)
{
for (size_t w = 0; w < kernelWidth; ++w)
{
float val = 0;
if (kernel == channel)
{
val = (1 - fabs(w - center) / factor) * (1 - fabs(h - center) / factor);
}
data[count++] = val;
}
}
}
}
Value().TransferToDeviceIfNotThere(m_deviceId, true);
}
// initialize by reading a matrix from a text file
template <class ElemType>
void LearnableParameter<ElemType>::InitFromFile(const wstring& initFromFilePath)
@ -247,7 +324,7 @@ void LearnableParameter<ElemType>::InitFromFile(const wstring& initFromFilePath)
// initialize by reading a matrix from a text file
template <class ElemType>
void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols)
void LearnableParameter<ElemType>::InitFromArray(const vector<ElemType>& array, size_t numRows, size_t numCols)
{
// infer tensor dimensions from input file if not set
// Note: The mapping of dimensions of the input matrix to tensor dimensions are somewhat confusing.
@ -295,13 +372,13 @@ void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& ar
// TODO: Move this error check there, since this is called only from one place.
template <class ElemType>
void LearnableParameter<ElemType>::ReviseFromFile(const std::wstring& reviseFromFilePath)
void LearnableParameter<ElemType>::ReviseFromFile(const wstring& reviseFromFilePath)
{
try
{
InitFromFile(reviseFromFilePath);
}
catch (const std::exception & e)
catch (const exception & e)
{
RuntimeError("ReviseFromFile: Failed to reload %ls %ls operation from file %ls: %s", NodeName().c_str(), OperationName().c_str(), reviseFromFilePath.c_str(), e.what());
}
@ -356,7 +433,7 @@ void LearnableParameter<ElemType>::Load(File& fstream, size_t modelVersion) /*ov
}
template <class ElemType>
/*virtual*/ void LearnableParameter<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const /*override*/
/*virtual*/ void LearnableParameter<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const wstring& newName, const CopyNodeFlags flags) const /*override*/
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
@ -365,6 +442,7 @@ template <class ElemType>
node->m_initString = m_initString;
node->m_randomSeed = m_randomSeed;
node->m_initValueScale = m_initValueScale;
node->m_initOutputRank = m_initOutputRank;
node->m_initOnCPUOnly = m_initOnCPUOnly;
node->m_initValue = m_initValue;
}
@ -439,7 +517,7 @@ void LearnableParameter<ElemType>::LazyInitParameters()
}
else if (ParseRandomizationType(m_initString).second != 0)
{
InitRandom(m_initString, m_randomSeed, m_initValueScale, m_initOnCPUOnly);
InitRandom(m_initString, m_randomSeed, m_initValueScale, m_initOutputRank, m_initOnCPUOnly);
}
else
LogicError("LearnableParameter: Invalid value of m_initString '%ls' for deferred initialization for %ls.", m_initString.c_str(), NodeDescription().c_str());

Просмотреть файл

@ -56,13 +56,16 @@ public:
unsigned long randomSeed = 0,
bool initOnCPUOnly = false);
// Initialize with bilinear interpolation coefficients (useful for deconvolution layer).
void InitBilinear(size_t kernelWidth, size_t kernelHeight);
// initialize by reading a matrix from a text file
void InitFromFile(const std::wstring& initFromFilePath);
private:
// initialize with random numbers
// If 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing).
void InitRandom(const std::wstring& type, const unsigned long randomSeed, const ElemType initValueScale, bool initOnCPUOnly);
void InitRandom(const std::wstring& type, const unsigned long randomSeed, const ElemType initValueScale, const int initOutputRank, const bool initOnCPUOnly);
// helper to initialize from a matrix read from a text file or a string literal
void InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols);
@ -103,6 +106,7 @@ private:
std::wstring m_initString; // if non-empty then deferred initialization is needed. Gets cleared upon completion of deferred init.
unsigned long m_randomSeed;
ElemType m_initValueScale;
int m_initOutputRank;
bool m_initOnCPUOnly;
ElemType m_initValue;
};

Просмотреть файл

@ -238,8 +238,8 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
public:
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name), m_outputRank(outputRank)
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
{
}
@ -249,7 +249,8 @@ public:
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
node->m_outputRank = m_outputRank;
node->m_outputRank = m_outputRank;
node->m_inferInputRankToMap = m_inferInputRankToMap;
}
}
@ -257,6 +258,7 @@ public:
{
Base::Save(fstream);
fstream << m_outputRank;
fstream << m_inferInputRankToMap;
}
virtual void Load(File& fstream, size_t modelVersion) override
@ -266,6 +268,10 @@ public:
fstream >> m_outputRank;
else
m_outputRank = 1;
if (modelVersion >= CNTK_MODEL_VERSION_11)
fstream >> m_inferInputRankToMap;
else
m_inferInputRankToMap = -1;
}
private:
@ -420,19 +426,33 @@ public:
if (dimsA[k] == 0)
InvalidArgument("%ls %ls operation: The outputRank (%d) dimensions in left argument's shape [%s] must not be 0.", NodeName().c_str(), OperationName().c_str(), (int)m_outputRank, dimsAstring.c_str());
// if the last dimension of A is 0, then extend it to fully match B
// E.g. [I x 0] * [X x Y x Z x K] => infer as [I x X x Y x Z], not as [I x X].
// I.e. we cannot use inference to infer a matrix product on a part of an input tensor.
// We default to inferring the whole, as part of a tensor is a special use case.
assert (dimsA.size() == m_outputRank + numReductionDims);
while (numReductionDims < dimsB.size() && dimsA.back() == 0)
// infer rank of dimsA
// For purpose of dimension inference, Times() accepts an optional parameter inferInputRankToMap (default -1=unspecified).
// The last 'inferInputRankToMap' axes are considered those that the matrix product should keep (Times()
// is applied one by one, like a "map" operation) rather than reducing over.
// Specifically, inferInputRankToMap=0 means to reduce over all input axes, e.g. for an image input that
// should be flattened.
// Examples:
// [I x Inferred] * [J x K], inferInputRankToMap=n/a --> Inferred := J, result is [I x K]
// [I x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inferred := W, result is [I x H x C] (not desired)
// [I x Inferred x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inf x Inf := [W x H], result is [I x C]
// [I x Inferred] * [W x H x C], inferInputRankToMap=0 --> Inferred := W x H x C, result is [I] (desired)
// [I x Inferred] * [W x H x C x R], inferInputRankToMap=1 --> Inferred := W x H x C, result is [I x R] (desired)
// If W's shape is too short, it will be padded with 0 (i.e. inferred in a subsequent step).
if (m_inferInputRankToMap >= 0) // if given, we pad if needed
{
dimsA.push_back(0);
numReductionDims++;
if ((size_t)m_inferInputRankToMap >= dimsB.size() && isFinalValidationPass) // at least one axis must be left to reduce over
InvalidArgument("%ls %ls operation: 'inferInputRankToMap' argument %d must be less than rank of second operand [%s].", NodeName().c_str(), OperationName().c_str(), m_inferInputRankToMap, dimsBstring.c_str());
assert(dimsA.size() == m_outputRank + numReductionDims);
while (numReductionDims + (size_t)m_inferInputRankToMap < dimsB.size())
{
dimsA.push_back(0);
numReductionDims++;
}
}
// fill in the missing ones
// We fill in dimensions given as 0. The tensor rank is not inferred.
// We fill in dimensions given as 0. The tensor rank is not inferred here (that is done above).
for (size_t k = m_outputRank; k < dimsA.size(); k++)
{
auto& dimA = dimsA[k];
@ -478,6 +498,7 @@ public:
private:
size_t m_outputRank;
int m_inferInputRankToMap; // -1 (not specified) or says how to expand shape of W, to keep this many mapping dims
};
// -----------------------------------------------------------------------
@ -504,12 +525,12 @@ class TimesNode : public TimesNodeBase<ElemType, false>
static const std::wstring TypeName() { return L"Times"; }
public:
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name, outputRank)
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
: Base(deviceId, name, outputRank, inferInputRankToMap)
{
}
TimesNode(const ScriptableObjects::IConfigRecordPtr configp)
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"))
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"), configp->Get(L"inferInputRankToMap"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
@ -537,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
public:
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name, outputRank)
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/-1)
{
}
};
@ -545,134 +566,6 @@ public:
template class TransposeTimesNode<float>;
template class TransposeTimesNode<double>;
// -----------------------------------------------------------------------
// DiagTimesNode (vector representing the diagonal of a square matrix, data)
// TODO: This is redundant with ElementTimes and should be removed (with a compat stub).
// -----------------------------------------------------------------------
template <class ElemType>
class DiagTimesNode : public ComputationNode<ElemType>, public NumInputs<2>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"DiagTimes";
}
public:
DeclareConstructorFromConfigWithNumInputs(DiagTimesNode);
DiagTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (inputIndex == 0) // left derivative
{
Matrix<ElemType> sliceOutputGrad = MaskedGradientFor(fr); // use Masked- version since this is reducing over frames
Matrix<ElemType> sliceInput1Value = Input(1)->MaskedValueFor(fr);
m_innerproduct->AssignInnerProductOf(sliceOutputGrad, sliceInput1Value, false);
Input(0)->GradientAsMatrix() += *m_innerproduct;
}
else // right derivative
{
Matrix<ElemType> sliceOutputGrad = GradientFor(fr);
Matrix<ElemType> sliceInput1Grad = Input(1)->GradientFor(fr);
m_rightGradient->SetValue(sliceOutputGrad);
m_rightGradient->ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
sliceInput1Grad += *m_rightGradient;
}
}
virtual bool OutputUsedInComputingInputNodesGradients() const override
{
// The DiagTimesNode does not require its output value for computing
// the gradients of its input nodes
return false;
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
sliceOutputValue.AssignValuesOf(sliceInput1Value);
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
size_t rows0 = Input(0)->GetAsMatrixNumRows();
size_t rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
// if dimension not specified we assume two operands' dimensions should match
Input(0)->ValidateInferInputDimsFrom(TensorShape(rows1));
if (Input(1)->HasMBLayout())
{
// infer rows1 as rows0
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0));
SetDims(TensorShape(rows0), true);
}
else // multiplying two straight matrices
{
size_t cols1 = Input(1)->GetAsMatrixNumCols();
// infer rows1 as rows0
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0, cols1));
SetDims(TensorShape(rows0, cols1), false);
}
// update after inference
rows0 = Input(0)->GetAsMatrixNumRows();
rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
if (isFinalValidationPass && rows0 != rows1)
InvalidArgument("The inner matrix dimension in the %ls %ls operation does not match (%d vs. %d).", NodeName().c_str(), OperationName().c_str(), (int) rows1, (int) rows0);
size_t cols0 = Input(0)->GetAsMatrixNumCols();
if (isFinalValidationPass && cols0 != 1)
InvalidArgument("The first matrix should be a column vector representing the diagonal of a square matrix in the DiagTimes operation.");
SetDims(Input(1));
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<DiagTimesNode<ElemType>>(nodeP);
node->m_innerproduct->SetValue(*m_innerproduct);
node->m_rightGradient->SetValue(*m_rightGradient);
}
}
// request matrices that are needed for gradient computation
virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool)
{
Base::RequestMatricesBeforeBackprop(matrixPool);
RequestMatrixFromPool(m_innerproduct, matrixPool);
RequestMatrixFromPool(m_rightGradient, matrixPool);
}
// release gradient and temp matrices that no longer needed after all the children's gradients are computed.
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
{
Base::ReleaseMatricesAfterBackprop(matrixPool);
ReleaseMatrixToPool(m_innerproduct, matrixPool);
ReleaseMatrixToPool(m_rightGradient, matrixPool);
}
private:
shared_ptr<Matrix<ElemType>> m_innerproduct;
shared_ptr<Matrix<ElemType>> m_rightGradient;
};
template class DiagTimesNode<float>;
template class DiagTimesNode<double>;
// -----------------------------------------------------------------------
// SumElementsNode (input)
// Sums up all elements in the input across all samples into a single scalar.

Просмотреть файл

@ -37,6 +37,7 @@ template <class ElemType>
node->m_axis = m_axis;
node->m_operation = m_operation;
node->m_reductionOp = m_reductionOp;
node->m_scale = m_scale;
}
}
@ -64,8 +65,8 @@ template <class ElemType>
auto input = Input(0)->ValueTensorFor(rank, fr);
// the actual operation is a Copy with reduction, where the magic is in the reduction op
result.DoUnaryOpOf(0, input, 1, ElementWiseOperator::opCopy, m_reductionOp);
// note: we can implement "Mean" by passing 1/dim for alpha
// For "Mean", m_scale is 1/#elements, and 1 otherwise.
result.DoUnaryOpOf(0, input, m_scale, ElementWiseOperator::opCopy, m_reductionOp);
}
template <class ElemType>
@ -82,8 +83,9 @@ template <class ElemType>
switch (m_reductionOp)
{
case ElementWiseOperator::opSum:
// "Sum": broadcast the gradient
sliceInputGrad.AddCopyOf(sliceOutputGrad);
// "Sum": broadcast the gradient
// "Mean": same as "Sum" with scaling by 1/#dims
sliceInputGrad.AddCopyOf(sliceOutputGrad, m_scale);
break;
case ElementWiseOperator::opLogSum:
@ -95,7 +97,7 @@ template <class ElemType>
// df / dx = exp(x)/exp(f)
// = exp(x – f)
sliceInputGrad.AddElementwiseProductWithExpOfDiffOf(sliceOutputGrad, input, output);
}
}
break;
case ElementWiseOperator::opMin:
@ -120,12 +122,6 @@ template <class ElemType>
break;
// more coming
// "LogPlus": softmax
// f(x) = log(sum_i exp x_i), hence gradient is:
// df / dx_i = 1 / (sum_j exp x_j) * exp x_i = (Softmax(x))_i = exp(x_i - ReduceLogPlus(x))
// targetGradient = gradientFromTop .* Exp (inputValue - outputValue) --TODO: verify
// i.e. compute dfference if input and output, then Exp in-place. No, would need temp memory. So needs its own opcode AddScaledExpOfDiff(). Ternary.
}
}
@ -164,6 +160,7 @@ void ReduceElementsNode<ElemType>::ValidateOp()
else
#endif
if (m_operation == L"Sum") m_reductionOp = ElementWiseOperator::opSum;
else if (m_operation == L"Mean") m_reductionOp = ElementWiseOperator::opSum;
else if (m_operation == L"LogSum") m_reductionOp = ElementWiseOperator::opLogSum;
else if (m_operation == L"Min") m_reductionOp = ElementWiseOperator::opMin;
else if (m_operation == L"Max") m_reductionOp = ElementWiseOperator::opMax;
@ -183,13 +180,26 @@ template <class ElemType>
let shape = Input(0)->GetSampleLayout();
auto dims = shape.GetDims();
size_t reducedDim = 0; // (init to keep compiler happy)
if (m_axis == 0)
{
reducedDim = shape.GetNumElements();
dims = { 1 }; // entire sample is reduced to a scalar
}
else if (m_axis - 1 >= 0 && m_axis - 1 < dims.size())
{
reducedDim = dims[m_axis - 1];
dims[m_axis - 1] = 1; // one axis is reduced to a scalar
}
else if (isFinalValidationPass)
InvalidArgument("The shape of %ls [%s] has no axis %d", NodeDescription().c_str(), string(shape).c_str(), m_axis);
// for "Mean", we must divide by #elements
if (isFinalValidationPass && m_operation == L"Mean")
m_scale = (ElemType)(1.0 / reducedDim);
else
m_scale = (ElemType)1;
SetDims(TensorShape(dims), Input(0)->HasMBLayout());
}

Просмотреть файл

@ -176,10 +176,10 @@ template class ReshapeNode<double>;
// The optional axis can be 0 (meaning all elements) or a specific axis.
// Allowed operations:
// - "Sum"
// - "LogSum" --not implemented yet
// - "Mean" --not implemented yet
// - "Max" --not implemented yet
// - "Min" --not implemented yet
// - "LogSum"
// - "Mean"
// - "Max"
// - "Min"
// - "All" --not implemented yet
// - "Any" --not implemented yet
// TODO:
@ -196,7 +196,7 @@ class ReduceElementsNode : public ComputationNode<ElemType>, public NumInputs<1>
void ValidateOp();
public:
ReduceElementsNode(DEVICEID_TYPE deviceId, const wstring& name, const std::wstring& operation = std::wstring(), int axis = 0) :
Base(deviceId, name), m_operation(operation), m_axis(axis), m_reductionOp((ElementWiseOperator)-1/*invalid*/)
Base(deviceId, name), m_operation(operation), m_axis(axis), m_reductionOp((ElementWiseOperator)-1/*invalid*/), m_scale(0/*invalid*/)
{
if (!m_operation.empty()) // verify validity already here out of courtesy (would otherwise be caught in Validate())
ValidateOp();
@ -221,9 +221,13 @@ public:
int ReductionAxis() const { return m_axis; }
private:
// operation attributes
int m_axis;
std::wstring m_operation; // the operation as a string, e.g. "Sum", see ValidateOp()
std::wstring m_operation; // the operation as a string, e.g. "Sum", see ValidateOp()
// things cached during validation
ElementWiseOperator m_reductionOp; // the reduction operation mapped to our internal opCode
ElemType m_scale; // 1 or, for Mean, 1/number of elements we are reducing over
};
// -----------------------------------------------------------------------

Просмотреть файл

@ -1629,7 +1629,7 @@ public:
fstream >> m_normTimeConst;
fstream >> m_blendTimeConst;
fstream >> m_imageLayoutKind;
if (modelVersion >= CNTK_MODEL_VERSION_12)
if (modelVersion >= CNTK_MODEL_VERSION_13)
fstream >> m_samplesSeen;
else
fstream >> mbCount; // converted below
@ -1677,7 +1677,7 @@ public:
}
}
if (modelVersion < CNTK_MODEL_VERSION_12)
if (modelVersion < CNTK_MODEL_VERSION_13)
{
// Prior to version 12, minibatch count was stored instead of samples seen.
// Approximate by assuming minibatch size 16, inform about that.
@ -1779,7 +1779,7 @@ public:
LogicError("%ls: Failed to convert running variance until forward prop", NodeName().c_str());
FrameRange fr(Input(0)->GetMBLayout());
Matrix<ElemType> sliceInputValue = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceInputValue = Input(0)->MaskedValueFor(fr);
const Matrix<ElemType>& scale = Input(1)->Value();
const Matrix<ElemType>& bias = Input(2)->Value();
Matrix<ElemType>& runMean = Input(3)->Value();
@ -1828,10 +1828,10 @@ public:
if (inputIndex == 0) // derivative with respect to the input.
{
auto sliceOutputGrad = GradientFor(fr);
auto sliceInputValue = Input(0)->ValueFor(fr);
const Matrix<ElemType>& scale = Input(1)->Value();
const Matrix<ElemType>& bias = Input(2)->Value();
auto sliceOutputGrad = MaskedGradientFor(fr);
auto sliceInputValue = Input(0)->ValueFor(fr);
const Matrix<ElemType>& scale = Input(1)->Value();
const Matrix<ElemType>& bias = Input(2)->Value();
auto sliceInputGrad = Input(0)->GradientFor(fr);
m_dScale->Resize(scale); // gradients for scale and bias get stored here

Просмотреть файл

@ -127,13 +127,13 @@ std::unique_ptr<BatchNormEngine<ElemType>> BatchNormEngine<ElemType>::Create(DEV
// Use CNTK as default batch norm engine.
if (HasFlag(enabledEngines, BatchNormEngineKind::Cntk))
{
fprintf(stderr, "\nUsing CNTK batch normalization engine.\n");
fprintf(stderr, "Using CNTK batch normalization engine.\n");
return std::make_unique<CntkBatchNormEngine<ElemType>>(deviceId, inOutT, spatial, imageLayout);
}
if (HasFlag(enabledEngines, BatchNormEngineKind::CuDnn))
{
fprintf(stderr, "\nUsing cuDNN batch normalization engine.\n");
fprintf(stderr, "Using cuDNN batch normalization engine.\n");
return CuDnnBatchNormEngineFactory<ElemType>::Create(deviceId, inOutT, spatial, imageLayout);
}

Просмотреть файл

@ -81,12 +81,7 @@ public:
static cudacode void ComputeRangeStatColj(const ElemType* inMat, const ElemType* inResidual, long M, size_t j, size_t bits, ElemType& lower, ElemType& upper)
{
/*dummy reducers do nothing in linear CPU version*/
ComputeRangeStatColjSubset<ZeroThresholdFor1Bit>(inMat, inResidual, M, j, bits, lower, upper, 0, 1, [](ElemType&)
{
},
[](unsigned int&)
{
});
ComputeRangeStatColjSubset<ZeroThresholdFor1Bit>(inMat, inResidual, M, j, bits, lower, upper, 0, 1, [](ElemType&){}, [](unsigned int&){});
}
public:
@ -231,9 +226,9 @@ public:
// i.e.
// - do not symmetrize/pool the quantization values for 0 and 1
// - but hard-code the quantization threshold to be 0 instead of the mean of the two bounds
// This should give us the best of all--fast operation yet ability to be asymmetric within a column
// This should give us the best of all--fast operation yet ability to be asymmetric within a column.
ElemType mean = 0.0f;
if (!ZeroThresholdFor1Bit || (bits != 1))
if (!ZeroThresholdFor1Bit && (bits == 1))
{
ElemType meanacc = 0.0f;
// (subset: compute subset sum)
@ -320,7 +315,7 @@ public:
}
else
{
ElemType stddevs = 5.0f;
ElemType stddevs = 4.0f; // TODO: make this a parameter
// >1 bit:
// We linearly quantize between 'stddevs' standard deviations.
ElemType varacc = 0.0f;
@ -349,7 +344,6 @@ private:
template <typename T>
friend class QuantizedMatrix;
};
}
}
}
}}}
#endif

Просмотреть файл

@ -866,7 +866,7 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (!isEnabled(ConvolutionEngineKind::Legacy))
RuntimeError("Trying to use Legacy convolution engine when it's disabled.");
// REVIEW alexeyk: should honor m_traceLevel here.
fprintf(stderr, "\n%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<LegacyConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
@ -874,19 +874,19 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (isEnabled(ConvolutionEngineKind::CuDnn) &&
CuDnnConvolutionEngineFactory<ElemType>::IsSupported(deviceId, geometry, poolKind))
{
fprintf(stderr, "\n%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
{
fprintf(stderr, "\n%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (!isEnabled(ConvolutionEngineKind::Reference))
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
fprintf(stderr, "\n%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
fprintf(stderr, "%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<ReferenceConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}

Просмотреть файл

@ -14,7 +14,9 @@ GPURNGHandle::GPURNGHandle(int deviceId, unsigned long seed)
: RNGHandle(deviceId)
{
unsigned long long cudaSeed = seed;
#ifdef _DEBUG
fprintf(stderr, "(GPU): creating curand object with seed %llu\n", cudaSeed);
#endif
CURAND_CALL(curandCreateGenerator(&m_generator, CURAND_RNG_PSEUDO_XORWOW));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(m_generator, cudaSeed));

Просмотреть файл

@ -32,33 +32,33 @@ void MatrixQuantizerCPU<ElemType>::QuantizeAsync(const Matrix<ElemType>& inMatri
#else
for (size_t j = 0; j < nCol; j++)
#endif
{
auto& qcol = *(outQMatrix.GetQuantizedColumn(j));
if (zeroThresholdFor1Bit)
{
// Explicit use of 'template' keyword is needed to compile with GCC
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
}
else
{
// Explicit use of 'template' keyword is needed to compile with GCC
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
}
{
auto& qcol = *(outQMatrix.GetQuantizedColumn(j));
if (zeroThresholdFor1Bit)
{
// Explicit use of 'template' keyword is needed to compile with GCC
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
}
else
{
// Explicit use of 'template' keyword is needed to compile with GCC
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
}
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
if (zeroThresholdFor1Bit)
{
// Explicit use of 'template' keyword is needed to compile with GCC
q.template Quantize<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
}
else
{
// Explicit use of 'template' keyword is needed to compile with GCC
q.template Quantize<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
}
}
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
if (zeroThresholdFor1Bit)
{
// Explicit use of 'template' keyword is needed to compile with GCC
q.template Quantize<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
}
else
{
// Explicit use of 'template' keyword is needed to compile with GCC
q.template Quantize<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
}
}
#ifdef QUANTUSEPPL
);
);
#endif
}
@ -89,13 +89,13 @@ void MatrixQuantizerCPU<ElemType>::UnquantizeAsync(QuantizedMatrix<ElemType>& in
#else
for (size_t j = 0; j < nCol; j++)
#endif
{
const auto& qcol = *(inQMatrix.GetQuantizedColumn(j));
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
q.Unquantize(outMatrix.Data(), (long) nRow, j, qcol.bits, add);
}
{
const auto& qcol = *(inQMatrix.GetQuantizedColumn(j));
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
q.Unquantize(outMatrix.Data(), (long) nRow, j, qcol.bits, add);
}
#ifdef QUANTUSEPPL
);
);
#endif
}
@ -108,4 +108,5 @@ void MatrixQuantizerCPU<ElemType>::WaitUnquantizeAsyncDone()
//The explicit instantiation part will make the linker happy
template class MatrixQuantizerCPU<float>;
template class MatrixQuantizerCPU<double>;
} } }
}}}

Просмотреть файл

@ -182,4 +182,5 @@ void QuantizedMatrix<ElemType>::Print(const char* matrixName, size_t rowStart, s
// Explicit instantiation
template class QuantizedMatrix<float>;
template class QuantizedMatrix<double>;
} } }
}}}

Просмотреть файл

@ -119,4 +119,5 @@ private:
template <typename T>
friend class MatrixQuantizer;
};
} } }
}}}

Просмотреть файл

@ -83,10 +83,12 @@ public:
}
else
{
// make the range asymmetrical, so we get a 0 slot
size_t usedrangeend = rangeend - (Nbits > 1); // TODO: make this a parameter
// precompute this for quantize() (see comment there)
qfactor = rangeend / (quantimax - quantimin);
qfactor = usedrangeend / (quantimax - quantimin);
// and for unquantize()
ufactor = (quantimax - quantimin) / rangeend;
ufactor = (quantimax - quantimin) / usedrangeend;
}
// set the quantization threshold for the special case of 1-bit
@ -127,6 +129,7 @@ public:
// unquantize one value
cudasharedcode ElemType Unquantize(QWordVal u) const
{
// special branch that does not quantize at all, for testing
if (Nbits == QWordNumBits)
{
return *(ElemType*) &u;

Просмотреть файл

@ -311,10 +311,10 @@ public:
wstring key;
if (!labels.empty()) // empty means unsupervised mode (don't load any)
{
#ifdef _MSC_VER
#ifdef _WIN32
key = regex_replace((wstring) ppath, wregex(L"\\.[^\\.\\\\/:]*$"), wstring()); // delete extension (or not if none)
#else
key = removeExtension(basename(ppath));
key = removeExtension(ppath);
#endif
if (labels.find(key) == labels.end())
{
@ -630,9 +630,8 @@ public:
{
#ifdef _WIN32
key = regex_replace((wstring) ppath, wregex(L"\\.[^\\.\\\\/:]*$"), wstring()); // delete extension (or not if none)
#endif
#ifdef __unix__
key = removeExtension(basename(ppath));
#else
key = removeExtension(ppath);
#endif
if (labels[0].find(key) == labels[0].end())
{

Просмотреть файл

@ -74,7 +74,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(OpenCvInclude);$(ZipInclude);$(SolutionDir)Source\Readers\ReaderLib</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(OpenCvInclude);$(ZipInclude);$(SolutionDir)Source\Readers\ReaderLib;$(BOOST_INCLUDE_PATH)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir);$(OpenCvLibPath);$(ZipLibPath)</AdditionalLibraryDirectories>
@ -127,10 +127,11 @@
<ClCompile Include="ZipByteReader.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<Target Name="Build" Condition="$(HasOpenCv)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />
<Target Name="Build" Condition="$(HasOpenCv) And $(HasBoost)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />
<ImportGroup Label="ExtensionTargets" />
<Target Name="CheckDependencies">
<Warning Condition="!$(HasBoost)" Text="ImageReader requires the Boost library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#boost for installation instructions." />
<Warning Condition="!$(HasOpenCv)" Text="ImageReader requires the OpenCV library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#opencv for installation instructions." />
<Warning Condition="!$(UseZip)" Text="zlib and libzip libraries were not found, ImageReader will be built without zip container support. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#libzip for installation instructions." />
</Target>
</Project>
</Project>

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше