Merge remote-tracking branch 'origin/master' into mahilleb/CuDnn5Test
Conflicts: Source/ComputationNetworkLib/ComputationNode.h Source/ComputationNetworkLib/TrainingNodes.h Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/baseline.linux.txt Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/baseline.windows.txt Tests/UnitTests/MathTests/ConvolutionEngineTests.cpp
This commit is contained in:
Коммит
0285fa9a13
|
@ -31,6 +31,9 @@
|
|||
|
||||
<HasOpenCv>false</HasOpenCv>
|
||||
<HasOpenCv Condition="Exists('$(OPENCV_PATH)') Or Exists('$(OPENCV_PATH_V31)')">true</HasOpenCv>
|
||||
|
||||
<HasBoost>false</HasBoost>
|
||||
<HasBoost Condition="Exists('$(BOOST_INCLUDE_PATH)') And Exists('$(BOOST_LIB_PATH)')">true</HasBoost>
|
||||
|
||||
<UseZip>false</UseZip>
|
||||
<UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip>
|
||||
|
|
76
CNTK.sln
76
CNTK.sln
|
@ -1156,6 +1156,72 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\U
|
|||
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tutorials", "Tutorials", "{8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ImageHandsOn", "ImageHandsOn", "{2230BF3D-4317-4A3F-A743-DDD6160503F8}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\ImageHandsOn\ImageHandsOn.cntk = Tutorials\ImageHandsOn\ImageHandsOn.cntk
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLUHandsOn", "SLUHandsOn", "{CC143D08-567D-4DAC-9E14-264749C19039}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\SLUHandsOn\SLUHandsOn.cntk = Tutorials\SLUHandsOn\SLUHandsOn.cntk
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Pretrained Models", "Pretrained Models", "{0ED2EE97-0A26-4865-871F-11033867BA34}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\ImageHandsOn\cifar10.pretrained.cmf = Tutorials\ImageHandsOn\cifar10.pretrained.cmf
|
||||
Tutorials\ImageHandsOn\cifar10.ResNet.cmf = Tutorials\ImageHandsOn\cifar10.ResNet.cmf
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solutions", "Solutions", "{A2A4893C-0D5B-42E2-BFAD-C123AE7FDAFD}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Solution1.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution1.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Solution2.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution2.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Solution3.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution3.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Solution4.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution4.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Solution5.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Solution5.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Task4_Start.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Task4_Start.cntk
|
||||
Tutorials\ImageHandsOn\ImageHandsOn_Task6.cntk = Tutorials\ImageHandsOn\ImageHandsOn_Task6.cntk
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{8CFBD0DB-5F16-48E6-984C-4401317FA10E}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\SLUHandsOn\atis.test.ctf = Tutorials\SLUHandsOn\atis.test.ctf
|
||||
Tutorials\SLUHandsOn\atis.train.ctf = Tutorials\SLUHandsOn\atis.train.ctf
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solutions", "Solutions", "{BD7FF8C0-EC3A-49CD-9D81-4A8A29B8AD8E}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\SLUHandsOn\SLUHandsOn_Solution1.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution1.cntk
|
||||
Tutorials\SLUHandsOn\SLUHandsOn_Solution2.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution2.cntk
|
||||
Tutorials\SLUHandsOn\SLUHandsOn_Solution3.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution3.cntk
|
||||
Tutorials\SLUHandsOn\SLUHandsOn_Solution4.cntk = Tutorials\SLUHandsOn\SLUHandsOn_Solution4.cntk
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Pretrained Models", "Pretrained Models", "{4727594B-A052-4834-B0E8-57DBB9ADEF13}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\SLUHandsOn\slu.forward.backward.cmf = Tutorials\SLUHandsOn\slu.forward.backward.cmf
|
||||
Tutorials\SLUHandsOn\slu.forward.cmf = Tutorials\SLUHandsOn\slu.forward.cmf
|
||||
Tutorials\SLUHandsOn\slu.forward.lookahead.cmf = Tutorials\SLUHandsOn\slu.forward.lookahead.cmf
|
||||
Tutorials\SLUHandsOn\slu.forward.nobn.cmf = Tutorials\SLUHandsOn\slu.forward.nobn.cmf
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{4A59163B-1EDE-4439-9E7D-40A30B82A3A0}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tutorials\ImageHandsOn\CifarConverter.py = Tutorials\ImageHandsOn\CifarConverter.py
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TIMIT", "TIMIT", "{B586AA4C-0BB9-4629-9EDA-25FF2618AC9F}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TrainSimpleNetwork", "TrainSimpleNetwork", "{C2102C39-BF5F-4B12-9C41-849D1ED35EE8}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.linux.txt = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.linux.txt
|
||||
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.windows.txt = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\baseline.windows.txt
|
||||
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\run-test = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\run-test
|
||||
Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\testcases.yml = Tests\EndToEndTests\Examples\Speech\TIMIT\TrainSimpleNetwork\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
|
||||
|
@ -1601,5 +1667,15 @@ Global
|
|||
{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
|
||||
{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E} = {3385EBEA-5F97-4B2B-9F30-0E6D7F91B9CA}
|
||||
{9F999212-AFC5-4EAC-AA78-F7247D46C456} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
|
||||
{2230BF3D-4317-4A3F-A743-DDD6160503F8} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
|
||||
{CC143D08-567D-4DAC-9E14-264749C19039} = {8BE0642A-A3AA-4A64-95D0-C78FB285B2A4}
|
||||
{0ED2EE97-0A26-4865-871F-11033867BA34} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
|
||||
{A2A4893C-0D5B-42E2-BFAD-C123AE7FDAFD} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
|
||||
{8CFBD0DB-5F16-48E6-984C-4401317FA10E} = {CC143D08-567D-4DAC-9E14-264749C19039}
|
||||
{BD7FF8C0-EC3A-49CD-9D81-4A8A29B8AD8E} = {CC143D08-567D-4DAC-9E14-264749C19039}
|
||||
{4727594B-A052-4834-B0E8-57DBB9ADEF13} = {CC143D08-567D-4DAC-9E14-264749C19039}
|
||||
{4A59163B-1EDE-4439-9E7D-40A30B82A3A0} = {2230BF3D-4317-4A3F-A743-DDD6160503F8}
|
||||
{B586AA4C-0BB9-4629-9EDA-25FF2618AC9F} = {FB7AF7B9-6BEA-459F-94D9-94D53916D2B6}
|
||||
{C2102C39-BF5F-4B12-9C41-849D1ED35EE8} = {B586AA4C-0BB9-4629-9EDA-25FF2618AC9F}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
@ -259,7 +259,7 @@ CE=CrossEntropyWithSoftmax(labels, Plus2)
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
ErrPredict=ErrorPrediction(labels, Plus2)
|
||||
ErrPredict=ClassificationError(labels, Plus2)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
@ -616,7 +616,7 @@ CE=CrossEntropyWithSoftmax(labels, Plus2)
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
ErrPredict=ErrorPrediction(labels, Plus2)
|
||||
ErrPredict=ClassificationError(labels, Plus2)
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
@ -633,19 +633,19 @@ CrossEntropyWithSoftmax
|
|||
|
||||
\end_inset
|
||||
|
||||
() to compute the training criterion and the operator ErrorPrediction
|
||||
() to compute the training criterion and the operator ClassificationError
|
||||
\begin_inset Index idx
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
ErrorPrediction
|
||||
ClassificationError
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
() to compute the testing criterion.
|
||||
These operators are internally represented as computation nodes CrossEntropyWit
|
||||
hSoftmaxNode and ErrorPredictionNode with names CE and ErrPredict, respectively.
|
||||
hSoftmaxNode and ClassificationErrorNode with names CE and ErrPredict, respectively.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
|
@ -740,7 +740,7 @@ status open
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
ErrPredict=ErrorPrediction(labels, Plus2) # classification error
|
||||
ErrPredict=ClassificationError(labels, Plus2) # classification error
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
@ -1025,7 +1025,7 @@ reference "sub:NDL-Basic-Concepts"
|
|||
but is much simpler and easier to understand because of the use of macros.
|
||||
One new feature shown in this network definition is the access to macro-region
|
||||
variables.
|
||||
ErrorPrediction() needs to access an intermediate result from SMBFF before
|
||||
ClassificationError() needs to access an intermediate result from SMBFF before
|
||||
the CrossEntropyWithSoftmax() is applied.
|
||||
Although the needed variable is local to the macro, it can be accessed
|
||||
via the
|
||||
|
@ -1107,7 +1107,7 @@ CE = SMBFF(L1, LDim, HDim, labels)
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
Err=ErrorPrediction(labels, CE.F)
|
||||
Err=ClassificationError(labels, CE.F)
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
@ -1280,7 +1280,7 @@ CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
Err=ErrorPrediction(labels, CE.F, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.F, tag="evaluation")
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
@ -2900,12 +2900,12 @@ classProbBeforeSoftmax - applying softmax on this matrix will result in
|
|||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
ErrorPrediction
|
||||
ClassificationError
|
||||
\begin_inset Index idx
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
ErrorPrediction
|
||||
ClassificationError
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
@ -2941,7 +2941,7 @@ status open
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
ErrorPrediction(labels, m)
|
||||
ClassificationError(labels, m)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
@ -4059,7 +4059,7 @@ CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
Err=ErrorPrediction(labels, CE.F, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.F, tag="evaluation")
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
|
|
@ -290,7 +290,7 @@ cntkSpeech.dnn"
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
evalCriterion="ErrorPrediction"
|
||||
evalCriterion="ClassificationError"
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
@ -1610,7 +1610,7 @@ CE1=CrossEntropyWithSoftmax(labels,BFF1.FF.P,tag="evaluation")
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
FER1 = ErrorPrediction(labels,BFF1.FF.P,tag="evaluation")
|
||||
FER1 = ClassificationError(labels,BFF1.FF.P,tag="evaluation")
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
@ -1634,7 +1634,7 @@ CE2=CrossEntropyWithSoftmax(regions,BFF2.FF.P,tag="evaluation")
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
FER2 = ErrorPrediction(regions,BFF2.FF.P,tag="evaluation")
|
||||
FER2 = ClassificationError(regions,BFF2.FF.P,tag="evaluation")
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
|
|
@ -514,7 +514,7 @@ Simple_Demo=[
|
|||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
evalCriterion="ErrorPrediction"
|
||||
evalCriterion="ClassificationError"
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
|
|
@ -52,12 +52,12 @@ train = [
|
|||
z = DNNLayer (hiddenDim, labelDim, h1, 1)
|
||||
|
||||
ce = CrossEntropyWithSoftmax (labels, z)
|
||||
errs = ErrorPrediction (labels, z)
|
||||
errs = ClassificationError (labels, z)
|
||||
|
||||
# set top5Errs as an evaluation node to compute the top-5 error rate
|
||||
# This is not marked tag="evaluation" since expensive during training.
|
||||
# We explicitly select it as an output node in the "test" command.
|
||||
top5Errs = ErrorPrediction (labels, z, topN=5)
|
||||
top5Errs = ClassificationError (labels, z, topN=5)
|
||||
|
||||
# declare special nodes
|
||||
featureNodes = (features)
|
||||
|
|
|
@ -22,8 +22,8 @@ DNN = [
|
|||
ol = DNNLayer(hiddenDim, labelDim, h1, 1)
|
||||
|
||||
ce = CrossEntropyWithSoftmax(labels, ol)
|
||||
errs = ErrorPrediction(labels, ol)
|
||||
top5Errs = ErrorPrediction(labels, ol, Const(5), tag="eval") # only used in testing
|
||||
errs = ClassificationError(labels, ol)
|
||||
top5Errs = ClassificationError(labels, ol, Const(5), tag="eval") # only used in testing
|
||||
|
||||
# Special Nodes
|
||||
FeatureNodes = (features)
|
||||
|
|
|
@ -58,7 +58,7 @@ DNN=[
|
|||
ol = DNNLayer(h1Dim, labelDim, h1, 1)
|
||||
|
||||
ce = CrossEntropyWithSoftmax(labels, ol)
|
||||
errs = ErrorPrediction(labels, ol)
|
||||
errs = ClassificationError(labels, ol)
|
||||
|
||||
# Special Nodes
|
||||
FeatureNodes = (features)
|
||||
|
|
|
@ -64,7 +64,7 @@ DNN = [
|
|||
ol = DNNLayer(h1Dim, labelDim, h1, 1)
|
||||
|
||||
ce = CrossEntropyWithSoftmax(labels, ol)
|
||||
errs = ErrorPrediction(labels, ol)
|
||||
errs = ClassificationError(labels, ol)
|
||||
|
||||
# Special Nodes
|
||||
FeatureNodes = (features)
|
||||
|
|
|
@ -48,7 +48,7 @@ DNN=[
|
|||
deconv1 = DeconvReLULayer(unpool1, kW1, kH1, imageC, 25, cMap1, hStride1, vStride1, lpad1, upad1, wScale1, bValue1)
|
||||
|
||||
mse = SquareError(featScaled, deconv1)
|
||||
#err = ErrorPrediction(labels, ol)
|
||||
#err = ClassificationError(labels, ol)
|
||||
|
||||
# Special Nodes
|
||||
FeatureNodes = (features)
|
||||
|
|
|
@ -79,7 +79,7 @@ DNN=[
|
|||
ol = DNNLastLayer(hiddenDim, labelDim, h1_d, fc2WScale, fc2BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ DNN=[
|
|||
ol = DNNLastLayer(hiddenDim, labelDim, h1, fc2WScale, fc2BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ DNN=[
|
|||
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ DNN=[
|
|||
ol = DnnLastLayer(cMap3, labelDim, pool, fc1WScale, fc1BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ DNN=[
|
|||
ol = DnnImageLastLayer(7, 7, cMap4, labelDim, conv4, fc1WScale, fc1BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
import sys
|
||||
import urllib.request as ul
|
||||
import pickle as cp
|
||||
import tarfile
|
||||
import shutil
|
||||
import os
|
||||
import struct
|
||||
import numpy as np
|
||||
import getopt
|
||||
|
||||
ImgSize = 32
|
||||
NumFeat = ImgSize * ImgSize * 3
|
||||
|
||||
def readBatch(src, outFmt):
|
||||
with open(src, 'rb') as f:
|
||||
d = cp.load(f, encoding="latin1")
|
||||
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
|
||||
# R0..RN,G0..GN,B0..BN
|
||||
# There are 2 possible options in CNTK:
|
||||
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
|
||||
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
|
||||
# NCHW format, it has to be converted to CNTK legacy format first.
|
||||
data = d['data']
|
||||
if outFmt == 'cudnn':
|
||||
feat = data
|
||||
elif outFmt == 'legacy':
|
||||
r = data[:, : ImgSize * ImgSize]
|
||||
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
|
||||
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
|
||||
feat = np.empty_like(data)
|
||||
feat[:, ::3] = r
|
||||
feat[:, 1::3] = g
|
||||
feat[:, 2::3] = b
|
||||
else:
|
||||
print ('Format not supported: ' + outFmt)
|
||||
usage()
|
||||
sys.exit(1)
|
||||
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
|
||||
return res.astype(np.int)
|
||||
|
||||
def loadData(src, outFmt):
|
||||
print ('Downloading ' + src)
|
||||
fname, h = ul.urlretrieve(src, './delete.me')
|
||||
print ('Done.')
|
||||
try:
|
||||
print ('Extracting files...')
|
||||
with tarfile.open(fname) as tar:
|
||||
tar.extractall()
|
||||
print ('Done.')
|
||||
print ('Preparing train set...')
|
||||
trn = np.empty((0, NumFeat + 1), dtype=np.int)
|
||||
for i in range(5):
|
||||
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
|
||||
trn = np.vstack((trn, readBatch(batchName, outFmt)))
|
||||
print ('Done.')
|
||||
print ('Preparing test set...')
|
||||
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
|
||||
print ('Done.')
|
||||
finally:
|
||||
os.remove(fname)
|
||||
return (trn, tst)
|
||||
|
||||
def usage():
|
||||
print ('Usage: CIFAR_convert.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
|
||||
|
||||
def parseCmdOpt(argv):
|
||||
if len(argv) == 0:
|
||||
print ("Using cudnn output format.")
|
||||
return "cudnn"
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
|
||||
except getopt.GetoptError:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage()
|
||||
sys.exit()
|
||||
elif opt in ('-f', '--outFormat'):
|
||||
fmt = arg
|
||||
if fmt != 'cudnn' and fmt != 'legacy':
|
||||
print ('Invalid output format option.')
|
||||
usage()
|
||||
sys.exit(1)
|
||||
return fmt
|
||||
|
||||
def savetxt(filename, ndarray):
|
||||
with open(filename, 'w') as f:
|
||||
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
|
||||
for row in ndarray:
|
||||
row_str = row.astype(str)
|
||||
label_str = labels[row[-1]]
|
||||
feature_str = ' '.join(row_str[:-1])
|
||||
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fmt = parseCmdOpt(sys.argv[1:])
|
||||
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
|
||||
print ('Writing train text file...')
|
||||
savetxt(r'./Train_cntk_text.txt', trn)
|
||||
print ('Done.')
|
||||
print ('Writing test text file...')
|
||||
savetxt(r'./Test_cntk_text.txt', tst)
|
||||
print ('Done.')
|
|
@ -0,0 +1,73 @@
|
|||
import os
|
||||
import sys
|
||||
import struct
|
||||
import pickle as cp
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import xml.etree.cElementTree as et
|
||||
import xml.dom.minidom
|
||||
|
||||
imgSize = 32
|
||||
|
||||
def saveImage(fname, data, label, mapFile, pad, **key_parms):
|
||||
# data in CIFAR-10 dataset is in CHW format.
|
||||
pixData = data.reshape((3, imgSize, imgSize))
|
||||
if ('mean' in key_parms):
|
||||
key_parms['mean'] += pixData
|
||||
|
||||
if pad > 0:
|
||||
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
|
||||
|
||||
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
|
||||
pixels = img.load()
|
||||
for x in range(img.size[0]):
|
||||
for y in range(img.size[1]):
|
||||
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
|
||||
img.save(fname)
|
||||
mapFile.write("%s\t%d\n" % (fname, label))
|
||||
|
||||
def saveMean(fname, data):
|
||||
root = et.Element('opencv_storage')
|
||||
et.SubElement(root, 'Channel').text = '3'
|
||||
et.SubElement(root, 'Row').text = str(imgSize)
|
||||
et.SubElement(root, 'Col').text = str(imgSize)
|
||||
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
|
||||
et.SubElement(meanImg, 'rows').text = '1'
|
||||
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
|
||||
et.SubElement(meanImg, 'dt').text = 'f'
|
||||
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
|
||||
|
||||
tree = et.ElementTree(root)
|
||||
tree.write(fname)
|
||||
x = xml.dom.minidom.parse(fname)
|
||||
with open(fname, 'w') as f:
|
||||
f.write(x.toprettyxml(indent = ' '))
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print ("Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
|
||||
sys.exit(1)
|
||||
rootDir = sys.argv[1]
|
||||
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
|
||||
if not os.path.exists(trainDir):
|
||||
os.makedirs(trainDir)
|
||||
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
|
||||
if not os.path.exists(testDir):
|
||||
os.makedirs(testDir)
|
||||
data = {}
|
||||
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
|
||||
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
|
||||
for ifile in range(1, 6):
|
||||
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
|
||||
data = cp.load(f, encoding='latin1')
|
||||
for i in range(10000):
|
||||
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
|
||||
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 4, mean=dataMean)
|
||||
dataMean = dataMean / (50 * 1000)
|
||||
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
|
||||
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
|
||||
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
|
||||
data = cp.load(f, encoding='latin1')
|
||||
for i in range(10000):
|
||||
fname = os.path.join(testDir, ('%05d.png' % i))
|
||||
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 0)
|
|
@ -41,8 +41,8 @@ TrainConvNet = [
|
|||
|
||||
# connect to system
|
||||
ce = CrossEntropyWithSoftmax (labels, z)
|
||||
errs = ErrorPrediction (labels, z)
|
||||
top5Errs = ErrorPrediction (labels, z, topN=5) # only used in Eval action
|
||||
errs = ClassificationError (labels, z)
|
||||
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
|
||||
|
||||
featureNodes = (features)
|
||||
labelNodes = (labels)
|
||||
|
@ -109,8 +109,8 @@ TrainConvNetWithBN = [
|
|||
|
||||
# connect to system
|
||||
ce = CrossEntropyWithSoftmax (labels, z)
|
||||
errs = ErrorPrediction (labels, z)
|
||||
top5Errs = ErrorPrediction (labels, z, topN=5)
|
||||
errs = ClassificationError (labels, z)
|
||||
top5Errs = ClassificationError (labels, z, topN=5)
|
||||
|
||||
featureNodes = (features)
|
||||
labelNodes = (labels)
|
||||
|
|
|
@ -1,229 +0,0 @@
|
|||
# Simple CIFAR-10 convnet, without and with BatchNormalization.
|
||||
|
||||
command = TrainConvNet:Eval
|
||||
#command = TrainConvNetWithBN:Eval
|
||||
|
||||
makeMode = false ; traceLevel = 0 ; deviceId = "auto"
|
||||
|
||||
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
|
||||
|
||||
modelPath = "$ModelDir$/cifar10.cmf"
|
||||
|
||||
# Training without BN
|
||||
TrainConvNet = {
|
||||
action = "train"
|
||||
|
||||
BrainScriptNetworkBuilder = {
|
||||
imageShape = 32:32:3
|
||||
labelDim = 10
|
||||
|
||||
# basic model
|
||||
model_basic (features) =
|
||||
{
|
||||
featNorm = features - Constant (128)
|
||||
l1 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
|
||||
init = "gaussian", initValueScale = 0.0043} (featNorm)
|
||||
p1 = MaxPoolingLayer {(3:3), stride = (2:2)} (l1)
|
||||
l2 = ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU,
|
||||
init = "gaussian", initValueScale = 1.414} (p1)
|
||||
p2 = MaxPoolingLayer {(3:3), stride = (2:2)} (l2)
|
||||
l3 = ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU,
|
||||
init = "gaussian", initValueScale = 1.414} (p2)
|
||||
p3 = MaxPoolingLayer {(3:3), stride = (2:2)} (l3)
|
||||
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (p3)
|
||||
z = LinearLayer {10, init = "gaussian", initValueScale = 1.5} (d1)
|
||||
}.z
|
||||
|
||||
# with self-defined layer
|
||||
MyConvReLUPoolLayer {dim, initValueScale} =
|
||||
{
|
||||
C = ConvolutionalLayer {dim, (5:5), pad = true, activation = ReLU, init = "gaussian", initValueScale = initValueScale}
|
||||
P = MaxPoolingLayer {(3:3), stride = (2:2)}
|
||||
apply (x) = P(C(x))
|
||||
}.apply
|
||||
model_layers (features) =
|
||||
{
|
||||
featNorm = features - Constant (128)
|
||||
h1 = MyConvReLUPoolLayer {32, 0.0043} (featNorm)
|
||||
h2 = MyConvReLUPoolLayer {32, 1.414} (h1)
|
||||
h3 = MyConvReLUPoolLayer {64, 1.414} (h2)
|
||||
d1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (h3)
|
||||
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (d1)
|
||||
}.z
|
||||
|
||||
# model-composition style
|
||||
# ...TODO: test this again; last run was a little worse
|
||||
Subtract128 (x) = x - Constant (128)
|
||||
model_compositionStyle = Sequential (
|
||||
Subtract128 :
|
||||
MyConvReLUPoolLayer {32, 0.0043} :
|
||||
MyConvReLUPoolLayer {32, 1.414} :
|
||||
MyConvReLUPoolLayer {64, 1.414} :
|
||||
DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} :
|
||||
LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
|
||||
)
|
||||
#model_compositionStyle =
|
||||
# Subtract128 >>
|
||||
# LayerStack {3, i => MyConvReLUPoolLayer {dims[i], initValueScales[i]} } >>
|
||||
# MyConvReLUPoolLayer {32, 0.0043} >>
|
||||
# MyConvReLUPoolLayer {32, 1.414} >>
|
||||
# MyConvReLUPoolLayer {64, 1.414} >>
|
||||
# DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} >>
|
||||
# LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5}
|
||||
|
||||
// --- with BatchNorm
|
||||
MyConvBNReLUPoolLayer {dim, initValueScale} =
|
||||
{
|
||||
C = ConvolutionalLayer {dim, (5:5), pad = true, bias = false, init = "gaussian", initValueScale = initValueScale}
|
||||
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
|
||||
P = MaxPoolingLayer {(3:3), stride = (2:2)}
|
||||
apply (x) = P(ReLU(B(C(x))))
|
||||
}.apply
|
||||
MyDenseBNReLULayer {dim, initValueScale} =
|
||||
{
|
||||
D = DenseLayer {dim, bias = false, init = "gaussian", initValueScale = initValueScale}
|
||||
B = BatchNormalizationLayer {normalizationTimeConstant = 4096}
|
||||
apply (x) = ReLU(B(D(x)))
|
||||
}.apply
|
||||
model_withBatchNorm (features) =
|
||||
{
|
||||
featNorm = features - Constant (128)
|
||||
h1 = MyConvBNReLUPoolLayer {32, 0.0043} (featNorm)
|
||||
h2 = MyConvBNReLUPoolLayer {32, 1.414} (h1)
|
||||
h3 = MyConvBNReLUPoolLayer {64, 1.414} (h2)
|
||||
d1 = MyDenseBNReLULayer {64, 12} (h3)
|
||||
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (d1)
|
||||
}.z
|
||||
|
||||
// --- ResNet
|
||||
MyConvBNLayer {dim, initValueScale, stride} =
|
||||
{
|
||||
# note: (3:3), while the macro above is (5:5)
|
||||
C = ConvolutionalLayer {dim, (3:3), pad = true, stride = (stride:stride), bias = false, init = "gaussian", initValueScale = initValueScale}
|
||||
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
|
||||
apply (x) = B(C(x))
|
||||
}.apply
|
||||
ResNetNode {dim, initValueScale} =
|
||||
{
|
||||
C1 = MyConvBNLayer {dim, initValueScale, 1} # first convolution layer
|
||||
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
|
||||
#B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
|
||||
# ^^ Note: Adding an exra BN to 'x' trains slightly better.
|
||||
apply (x) = ReLU (x + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
|
||||
}.apply
|
||||
ResNetIncNode {dim, initValueScale} =
|
||||
{
|
||||
# first branch. This doubles the #channels but halves the image size
|
||||
C1 = MyConvBNLayer {dim, initValueScale, 2} # first convolution layer, stride = 2
|
||||
C2 = MyConvBNLayer {dim, initValueScale, 1} # second convolution layer
|
||||
|
||||
# second branch:
|
||||
# sub-sample spatially by a factor of 2
|
||||
DownSamplingLayer {stride} = MaxPoolingLayer {(1:1), stride = stride}
|
||||
# append dim/2 zero output channels
|
||||
pad = ConstantTensor (0, (1:1:dim/2)) # the 1s will broadcast to image size
|
||||
P(x) = Splice ((DownSamplingLayer {(2:2)} (x) : pad), axis = 3)
|
||||
B = BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096}
|
||||
|
||||
# layer sums both branches and rectifies the result
|
||||
apply (x) = ReLU (B(P(x)) + C2(ReLU(C1(x)))) # ReLU between C1 and C2 and after summation
|
||||
}.apply
|
||||
model_resNet (features) =
|
||||
{
|
||||
conv1 = MyConvBNLayer {16, 0.26, 1} (features)
|
||||
rl1 = ReLU (conv1)
|
||||
rn1 = LayerStack {3, _ => ResNetNode {16, 7.07}} (rl1)
|
||||
|
||||
rn2_1 = ResNetIncNode {32, 7.07} (rn1)
|
||||
rn2 = LayerStack {2, _ => ResNetNode {32, 7.07}} (rn2_1)
|
||||
|
||||
rn3_1 = ResNetIncNode {64, 7.07} (rn2)
|
||||
rn3 = LayerStack {2, _ => ResNetNode {64, 7.07}} (rn3_1)
|
||||
|
||||
pool = AveragePoolingLayer {(8:8)} (rn3)
|
||||
|
||||
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 0.4} (pool)
|
||||
}.z
|
||||
|
||||
# inputs
|
||||
features = Input {imageShape}
|
||||
labels = Input {labelDim}
|
||||
|
||||
# apply model to features
|
||||
z = model_basic (features)
|
||||
|
||||
# connect to system
|
||||
ce = CrossEntropyWithSoftmax (labels, z)
|
||||
errs = ErrorPrediction (labels, z)
|
||||
top5Errs = ErrorPrediction (labels, z, topN=5) # only used in Eval action
|
||||
|
||||
featureNodes = (features)
|
||||
labelNodes = (labels)
|
||||
criterionNodes = (ce)
|
||||
evaluationNodes = (errs) # top5Errs only used in Eval
|
||||
outputNodes = (z)
|
||||
}
|
||||
|
||||
SGD = {
|
||||
epochSize = 50000 # 49984 --TODO: why 16 less?
|
||||
|
||||
# without BatchNormalization:
|
||||
maxEpochs = 30 ; minibatchSize = 64
|
||||
learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
|
||||
momentumAsTimeConstant = 600*20:6400
|
||||
L2RegWeight = 0.03
|
||||
|
||||
# with BatchNormalization:
|
||||
#maxEpochs = 30 ; minibatchSize = 64
|
||||
#learningRatesPerSample = 0.00046875*7:0.00015625
|
||||
#momentumAsTimeConstant = 0
|
||||
#L2RegWeight = 0
|
||||
|
||||
# ResNet
|
||||
#maxEpochs = 160 ; minibatchSize = 128
|
||||
#learningRatesPerSample = 0.0078125*80:0.00078125*40:0.000078125
|
||||
#momentumAsTimeConstant = 1200
|
||||
#L2RegWeight = 0.0001
|
||||
|
||||
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
|
||||
}
|
||||
|
||||
reader = {
|
||||
verbosity = 0
|
||||
randomize = true
|
||||
deserializers = ({
|
||||
type = "ImageDeserializer" ; module = "ImageReader"
|
||||
file = "$DataDir$/cifar-10-batches-py/train_map.txt"
|
||||
input = {
|
||||
features = { transforms = (
|
||||
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
|
||||
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
|
||||
{ type = "Transpose" }
|
||||
)}
|
||||
labels = { labelDim = 10 }
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
# Eval action
|
||||
Eval = {
|
||||
action = "eval"
|
||||
minibatchSize = 16
|
||||
evalNodeNames = errs:top5Errs # also test top-5 error rate
|
||||
reader = {
|
||||
verbosity = 0
|
||||
randomize = true
|
||||
deserializers = ({
|
||||
type = "ImageDeserializer" ; module = "ImageReader"
|
||||
file = "$DataDir$/cifar-10-batches-py/test_map.txt"
|
||||
input = {
|
||||
features = { transforms = (
|
||||
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
|
||||
{ type = "Transpose" }
|
||||
)}
|
||||
labels = { labelDim = 10 }
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
m1 = LoadModel("$curModel$", format="cntk")
|
||||
SetDefaultModel(m1)
|
||||
|
||||
errTop5 = ErrorPrediction(labels, outputNodes.z, Const(5), tag="eval")
|
||||
errTop5 = ClassificationError(labels, outputNodes.z, Const(5), tag="eval")
|
||||
|
||||
SaveModel(m1, "$newModel$", format="cntk")
|
||||
|
|
|
@ -103,6 +103,6 @@ DNN=[
|
|||
ol = DNNLastLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -2,6 +2,6 @@ m1=LoadModel($CurModel$, format=cntk)
|
|||
SetDefaultModel(m1)
|
||||
|
||||
# Add top-5 error prediction node.
|
||||
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
|
||||
ErrTop5 = ClassificationError(labels, OutputNodes.z, Const(5), tag = Eval)
|
||||
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
|
@ -111,6 +111,6 @@ DNN=[
|
|||
ol = DnnLayer(cMap6, labelDim, pool2, fcWScale, fcBValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -67,6 +67,6 @@ DNN=[
|
|||
ol = DnnLayer(cMap4, labelDim, pool5, fcWScale, fcBValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -74,6 +74,6 @@ DNN=[
|
|||
ol = DnnLayer(cMap4, labelDim, pool5, fcWScale, fcBValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -77,6 +77,6 @@ DNN=[
|
|||
ol = DnnLayer(cMap6, labelDim, pool2, fcWScale, fcBValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -2,6 +2,6 @@ m1=LoadModel($CurModel$, format=cntk)
|
|||
SetDefaultModel(m1)
|
||||
|
||||
# Add top-5 error prediction node.
|
||||
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = "eval")
|
||||
ErrTop5 = ClassificationError(labels, OutputNodes.z, Const(5), tag = "eval")
|
||||
|
||||
SaveModel(m1, $NewModel$, format=cntk)
|
|
@ -71,6 +71,6 @@ DNN=[
|
|||
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -79,6 +79,6 @@ DNN=[
|
|||
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -80,6 +80,6 @@ DNN=[
|
|||
ol = DnnLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
|
||||
|
||||
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
|
||||
Err = ErrorPrediction(labels, ol, tag = Eval)
|
||||
Err = ClassificationError(labels, ol, tag = Eval)
|
||||
OutputNodes = ol
|
||||
]
|
||||
|
|
|
@ -29,7 +29,7 @@ ndlTestCosDist=[
|
|||
CD = CosDistance(L4, labels);
|
||||
CDAll=SumElements(CD)
|
||||
NCD=Negate(CDALL, tag="criterion")
|
||||
Err=ErrorPrediction(labels, L4, tag="evaluation")
|
||||
Err=ClassificationError(labels, L4, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(L4)
|
||||
|
@ -129,7 +129,7 @@ ndlFull=[
|
|||
#SM=Softmax(Plus2)
|
||||
#CE=CrossEntropy(labels, SM)
|
||||
CE=CrossEntropyWithSoftmax(labels, Plus2)
|
||||
ErrPredict=ErrorPrediction(labels, Plus2)
|
||||
ErrPredict=ClassificationError(labels, Plus2)
|
||||
FeatureNodes=(features)
|
||||
LabelNodes=(labels)
|
||||
CriterionNodes=(CE)
|
||||
|
@ -233,7 +233,7 @@ ndlMacroUse2=[
|
|||
L2 = RBFF(L1, HDim, HDim)
|
||||
L3 = RBFF(L2, HDim, HDim)
|
||||
CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF)
|
||||
|
@ -290,7 +290,7 @@ ndlMacroUseCNNSubSample2ZeroPadding=[
|
|||
HDim=128
|
||||
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
|
||||
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF)
|
||||
|
@ -349,7 +349,7 @@ ndlMacroUseCNNSubSample2=[
|
|||
HDim=128
|
||||
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
|
||||
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF)
|
||||
|
@ -399,7 +399,7 @@ ndlMacroUseCNN=[
|
|||
HDim=128
|
||||
L1 = SBFF(mp, HDim, 0)
|
||||
CE = SMBFF(L1, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF)
|
||||
|
@ -430,7 +430,7 @@ ndlMacroUseNoBase=[
|
|||
L2 = RFFD(L1, HDim, HDim)
|
||||
L3 = RFFD(L2, HDim, HDim)
|
||||
CE = SMFF(L3, LDim, SDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF)
|
||||
]
|
||||
|
@ -463,7 +463,7 @@ ndlMacroUseMask=[
|
|||
L4=ElementTimes(L3, ML2)
|
||||
|
||||
CE = SMBFF(L4, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF, tag="evaluation")
|
||||
|
||||
# output nodes
|
||||
Prior=Mean(labels)
|
||||
|
|
|
@ -39,7 +39,7 @@ Multigpu_Demo_Train=[
|
|||
# 2 input, 2 50-element hidden, 2 output
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
evalCriterion = "ClassificationError"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
|
|
@ -32,7 +32,7 @@ Simple_Demo_Train = [
|
|||
# 2 input, 2 50-element hidden, 2 output
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
evalCriterion = "ClassificationError"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -137,6 +137,6 @@ Simple_Demo_Output=[
|
|||
# grep labels SimpleOutput.labels | awk '{print $1}' > L
|
||||
# diff L P | grep "<" | wc -l
|
||||
# wc -l P
|
||||
# The ratio of the two numbers gives the same error rate as ErrorPrediction/Sample in the log.
|
||||
# The ratio of the two numbers gives the same error rate as ClassificationError/Sample in the log.
|
||||
]
|
||||
]
|
||||
|
|
|
@ -29,7 +29,7 @@ speechTrain = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
evalCriterion = "ClassificationError"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
|
|
@ -199,7 +199,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3 = [
|
|||
LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
|
||||
|
||||
ce = CrossEntropyWithSoftmax(labels, LSTMoutputW);
|
||||
err = ErrorPrediction(labels, LSTMoutputW);
|
||||
err = ClassificationError(labels, LSTMoutputW);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
scaledLogLikelihood = Minus(LSTMoutputW, logPrior)
|
||||
|
|
|
@ -172,7 +172,7 @@ ndlCreateNetwork=[
|
|||
criterion = Plus(Scale(cr2,criterion2), Scale(cr1,criterion1), tag=Criteria)
|
||||
|
||||
#CE = SMBFF(Dout,labelDim,hiddenDim,labels,tag=Criteria)
|
||||
Err = ErrorPrediction(labels,DNN_A_CE_BFF,tag=Eval)
|
||||
Err = ClassificationError(labels,DNN_A_CE_BFF,tag=Eval)
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ ndlCreateNetwork=[
|
|||
L2 = SBFF(L1,hiddenDim,hiddenDim)
|
||||
L3 = SBFF(L2,hiddenDim,hiddenDim)
|
||||
CE = SMBFF(L3,labelDim,hiddenDim,labels,tag=Criteria)
|
||||
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)
|
||||
Err = ClassificationError(labels,CE.BFF.FF.P,tag=Eval)
|
||||
|
||||
# define output (scaled loglikelihood)
|
||||
logPrior = LogPrior(labels)
|
||||
|
|
|
@ -122,7 +122,7 @@ ndlCreateNetwork=[
|
|||
L5 = SBFF(L4,hiddenDim,hiddenDim)
|
||||
L6 = SBFF(L5,hiddenDim,hiddenDim)
|
||||
CE = SMBFF(L6,labelDim,hiddenDim,labels,tag=Criteria)
|
||||
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)
|
||||
Err = ClassificationError(labels,CE.BFF.FF.P,tag=Eval)
|
||||
|
||||
# define output (scaled loglikelihood)
|
||||
logPrior = LogPrior(labels)
|
||||
|
|
|
@ -128,7 +128,7 @@ ndlCreateNetwork=[
|
|||
# same name as the corresponding node in the non-sequence training model.
|
||||
CE.BFF = BFF(L6, labelDim, hiddenDim)
|
||||
Cr = DummyCriterion(objectives, derivatives, CE.BFF.FF.P, tag=Criteria)
|
||||
Err = ErrorPrediction(labels, CE.BFF.FF.P, tag=Eval)
|
||||
Err = ClassificationError(labels, CE.BFF.FF.P, tag=Eval)
|
||||
|
||||
# define output (scaled loglikelihood)
|
||||
logPrior = LogPrior(labels)
|
||||
|
|
|
@ -106,7 +106,7 @@ ndlCreateNetwork=[
|
|||
LSTMoutputW1 = Times(W1, LSTMoutput3)
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
|
||||
Err = ErrorPrediction(labels,LSTMoutputW1,tag=Eval)
|
||||
Err = ClassificationError(labels,LSTMoutputW1,tag=Eval)
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag=Output)
|
||||
|
|
|
@ -142,7 +142,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
#LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -184,7 +184,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -182,7 +182,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -111,7 +111,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
LSTMoutputW = Plus(Times(W, LSTMoutput3), b);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -112,7 +112,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -187,7 +187,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -116,7 +116,7 @@ ndlCreateNetwork_LSTMP_c1024_p256_x3=[
|
|||
LSTMoutputW = Plus(Times(W, LSTMoutput8), b);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
|
||||
Err = ClassificationError(labels,LSTMoutputW,tag=Eval);
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
|
||||
|
|
|
@ -27,7 +27,7 @@ TimitTrainSimple=[
|
|||
SimpleNetworkBuilder=[
|
||||
layerSizes=792:512*3:183
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
evalCriterion=ClassificationError
|
||||
layerTypes=Sigmoid
|
||||
initValueScale=1.0
|
||||
applyMeanVarNorm=true
|
||||
|
|
|
@ -163,7 +163,7 @@ ndlCreateNetwork=[
|
|||
criterion2 = CrossEntropyWithSoftmax(statelabels, DNN_B_CE_BFF)
|
||||
criterion = Plus(Scale(cr2,criterion2), Scale(cr1,criterion1), tag="criterion")
|
||||
|
||||
Err = ErrorPrediction(labels,DNN_A_CE_BFF,tag="evaluation")
|
||||
Err = ClassificationError(labels,DNN_A_CE_BFF,tag="evaluation")
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ TIMIT_TrainAdaptLR=[
|
|||
SimpleNetworkBuilder=[
|
||||
layerSizes=792:512*3:183
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
evalCriterion=ClassificationError
|
||||
layerTypes=Sigmoid
|
||||
initValueScale=1.0
|
||||
applyMeanVarNorm=true
|
||||
|
|
|
@ -24,7 +24,7 @@ TIMIT_TrainSimple=[
|
|||
SimpleNetworkBuilder=[
|
||||
layerSizes=792:512*3:183
|
||||
trainingCriterion=CrossEntropyWithSoftmax
|
||||
evalCriterion=ErrorPrediction
|
||||
evalCriterion=ClassificationError
|
||||
layerTypes=Sigmoid
|
||||
initValueScale=1.0
|
||||
applyMeanVarNorm=true
|
||||
|
|
|
@ -33,7 +33,7 @@ ndlCreateNetwork=[
|
|||
L2 = SBFF(L1,hiddenDim,hiddenDim)
|
||||
L3 = SBFF(L2,hiddenDim,hiddenDim)
|
||||
CE = SMBFF(L3,labelDim,hiddenDim,myLabels,tag="criterion")
|
||||
Err = ErrorPrediction(myLabels,CE.BFF.FF.P,tag="evaluation")
|
||||
Err = ClassificationError(myLabels,CE.BFF.FF.P,tag="evaluation")
|
||||
|
||||
# define output (scaled loglikelihood)
|
||||
logPrior = LogPrior(myLabels)
|
||||
|
|
|
@ -31,7 +31,7 @@ ndlCreateNetwork=[
|
|||
featNorm = MeanVarNorm(features)
|
||||
L1 = SBFF(featNorm,hiddenDim,featDim)
|
||||
CE = SMBFF(L1,labelDim,hiddenDim,labels,tag="criterion")
|
||||
Err = ErrorPrediction(labels,CE.BFF.FF.P,tag="evaluation")
|
||||
Err = ClassificationError(labels,CE.BFF.FF.P,tag="evaluation")
|
||||
|
||||
# define output (scaled loglikelihood)
|
||||
logPrior = LogPrior(labels)
|
||||
|
|
|
@ -102,7 +102,7 @@ ndlCreateNetwork=[
|
|||
LSTMoutputW1 = Times(W1, LSTMoutput)
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag="criterion")
|
||||
Err = ErrorPrediction(labels,LSTMoutputW1,tag="evaluation")
|
||||
Err = ClassificationError(labels,LSTMoutputW1,tag="evaluation")
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag="output")
|
||||
|
|
|
@ -51,7 +51,7 @@ L1 = SBFF2(featInput1, HiddenDim, FeatDim1, featInput2, FeatDim2)
|
|||
L2 = SBFF(L1, HiddenDim, HiddenDim)
|
||||
L3 = SBFF(L2, HiddenDim, HiddenDim)
|
||||
CE = SMBFF(L3, LabelDim1, HiddenDim, labels,tag="criterion") # do I need a tag?
|
||||
FER = ErrorPrediction(labels,CE.BFF.FF.P,tag="evaluation")
|
||||
FER = ClassificationError(labels,CE.BFF.FF.P,tag="evaluation")
|
||||
|
||||
# outputNodes
|
||||
Prior=Mean(labels)
|
||||
|
|
|
@ -41,12 +41,12 @@ L3 = SBFF(L2, HiddenDim, HiddenDim2)
|
|||
# objective function 1
|
||||
BFF1=BFF(L3,LabelDim1,HiddenDim)
|
||||
CE1=CrossEntropyWithSoftmax(labels,BFF1.FF.P,tag="evaluation")
|
||||
FER1 = ErrorPrediction(labels,BFF1.FF.P,tag="evaluation")
|
||||
FER1 = ClassificationError(labels,BFF1.FF.P,tag="evaluation")
|
||||
|
||||
# objective function 2
|
||||
BFF2=BFF(L3,LabelDim2,HiddenDim)
|
||||
CE2=CrossEntropyWithSoftmax(regions,BFF2.FF.P,tag="evaluation")
|
||||
FER2 = ErrorPrediction(regions,BFF2.FF.P,tag="evaluation")
|
||||
FER2 = ClassificationError(regions,BFF2.FF.P,tag="evaluation")
|
||||
|
||||
# weighted final objective function
|
||||
Alpha1=0.8
|
||||
|
|
|
@ -60,7 +60,7 @@ Train = [
|
|||
outputs = W * LSTMoutput + b
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, outputs)
|
||||
errs = ErrorPrediction(labels, outputs)
|
||||
errs = ClassificationError(labels, outputs)
|
||||
|
||||
criterionNodes = (cr)
|
||||
evaluationNodes = (errs)
|
||||
|
@ -191,7 +191,7 @@ Test = [
|
|||
labels = Input($labelCount$, tag = "label")
|
||||
modelAsTrained = BS.Network.Load ("$modelPath$")
|
||||
final = Hardmax(modelAsTrained.outputs)
|
||||
errorRate = ErrorPrediction(labels, final, tag='evaluation')
|
||||
errorRate = ClassificationError(labels, final, tag='evaluation')
|
||||
]
|
||||
|
||||
evalNodeNames = errorRate
|
||||
|
|
|
@ -1,169 +0,0 @@
|
|||
# The configuration file to build language understanding model with ATIS corpus.
|
||||
# An LSTM model is built to tag each word in sentences with its semantic label.
|
||||
|
||||
makeMode = false ; traceLevel = 1 ; deviceId = -1
|
||||
|
||||
WorkDir = Work
|
||||
DataDir = Data
|
||||
|
||||
modelPath = $WorkDir$/slot.model
|
||||
parallelTrain = true
|
||||
|
||||
#stderr = $WorkDir$/log
|
||||
|
||||
command = TrainATIS:RunATIS:EvalATIS
|
||||
|
||||
vocabSize = 943 # number of words
|
||||
numLabels = 129 # number of slot labels
|
||||
numIntents = 26 # number of intent labels
|
||||
|
||||
# The command to train the LSTM model
|
||||
TrainATIS = [
|
||||
action = "train"
|
||||
BrainScriptNetworkBuilder = [
|
||||
inputDim = $vocabSize$
|
||||
labelDim = $numLabels$
|
||||
embDim = 150
|
||||
hiddenDim = 300
|
||||
#hiddenDim = 150
|
||||
|
||||
model = Sequential (
|
||||
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
|
||||
EmbeddingLayer {embDim} : # embedding
|
||||
RecurrentLSTMLayer {hiddenDim} : # LSTM
|
||||
#Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) : # bidirectional LSTM
|
||||
#Parallel ((RecurrentLSTMLayer {hiddenDim} : RecurrentLSTMLayer {hiddenDim, goBackwards=true}), Splice) : # bidirectional LSTM
|
||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
||||
)
|
||||
|
||||
# features
|
||||
query = Input {inputDim}
|
||||
slotLabels = Input {labelDim}
|
||||
|
||||
# model application
|
||||
z = model (query)
|
||||
|
||||
# loss and metric
|
||||
ce = CrossEntropyWithSoftmax (slotLabels, z)
|
||||
errs = ErrorPrediction (slotLabels, z)
|
||||
|
||||
featureNodes = (query)
|
||||
labelNodes = (slotLabels)
|
||||
criterionNodes = (ce)
|
||||
evaluationNodes = (errs)
|
||||
outputNodes = (z)
|
||||
]
|
||||
# rename this to BrainScriptNetworkBuilder to switch to intent-classification task
|
||||
Intent_BrainScriptNetworkBuilder = [
|
||||
inputDim = $vocabSize$
|
||||
labelDim = $numIntents$
|
||||
embDim = 150
|
||||
#hiddenDim = 300
|
||||
hiddenDim = 150
|
||||
|
||||
model = Sequential (
|
||||
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
|
||||
EmbeddingLayer {embDim} : # embedding
|
||||
RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last : # LSTM state, final state
|
||||
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
|
||||
Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true} : BS.Sequences.First)), Splice) : # bidirectional LSTM
|
||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
||||
)
|
||||
|
||||
# features
|
||||
t = DynamicAxis{}
|
||||
query = Input {inputDim, dynamicAxis=t}
|
||||
intentLabels = Input {labelDim}
|
||||
|
||||
# model application
|
||||
z = model (query)
|
||||
|
||||
# loss and metric
|
||||
ce = CrossEntropyWithSoftmax (intentLabels, z)
|
||||
errs = ErrorPrediction (intentLabels, z)
|
||||
|
||||
featureNodes = (query)
|
||||
labelNodes = (intentLabels)
|
||||
criterionNodes = (ce)
|
||||
evaluationNodes = (errs)
|
||||
outputNodes = (z)
|
||||
]
|
||||
|
||||
SGD = [
|
||||
maxEpochs = 20 ; epochSize = 36000
|
||||
|
||||
minibatchSize = 70
|
||||
|
||||
learningRatesPerSample = 0.01*2:0.005*12:0.001
|
||||
|
||||
gradUpdateType = "FSAdaGrad"
|
||||
|
||||
gradientClippingWithTruncation = true ; clippingThresholdPerSample = 15.0
|
||||
|
||||
# number of minibatches to report progress
|
||||
firstMBsToShowResult = 10 ; numMBsToShowResult = 100
|
||||
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
parallelizationStartEpoch = 2
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/atis.train.ctf"
|
||||
randomize = true
|
||||
input = [
|
||||
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
|
||||
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
|
||||
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
# Run the model to predict slot labels
|
||||
RunATIS = [
|
||||
action = "write"
|
||||
BrainScriptNetworkBuilder = [
|
||||
modelAsTrained = BS.Network.Load ("$modelPath$")
|
||||
final = Hardmax (modelAsTrained.z) # make a decision
|
||||
#labels = Pass (modelAsTrained.slotLabels)
|
||||
# enable this for intent classification:
|
||||
labels = Pass (modelAsTrained.intentLabels)
|
||||
t = DynamicAxis()
|
||||
]
|
||||
|
||||
outputPath = $WorkDir$/model.writeaction
|
||||
outputNodeNames = intentLabels:slotLabels:final
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/atis.test.ctf"
|
||||
randomize = false
|
||||
input = [
|
||||
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
|
||||
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
|
||||
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
# Evaluate the model's slot-tagging accuracy (as an error count)
|
||||
EvalATIS = [
|
||||
action = "eval"
|
||||
modelPath = $modelPath$ # from outside
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/atis.test.ctf"
|
||||
randomize = false
|
||||
input = [
|
||||
query = [ alias = "S0" ; dim = $vocabSize$ ; format = "sparse" ]
|
||||
intentLabels = [ alias = "S1" ; dim = $numIntents$ ; format = "sparse" ]
|
||||
slotLabels = [ alias = "S2" ; dim = $numLabels$ ; format = "sparse" ]
|
||||
]
|
||||
]
|
||||
]
|
|
@ -34,7 +34,7 @@ Train = [
|
|||
z = w * features + b
|
||||
|
||||
ce = CrossEntropyWithSoftmax (labels, z)
|
||||
errs = ErrorPrediction (labels, z)
|
||||
errs = ClassificationError (labels, z)
|
||||
|
||||
# root nodes
|
||||
featureNodes = (features)
|
||||
|
|
5
Makefile
5
Makefile
|
@ -713,6 +713,9 @@ endif
|
|||
########################################
|
||||
|
||||
ifdef OPENCV_PATH
|
||||
ifdef BOOST_PATH
|
||||
|
||||
INCLUDEPATH += $(BOOST_PATH)/include
|
||||
|
||||
IMAGE_READER_LIBS += -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
|
||||
|
||||
|
@ -743,6 +746,7 @@ $(IMAGEREADER): $(IMAGEREADER_OBJ) | $(CNTKMATH_LIB)
|
|||
@echo $(SEPARATOR)
|
||||
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH) $(IMAGE_READER_LIBS)
|
||||
endif
|
||||
endif
|
||||
|
||||
########################################
|
||||
# 1bit SGD setup
|
||||
|
@ -841,7 +845,6 @@ UNITTEST_READER_SRC = \
|
|||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/HTKLMFReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ImageReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ReaderLibTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/UCIFastReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/stdafx.cpp \
|
||||
$(SOURCEDIR)/Readers/CNTKTextFormatReader/Indexer.cpp \
|
||||
$(SOURCEDIR)/Readers/CNTKTextFormatReader/TextParser.cpp \
|
||||
|
|
|
@ -39,6 +39,8 @@ void DoTrain(const ConfigRecordType& config);
|
|||
template <typename ElemType>
|
||||
void DoAdapt(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoDumpNodes(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoEdit(const ConfigParameters& config);
|
||||
|
||||
// evaluation (EvalActions.cpp)
|
||||
|
|
|
@ -154,6 +154,13 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
|
|||
m_net->InitLearnableParameters(nodePtr, L"uniform", initValueScale, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initOnCPUOnly);
|
||||
else if (EqualCI(initString, L"gaussian"))
|
||||
m_net->InitLearnableParameters(nodePtr, L"gaussian", initValueScale, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initOnCPUOnly);
|
||||
else if (EqualCI(initString, L"bilinear"))
|
||||
{
|
||||
const size_t kernelWidth = node->GetOptionalParameter("kernelWidth", "0");
|
||||
const size_t kernelHeight = node->GetOptionalParameter("kernelHeight", "0");
|
||||
assert(kernelWidth > 0 && kernelHeight > 0);
|
||||
m_net->InitLearnableParametersWithBilinearFill<ElemType>(nodePtr, kernelWidth, kernelHeight);
|
||||
}
|
||||
else if (EqualCI(initString, L"fromFile"))
|
||||
{
|
||||
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");
|
||||
|
|
|
@ -158,6 +158,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(CRFNode), L"CRF")) ret = true;
|
||||
#endif
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ClassificationErrorNode), L"ErrorPrediction")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(EqualNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterEqualNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterNode))) ret = true;
|
||||
|
@ -177,7 +178,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(DropoutNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(DummyCriterionNode), L"DummyCriterion")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ElementTimesNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ErrorPredictionNode), L"ClassificationError")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(ExpNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(FloorNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(FutureValueNode))) ret = true;
|
||||
|
|
|
@ -1751,10 +1751,10 @@ shared_ptr<ComputationNode<ElemType>> SimpleNetworkBuilder<ElemType>::AddTrainAn
|
|||
// output = builder.SquareError(label, tinput, (evalNodeName == L"")?L"EvalSquareError":evalNodeName);
|
||||
output = builder.Logistic(label, tinput, (evalNodeName == L"") ? L"Logistic" : evalNodeName);
|
||||
break;
|
||||
case EvalCriterion::ErrorPrediction:
|
||||
case EvalCriterion::ClassificationError:
|
||||
if (matrix != nullptr && tinput == input)
|
||||
tinput = builder.Times(matrix, input);
|
||||
output = builder.ErrorPrediction(label, tinput, (evalNodeName == L"") ? L"EvalErrorPrediction" : evalNodeName);
|
||||
output = builder.ClassificationError(label, tinput, (evalNodeName == L"") ? L"EvalClassificationError" : evalNodeName);
|
||||
break;
|
||||
#ifdef COMING_SOON
|
||||
case EvalCriterion::CRF:
|
||||
|
@ -1785,23 +1785,26 @@ template class SimpleNetworkBuilder<double>;
|
|||
TrainingCriterion ParseTrainingCriterionString(wstring s)
|
||||
{
|
||||
if (EqualCI(s, L"crossEntropyWithSoftmax")) return TrainingCriterion::CrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"sequenceWithSoftmax")) return TrainingCriterion::SequenceWithSoftmax;
|
||||
else if (EqualCI(s, L"squareError")) return TrainingCriterion::SquareError;
|
||||
else if (EqualCI(s, L"logistic")) return TrainingCriterion::Logistic;
|
||||
else if (EqualCI(s, L"noiseContrastiveEstimation")) return TrainingCriterion::NCECrossEntropyWithSoftmax;
|
||||
// legacy/deprecated
|
||||
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return TrainingCriterion::ClassCrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"sequenceWithSoftmax")) return TrainingCriterion::SequenceWithSoftmax;
|
||||
else LogicError("trainingCriterion: Invalid trainingCriterion value. Valid values are (crossEntropyWithSoftmax | squareError | logistic | classCrossEntropyWithSoftmax| sequenceWithSoftmax)");
|
||||
}
|
||||
|
||||
EvalCriterion ParseEvalCriterionString(wstring s)
|
||||
{
|
||||
if (EqualCI(s, L"errorPrediction")) return EvalCriterion::ErrorPrediction;
|
||||
if (EqualCI(s, L"classificationError")) return EvalCriterion::ClassificationError;
|
||||
else if (EqualCI(s, L"crossEntropyWithSoftmax")) return EvalCriterion::CrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"sequenceWithSoftmax")) return EvalCriterion::SequenceWithSoftmax;
|
||||
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return EvalCriterion::ClassCrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"logistic")) return EvalCriterion::Logistic;
|
||||
else if (EqualCI(s, L"noiseContrastiveEstimation")) return EvalCriterion::NCECrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"squareError")) return EvalCriterion::SquareError;
|
||||
// legacy/deprecated
|
||||
else if (EqualCI(s, L"classCrossEntropyWithSoftmax")) return EvalCriterion::ClassCrossEntropyWithSoftmax;
|
||||
else if (EqualCI(s, L"sequenceWithSoftmax")) return EvalCriterion::SequenceWithSoftmax;
|
||||
else if (EqualCI(s, L"errorPrediction")) return EvalCriterion::ClassificationError;
|
||||
else LogicError("evalCriterion: Invalid trainingCriterion value. Valid values are (errorPrediction | crossEntropyWithSoftmax | squareError | logistic | sequenceWithSoftmax)");
|
||||
}
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ enum class EvalCriterion : int
|
|||
CrossEntropy,
|
||||
SquareError,
|
||||
Logistic,
|
||||
ErrorPrediction,
|
||||
ClassificationError,
|
||||
ClassCrossEntropyWithSoftmax,
|
||||
NCECrossEntropyWithSoftmax,
|
||||
CRF,
|
||||
|
|
|
@ -191,6 +191,30 @@ void DoAdapt(const ConfigParameters& config)
|
|||
template void DoAdapt<float>(const ConfigParameters& config);
|
||||
template void DoAdapt<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoDumpNodes() - implements CNTK "dumpNode" command
|
||||
// ===========================================================================
|
||||
|
||||
template <typename ElemType>
|
||||
void DoDumpNodes(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config(L"modelPath");
|
||||
wstring nodeName = config(L"nodeName", L"__AllNodes__");
|
||||
wstring nodeNameRegexStr = config(L"nodeNameRegex", L"");
|
||||
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
|
||||
wstring outputFile = config(L"outputFile", defOutFilePath);
|
||||
bool printValues = config(L"printValues", true);
|
||||
bool printMetadata = config(L"printMetadata", true);
|
||||
if (!printValues && !printMetadata)
|
||||
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
|
||||
|
||||
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
|
||||
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
|
||||
}
|
||||
|
||||
template void DoDumpNodes<float>(const ConfigParameters& config);
|
||||
template void DoDumpNodes<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoEdit() - implements CNTK "edit" command
|
||||
// ===========================================================================
|
||||
|
|
|
@ -277,7 +277,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
|
|||
if (rightVal.Is<Double>()) // ComputeNode * scalar
|
||||
swap(leftVal, rightVal); // -> scalar * ComputeNode
|
||||
if (leftVal.Is<Double>())
|
||||
operationName = L"Scale"; // scalar * ComputeNode
|
||||
operationName = L"ElementTimes"; // scalar * ComputeNode
|
||||
else
|
||||
operationName = L"Times"; // ComputeNode * ComputeNode (matrix produt)
|
||||
}
|
||||
|
@ -305,6 +305,8 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
|
|||
config->Add(L"operation", MakeFailFn(e->location), ConfigValuePtr(make_shared<String>(operationName), MakeFailFn(e->location), exprPath));
|
||||
let leftFailFn = leftVal.GetFailFn(); // report any error for this Constant object as belonging to the scalar factor's expression
|
||||
vector<ConfigValuePtr> inputs;
|
||||
#if 0 // BUGBUG: rows,cols is no longer right, we need a TensorShape here
|
||||
// TODO: Solve this by directly constructing Constant() off a 'double' input in the ComputationNode constructor.
|
||||
if (operationName == L"Scale")
|
||||
{
|
||||
// if we scale, the first operand is a Double, and we must convert that into a 1x1 Constant
|
||||
|
@ -314,7 +316,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
|
|||
let one = MakePrimitiveConfigValuePtr(1.0, leftFailFn, exprPath);
|
||||
constantConfig->Add(L"rows", leftFailFn, one);
|
||||
constantConfig->Add(L"cols", leftFailFn, one);
|
||||
//constantConfig->Add(L"shape", leftFailFn, one); // BUGBUG: rows,cols is no longer right, we need a TensorShape here
|
||||
//constantConfig->Add(L"shape", leftFailFn, one);
|
||||
constantConfig->Add(L"value", leftFailFn, leftVal);
|
||||
constantConfig->Add(L"learningRateMultiplier", leftFailFn, MakePrimitiveConfigValuePtr(0.0f, leftFailFn, exprPath));
|
||||
let value = ConfigValuePtr(rtInfo->construct(constantConfig), leftFailFn, exprPath);
|
||||
|
@ -323,6 +325,7 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
|
|||
valueWithName->SetName(value.GetExpressionName());
|
||||
leftVal = value; // and that's our actual left value
|
||||
}
|
||||
#endif
|
||||
inputs.push_back(leftVal);
|
||||
if (operationName != L"Negate") // Negate only has one input (rightVal is a nullptr)
|
||||
inputs.push_back(rightVal);
|
||||
|
@ -332,6 +335,8 @@ static ConfigValuePtr NodeOp(const ExpressionPtr &e, ConfigValuePtr leftVal, Con
|
|||
{
|
||||
let one = MakePrimitiveConfigValuePtr(1.0, leftFailFn, exprPath);
|
||||
config->Add(L"outputRank", leftFailFn, one);
|
||||
let minusOne = MakePrimitiveConfigValuePtr(-1.0, leftFailFn, exprPath);
|
||||
config->Add(L"inferInputRankToMap", leftFailFn, minusOne);
|
||||
}
|
||||
// instantiate the ComputationNode
|
||||
let value = ConfigValuePtr(rtInfo->construct(config), MakeFailFn(e->location), exprPath);
|
||||
|
|
|
@ -30,27 +30,38 @@
|
|||
|
||||
# LinearLayer -- create a fully-connected linear projection layer
|
||||
# Note: outDim may describe a tensor as well.
|
||||
LinearLayer {outDim, bias = true, init='uniform', initValueScale=1} =
|
||||
LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
|
||||
{
|
||||
W = ParameterTensor {_ConcatArrays (outDim, 0), init=init, initValueScale=initValueScale}
|
||||
# inputRank given: number of zeroes to add to W (mapRank must not be given)
|
||||
# mapRank given: expand W to leave exactly mapRank axes (inputRank must not be given)
|
||||
# none given: expand W to all (same as mapRank=0)
|
||||
inputShape =
|
||||
if BS.Constants.IsNone (inputRank) then Inferred # not given: one Inferred, which will get expanded
|
||||
else if !BS.Constants.IsNone (mapRank) then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
|
||||
else Repeat (inputRank, Inferred)
|
||||
W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
|
||||
b = ParameterTensor {outDim, initValue=0}
|
||||
outRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
|
||||
outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
|
||||
inferInputRankToMap =
|
||||
if !BS.Constants.IsNone (inputRank) then -1 # means not specified
|
||||
else if BS.Constants.IsNone (mapRank) then 0 # default to 'use all input dims'
|
||||
else mapRank
|
||||
apply (x) =
|
||||
if bias
|
||||
then Times (W, x, outputRank = outRank) + b
|
||||
else Times (W, x, outputRank = outRank)
|
||||
then Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap) + b
|
||||
else Times (W, x, outputRank=outputRank, inferInputRankToMap=inferInputRankToMap)
|
||||
}.apply
|
||||
|
||||
# DenseLayer -- create a fully-connected layer with optional non-linearity
|
||||
DenseLayer{outDim, bias = true, activation=(x=>x), init='uniform', initValueScale=1} = Sequential ( LinearLayer{outDim, bias = bias, init = init, initValueScale = initValueScale} : activation )
|
||||
DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueScale=1, inputRank=None, mapRank=None} = Sequential ( LinearLayer{outDim, bias=bias, init=init, initValueScale=initValueScale, inputRank=inputRank, mapRank=mapRank} : activation )
|
||||
|
||||
# EmbeddingLayer -- create a linear embedding layer
|
||||
EmbeddingLayer {outDim, # dimension of embedding
|
||||
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
|
||||
{
|
||||
shape = if transpose then (0 : outDim) else (outDim : 0)
|
||||
shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
|
||||
E = if embeddingPath == ''
|
||||
then ParameterTensor {shape, init='uniform'} # learnable
|
||||
then ParameterTensor {shape, init='heNormal'} # learnable
|
||||
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
|
||||
TimesOp = if transpose then TransposeTimes else Times
|
||||
apply (x) = TimesOp (E, x) # x is expected to be sparse one-hot
|
||||
|
@ -65,7 +76,7 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
|
|||
filterShape, # e.g. (3:3)
|
||||
bias = true,
|
||||
activation = (x=>x),
|
||||
init = "uniform",
|
||||
init = "heNormal",
|
||||
initValueScale = 1, # TODO: rename to initScale
|
||||
#reductionRank = 1, # TODO: support this
|
||||
stride = 1, pad = false,
|
||||
|
@ -77,9 +88,9 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
|
|||
outputChannelsShape = _AsArray (numOutputChannels)
|
||||
outputRank = Length (outputChannelsShape)
|
||||
filterRank = Length (filterShape)
|
||||
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # kernel := filter plus reductionDims
|
||||
#W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale} # [ W x H x C x K ]
|
||||
W = ParameterTensor{(outputChannelsShape:0), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization
|
||||
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, Inferred)) # kernel := filter plus reductionDims
|
||||
#W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale, initOutputRank = -1} # [ W x H x C x K ]
|
||||
W = ParameterTensor{(outputChannelsShape : Inferred), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization
|
||||
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = 0) # [ 1 x 1 x K ]
|
||||
sharing = true # TODO: support this
|
||||
transpose = false # TODO: support this
|
||||
|
@ -106,10 +117,11 @@ AveragePoolingLayer {filterShape, stride = 1, pad = false, lowerPad = 0, upperPa
|
|||
RecurrentLSTMLayer {outputDim,
|
||||
cellShape = BS.Constants.None, # if set then use a projection
|
||||
goBackwards = false,
|
||||
init='heNormal', initValueScale=1,
|
||||
enableSelfStabilization = false} =
|
||||
{
|
||||
previousHook = if goBackwards then BS.RNNs.NextHC else BS.RNNs.PreviousHC
|
||||
lstm = BS.RNNs.LSTMBlock {outputDim, cellShape = cellShape, enableSelfStabilization = enableSelfStabilization}
|
||||
lstm = BS.RNNs.LSTMBlock {outputDim, cellShape = cellShape, enableSelfStabilization = enableSelfStabilization, init=init, initValueScale=initValueScale}
|
||||
apply (x) = {
|
||||
prevState = previousHook (lstmState) # recurrent memory. E.g. Previous or Next, with or without initial state, beam reordering etc.
|
||||
|
||||
|
@ -138,7 +150,7 @@ DelayLayer {T=1, defaultHiddenActivation=0} =
|
|||
# BatchNormalizationLayer -- create a batch-normalization layer
|
||||
BatchNormalizationLayer {spatialRank = 0, # reduce over these dims. E.g. 2 to reduce over (w,h) in a [W x H x C]-shaped input
|
||||
initialScale = 1,
|
||||
normalizationTimeConstant = 0, blendTimeConstant = 0,
|
||||
normalizationTimeConstant = 0, blendTimeConstant = 0, # TODO: normTimeConst should be INF, not 0
|
||||
epsilon = 0.00001, useCntkEngine = true} =
|
||||
{
|
||||
#normShape = _ConcatArrays (Repeat (spatialRank, 1), 0) # spatial dims get a dimension of 1 (broadcasting, while all others are inferred from input)
|
||||
|
@ -151,18 +163,16 @@ BatchNormalizationLayer {spatialRank = 0, # reduce over these dims. E.g. 2 to r
|
|||
}.apply
|
||||
|
||||
# LayerNormalizationLayer -- create a layer-normalization layer
|
||||
LayerNormalizationLayer {dim = BS.Constants.None, initScale = 1, initBias = 0} = if BS.Constants.IsNone (dim) then Fail ("LayerNormalizationLayer: 'dim' parameter is currently required.") else
|
||||
LayerNormalizationLayer {initScale = 1, initBias = 0} =
|
||||
{
|
||||
gain = ParameterTensor{(1), initValue = initScale}
|
||||
bias = ParameterTensor{(1), initValue = initBias}
|
||||
|
||||
apply (x) = {
|
||||
div = Constant (1.0 / dim)
|
||||
|
||||
# normalize w.r.t. actual sample statistics
|
||||
mean = div .* ReduceSum (x)
|
||||
mean = ReduceMean (x)
|
||||
x0 = x - mean;
|
||||
std = Sqrt (div .* ReduceSum (x0 .* x0))
|
||||
std = Sqrt (ReduceMean (x0 .* x0))
|
||||
xHat = ElementDivide (x0, std)
|
||||
|
||||
# denormalize with learned parameters
|
||||
|
@ -171,16 +181,22 @@ LayerNormalizationLayer {dim = BS.Constants.None, initScale = 1, initBias = 0} =
|
|||
}.apply
|
||||
|
||||
# StabilizerLayer -- create a scalar stabilizer [J. Droppo, 2014 -- TODO: get the reference]
|
||||
StabilizerLayer {} =
|
||||
StabilizerLayer{} =
|
||||
{
|
||||
# BUGBUG: Calling f(x) twice will create a second set of parameters. Needs to refactor Stabilize() for this.
|
||||
apply (x) = Stabilize (x)
|
||||
apply (x) = BS.Parameters.Stabilize (x)
|
||||
}.apply
|
||||
|
||||
# FeatureMVNLayer -- create a corpus-level feature-normalization layer
|
||||
# This can only be applied to features. Statistics are not shared across invocations,
|
||||
# which is semantically OK because the values are the same. However, it is not efficient.
|
||||
FeatureMVNLayer {} = MeanVarNorm
|
||||
FeatureMVNLayer{} = MeanVarNorm
|
||||
|
||||
# LogPriorLayer -- create a corpus-level label-prior layer
|
||||
# This can only be applied to labels. Statistics are not shared across invocations,
|
||||
# which is semantically OK because the values are the same. However, it is not efficient.
|
||||
# TODO: document on Wiki
|
||||
LogPriorLayer{} = LogPrior
|
||||
|
||||
# Layers that exist in other tools that we will not have:
|
||||
# FlattenLayer{}: Not needed since DenseLayer() can handle tensors just fine.
|
||||
|
@ -188,6 +204,10 @@ FeatureMVNLayer {} = MeanVarNorm
|
|||
|
||||
Identity(x) = x # sometimes helpful
|
||||
|
||||
None = BS.Constants.None # for use with some optional parameters; test with IsNone()
|
||||
|
||||
Inferred = 0 # denotes a dimension that is to be inferred
|
||||
|
||||
##############################################################################
|
||||
# Composing layers or models into more more complex models
|
||||
##############################################################################
|
||||
|
@ -234,7 +254,7 @@ CrossEntropyWithSoftmax = CNTK2.CrossEntropyWithSoftmax
|
|||
Dropout = CNTK2.Dropout
|
||||
ElementTimes = CNTK2.ElementTimes
|
||||
ElementDivide = CNTK2.ElementDivide
|
||||
ErrorPrediction = CNTK2.ErrorPrediction
|
||||
ClassificationError = CNTK2.ClassificationError
|
||||
Exp = CNTK2.Exp
|
||||
Floor = CNTK2.Floor
|
||||
Log = CNTK2.Log
|
||||
|
@ -245,6 +265,7 @@ RectifiedLinear = CNTK2.ReLU # deprecated
|
|||
ReLU = CNTK2.ReLU
|
||||
ReduceSum = CNTK2.ReduceSum
|
||||
ReduceLogSum = CNTK2.ReduceLogSum
|
||||
ReduceMean = CNTK2.ReduceMean
|
||||
ReduceMin = CNTK2.ReduceMin
|
||||
ReduceMax = CNTK2.ReduceMax
|
||||
|
||||
|
@ -283,7 +304,7 @@ CNTK2 = [
|
|||
// TODO: The API for Parameter is different in current 2.0 design, getting a constant as input for the initial values.
|
||||
// This needs to be fixed to follow the way the Constant() is exposed in Python
|
||||
// Making this an internal node with "_" until we agree on the final interface:
|
||||
_Parameter(shape, value = 0, initValue = '', learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*shape */ ] /*plus the function args*/ ]
|
||||
_Parameter(shape, value = 0, initValue = '', learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, initOutputRank = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*shape */ ] /*plus the function args*/ ]
|
||||
|
||||
// 3. Shape operations
|
||||
// Changes: NewReshape -> Reshape, input -> _, dims -> shape
|
||||
|
@ -316,13 +337,14 @@ CNTK2 = [
|
|||
|
||||
// 4. Tensor operations
|
||||
// Changes: Matrix -> Tensor. A -> x, B -> y. Data must come on y ("default parameter") hence not using _
|
||||
Times(x, y, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
|
||||
Times(x, y, outputRank=1, inferInputRankToMap=-1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( x : y ) /*plus the function args*/ ]
|
||||
|
||||
// 5. Elementwise operations.
|
||||
// Changes: "Matrix" -> "Tensor"; left input -> _; Clip: move input to front. ElementDivide/Times: anotherTensor -> y
|
||||
Abs(_, tag='') = new ComputationNode [ operation = 'Abs' ; inputs = _ /*plus the function args*/ ]
|
||||
Ceil(_, tag='') = Negate(Floor(Negate(_)), tag=tag)
|
||||
Clip(_, minValue, maxValue, tag='') = new ComputationNode [ operation = 'Clip' ; inputs = (minValue : maxValue : _) /* plus the function args*/ ]
|
||||
# TODO: Make ElementDivide a proper operation
|
||||
ElementDivide(_, y, tag='') = ElementTimes(_, Reciprocal(y), tag=tag)
|
||||
ElementTimes(_, y, tag='') = new ComputationNode [ operation = 'ElementTimes' ; inputs = (_ : y) /*plus the function args*/ ]
|
||||
Exp(_, tag='') = new ComputationNode [ operation = 'Exp' ; inputs = _ /*plus the function args*/ ]
|
||||
|
@ -336,12 +358,12 @@ CNTK2 = [
|
|||
Tanh(_, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = _ /*plus the function args*/ ]
|
||||
|
||||
// 6. Reductions
|
||||
ReduceSum (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Sum" /*plus the function args*/ ]
|
||||
ReduceLogSum(_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "LogSum" /*plus the function args*/ ]
|
||||
ReduceMin (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Min" /*plus the function args*/ ]
|
||||
ReduceMax (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Max" /*plus the function args*/ ]
|
||||
#ReduceMean (_, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; reductionOp = "Mean" /*plus the function args*/ ]
|
||||
|
||||
ReduceSum (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Sum" /*plus the function args*/ ]}.r
|
||||
ReduceLogSum(_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "LogSum" /*plus the function args*/ ]}.r
|
||||
ReduceMean (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Mean" /*plus the function args*/ ]}.r
|
||||
ReduceMin (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Min" /*plus the function args*/ ]}.r
|
||||
ReduceMax (_, axis=None, tag='') = { axis1 = if BS.Constants.IsNone (axis) then 0 else axis ; r = new ComputationNode [ operation = 'ReduceElements' ; inputs = _ ; axis = axis1 ; reductionOp = "Max" /*plus the function args*/ ]}.r
|
||||
|
||||
// 7. Control flow (if, composite etc.)
|
||||
// None so far
|
||||
|
||||
|
@ -365,11 +387,12 @@ CNTK2 = [
|
|||
// No changes here - we said the default input would be the label sequence here, against which the
|
||||
// empirical sequence is compared to. Keeping this for now.
|
||||
CrossEntropyWithSoftmax(_, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = (_ : outProbVectorSequence) /*plus the function args*/ ]
|
||||
ErrorPrediction(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ErrorPrediction' ; inputs = if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN)) /*plus the function args*/ ]
|
||||
ClassificationError(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ClassificationError' ; inputs = if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN)) /*plus the function args*/ ]
|
||||
ErrorPrediction = ClassificationError # legacy
|
||||
# TODO: replace with this (need to deal with topN thing):
|
||||
# (_new will be removed once the change is made)
|
||||
CrossEntropyWithSoftmax_new (L, z, tag='') = Minus (ReduceLogSum (z), TransposeTimes (L, z), tag=tag)
|
||||
ErrorPrediction_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
|
||||
ClassificationError_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
|
||||
|
||||
// 12. Comparison nodes
|
||||
Less(_, y, tag='') = new ComputationNode [ operation = 'Less' ; inputs = (_ : y) /*plus the function args*/ ]
|
||||
|
@ -393,12 +416,12 @@ CNTK2 = [
|
|||
# - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form)
|
||||
# - init="fixedValue", value from 'value'
|
||||
# Warning: Current config will behave unexpected if user mistypes 'initValue' as 'value' (which will be ignored, defaulting to "uniform" init)
|
||||
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
|
||||
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; initOutputRank = 1 ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
|
||||
|
||||
LearnableParameter = Parameter // deprecated
|
||||
|
||||
# TODO: make Parameter take tensor dims?
|
||||
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
|
||||
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initOutputRank = 1, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
|
||||
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, initFromLiteral = literal, learningRateMultiplier = 0.0)
|
||||
# TODO: Deprecate ConstantFromString() in favor of Constant(array expression)
|
||||
DynamicAxis(tag='') = new ComputationNode [ operation = 'DynamicAxis' ; /*plus the function args*/ ]
|
||||
|
@ -452,7 +475,7 @@ MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, l
|
|||
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]
|
||||
AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = input /*plus the function args*/ ]
|
||||
ColumnwiseCrossProduct = KhatriRaoProduct // deprecated
|
||||
ClassificationError = ErrorPrediction
|
||||
ErrorPrediction = ClassificationError # legacy name
|
||||
Delay = PastValue
|
||||
|
||||
BatchNormalization(input, scale, bias, runMean, runVariance, spatial, normalizationTimeConstant = 0, blendTimeConstant = 0, epsilon = 0.00001, useCntkEngine = true, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runVariance) /*plus the function args*/ ]
|
||||
|
@ -551,16 +574,22 @@ IntDiv(x, y) = new NumericFunction [ what = 'IntDiv' ; args = (x:y) ]
|
|||
# macros from NDL book
|
||||
##############################################################################
|
||||
|
||||
# deprecated--use LinearLayer{} and DenseLayer{} instead
|
||||
BFF(in, rows, cols) = [ B = Parameter(rows, 1, initValue = 0) ; W = Parameter(rows, cols) ; z = W*in+B ]
|
||||
SBFF(in, rows, cols) = [ Eh = Sigmoid(BFF(in, rows, cols).z) ]
|
||||
|
||||
# deprecated--use FeatureMVNLayer{} instead
|
||||
MeanVarNorm(feat) = PerDimMeanVarNormalization(feat, Mean(feat), InvStdDev(feat))
|
||||
|
||||
# deprecated--use LogPriorLayer{} instead
|
||||
LogPrior(labels) = Log(Mean(labels))
|
||||
|
||||
# specify one of these two for initialization:
|
||||
# - init = "uniform"|"gaussian"
|
||||
# - embeddingFile = PATHNAME
|
||||
# deprecated--use EmbeddingLayer{} instead
|
||||
Embedding (embeddingDim, input, inputDim=input.dim, initFrom=''/*|fromFile|gaussian|uniform*/, embeddingPath = '', sparseInput = false, learningRateWeight = 0.0) = [
|
||||
embedding = Transpose (LearnableParameter (inputDim, embeddingDim, learningRateMultiplier = learningRateWeight, init = initFrom, initFromFilePath = embeddingPath))
|
||||
embedding = Transpose (Parameter (inputDim, embeddingDim, learningRateMultiplier = learningRateWeight, init = initFrom, initFromFilePath = embeddingPath))
|
||||
lookup = if sparseInput then embedding * input
|
||||
else GatherPacked (input, embedding)
|
||||
].lookup
|
||||
|
@ -802,7 +831,8 @@ Loop = [
|
|||
|
||||
Parameters =
|
||||
[
|
||||
WeightParam (outputDim, inputDim) = Parameter (outputDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
|
||||
# TODO: These all have randomSeed set to 1!
|
||||
WeightParam (outputDim, inputDim) = ParameterTensor ((outputDim : inputDim), init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
|
||||
DiagWeightParam (outputDim) = ParameterTensor ((outputDim), init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1) # meant to be applied elementwise
|
||||
BiasParam (dim) = ParameterTensor ((dim), initValue=0.0)
|
||||
ScalarParam() = BiasParam (1)
|
||||
|
@ -845,23 +875,25 @@ RNNs =
|
|||
# This is the stateless version that takes the previous state as an input.
|
||||
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
|
||||
# This function also takes an optional auxiliary input, e.g. for suporting attention models.
|
||||
LSTMBlock (outputDim, cellShape=Constants.None, enableSelfStabilization=false) =
|
||||
[
|
||||
LSTMBlock (outputDim, cellShape=None, init='heNormal', initValueScale=1, enableSelfStabilization=false) =
|
||||
{
|
||||
cellDim = if Constants.IsNone (cellShape) then outputDim else cellShape
|
||||
// parameter macros
|
||||
# note: each invocation comes with its own set of weights
|
||||
B{} = Parameters.BiasParam {cellDim}
|
||||
W{} = Parameters.WeightParam {cellDim, 0} // input
|
||||
A{} = Parameters.WeightParam {cellDim, 0} // aux input
|
||||
H{} = Parameters.WeightParam {cellDim, outputDim} // hidden-to-hidden
|
||||
C{} = Parameters.DiagWeightParam {cellDim} // cell-to-hiddden {note: applied elementwise}
|
||||
Wmr = Parameters.WeightParam {outputDim, cellDim};
|
||||
|
||||
# parameter helpers
|
||||
# note: invoked multiple times, each invocation comes with its own set of weights
|
||||
B{} = ParameterTensor {(cellDim), initValue=0} # a bias
|
||||
W{} = ParameterTensor {(cellDim : Inferred), init=init, initValueScale=initValueScale} # a input
|
||||
A{} = ParameterTensor {(cellDim : Inferred), init=init, initValueScale=initValueScale} # an aux input
|
||||
H{} = ParameterTensor {(cellDim : outputDim), init=init, initValueScale=initValueScale} # a hidden-to-hidden
|
||||
C{} = ParameterTensor {(cellDim), init=init, initValueScale=initValueScale} # a cell-to-hiddden {note: applied elementwise}
|
||||
|
||||
Wmr = ParameterTensor {(outputDim : cellDim), init=init, initValueScale=initValueScale}; # final projection
|
||||
|
||||
S(x) = Parameters.Stabilize (x, enabled=enableSelfStabilization)
|
||||
# BUGBUG: S() must not be a macro either, but also an object instance
|
||||
|
||||
apply (x, prevState, aux=Constants.None) = [
|
||||
_ = [ // encapsulate the inner workings
|
||||
apply (x, prevState, aux=None) = {
|
||||
_ = { // encapsulate the inner workings
|
||||
|
||||
dh = prevState.h // previous values
|
||||
dc = prevState.c
|
||||
|
@ -872,20 +904,20 @@ RNNs =
|
|||
|
||||
# projected contribution from input(s) and bias
|
||||
pin() = if Constants.IsNone (aux)
|
||||
then B() + W() * x
|
||||
else B() + W() * x + A() * aux
|
||||
then B{} + W{} * x
|
||||
else B{} + W{} * x + A{} * aux
|
||||
|
||||
it = Sigmoid (pin() + H() * dhs + C() .* dcs) // input gate(t)
|
||||
bit = it .* Tanh (pin() + H() * dhs) // applied to tanh of input network
|
||||
it = Sigmoid (pin() + H{} * dhs + C{} .* dcs) // input gate(t)
|
||||
bit = it .* Tanh (pin() + H{} * dhs) // applied to tanh of input network
|
||||
|
||||
ft = Sigmoid (pin() + H() * dhs + C() .* dcs) // forget-me-not gate(t)
|
||||
ft = Sigmoid (pin() + H{} * dhs + C{} .* dcs) // forget-me-not gate(t)
|
||||
bft = ft .* dc // applied to cell(t-1)
|
||||
|
||||
ct = bft + bit // c(t) is sum of both
|
||||
|
||||
ot = Sigmoid (pin() + H() * dhs + C() .* S(ct)) // output gate(t)
|
||||
ot = Sigmoid (pin() + H{} * dhs + C{} .* S(ct)) // output gate(t)
|
||||
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
|
||||
]
|
||||
}
|
||||
|
||||
# our return values
|
||||
c = _.ct // cell value
|
||||
|
@ -893,8 +925,8 @@ RNNs =
|
|||
then Wmr * S(_.ht) // project
|
||||
else _.ht // no projection
|
||||
dim = outputDim
|
||||
] // end of apply(x)
|
||||
].apply
|
||||
} // end of apply (x, prevState)
|
||||
}.apply
|
||||
|
||||
# LSTMP -- LSTM function with projection and self-stabilization
|
||||
# Projection is enabled by passing different values for outputDim and cellDim.
|
||||
|
|
|
@ -216,13 +216,13 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
|
|||
needPrior = true
|
||||
// the following two belong into SGD, so they were removed here
|
||||
//trainingCriterion = CrossEntropyWithSoftmax
|
||||
//evalCriterion = ErrorPrediction
|
||||
//evalCriterion = ClassificationError
|
||||
// new: connect to input stream from source; and expose the output layer
|
||||
input = source.features.data // these are also ComputeNodeRefs, exposed by the source
|
||||
output = ComputeNodeRef [ dim = source.labels.dim ] // SimpleNetworkBuilder will put top layer affine transform output (input to softmax) here
|
||||
// criteria are configurable here; these are ComputeNodes created here
|
||||
trainingCriterion = CrossEntropyWithSoftmax (source.labels.data, output)
|
||||
evalCriterion = ErrorPrediction (source.labels.data, output)
|
||||
evalCriterion = ClassificationError (source.labels.data, output)
|
||||
// new: (and half-baked) define Input nodes
|
||||
myFeatures=Input(featDim) // reader stream will reference this
|
||||
myLabels=Input(labelDim)
|
||||
|
@ -245,7 +245,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
|
|||
//L2 = SBFF(L1,hiddenDim,hiddenDim)
|
||||
//L3 = SBFF(L2,hiddenDim,hiddenDim)
|
||||
//CE = SMBFF(L3,labelDim,hiddenDim,myLabels,tag=Criteria)
|
||||
//Err = ErrorPrediction(myLabels,CE.BFF.FF.P,tag=Eval)
|
||||
//Err = ClassificationError(myLabels,CE.BFF.FF.P,tag=Eval)
|
||||
//logPrior = LogPrior(myLabels)
|
||||
//ScaledLogLikelihood=Minus(CE.BFF.FF.P,logPrior,tag=Output)
|
||||
|
||||
|
@ -279,7 +279,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
|
|||
|
||||
// define criterion nodes
|
||||
CE = CrossEntropyWithSoftmax(myLabels, outZ)
|
||||
Err = ErrorPrediction(myLabels, outZ)
|
||||
Err = ClassificationError(myLabels, outZ)
|
||||
|
||||
// define output node for decoding
|
||||
logPrior = LogPrior(myLabels)
|
||||
|
@ -392,7 +392,7 @@ network = new NDL [
|
|||
|
||||
// define criterion nodes
|
||||
CE = CrossEntropyWithSoftmax(myLabels, outZ)
|
||||
Err = ErrorPrediction(myLabels, outZ)
|
||||
Err = ClassificationError(myLabels, outZ)
|
||||
|
||||
// define output node for decoding
|
||||
logPrior = LogPrior(myLabels)
|
||||
|
|
|
@ -93,26 +93,6 @@ std::string WCharToString(const wchar_t* wst)
|
|||
return s;
|
||||
}
|
||||
|
||||
// TODO: This is an action, it should be moved into ActionsLib.
|
||||
template <typename ElemType>
|
||||
void DumpNodeInfo(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config(L"modelPath");
|
||||
wstring nodeName = config(L"nodeName", L"__AllNodes__");
|
||||
wstring nodeNameRegexStr = config(L"nodeNameRegex", L"");
|
||||
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
|
||||
wstring outputFile = config(L"outputFile", defOutFilePath);
|
||||
bool printValues = config(L"printValues", true);
|
||||
bool printMetadata = config(L"printMetadata", true);
|
||||
if (!printValues && !printMetadata)
|
||||
{
|
||||
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
|
||||
}
|
||||
|
||||
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
|
||||
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
|
||||
}
|
||||
|
||||
size_t GetMaxEpochs(const ConfigParameters& configParams)
|
||||
{
|
||||
ConfigParameters configSGD(configParams("SGD"));
|
||||
|
@ -286,9 +266,9 @@ void DoCommands(const ConfigParameters& config, const shared_ptr<MPIWrapper>& mp
|
|||
{
|
||||
TestCn<ElemType>(config); // for "devtest" action pass the root config instead
|
||||
}
|
||||
else if (thisAction == "dumpNode" /*deprecated:*/|| thisAction == "dumpnode")
|
||||
else if (thisAction == "dumpNodes" /*deprecated:*/ || thisAction == "dumpNode" || thisAction == "dumpnode")
|
||||
{
|
||||
DumpNodeInfo<ElemType>(commandParams);
|
||||
DoDumpNodes<ElemType>(commandParams);
|
||||
}
|
||||
else if (thisAction == "convertdbn")
|
||||
{
|
||||
|
@ -682,28 +662,22 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
|
|||
fprintf(stderr, "%*s%ls", i > 0 ? 2 : 0, "", argv[i]); // use 2 spaces for better visual separability
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
#if 1 //def _DEBUG
|
||||
#ifdef _DEBUG
|
||||
// This simply merges all the different config parameters specified (eg, via config files or via command line directly),
|
||||
// and prints it.
|
||||
fprintf(stderr, "\n\n");
|
||||
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
|
||||
fprintf(stderr, "\nConfiguration, Raw:\n\n");
|
||||
LOGPRINTF(stderr, "%s\n", rawConfigString.c_str());
|
||||
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n");
|
||||
|
||||
// Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overridden at command line),
|
||||
// All of these assignments will appear, even though only the last assignment matters.
|
||||
fprintf(stderr, "\n");
|
||||
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
|
||||
fprintf(stderr, "\nConfiguration After Variable Resolution:\n\n");
|
||||
LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
|
||||
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
|
||||
|
||||
#endif
|
||||
// This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
|
||||
// value it is set to will appear).
|
||||
fprintf(stderr, "\n");
|
||||
LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
|
||||
fprintf(stderr, "\nConfiguration After Processing and Variable Resolution:\n\n");
|
||||
config.dumpWithResolvedVariables();
|
||||
LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
|
||||
#endif
|
||||
|
||||
LOGPRINTF(stderr, "Commands:");
|
||||
for (int i = 0; i < command.size(); i++)
|
||||
|
|
|
@ -23,7 +23,7 @@ m1=[
|
|||
L2 = RBFF(L1, HDim, HDim)
|
||||
L3 = RBFF(L2, HDim, HDim)
|
||||
CE = SMBFF(L3, LDim, HDim, labels, tag="criterion")
|
||||
Err=ErrorPrediction(labels, CE.BFF.FF.P, tag="evaluation")
|
||||
Err=ClassificationError(labels, CE.BFF.FF.P, tag="evaluation")
|
||||
|
||||
# rootNodes defined here
|
||||
OutputNodes=(CE.BFF.FF.P)
|
||||
|
|
|
@ -218,7 +218,7 @@ namespace CNTK
|
|||
std::swap(inputVars[0], inputVars[1]);
|
||||
opType = PrimitiveOpType::CrossEntropyWithSoftmax;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(ErrorPredictionNode))
|
||||
else if (node->OperationName() == OperationNameOf(ClassificationErrorNode))
|
||||
{
|
||||
std::swap(inputVars[0], inputVars[1]);
|
||||
opType = PrimitiveOpType::ClassificationError;
|
||||
|
|
|
@ -355,7 +355,7 @@ namespace CNTK
|
|||
computationNodePtr = builder.CrossEntropyWithSoftmax(input1Node, input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::ClassificationError:
|
||||
computationNodePtr = builder.ErrorPrediction(input1Node, input0Node, function->Name());
|
||||
computationNodePtr = builder.ClassificationError(input1Node, input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::PastValue:
|
||||
case PrimitiveOpType::FutureValue:
|
||||
|
|
|
@ -426,6 +426,13 @@ void ComputationNetwork::RandomInitLearnableParameters(const ComputationNodeBase
|
|||
InitLearnableParameters(node, uniformInit ? L"uniform" : L"gaussian", initValueScale, randomSeed, initOnCPUOnly);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ComputationNetwork::InitLearnableParametersWithBilinearFill(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight)
|
||||
{
|
||||
auto learnableParameterNode = dynamic_pointer_cast<LearnableParameter<ElemType>>(node);
|
||||
learnableParameterNode->InitBilinear(kernelWidth, kernelHeight);
|
||||
}
|
||||
|
||||
bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr)
|
||||
{
|
||||
// TODO: just use return!
|
||||
|
@ -435,7 +442,7 @@ bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr)
|
|||
nodePtr->OperationName() == OperationNameOf(SequenceWithSoftmaxNode) ||
|
||||
nodePtr->OperationName() == OperationNameOf(CrossEntropyNode) ||
|
||||
nodePtr->OperationName() == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode) ||
|
||||
nodePtr->OperationName() == OperationNameOf(ErrorPredictionNode) ||
|
||||
nodePtr->OperationName() == OperationNameOf(ClassificationErrorNode) ||
|
||||
#ifdef COMING_SOON
|
||||
nodePtr->OperationName() == OperationNameOf(CRFNode) ||
|
||||
#endif
|
||||
|
@ -1228,7 +1235,7 @@ void ComputationNetwork::SaveToDbnFile(ComputationNetworkPtr net, const std::wst
|
|||
};
|
||||
|
||||
// Get output node
|
||||
std::list<ComputationNodeBasePtr> outputNodes = net->GetNodesWithType(OperationNameOf(ErrorPredictionNode));
|
||||
std::list<ComputationNodeBasePtr> outputNodes = net->GetNodesWithType(OperationNameOf(ClassificationErrorNode));
|
||||
ComputationNodeBasePtr outputNode = GetFirstNodeWithDifferentType(outputNodes.front()->GetInputs(), OperationNameOf(InputValue));
|
||||
|
||||
if (outputNode == nullptr)
|
||||
|
@ -1478,6 +1485,7 @@ void ComputationNetwork::SaveToDbnFile(ComputationNetworkPtr net, const std::wst
|
|||
PutTag("EDBN");
|
||||
}
|
||||
|
||||
template void ComputationNetwork::InitLearnableParametersWithBilinearFill<float>(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
|
||||
template void ComputationNetwork::Read<float>(const wstring& fileName);
|
||||
template void ComputationNetwork::ReadPersistableParameters<float>(File& fstream, bool create);
|
||||
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
|
@ -1487,6 +1495,7 @@ template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net,
|
|||
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
|
||||
template void ComputationNetwork::SaveToDbnFile<float>(ComputationNetworkPtr net, const std::wstring& fileName) const;
|
||||
|
||||
template void ComputationNetwork::InitLearnableParametersWithBilinearFill<double>(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
|
||||
template void ComputationNetwork::Read<double>(const wstring& fileName);
|
||||
template void ComputationNetwork::ReadPersistableParameters<double>(File& fstream, bool create);
|
||||
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
|
|
|
@ -349,6 +349,9 @@ public:
|
|||
// Legacy version that is for random only.
|
||||
void RandomInitLearnableParameters(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly = false) const;
|
||||
|
||||
template <class ElemType>
|
||||
void InitLearnableParametersWithBilinearFill(const ComputationNodeBasePtr& node, size_t kernelWidth, size_t kernelHeight);
|
||||
|
||||
template <typename N>
|
||||
static shared_ptr<N> AsNodePtr(const ComputationNodeBasePtr& inode)
|
||||
{
|
||||
|
|
|
@ -40,13 +40,8 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
#endif
|
||||
if (nodeType == OperationNameOf(AbsNode)) return New<AbsNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))return New<ClassBasedCrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ClassificationErrorNode)) return New<ClassificationErrorNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ClipNode)) return New<ClipNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(EqualNode)) return New<EqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(GreaterEqualNode)) return New<GreaterEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(GreaterNode)) return New<GreaterNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LessEqualNode)) return New<LessEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LessNode)) return New<LessNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(NotEqualNode)) return New<NotEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(CosDistanceNode)) return New<CosDistanceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode)) return New<CosDistanceWithNegativeSamplesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(CosineNode)) return New<CosineNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -59,7 +54,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(DynamicAxisNode)) return New<DynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ElementTimesNode)) return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(EqualNode)) return New<EqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ExpNode)) return New<ExpNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(FloorNode)) return New<FloorNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(FutureValueNode)) return New<FutureValueNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -67,10 +62,14 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
#ifdef COMING_SOON
|
||||
else if (nodeType == OperationNameOf(GMMLogLikelihoodNode)) return New<GMMLogLikelihoodNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
#endif
|
||||
else if (nodeType == OperationNameOf(GreaterEqualNode)) return New<GreaterEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(GreaterNode)) return New<GreaterNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(HardmaxNode)) return New<HardmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(IfNode)) return New<IfNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LessEqualNode)) return New<LessEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LessNode)) return New<LessNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogNode)) return New<LogNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogPlusNode)) return New<LogPlusNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogSoftmaxNode)) return New<LogSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -80,6 +79,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(MeanNode)) return New<MeanNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(MinusNode)) return New<MinusNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(NegateNode)) return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(NotEqualNode)) return New<NotEqualNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode)) return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PackedIndexNode)) return New<PackedIndexNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PastValueNode)) return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -119,6 +119,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(WhereNode)) return New<WhereNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
// legacy names we also support for back compat of model-files
|
||||
else if (nodeType == L"ColumnElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"ErrorPrediction") return New<ClassificationErrorNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Delay") return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.
|
||||
else if (nodeType == L"PerDimMeanVarNormalizationNode") return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -368,9 +369,9 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Avera
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ClassificationError(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<ErrorPredictionNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
|
||||
return net.AddNodeToNetAndAttachInputs(New<ClassificationErrorNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
|
|
@ -122,7 +122,7 @@ public:
|
|||
ComputationNodePtr DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
|
||||
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr ClassificationError(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Floor(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");
|
||||
|
|
|
@ -447,7 +447,7 @@ ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNetworkWithEd
|
|||
// refWeight = 0.9
|
||||
// kldLabels = labels * (1-refWeight) + Softmax (zRef) * refWeight # interpolate with ref output
|
||||
// ce = CrossEntropyWithSoftmax (z, kldLabels)
|
||||
// errs = ErrorPrediction (z, labels)
|
||||
// errs = ClassificationError (z, labels)
|
||||
// criterionNodes = (ce)
|
||||
// evaluationNodes = (errs)
|
||||
// ===================================================================
|
||||
|
|
|
@ -32,16 +32,17 @@
|
|||
#define CNTK_MODEL_VERSION_1 1
|
||||
#define CNTK_MODEL_VERSION_2 2
|
||||
#define CNTK_MODEL_VERSION_3 3
|
||||
#define CNTK_MODEL_VERSION_4 4 // PastValue
|
||||
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
|
||||
#define CNTK_MODEL_VERSION_6 6 // Batch norm blending
|
||||
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
|
||||
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
|
||||
#define CNTK_MODEL_VERSION_9 9 // Transpose flag in ConvolutionNode to support deconvolution.
|
||||
#define CNTK_MODEL_VERSION_10 10 // Learning rate multiplier for input nodes.
|
||||
#define CNTK_MODEL_VERSION_11 11 // Dynamic axis name for where nodes.
|
||||
#define CNTK_MODEL_VERSION_12 12 // Batch norm: switch running inverse std deviation -> variance, MB count -> samplesSeen; CuDNN v5
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_12
|
||||
#define CNTK_MODEL_VERSION_4 4 // PastValue
|
||||
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
|
||||
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
|
||||
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
|
||||
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
|
||||
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
|
||||
#define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes
|
||||
#define CNTK_MODEL_VERSION_11 11 // dynamic axis name for where nodes
|
||||
#define CNTK_MODEL_VERSION_12 12 // Times() m_inputRank to support parameter-rank inference
|
||||
#define CNTK_MODEL_VERSION_13 13 // Batch norm: switch running inverse std deviation -> variance, MB count -> samplesSeen; CuDNN v5
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_13
|
||||
|
||||
extern bool g_shareNodeValueMatrices;
|
||||
|
||||
|
|
|
@ -166,4 +166,128 @@ public:
|
|||
template class PerDimMeanVarNormalizationNode<float>;
|
||||
template class PerDimMeanVarNormalizationNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DiagTimesNode (vector representing the diagonal of a square matrix, data)
|
||||
// Deprecated because can be implemented with ElementTimes.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class DiagTimesNode : public ComputationNode<ElemType>, public NumInputs<2>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"DiagTimes"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(DiagTimesNode);
|
||||
DiagTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
if (inputIndex == 0) // left derivative
|
||||
{
|
||||
Matrix<ElemType> sliceOutputGrad = MaskedGradientFor(fr); // use Masked- version since this is reducing over frames
|
||||
Matrix<ElemType> sliceInput1Value = Input(1)->MaskedValueFor(fr);
|
||||
m_innerproduct->AssignInnerProductOf(sliceOutputGrad, sliceInput1Value, false);
|
||||
Input(0)->GradientAsMatrix() += *m_innerproduct;
|
||||
}
|
||||
else // right derivative
|
||||
{
|
||||
Matrix<ElemType> sliceOutputGrad = GradientFor(fr);
|
||||
Matrix<ElemType> sliceInput1Grad = Input(1)->GradientFor(fr);
|
||||
m_rightGradient->SetValue(sliceOutputGrad);
|
||||
m_rightGradient->ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
|
||||
sliceInput1Grad += *m_rightGradient;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The DiagTimesNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
|
||||
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
|
||||
|
||||
sliceOutputValue.AssignValuesOf(sliceInput1Value);
|
||||
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
|
||||
|
||||
size_t rows0 = Input(0)->GetAsMatrixNumRows();
|
||||
size_t rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
|
||||
|
||||
// if dimension not specified we assume two operands' dimensions should match
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(rows1));
|
||||
|
||||
if (Input(1)->HasMBLayout())
|
||||
{
|
||||
// infer rows1 as rows0
|
||||
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0));
|
||||
SetDims(TensorShape(rows0), true);
|
||||
}
|
||||
else // multiplying two straight matrices
|
||||
{
|
||||
size_t cols1 = Input(1)->GetAsMatrixNumCols();
|
||||
// infer rows1 as rows0
|
||||
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0, cols1));
|
||||
SetDims(TensorShape(rows0, cols1), false);
|
||||
}
|
||||
|
||||
// update after inference
|
||||
rows0 = Input(0)->GetAsMatrixNumRows();
|
||||
rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
|
||||
if (isFinalValidationPass && rows0 != rows1)
|
||||
InvalidArgument("The inner matrix dimension in the %ls %ls operation does not match (%d vs. %d).", NodeName().c_str(), OperationName().c_str(), (int) rows1, (int) rows0);
|
||||
size_t cols0 = Input(0)->GetAsMatrixNumCols();
|
||||
if (isFinalValidationPass && cols0 != 1)
|
||||
InvalidArgument("The first matrix should be a column vector representing the diagonal of a square matrix in the DiagTimes operation.");
|
||||
|
||||
SetDims(Input(1));
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<DiagTimesNode<ElemType>>(nodeP);
|
||||
node->m_innerproduct->SetValue(*m_innerproduct);
|
||||
node->m_rightGradient->SetValue(*m_rightGradient);
|
||||
}
|
||||
}
|
||||
// request matrices that are needed for gradient computation
|
||||
virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::RequestMatricesBeforeBackprop(matrixPool);
|
||||
RequestMatrixFromPool(m_innerproduct, matrixPool);
|
||||
RequestMatrixFromPool(m_rightGradient, matrixPool);
|
||||
}
|
||||
|
||||
// release gradient and temp matrices that no longer needed after all the children's gradients are computed.
|
||||
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::ReleaseMatricesAfterBackprop(matrixPool);
|
||||
ReleaseMatrixToPool(m_innerproduct, matrixPool);
|
||||
ReleaseMatrixToPool(m_rightGradient, matrixPool);
|
||||
}
|
||||
|
||||
private:
|
||||
shared_ptr<Matrix<ElemType>> m_innerproduct;
|
||||
shared_ptr<Matrix<ElemType>> m_rightGradient;
|
||||
};
|
||||
|
||||
template class DiagTimesNode<float>;
|
||||
template class DiagTimesNode<double>;
|
||||
|
||||
}}}
|
||||
|
|
|
@ -18,24 +18,20 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ErrorPredictionNode (label, prediction) or ErrorPredictionNode (prediction, label)
|
||||
// ClassificationErrorNode (label, prediction) or ClassificationErrorNode (prediction, label)
|
||||
// Performs classification and error counting.
|
||||
// Result is an error rate, lower = better.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class ErrorPredictionNode : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>
|
||||
class ClassificationErrorNode : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>
|
||||
{
|
||||
typedef ComputationNodeNonLooping<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"ErrorPrediction";
|
||||
}
|
||||
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"ClassificationError"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfig(ErrorPredictionNode);
|
||||
ErrorPredictionNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
DeclareConstructorFromConfig(ClassificationErrorNode);
|
||||
ClassificationErrorNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
@ -63,10 +59,10 @@ public:
|
|||
MaskMissingColumnsToZero(*m_maxIndexes1, Input(1)->GetMBLayout(), fr);
|
||||
Value().AssignNumOfDiff(*m_maxIndexes0, *m_maxIndexes1, m_topK > 1);
|
||||
#if NANCHECK
|
||||
Value().HasNan("ErrorPrediction");
|
||||
Value().HasNan("ClassificationError");
|
||||
#endif
|
||||
#if DUMPOUTPUT
|
||||
Value().Print("ErrorPredictionNode");
|
||||
Value().Print("ClassificationErrorNode");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -100,7 +96,7 @@ public:
|
|||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<ErrorPredictionNode<ElemType>>(nodeP);
|
||||
auto node = dynamic_pointer_cast<ClassificationErrorNode<ElemType>>(nodeP);
|
||||
node->m_maxIndexes0->SetValue(*m_maxIndexes0);
|
||||
node->m_maxIndexes1->SetValue(*m_maxIndexes1);
|
||||
node->m_maxValues->SetValue(*m_maxValues);
|
||||
|
@ -131,8 +127,8 @@ private:
|
|||
int m_topK;
|
||||
};
|
||||
|
||||
template class ErrorPredictionNode<float>;
|
||||
template class ErrorPredictionNode<double>;
|
||||
template class ClassificationErrorNode<float>;
|
||||
template class ClassificationErrorNode<double>;
|
||||
|
||||
#ifdef COMING_SOON
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ void LearnableParameter<ElemType>::InitShape(const TensorShape& shape)
|
|||
Value().Invalidate();
|
||||
}
|
||||
|
||||
static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(const std::wstring& type, size_t fanOut = 1, size_t fanIn = 1);
|
||||
static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(const wstring& type, size_t fanOut = 1, size_t fanIn = 1);
|
||||
|
||||
// constructor from config
|
||||
// Parameterization is a little wicked. An older version required to specify the type of initialization
|
||||
|
@ -41,6 +41,11 @@ static pair<bool/*uniform*/, double/*stddev or range*/> ParseRandomizationType(c
|
|||
// - init="fixedValue", value from 'value' --deprecated in favor of just specifying initValue
|
||||
// - init="fromFile", value from 'initFromFilePath' --deprecated in favor of just specifying 'initFromFilePath'
|
||||
// - init="fromLiteral", value from 'initFromLiteral' --deprecated in favor of initValue=array expression
|
||||
// Random initialization takes an additional optional parameter initOutputRank, default 1.
|
||||
// All dimensions that are not amongst the first 'initOutputRank' are considered inputs.
|
||||
// This is necessary e.g. for convolution.
|
||||
// 'initOutputRank' can also be negative to denote output dims on the right, to cater to the needs
|
||||
// of convolution kernels where the output rank is the right-most axis (initOutputRank=-1).
|
||||
// The forms that infer the dimensions have different BrainScript names. TODO: need one for fromFile
|
||||
// TODO: All forms that require specified dimensions but contain zeroes (to be updated by graph)
|
||||
// will need to do deferred initialization, or have a way to repeat it.
|
||||
|
@ -91,7 +96,8 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
|
|||
int forcedRandomSeed = configp->Get(L"randomSeed"); // forcing a specific random seed is useful for testing to get repeatable initialization independent of evaluation order
|
||||
m_randomSeed = forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed;
|
||||
m_initValueScale = configp->Get(L"initValueScale");
|
||||
m_initOnCPUOnly = configp->Get(L"initOnCPUOnly");
|
||||
m_initOutputRank = configp->Get(L"initOutputRank");
|
||||
m_initOnCPUOnly = configp->Get(L"initOnCPUOnly");
|
||||
}
|
||||
else if (initString == L"zero")
|
||||
{
|
||||
|
@ -114,6 +120,13 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
|
|||
InitFromFile(initFromFilePath);
|
||||
m_initString.clear();
|
||||
}
|
||||
else if (initString == L"bilinear")
|
||||
{
|
||||
const size_t kernelWidth = configp->Get(L"kernelWidth");
|
||||
const size_t kernelHeight = configp->Get(L"kernelHeight");
|
||||
InitBilinear(kernelWidth, kernelHeight);
|
||||
m_initString.clear();
|
||||
}
|
||||
// legacy
|
||||
else if (initString == L"fixedValue") // deprecated. Use initValue=... instead
|
||||
{
|
||||
|
@ -155,6 +168,7 @@ void LearnableParameter<ElemType>::PostInitParameters(const wstring& initString,
|
|||
m_initString = initString;
|
||||
m_randomSeed = randomSeed;
|
||||
m_initValueScale = initValue;
|
||||
m_initOutputRank = 1; // default. NDL (deprecated) cannot specify a different value.
|
||||
m_initOnCPUOnly = initOnCPUOnly;
|
||||
}
|
||||
else if (initString == L"fixedValue") // from constant value
|
||||
|
@ -182,7 +196,7 @@ void LearnableParameter<ElemType>::PostInitParameters(const wstring& initString,
|
|||
// heNormal: sqrt(2 / fanin)
|
||||
// heUniform: sqrt(6 / fanin)
|
||||
// returns (*,0) for unrecognized string
|
||||
static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(const std::wstring& type, size_t fanOut /* = 1*/, size_t fanIn /*= 1*/)
|
||||
static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(const wstring& type, size_t fanOut /* = 1*/, size_t fanIn /*= 1*/)
|
||||
{
|
||||
if (type == L"uniform") return make_pair( true, 0.05f);
|
||||
else if (type == L"gaussian") return make_pair(false, 0.2 / sqrt(fanIn));
|
||||
|
@ -197,26 +211,33 @@ static pair<bool/*uniform*/,double/*stddev or range*/> ParseRandomizationType(co
|
|||
// initialize with random numbers
|
||||
// if 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing)
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::InitRandom(const std::wstring& type,
|
||||
void LearnableParameter<ElemType>::InitRandom(const wstring& type,
|
||||
const unsigned long randomSeed,
|
||||
const ElemType initValueScale,
|
||||
bool initOnCPUOnly)
|
||||
const int initOutputRank,
|
||||
const bool initOnCPUOnly)
|
||||
{
|
||||
// fprintf(stderr, "%d x %d: %d %ls\n", (int)GetNumRows(), (int)GetNumCols(), (int)randomSeed, NodeName().c_str());
|
||||
|
||||
let& sampleLayout = GetSampleLayout();
|
||||
#if 1 // this more complex version is needed to repro test cases generated with an older version
|
||||
auto& value = sampleLayout.GetRank() > 2 ? Value() : ValueAsMatrix();
|
||||
#else
|
||||
auto& value = Value();
|
||||
#endif
|
||||
|
||||
let numElements = sampleLayout.GetNumElements();
|
||||
if (numElements == 0)
|
||||
return;
|
||||
// We assume that the matrix row dimension is the output dimension. This is wrong in case of ND biases, convolution filters, and BatchNorm.
|
||||
size_t fanIn = value.GetNumCols(); // fan-in
|
||||
size_t fanOut = numElements / fanIn; // remaining dimensions
|
||||
// determine fan-in and fan-out
|
||||
// This is controlled by initOutputRank.
|
||||
// For a normal matrix [I x J], fanOut = I, fanIn = J=inDim --> initOutputRank = +1
|
||||
// For a convolution kernel [w x h x C x K], fanOut = K, fanIn = w*h*C. --> initOutputRank = -1, meaning count from back
|
||||
if (abs(initOutputRank) > sampleLayout.GetRank())
|
||||
InvalidArgument("InitRandom: initOutputRank=%d exceeds sampleLayout rank %d", initOutputRank, (int)sampleLayout.GetRank());
|
||||
// fanIn is determined by multiplying a range of dimensions:
|
||||
// - initOutputRank >= 0: [ initOutputRank, rank )
|
||||
// - initOutputRank < 0: [ 0, rank-abs(initOutputRank) )
|
||||
let inDimsBegin = (initOutputRank >= 0) ? (size_t)initOutputRank : 0;
|
||||
let inDimsEnd = (initOutputRank >= 0) ? sampleLayout.GetRank() : (size_t)((int)sampleLayout.GetRank() + initOutputRank);
|
||||
size_t fanIn = 1;
|
||||
for (size_t k = inDimsBegin; k < inDimsEnd; k++)
|
||||
fanIn *= sampleLayout[k];
|
||||
let fanOut = numElements / fanIn; // remaining dimensions
|
||||
let opts = ParseRandomizationType(type, fanOut, fanIn);
|
||||
let isUniform = opts.first;
|
||||
ElemType range = (ElemType)opts.second;
|
||||
|
@ -224,18 +245,74 @@ void LearnableParameter<ElemType>::InitRandom(const std::wstring& type,
|
|||
LogicError("InitRandom: Invalid initialization type '%ls'", type.c_str());
|
||||
|
||||
// the random seed offset is set via the "randomSeedOffset" parameter in config
|
||||
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, range=%f*%f, onCPU=%s).\n", NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(), (int)m_randomSeed, range, m_initValueScale, m_initOnCPUOnly ? "true" : "false");
|
||||
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f*%f, onCPU=%s).\n",
|
||||
NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(),
|
||||
(int)m_randomSeed, (int)fanOut, (int)fanIn, range, m_initValueScale, m_initOnCPUOnly ? "true" : "false");
|
||||
range *= initValueScale;
|
||||
if (initOnCPUOnly)
|
||||
Value().TransferToDeviceIfNotThere(CPUDEVICE, true);
|
||||
if (isUniform)
|
||||
value.SetUniformRandomValue(-range, range, randomSeed);
|
||||
Value().SetUniformRandomValue(-range, range, randomSeed);
|
||||
else
|
||||
value.SetGaussianRandomValue(0, range, randomSeed);
|
||||
Value().SetGaussianRandomValue(0, range, randomSeed);
|
||||
if (initOnCPUOnly)
|
||||
Value().TransferToDeviceIfNotThere(m_deviceId, true);
|
||||
}
|
||||
|
||||
// Initialize with bilinear interpolation coefficients (useful for deconvolution layer).
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::InitBilinear(size_t kernelWidth, size_t kernelHeight)
|
||||
{
|
||||
if (kernelHeight != kernelWidth)
|
||||
LogicError("Filter for bilinear interpolation must be square.");
|
||||
|
||||
// Transfer to CPU as GPU initialization is still not supported.
|
||||
Value().TransferToDeviceIfNotThere(CPUDEVICE, true);
|
||||
|
||||
const SmallVector<size_t>& dims = GetSampleLayout().GetDims();
|
||||
assert(dims.size() == 2);
|
||||
const size_t kernelCount = dims[0];
|
||||
const size_t kernelWeightCount = dims[1];
|
||||
assert(kernelWeightCount % (kernelWidth * kernelHeight) == 0);
|
||||
const size_t channels = kernelWeightCount / (kernelWidth * kernelHeight);
|
||||
if (kernelCount != channels)
|
||||
LogicError("Number of input and output channels of filter for bilinear interpolation must be equal.");
|
||||
|
||||
ElemType* data = Value().Data();
|
||||
const size_t factor = (kernelWidth + 1) / 2;
|
||||
const float center = (kernelWidth - 1) / 2.0f;
|
||||
int count = 0;
|
||||
// Filter dimensions are [W x H x C x K] or ARRAY[1..K] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W], where:
|
||||
// W = width, H = height, C = input channels, K = output channels.
|
||||
// In deconvolution, output channel should be upsampled version of corresponding input channel.
|
||||
// 2D filter for bilinear interpolation where height=width=3 contains the following values:
|
||||
// |0.25, 0.50, 0.25|
|
||||
// |0.50, 1.00, 0.50|
|
||||
// |0.25, 0.50, 0.25|
|
||||
// So, output kernel with dimensions [3 x 3 x C] will contain all zeros except for the channel which we want to
|
||||
// upsample. For that channel it will contain values above.
|
||||
for (size_t kernel = 0; kernel < kernelCount; ++kernel)
|
||||
{
|
||||
for (size_t channel = 0; channel < channels; ++channel)
|
||||
{
|
||||
for (size_t h = 0; h < kernelHeight; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < kernelWidth; ++w)
|
||||
{
|
||||
float val = 0;
|
||||
if (kernel == channel)
|
||||
{
|
||||
val = (1 - fabs(w - center) / factor) * (1 - fabs(h - center) / factor);
|
||||
}
|
||||
data[count++] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value().TransferToDeviceIfNotThere(m_deviceId, true);
|
||||
}
|
||||
|
||||
// initialize by reading a matrix from a text file
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::InitFromFile(const wstring& initFromFilePath)
|
||||
|
@ -247,7 +324,7 @@ void LearnableParameter<ElemType>::InitFromFile(const wstring& initFromFilePath)
|
|||
|
||||
// initialize by reading a matrix from a text file
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols)
|
||||
void LearnableParameter<ElemType>::InitFromArray(const vector<ElemType>& array, size_t numRows, size_t numCols)
|
||||
{
|
||||
// infer tensor dimensions from input file if not set
|
||||
// Note: The mapping of dimensions of the input matrix to tensor dimensions are somewhat confusing.
|
||||
|
@ -295,13 +372,13 @@ void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& ar
|
|||
|
||||
// TODO: Move this error check there, since this is called only from one place.
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::ReviseFromFile(const std::wstring& reviseFromFilePath)
|
||||
void LearnableParameter<ElemType>::ReviseFromFile(const wstring& reviseFromFilePath)
|
||||
{
|
||||
try
|
||||
{
|
||||
InitFromFile(reviseFromFilePath);
|
||||
}
|
||||
catch (const std::exception & e)
|
||||
catch (const exception & e)
|
||||
{
|
||||
RuntimeError("ReviseFromFile: Failed to reload %ls %ls operation from file %ls: %s", NodeName().c_str(), OperationName().c_str(), reviseFromFilePath.c_str(), e.what());
|
||||
}
|
||||
|
@ -356,7 +433,7 @@ void LearnableParameter<ElemType>::Load(File& fstream, size_t modelVersion) /*ov
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
/*virtual*/ void LearnableParameter<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const /*override*/
|
||||
/*virtual*/ void LearnableParameter<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const wstring& newName, const CopyNodeFlags flags) const /*override*/
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
|
@ -365,6 +442,7 @@ template <class ElemType>
|
|||
node->m_initString = m_initString;
|
||||
node->m_randomSeed = m_randomSeed;
|
||||
node->m_initValueScale = m_initValueScale;
|
||||
node->m_initOutputRank = m_initOutputRank;
|
||||
node->m_initOnCPUOnly = m_initOnCPUOnly;
|
||||
node->m_initValue = m_initValue;
|
||||
}
|
||||
|
@ -439,7 +517,7 @@ void LearnableParameter<ElemType>::LazyInitParameters()
|
|||
}
|
||||
else if (ParseRandomizationType(m_initString).second != 0)
|
||||
{
|
||||
InitRandom(m_initString, m_randomSeed, m_initValueScale, m_initOnCPUOnly);
|
||||
InitRandom(m_initString, m_randomSeed, m_initValueScale, m_initOutputRank, m_initOnCPUOnly);
|
||||
}
|
||||
else
|
||||
LogicError("LearnableParameter: Invalid value of m_initString '%ls' for deferred initialization for %ls.", m_initString.c_str(), NodeDescription().c_str());
|
||||
|
|
|
@ -56,13 +56,16 @@ public:
|
|||
unsigned long randomSeed = 0,
|
||||
bool initOnCPUOnly = false);
|
||||
|
||||
// Initialize with bilinear interpolation coefficients (useful for deconvolution layer).
|
||||
void InitBilinear(size_t kernelWidth, size_t kernelHeight);
|
||||
|
||||
// initialize by reading a matrix from a text file
|
||||
void InitFromFile(const std::wstring& initFromFilePath);
|
||||
|
||||
private:
|
||||
// initialize with random numbers
|
||||
// If 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing).
|
||||
void InitRandom(const std::wstring& type, const unsigned long randomSeed, const ElemType initValueScale, bool initOnCPUOnly);
|
||||
void InitRandom(const std::wstring& type, const unsigned long randomSeed, const ElemType initValueScale, const int initOutputRank, const bool initOnCPUOnly);
|
||||
|
||||
// helper to initialize from a matrix read from a text file or a string literal
|
||||
void InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols);
|
||||
|
@ -103,6 +106,7 @@ private:
|
|||
std::wstring m_initString; // if non-empty then deferred initialization is needed. Gets cleared upon completion of deferred init.
|
||||
unsigned long m_randomSeed;
|
||||
ElemType m_initValueScale;
|
||||
int m_initOutputRank;
|
||||
bool m_initOnCPUOnly;
|
||||
ElemType m_initValue;
|
||||
};
|
||||
|
|
|
@ -238,8 +238,8 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
|
|||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName; \
|
||||
|
||||
public:
|
||||
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||
: Base(deviceId, name), m_outputRank(outputRank)
|
||||
TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||
: Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -249,7 +249,8 @@ public:
|
|||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
|
||||
node->m_outputRank = m_outputRank;
|
||||
node->m_outputRank = m_outputRank;
|
||||
node->m_inferInputRankToMap = m_inferInputRankToMap;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,6 +258,7 @@ public:
|
|||
{
|
||||
Base::Save(fstream);
|
||||
fstream << m_outputRank;
|
||||
fstream << m_inferInputRankToMap;
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
|
@ -266,6 +268,10 @@ public:
|
|||
fstream >> m_outputRank;
|
||||
else
|
||||
m_outputRank = 1;
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_11)
|
||||
fstream >> m_inferInputRankToMap;
|
||||
else
|
||||
m_inferInputRankToMap = -1;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -420,19 +426,33 @@ public:
|
|||
if (dimsA[k] == 0)
|
||||
InvalidArgument("%ls %ls operation: The outputRank (%d) dimensions in left argument's shape [%s] must not be 0.", NodeName().c_str(), OperationName().c_str(), (int)m_outputRank, dimsAstring.c_str());
|
||||
|
||||
// if the last dimension of A is 0, then extend it to fully match B
|
||||
// E.g. [I x 0] * [X x Y x Z x K] => infer as [I x X x Y x Z], not as [I x X].
|
||||
// I.e. we cannot use inference to infer a matrix product on a part of an input tensor.
|
||||
// We default to inferring the whole, as part of a tensor is a special use case.
|
||||
assert (dimsA.size() == m_outputRank + numReductionDims);
|
||||
while (numReductionDims < dimsB.size() && dimsA.back() == 0)
|
||||
// infer rank of dimsA
|
||||
// For purpose of dimension inference, Times() accepts an optional parameter inferInputRankToMap (default -1=unspecified).
|
||||
// The last 'inferInputRankToMap' axes are considered those that the matrix product should keep (Times()
|
||||
// is applied one by one, like a "map" operation) rather than reducing over.
|
||||
// Specifically, inferInputRankToMap=0 means to reduce over all input axes, e.g. for an image input that
|
||||
// should be flattened.
|
||||
// Examples:
|
||||
// [I x Inferred] * [J x K], inferInputRankToMap=n/a --> Inferred := J, result is [I x K]
|
||||
// [I x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inferred := W, result is [I x H x C] (not desired)
|
||||
// [I x Inferred x Inferred] * [W x H x C], inferInputRankToMap=n/a --> Inf x Inf := [W x H], result is [I x C]
|
||||
// [I x Inferred] * [W x H x C], inferInputRankToMap=0 --> Inferred := W x H x C, result is [I] (desired)
|
||||
// [I x Inferred] * [W x H x C x R], inferInputRankToMap=1 --> Inferred := W x H x C, result is [I x R] (desired)
|
||||
// If W's shape is too short, it will be padded with 0 (i.e. inferred in a subsequent step).
|
||||
if (m_inferInputRankToMap >= 0) // if given, we pad if needed
|
||||
{
|
||||
dimsA.push_back(0);
|
||||
numReductionDims++;
|
||||
if ((size_t)m_inferInputRankToMap >= dimsB.size() && isFinalValidationPass) // at least one axis must be left to reduce over
|
||||
InvalidArgument("%ls %ls operation: 'inferInputRankToMap' argument %d must be less than rank of second operand [%s].", NodeName().c_str(), OperationName().c_str(), m_inferInputRankToMap, dimsBstring.c_str());
|
||||
assert(dimsA.size() == m_outputRank + numReductionDims);
|
||||
while (numReductionDims + (size_t)m_inferInputRankToMap < dimsB.size())
|
||||
{
|
||||
dimsA.push_back(0);
|
||||
numReductionDims++;
|
||||
}
|
||||
}
|
||||
|
||||
// fill in the missing ones
|
||||
// We fill in dimensions given as 0. The tensor rank is not inferred.
|
||||
// We fill in dimensions given as 0. The tensor rank is not inferred here (that is done above).
|
||||
for (size_t k = m_outputRank; k < dimsA.size(); k++)
|
||||
{
|
||||
auto& dimA = dimsA[k];
|
||||
|
@ -478,6 +498,7 @@ public:
|
|||
|
||||
private:
|
||||
size_t m_outputRank;
|
||||
int m_inferInputRankToMap; // -1 (not specified) or says how to expand shape of W, to keep this many mapping dims
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -504,12 +525,12 @@ class TimesNode : public TimesNodeBase<ElemType, false>
|
|||
static const std::wstring TypeName() { return L"Times"; }
|
||||
|
||||
public:
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||
: Base(deviceId, name, outputRank)
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
|
||||
: Base(deviceId, name, outputRank, inferInputRankToMap)
|
||||
{
|
||||
}
|
||||
TimesNode(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"))
|
||||
: TimesNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"outputRank"), configp->Get(L"inferInputRankToMap"))
|
||||
{
|
||||
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
|
||||
}
|
||||
|
@ -537,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
|
|||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
|
||||
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||
: Base(deviceId, name, outputRank)
|
||||
: Base(deviceId, name, outputRank, /*inferInputRankToMap=*/-1)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -545,134 +566,6 @@ public:
|
|||
template class TransposeTimesNode<float>;
|
||||
template class TransposeTimesNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DiagTimesNode (vector representing the diagonal of a square matrix, data)
|
||||
// TODO: This is redundant with ElementTimes and should be removed (with a compat stub).
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class DiagTimesNode : public ComputationNode<ElemType>, public NumInputs<2>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"DiagTimes";
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(DiagTimesNode);
|
||||
DiagTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
if (inputIndex == 0) // left derivative
|
||||
{
|
||||
Matrix<ElemType> sliceOutputGrad = MaskedGradientFor(fr); // use Masked- version since this is reducing over frames
|
||||
Matrix<ElemType> sliceInput1Value = Input(1)->MaskedValueFor(fr);
|
||||
m_innerproduct->AssignInnerProductOf(sliceOutputGrad, sliceInput1Value, false);
|
||||
Input(0)->GradientAsMatrix() += *m_innerproduct;
|
||||
}
|
||||
else // right derivative
|
||||
{
|
||||
Matrix<ElemType> sliceOutputGrad = GradientFor(fr);
|
||||
Matrix<ElemType> sliceInput1Grad = Input(1)->GradientFor(fr);
|
||||
m_rightGradient->SetValue(sliceOutputGrad);
|
||||
m_rightGradient->ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
|
||||
sliceInput1Grad += *m_rightGradient;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
// The DiagTimesNode does not require its output value for computing
|
||||
// the gradients of its input nodes
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
|
||||
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
|
||||
|
||||
sliceOutputValue.AssignValuesOf(sliceInput1Value);
|
||||
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
|
||||
|
||||
size_t rows0 = Input(0)->GetAsMatrixNumRows();
|
||||
size_t rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
|
||||
|
||||
// if dimension not specified we assume two operands' dimensions should match
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(rows1));
|
||||
|
||||
if (Input(1)->HasMBLayout())
|
||||
{
|
||||
// infer rows1 as rows0
|
||||
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0));
|
||||
SetDims(TensorShape(rows0), true);
|
||||
}
|
||||
else // multiplying two straight matrices
|
||||
{
|
||||
size_t cols1 = Input(1)->GetAsMatrixNumCols();
|
||||
// infer rows1 as rows0
|
||||
Input(1)->ValidateInferInputDimsFrom(TensorShape(rows0, cols1));
|
||||
SetDims(TensorShape(rows0, cols1), false);
|
||||
}
|
||||
|
||||
// update after inference
|
||||
rows0 = Input(0)->GetAsMatrixNumRows();
|
||||
rows1 = Input(1)->HasMBLayout() ? Input(1)->GetSampleMatrixNumRows() : Input(1)->GetAsMatrixNumRows();
|
||||
if (isFinalValidationPass && rows0 != rows1)
|
||||
InvalidArgument("The inner matrix dimension in the %ls %ls operation does not match (%d vs. %d).", NodeName().c_str(), OperationName().c_str(), (int) rows1, (int) rows0);
|
||||
size_t cols0 = Input(0)->GetAsMatrixNumCols();
|
||||
if (isFinalValidationPass && cols0 != 1)
|
||||
InvalidArgument("The first matrix should be a column vector representing the diagonal of a square matrix in the DiagTimes operation.");
|
||||
|
||||
SetDims(Input(1));
|
||||
}
|
||||
|
||||
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
|
||||
{
|
||||
Base::CopyTo(nodeP, newName, flags);
|
||||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<DiagTimesNode<ElemType>>(nodeP);
|
||||
node->m_innerproduct->SetValue(*m_innerproduct);
|
||||
node->m_rightGradient->SetValue(*m_rightGradient);
|
||||
}
|
||||
}
|
||||
// request matrices that are needed for gradient computation
|
||||
virtual void RequestMatricesBeforeBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::RequestMatricesBeforeBackprop(matrixPool);
|
||||
RequestMatrixFromPool(m_innerproduct, matrixPool);
|
||||
RequestMatrixFromPool(m_rightGradient, matrixPool);
|
||||
}
|
||||
|
||||
// release gradient and temp matrices that no longer needed after all the children's gradients are computed.
|
||||
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::ReleaseMatricesAfterBackprop(matrixPool);
|
||||
ReleaseMatrixToPool(m_innerproduct, matrixPool);
|
||||
ReleaseMatrixToPool(m_rightGradient, matrixPool);
|
||||
}
|
||||
|
||||
private:
|
||||
shared_ptr<Matrix<ElemType>> m_innerproduct;
|
||||
shared_ptr<Matrix<ElemType>> m_rightGradient;
|
||||
};
|
||||
|
||||
template class DiagTimesNode<float>;
|
||||
template class DiagTimesNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SumElementsNode (input)
|
||||
// Sums up all elements in the input across all samples into a single scalar.
|
||||
|
|
|
@ -37,6 +37,7 @@ template <class ElemType>
|
|||
node->m_axis = m_axis;
|
||||
node->m_operation = m_operation;
|
||||
node->m_reductionOp = m_reductionOp;
|
||||
node->m_scale = m_scale;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,8 +65,8 @@ template <class ElemType>
|
|||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
|
||||
// the actual operation is a Copy with reduction, where the magic is in the reduction op
|
||||
result.DoUnaryOpOf(0, input, 1, ElementWiseOperator::opCopy, m_reductionOp);
|
||||
// note: we can implement "Mean" by passing 1/dim for alpha
|
||||
// For "Mean", m_scale is 1/#elements, and 1 otherwise.
|
||||
result.DoUnaryOpOf(0, input, m_scale, ElementWiseOperator::opCopy, m_reductionOp);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -82,8 +83,9 @@ template <class ElemType>
|
|||
switch (m_reductionOp)
|
||||
{
|
||||
case ElementWiseOperator::opSum:
|
||||
// "Sum": broadcast the gradient
|
||||
sliceInputGrad.AddCopyOf(sliceOutputGrad);
|
||||
// "Sum": broadcast the gradient
|
||||
// "Mean": same as "Sum" with scaling by 1/#dims
|
||||
sliceInputGrad.AddCopyOf(sliceOutputGrad, m_scale);
|
||||
break;
|
||||
|
||||
case ElementWiseOperator::opLogSum:
|
||||
|
@ -95,7 +97,7 @@ template <class ElemType>
|
|||
// df / dx = exp(x)/exp(f)
|
||||
// = exp(x – f)
|
||||
sliceInputGrad.AddElementwiseProductWithExpOfDiffOf(sliceOutputGrad, input, output);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ElementWiseOperator::opMin:
|
||||
|
@ -120,12 +122,6 @@ template <class ElemType>
|
|||
break;
|
||||
|
||||
// more coming
|
||||
|
||||
// "LogPlus": softmax
|
||||
// f(x) = log(sum_i exp x_i), hence gradient is:
|
||||
// df / dx_i = 1 / (sum_j exp x_j) * exp x_i = (Softmax(x))_i = exp(x_i - ReduceLogPlus(x))
|
||||
// targetGradient = gradientFromTop .* Exp (inputValue - outputValue) --TODO: verify
|
||||
// i.e. compute dfference if input and output, then Exp in-place. No, would need temp memory. So needs its own opcode AddScaledExpOfDiff(). Ternary.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,6 +160,7 @@ void ReduceElementsNode<ElemType>::ValidateOp()
|
|||
else
|
||||
#endif
|
||||
if (m_operation == L"Sum") m_reductionOp = ElementWiseOperator::opSum;
|
||||
else if (m_operation == L"Mean") m_reductionOp = ElementWiseOperator::opSum;
|
||||
else if (m_operation == L"LogSum") m_reductionOp = ElementWiseOperator::opLogSum;
|
||||
else if (m_operation == L"Min") m_reductionOp = ElementWiseOperator::opMin;
|
||||
else if (m_operation == L"Max") m_reductionOp = ElementWiseOperator::opMax;
|
||||
|
@ -183,13 +180,26 @@ template <class ElemType>
|
|||
|
||||
let shape = Input(0)->GetSampleLayout();
|
||||
auto dims = shape.GetDims();
|
||||
size_t reducedDim = 0; // (init to keep compiler happy)
|
||||
if (m_axis == 0)
|
||||
{
|
||||
reducedDim = shape.GetNumElements();
|
||||
dims = { 1 }; // entire sample is reduced to a scalar
|
||||
}
|
||||
else if (m_axis - 1 >= 0 && m_axis - 1 < dims.size())
|
||||
{
|
||||
reducedDim = dims[m_axis - 1];
|
||||
dims[m_axis - 1] = 1; // one axis is reduced to a scalar
|
||||
}
|
||||
else if (isFinalValidationPass)
|
||||
InvalidArgument("The shape of %ls [%s] has no axis %d", NodeDescription().c_str(), string(shape).c_str(), m_axis);
|
||||
|
||||
// for "Mean", we must divide by #elements
|
||||
if (isFinalValidationPass && m_operation == L"Mean")
|
||||
m_scale = (ElemType)(1.0 / reducedDim);
|
||||
else
|
||||
m_scale = (ElemType)1;
|
||||
|
||||
SetDims(TensorShape(dims), Input(0)->HasMBLayout());
|
||||
}
|
||||
|
||||
|
|
|
@ -176,10 +176,10 @@ template class ReshapeNode<double>;
|
|||
// The optional axis can be 0 (meaning all elements) or a specific axis.
|
||||
// Allowed operations:
|
||||
// - "Sum"
|
||||
// - "LogSum" --not implemented yet
|
||||
// - "Mean" --not implemented yet
|
||||
// - "Max" --not implemented yet
|
||||
// - "Min" --not implemented yet
|
||||
// - "LogSum"
|
||||
// - "Mean"
|
||||
// - "Max"
|
||||
// - "Min"
|
||||
// - "All" --not implemented yet
|
||||
// - "Any" --not implemented yet
|
||||
// TODO:
|
||||
|
@ -196,7 +196,7 @@ class ReduceElementsNode : public ComputationNode<ElemType>, public NumInputs<1>
|
|||
void ValidateOp();
|
||||
public:
|
||||
ReduceElementsNode(DEVICEID_TYPE deviceId, const wstring& name, const std::wstring& operation = std::wstring(), int axis = 0) :
|
||||
Base(deviceId, name), m_operation(operation), m_axis(axis), m_reductionOp((ElementWiseOperator)-1/*invalid*/)
|
||||
Base(deviceId, name), m_operation(operation), m_axis(axis), m_reductionOp((ElementWiseOperator)-1/*invalid*/), m_scale(0/*invalid*/)
|
||||
{
|
||||
if (!m_operation.empty()) // verify validity already here out of courtesy (would otherwise be caught in Validate())
|
||||
ValidateOp();
|
||||
|
@ -221,9 +221,13 @@ public:
|
|||
int ReductionAxis() const { return m_axis; }
|
||||
|
||||
private:
|
||||
// operation attributes
|
||||
int m_axis;
|
||||
std::wstring m_operation; // the operation as a string, e.g. "Sum", see ValidateOp()
|
||||
std::wstring m_operation; // the operation as a string, e.g. "Sum", see ValidateOp()
|
||||
|
||||
// things cached during validation
|
||||
ElementWiseOperator m_reductionOp; // the reduction operation mapped to our internal opCode
|
||||
ElemType m_scale; // 1 or, for Mean, 1/number of elements we are reducing over
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -1629,7 +1629,7 @@ public:
|
|||
fstream >> m_normTimeConst;
|
||||
fstream >> m_blendTimeConst;
|
||||
fstream >> m_imageLayoutKind;
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_12)
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_13)
|
||||
fstream >> m_samplesSeen;
|
||||
else
|
||||
fstream >> mbCount; // converted below
|
||||
|
@ -1677,7 +1677,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
if (modelVersion < CNTK_MODEL_VERSION_12)
|
||||
if (modelVersion < CNTK_MODEL_VERSION_13)
|
||||
{
|
||||
// Prior to version 12, minibatch count was stored instead of samples seen.
|
||||
// Approximate by assuming minibatch size 16, inform about that.
|
||||
|
@ -1779,7 +1779,7 @@ public:
|
|||
LogicError("%ls: Failed to convert running variance until forward prop", NodeName().c_str());
|
||||
FrameRange fr(Input(0)->GetMBLayout());
|
||||
|
||||
Matrix<ElemType> sliceInputValue = Input(0)->ValueFor(fr);
|
||||
Matrix<ElemType> sliceInputValue = Input(0)->MaskedValueFor(fr);
|
||||
const Matrix<ElemType>& scale = Input(1)->Value();
|
||||
const Matrix<ElemType>& bias = Input(2)->Value();
|
||||
Matrix<ElemType>& runMean = Input(3)->Value();
|
||||
|
@ -1828,10 +1828,10 @@ public:
|
|||
|
||||
if (inputIndex == 0) // derivative with respect to the input.
|
||||
{
|
||||
auto sliceOutputGrad = GradientFor(fr);
|
||||
auto sliceInputValue = Input(0)->ValueFor(fr);
|
||||
const Matrix<ElemType>& scale = Input(1)->Value();
|
||||
const Matrix<ElemType>& bias = Input(2)->Value();
|
||||
auto sliceOutputGrad = MaskedGradientFor(fr);
|
||||
auto sliceInputValue = Input(0)->ValueFor(fr);
|
||||
const Matrix<ElemType>& scale = Input(1)->Value();
|
||||
const Matrix<ElemType>& bias = Input(2)->Value();
|
||||
|
||||
auto sliceInputGrad = Input(0)->GradientFor(fr);
|
||||
m_dScale->Resize(scale); // gradients for scale and bias get stored here
|
||||
|
|
|
@ -127,13 +127,13 @@ std::unique_ptr<BatchNormEngine<ElemType>> BatchNormEngine<ElemType>::Create(DEV
|
|||
// Use CNTK as default batch norm engine.
|
||||
if (HasFlag(enabledEngines, BatchNormEngineKind::Cntk))
|
||||
{
|
||||
fprintf(stderr, "\nUsing CNTK batch normalization engine.\n");
|
||||
fprintf(stderr, "Using CNTK batch normalization engine.\n");
|
||||
return std::make_unique<CntkBatchNormEngine<ElemType>>(deviceId, inOutT, spatial, imageLayout);
|
||||
}
|
||||
|
||||
if (HasFlag(enabledEngines, BatchNormEngineKind::CuDnn))
|
||||
{
|
||||
fprintf(stderr, "\nUsing cuDNN batch normalization engine.\n");
|
||||
fprintf(stderr, "Using cuDNN batch normalization engine.\n");
|
||||
return CuDnnBatchNormEngineFactory<ElemType>::Create(deviceId, inOutT, spatial, imageLayout);
|
||||
}
|
||||
|
||||
|
|
|
@ -81,12 +81,7 @@ public:
|
|||
static cudacode void ComputeRangeStatColj(const ElemType* inMat, const ElemType* inResidual, long M, size_t j, size_t bits, ElemType& lower, ElemType& upper)
|
||||
{
|
||||
/*dummy reducers do nothing in linear CPU version*/
|
||||
ComputeRangeStatColjSubset<ZeroThresholdFor1Bit>(inMat, inResidual, M, j, bits, lower, upper, 0, 1, [](ElemType&)
|
||||
{
|
||||
},
|
||||
[](unsigned int&)
|
||||
{
|
||||
});
|
||||
ComputeRangeStatColjSubset<ZeroThresholdFor1Bit>(inMat, inResidual, M, j, bits, lower, upper, 0, 1, [](ElemType&){}, [](unsigned int&){});
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -231,9 +226,9 @@ public:
|
|||
// i.e.
|
||||
// - do not symmetrize/pool the quantization values for 0 and 1
|
||||
// - but hard-code the quantization threshold to be 0 instead of the mean of the two bounds
|
||||
// This should give us the best of all--fast operation yet ability to be asymmetric within a column
|
||||
// This should give us the best of all--fast operation yet ability to be asymmetric within a column.
|
||||
ElemType mean = 0.0f;
|
||||
if (!ZeroThresholdFor1Bit || (bits != 1))
|
||||
if (!ZeroThresholdFor1Bit && (bits == 1))
|
||||
{
|
||||
ElemType meanacc = 0.0f;
|
||||
// (subset: compute subset sum)
|
||||
|
@ -320,7 +315,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
ElemType stddevs = 5.0f;
|
||||
ElemType stddevs = 4.0f; // TODO: make this a parameter
|
||||
// >1 bit:
|
||||
// We linearly quantize between 'stddevs' standard deviations.
|
||||
ElemType varacc = 0.0f;
|
||||
|
@ -349,7 +344,6 @@ private:
|
|||
template <typename T>
|
||||
friend class QuantizedMatrix;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}}}
|
||||
#endif
|
||||
|
|
|
@ -866,7 +866,7 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
|
|||
if (!isEnabled(ConvolutionEngineKind::Legacy))
|
||||
RuntimeError("Trying to use Legacy convolution engine when it's disabled.");
|
||||
// REVIEW alexeyk: should honor m_traceLevel here.
|
||||
fprintf(stderr, "\n%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<LegacyConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
|
@ -874,19 +874,19 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
|
|||
if (isEnabled(ConvolutionEngineKind::CuDnn) &&
|
||||
CuDnnConvolutionEngineFactory<ElemType>::IsSupported(deviceId, geometry, poolKind))
|
||||
{
|
||||
fprintf(stderr, "\n%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
|
||||
{
|
||||
fprintf(stderr, "\n%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
if (!isEnabled(ConvolutionEngineKind::Reference))
|
||||
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
|
||||
fprintf(stderr, "\n%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
fprintf(stderr, "%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
|
||||
return std::make_unique<ReferenceConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,9 @@ GPURNGHandle::GPURNGHandle(int deviceId, unsigned long seed)
|
|||
: RNGHandle(deviceId)
|
||||
{
|
||||
unsigned long long cudaSeed = seed;
|
||||
#ifdef _DEBUG
|
||||
fprintf(stderr, "(GPU): creating curand object with seed %llu\n", cudaSeed);
|
||||
#endif
|
||||
|
||||
CURAND_CALL(curandCreateGenerator(&m_generator, CURAND_RNG_PSEUDO_XORWOW));
|
||||
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(m_generator, cudaSeed));
|
||||
|
|
|
@ -32,33 +32,33 @@ void MatrixQuantizerCPU<ElemType>::QuantizeAsync(const Matrix<ElemType>& inMatri
|
|||
#else
|
||||
for (size_t j = 0; j < nCol; j++)
|
||||
#endif
|
||||
{
|
||||
auto& qcol = *(outQMatrix.GetQuantizedColumn(j));
|
||||
if (zeroThresholdFor1Bit)
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
|
||||
}
|
||||
{
|
||||
auto& qcol = *(outQMatrix.GetQuantizedColumn(j));
|
||||
if (zeroThresholdFor1Bit)
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
ColumnQuantizer<ElemType>::template ComputeRangeStatColj<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, nBits, qcol.lower, qcol.upper);
|
||||
}
|
||||
|
||||
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
|
||||
if (zeroThresholdFor1Bit)
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
q.template Quantize<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
q.template Quantize<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
|
||||
}
|
||||
}
|
||||
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
|
||||
if (zeroThresholdFor1Bit)
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
q.template Quantize<true>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Explicit use of 'template' keyword is needed to compile with GCC
|
||||
q.template Quantize<false>(inMatrix.Data(), inResidual.Data(), (long) nRow, j, qcol.bits, outResidual.Data());
|
||||
}
|
||||
}
|
||||
#ifdef QUANTUSEPPL
|
||||
);
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -89,13 +89,13 @@ void MatrixQuantizerCPU<ElemType>::UnquantizeAsync(QuantizedMatrix<ElemType>& in
|
|||
#else
|
||||
for (size_t j = 0; j < nCol; j++)
|
||||
#endif
|
||||
{
|
||||
const auto& qcol = *(inQMatrix.GetQuantizedColumn(j));
|
||||
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
|
||||
q.Unquantize(outMatrix.Data(), (long) nRow, j, qcol.bits, add);
|
||||
}
|
||||
{
|
||||
const auto& qcol = *(inQMatrix.GetQuantizedColumn(j));
|
||||
ColumnQuantizer<ElemType> q(ldNbits, qcol.lower, qcol.upper);
|
||||
q.Unquantize(outMatrix.Data(), (long) nRow, j, qcol.bits, add);
|
||||
}
|
||||
#ifdef QUANTUSEPPL
|
||||
);
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -108,4 +108,5 @@ void MatrixQuantizerCPU<ElemType>::WaitUnquantizeAsyncDone()
|
|||
//The explicit instantiation part will make the linker happy
|
||||
template class MatrixQuantizerCPU<float>;
|
||||
template class MatrixQuantizerCPU<double>;
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
|
@ -182,4 +182,5 @@ void QuantizedMatrix<ElemType>::Print(const char* matrixName, size_t rowStart, s
|
|||
// Explicit instantiation
|
||||
template class QuantizedMatrix<float>;
|
||||
template class QuantizedMatrix<double>;
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
|
@ -119,4 +119,5 @@ private:
|
|||
template <typename T>
|
||||
friend class MatrixQuantizer;
|
||||
};
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
|
@ -83,10 +83,12 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
// make the range asymmetrical, so we get a 0 slot
|
||||
size_t usedrangeend = rangeend - (Nbits > 1); // TODO: make this a parameter
|
||||
// precompute this for quantize() (see comment there)
|
||||
qfactor = rangeend / (quantimax - quantimin);
|
||||
qfactor = usedrangeend / (quantimax - quantimin);
|
||||
// and for unquantize()
|
||||
ufactor = (quantimax - quantimin) / rangeend;
|
||||
ufactor = (quantimax - quantimin) / usedrangeend;
|
||||
}
|
||||
|
||||
// set the quantization threshold for the special case of 1-bit
|
||||
|
@ -127,6 +129,7 @@ public:
|
|||
// unquantize one value
|
||||
cudasharedcode ElemType Unquantize(QWordVal u) const
|
||||
{
|
||||
// special branch that does not quantize at all, for testing
|
||||
if (Nbits == QWordNumBits)
|
||||
{
|
||||
return *(ElemType*) &u;
|
||||
|
|
|
@ -311,10 +311,10 @@ public:
|
|||
wstring key;
|
||||
if (!labels.empty()) // empty means unsupervised mode (don't load any)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
key = regex_replace((wstring) ppath, wregex(L"\\.[^\\.\\\\/:]*$"), wstring()); // delete extension (or not if none)
|
||||
#else
|
||||
key = removeExtension(basename(ppath));
|
||||
key = removeExtension(ppath);
|
||||
#endif
|
||||
if (labels.find(key) == labels.end())
|
||||
{
|
||||
|
@ -630,9 +630,8 @@ public:
|
|||
{
|
||||
#ifdef _WIN32
|
||||
key = regex_replace((wstring) ppath, wregex(L"\\.[^\\.\\\\/:]*$"), wstring()); // delete extension (or not if none)
|
||||
#endif
|
||||
#ifdef __unix__
|
||||
key = removeExtension(basename(ppath));
|
||||
#else
|
||||
key = removeExtension(ppath);
|
||||
#endif
|
||||
if (labels[0].find(key) == labels[0].end())
|
||||
{
|
||||
|
|
|
@ -74,7 +74,7 @@
|
|||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(OpenCvInclude);$(ZipInclude);$(SolutionDir)Source\Readers\ReaderLib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(OpenCvInclude);$(ZipInclude);$(SolutionDir)Source\Readers\ReaderLib;$(BOOST_INCLUDE_PATH)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>$(OutDir);$(OpenCvLibPath);$(ZipLibPath)</AdditionalLibraryDirectories>
|
||||
|
@ -127,10 +127,11 @@
|
|||
<ClCompile Include="ZipByteReader.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<Target Name="Build" Condition="$(HasOpenCv)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />
|
||||
<Target Name="Build" Condition="$(HasOpenCv) And $(HasBoost)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
<Target Name="CheckDependencies">
|
||||
<Warning Condition="!$(HasBoost)" Text="ImageReader requires the Boost library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#boost for installation instructions." />
|
||||
<Warning Condition="!$(HasOpenCv)" Text="ImageReader requires the OpenCV library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#opencv for installation instructions." />
|
||||
<Warning Condition="!$(UseZip)" Text="zlib and libzip libraries were not found, ImageReader will be built without zip container support. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#libzip for installation instructions." />
|
||||
</Target>
|
||||
</Project>
|
||||
</Project>
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче