added RowElementTimes and ColumnElementTimes nodes.

Revoke back ElementTimes node to do element-wise multiplication since the implementation there for column element-wise multiplication is incorrect.
This commit is contained in:
Dong Yu 2015-08-05 15:13:11 -07:00
Родитель a0567f66ba
Коммит d62e5db3f1
55 изменённых файлов: 724 добавлений и 969 удалений

Просмотреть файл

@ -1,605 +0,0 @@
running on KAISHENGLP1 at 2014/08/28 16:50:28
command line options:
configFile=C:\dev\cntk3\CheckInSuites\SLU\globals.config+C:\dev\cntk3\CheckInSuites\SLU\rnnlu.config
>>>>>>>>>>>>>>>>>>>> config >>>>>>>>>>>>>>>>>>>>
configparameters: rnnlu.config:command=LSTM:LSTMTest
configparameters: rnnlu.config:ConfigDir=$WorkDir$\config
configparameters: rnnlu.config:DataDir=$WorkDir$
configparameters: rnnlu.config:DeviceNumber=-1
configparameters: rnnlu.config:ExpDir=c:\temp\exp\atis
configparameters: rnnlu.config:LSTM=[
action=train
makeMode=true
minibatchSize=10
traceLevel=1
deviceId=-1
epochSize=4430000
SimpleNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
defaultHiddenActivity=0.1
recurrentLayer=2
initValueScale=6.0
layerSizes=2832:50:300:127
rnnType=LSTM
lookupTableOrder=3
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
SGD=[
learningRatesPerSample=0.1
momentumPerMB=0.90
gradientClippingWithTruncation=true
clippingThresholdPerSample=15.0
maxEpochs=3
gradientcheck=false
numMBsToShowResult=1000
modelPath=$ExpDir$\cntkdebug.dnn
loadBestModel=true
AutoAdjust=[
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
learnRateDecreaseFactor=0.5
learnRateIncreaseFactor=1.382
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
readerType=LUSequenceReader
wordContext=0:1:2
randomize=None
nbruttsineachrecurrentiter=10
wfile=$ExpDir$\sequenceSentence.bin
wsize=256
wrecords=1000
windowSize=10000
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.train.apos.pred.pos.head.IOB.simple
features=[
dim=0
sectionType=data
]
sequence=[
dim=1
wrecords=2
sectionType=data
]
labelIn=[
dim=1
usewordmap=true
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
token=$DataDir$\input.txt
elementSize=4
sectionType=labels
mapping=[
wrecords=11
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
sectionType=categoryLabels
]
]
labels=[
dim=1
labelType=Category
beginSequence="O"
endSequence="O"
token=$DataDir$\output.txt
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
cvReader=[
readerType=LUSequenceReader
randomize=None
wordContext=0:1:2
wfile=$ExpDir$\sequenceSentence.valid.bin
wsize=256
wrecords=1000
windowSize=10000
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.dev.IOB.simple
features=[
dim=0
sectionType=data
]
sequence=[
dim=1
wrecords=2
sectionType=data
]
labelIn=[
dim=1
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.in.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
token=$DataDir$\input.txt
elementSize=4
sectionType=labels
mapping=[
wrecords=11
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
sectionType=categoryLabels
]
]
labels=[
dim=1
labelType=Category
beginSequence="O"
endSequence="O"
token=$DataDir$\output.txt
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
elementSize=4
sectionType=labels
mapping=[
wrecords=3
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
sectionType=categoryLabels
]
]
]
]
configparameters: rnnlu.config:LSTMTest=[
action=write
minibatchSize=1
traceLevel=1
deviceId=-1
epochSize=4430000
defaultHiddenActivity=0.1
modelPath=$ExpDir$\cntkdebug.dnn
outputNodeNames=outputs
reader=[
readerType=LUSequenceReader
randomize=None
wordContext=0:1:2
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.test.apos.pred.pos.head.IOB.simple
wfile=$ExpDir$\sequenceSentence.bin
wsize=256
wrecords=1000
windowSize=10000
features=[
dim=0
sectionType=data
]
sequence=[
dim=1
wrecords=2
sectionType=data
]
labelIn=[
dim=1
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
token=$DataDir$\input.txt
elementSize=4
sectionType=labels
mapping=[
wrecords=11
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
sectionType=categoryLabels
]
]
labels=[
dim=1
labelType=Category
beginSequence="BOS"
endSequence="EOS"
token=$DataDir$\output.txt
labelDim=127
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
elementSize=4
sectionType=labels
mapping=[
wrecords=3
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
sectionType=categoryLabels
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\output.txt
]
]
]
configparameters: rnnlu.config:NdlDir=$ConfigDir$
configparameters: rnnlu.config:OutDir=$ExpDir$
configparameters: rnnlu.config:stderr=$ExpDir$\ATIS\log
configparameters: rnnlu.config:type=double
configparameters: rnnlu.config:WorkDir=.
<<<<<<<<<<<<<<<<<<<< config <<<<<<<<<<<<<<<<<<<<
command: LSTM LSTMTest
precision = double
SimpleNetworkBuilder Using CPU
reading sequence file .\atis.train.apos.pred.pos.head.IOB.simple
reading sequence file .\atis.dev.IOB.simple
GetTrainCriterionNodes ...
GetEvalCriterionNodes ...
nodes in the recurrent loops :
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
Validating node CrossEntropyWithSoftmax
Validating --> labels = InputValue
Validating --> W2 = LearnableParameter
Validating --> WXO0 = LearnableParameter
Validating --> E0 = LearnableParameter
Validating --> features = InputValue
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
Validating --> bo0 = LearnableParameter
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
Validating --> WHO0 = LearnableParameter
Validating --> WCO0 = LearnableParameter
Validating --> WXF0 = LearnableParameter
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
Validating --> bf0 = LearnableParameter
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
Validating --> WHF0 = LearnableParameter
Validating --> WCF0 = LearnableParameter
Validating --> WXI0 = LearnableParameter
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
Validating --> bi0 = LearnableParameter
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
Validating --> WHI0 = LearnableParameter
Validating --> WCI0 = LearnableParameter
Validating --> WXC0 = LearnableParameter
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
Validating --> WHC0 = LearnableParameter
Validating --> bc0 = LearnableParameter
Validating --> AutoName3 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
Validating --> AutoName2 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
Validating --> AutoName6 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName7 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName1 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
Validating --> AutoName5 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName4 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
No PreCompute nodes found, skipping PreCompute step
Set Max Temp Mem Size For Convolution Nodes to 0 samples.
WARNING: there is no convolution node.
Finished Epoch[1]: [Training Set] Train Loss Per Sample = 4.7967326 EvalErr Per Sample = 4.7967326 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.177
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 4.6260059 CrossEntropyWithSoftmax/Sample = 4.6260059
Finished Epoch[1]: [Validation Set] Train Loss Per Sample = 4.6260059 EvalErr Per Sample = 4.6260059
Finished Epoch[2]: [Training Set] Train Loss Per Sample = 4.4580467 EvalErr Per Sample = 4.4580467 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.178
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 4.0801723 CrossEntropyWithSoftmax/Sample = 4.0801723
Finished Epoch[2]: [Validation Set] Train Loss Per Sample = 4.0801723 EvalErr Per Sample = 4.0801723
Finished Epoch[3]: [Training Set] Train Loss Per Sample = 3.6568716 EvalErr Per Sample = 3.6568716 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.171
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 2.6959986 CrossEntropyWithSoftmax/Sample = 2.6959986
Finished Epoch[3]: [Validation Set] Train Loss Per Sample = 2.6959986 EvalErr Per Sample = 2.6959986
reading sequence file .\atis.test.apos.pred.pos.head.IOB.simple
nodes in the recurrent loops :
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
Validating node CrossEntropyWithSoftmax
Validating --> labels = InputValue
Validating --> W2 = LearnableParameter
Validating --> WXO0 = LearnableParameter
Validating --> E0 = LearnableParameter
Validating --> features = InputValue
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
Validating --> bo0 = LearnableParameter
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
Validating --> WHO0 = LearnableParameter
Validating --> WCO0 = LearnableParameter
Validating --> WXF0 = LearnableParameter
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
Validating --> bf0 = LearnableParameter
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
Validating --> WHF0 = LearnableParameter
Validating --> WCF0 = LearnableParameter
Validating --> WXI0 = LearnableParameter
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
Validating --> bi0 = LearnableParameter
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
Validating --> WHI0 = LearnableParameter
Validating --> WCI0 = LearnableParameter
Validating --> WXC0 = LearnableParameter
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
Validating --> WHC0 = LearnableParameter
Validating --> bc0 = LearnableParameter
Validating --> AutoName3 = Delay(AutoName38[0, 0])
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300, 1])
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
Validating --> AutoName2 = Delay(AutoName38[0, 0])
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300, 1])
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
Validating --> AutoName6 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName7 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName1 = Delay(AutoName38[0, 0])
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300, 1])
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
Validating --> AutoName5 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName4 = Delay(AutoName38[0, 0])
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300, 1])
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
Validating node outputs
Validating --> W2 = LearnableParameter
Validating --> WXO0 = LearnableParameter
Validating --> E0 = LearnableParameter
Validating --> features = InputValue
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
Validating --> bo0 = LearnableParameter
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
Validating --> WHO0 = LearnableParameter
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
Validating --> WCO0 = LearnableParameter
Validating --> WXF0 = LearnableParameter
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
Validating --> bf0 = LearnableParameter
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
Validating --> WHF0 = LearnableParameter
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
Validating --> WCF0 = LearnableParameter
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
Validating --> WXI0 = LearnableParameter
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
Validating --> bi0 = LearnableParameter
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
Validating --> WHI0 = LearnableParameter
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
Validating --> WCI0 = LearnableParameter
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
Validating --> WXC0 = LearnableParameter
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
Validating --> WHC0 = LearnableParameter
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
Validating --> bc0 = LearnableParameter
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
Validating --> outputs = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
Validating node CrossEntropyWithSoftmax
Validating --> labels = InputValue
Validating --> W2 = LearnableParameter
Validating --> WXO0 = LearnableParameter
Validating --> E0 = LearnableParameter
Validating --> features = InputValue
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
Validating --> bo0 = LearnableParameter
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
Validating --> WHO0 = LearnableParameter
Validating --> WCO0 = LearnableParameter
Validating --> WXF0 = LearnableParameter
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
Validating --> bf0 = LearnableParameter
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
Validating --> WHF0 = LearnableParameter
Validating --> WCF0 = LearnableParameter
Validating --> WXI0 = LearnableParameter
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
Validating --> bi0 = LearnableParameter
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
Validating --> WHI0 = LearnableParameter
Validating --> WCI0 = LearnableParameter
Validating --> WXC0 = LearnableParameter
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
Validating --> WHC0 = LearnableParameter
Validating --> bc0 = LearnableParameter
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
nodes in the recurrent loops :
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
Validating node outputs
Validating --> W2 = LearnableParameter
Validating --> WXO0 = LearnableParameter
Validating --> E0 = LearnableParameter
Validating --> features = InputValue
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
Validating --> bo0 = LearnableParameter
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
Validating --> WHO0 = LearnableParameter
Validating --> WCO0 = LearnableParameter
Validating --> WXF0 = LearnableParameter
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
Validating --> bf0 = LearnableParameter
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
Validating --> WHF0 = LearnableParameter
Validating --> WCF0 = LearnableParameter
Validating --> WXI0 = LearnableParameter
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
Validating --> bi0 = LearnableParameter
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
Validating --> WHI0 = LearnableParameter
Validating --> WCI0 = LearnableParameter
Validating --> WXC0 = LearnableParameter
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
Validating --> WHC0 = LearnableParameter
Validating --> bc0 = LearnableParameter
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
Validating --> outputs = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
Total Samples Evaluated = 91

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -1268,14 +1268,22 @@ public:
{
newNode = new TransposeTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
{
newNode = new StrideTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
{
newNode = new StrideTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == ElementTimesNode<ElemType>::TypeName())
{
newNode = new ElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == RowElementTimesNode<ElemType>::TypeName())
{
newNode = new RowElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == ColumnElementTimesNode<ElemType>::TypeName())
{
newNode = new ColumnElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == DiagTimesNode<ElemType>::TypeName())
{
newNode = new DiagTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
@ -1606,14 +1614,22 @@ public:
{
newNode = new TransposeTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
{
newNode = new StrideTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
{
newNode = new StrideTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == ElementTimesNode<ElemType>::TypeName())
{
newNode = new ElementTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == RowElementTimesNode<ElemType>::TypeName())
{
newNode = new RowElementTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == ColumnElementTimesNode<ElemType>::TypeName())
{
newNode = new ColumnElementTimesNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == DiagTimesNode<ElemType>::TypeName())
{
newNode = new DiagTimesNode<ElemType>(m_deviceId, nodeName);
@ -2110,7 +2126,26 @@ public:
return newNode;
}
ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"")
ComputationNodePtr RowElementTimes(const ComputationNodePtr a,
const ComputationNodePtr b,
const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new RowElementTimesNode<ElemType>(m_deviceId, nodeName));
newNode->AttachInputs(a, b);
AddNodeToNet(newNode);
return newNode;
}
ComputationNodePtr ColumnElementTimes(const ComputationNodePtr a,
const ComputationNodePtr b,
const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new ColumnElementTimesNode<ElemType>(m_deviceId, nodeName));
newNode->AttachInputs(a, b);
AddNodeToNet(newNode);
return newNode;
}
ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"")
{
ComputationNodePtr newNode(new StrideTimesNode<ElemType>(m_deviceId, nodeName));
newNode->AttachInputs(a, b, c);

Просмотреть файл

@ -1269,18 +1269,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// inputIndex == 1 (right) - inputGradientValues[1], inputFunctionValues[0]
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
size_t gradCol = gradientValues.GetNumCols();
size_t inputCol = inputFunctionValues.GetNumCols();
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
if (gradCol != inputCol && inputCol == 1)
{
inputGradientValues.SetValue(gradientValues);
inputGradientValues.ColumnElementMultiplyWith(inputFunctionValues);
}
else
{
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
}
#if NANCHECK
inputGradientValues.HasNan("ElementTimes");
#endif
@ -1303,30 +1293,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
size_t rows0 = input0.GetNumRows(), cols0 = input0.GetNumCols();
size_t rows1 = input1.GetNumRows(), cols1 = input1.GetNumCols();
if (rows0 == rows1 && cols0 == cols1)
{
functionValues.AssignElementProductOf(input0, input1);
}
else if ((cols0 == 1 || cols1 == 1) && rows1 == rows0) // col vec with matching rows
{
Matrix<ElemType> tmpMat;
if (cols0 == 1)
{
functionValues.SetValue(input1);
functionValues.ColumnElementMultiplyWith(input0);
}
else if (cols1 == 1)
{
functionValues.SetValue(input0);
functionValues.ColumnElementMultiplyWith(input1);
}
}
else
{
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");
}
functionValues.AssignElementProductOf(input0, input1);
#if NANCHECK
functionValues.HasNan("ElementTimes");
#endif
@ -1339,29 +1307,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_children.size() != 2)
throw std::logic_error("ElementTimes operation requires two inputs.");
size_t index = 0;
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
//derive number of rows if possible
for (size_t index = 0; index < 2; index++)
{
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0? Inputs(1-index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0? Inputs(1-index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
Inputs(index)->FunctionValues().Resize(rows, cols);
}
index = 1;
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
{
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0? Inputs(1-index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0? Inputs(1-index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
Inputs(index)->FunctionValues().Resize(rows, cols);
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
{
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0 ? Inputs(1 - index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0 ? Inputs(1 - index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
Inputs(index)->FunctionValues().Resize(rows, cols);
}
}
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
throw std::logic_error("ElementTimes operation: one of the operants has 0 element.");
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
if (rows0 != rows1 || (cols0 != cols1 && cols0 != 1 && cols1 != 1))
if (Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumRows() ||
Inputs(1)->FunctionValues().GetNumCols() != Inputs(0)->FunctionValues().GetNumCols())
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");
FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
@ -1387,6 +1348,364 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class ElementTimesNode<float>;
template class ElementTimesNode<double>;
template<class ElemType>
class RowElementTimesNode : public ComputationNode<ElemType>
{
UsingComputationNodeMembers;
public:
RowElementTimesNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
InitRecurrentNode();
}
RowElementTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
}
// copy constructor
RowElementTimesNode(const RowElementTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId), m_tempMatrix(node->m_deviceId)
{
node->CopyTo(this, newName, flags);
}
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
{
const std::wstring& name = (newName == L"") ? NodeName() : newName;
ComputationNodePtr node = new RowElementTimesNode<ElemType>(this, name, flags);
return node;
}
virtual const std::wstring OperationName() const { return TypeName(); }
static const std::wstring TypeName() { return L"RowElementTimes"; }
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex > 1)
throw std::invalid_argument("RowElementTimes operation only takes two inputs.");
if (inputIndex == 0)
{
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), m_tempMatrix);
}
else
{
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), m_tempMatrix);
}
}
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
{
if (inputIndex > 1)
throw std::invalid_argument("RowElementTimes operation only takes two inputs.");
Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
if (inputIndex == 0)
{
ComputeInputPartialLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
}
else
{
ComputeInputPartialRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
}
}
//left (input 0) is a matrix
static void WINAPI ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
Matrix<ElemType>& input0GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
{
tempMatrix.SetValue(gradientValues);
tempMatrix.RowElementMultiplyWith(input1FunctionValues);
input0GradientValues += tempMatrix;
#if NANCHECK
input0GradientValues.HasNan("RowElementTimes");
#endif
}
//right (input 1) is a row vector
static void WINAPI ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
Matrix<ElemType>& input1GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
{
tempMatrix.AssignInnerProductOf(gradientValues, input0FunctionValues, true);
input1GradientValues += tempMatrix;
#if NANCHECK
input1GradientValues.HasNan("RowElementTimes");
#endif
}
virtual void EvaluateThisNode()
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceInput1Value = Inputs(1)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
}
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
functionValues.SetValue(input0);
functionValues.RowElementMultiplyWith(input1);
#if NANCHECK
functionValues.HasNan("RowElementTimes");
#endif
}
virtual void Validate()
{
PrintSelfBeforeValidation();
if (m_children.size() != 2)
throw std::logic_error("RowElementTimes operation requires two inputs.");
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
throw std::logic_error("RowElementTimes operation: one of the operants has 0 element.");
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
if (cols0 != cols1 || rows1 != 1)
throw std::logic_error("RowElementTimes: Either the second operand is not a row vector or the number of columns of operands does not match.");
FunctionValues().Resize(rows0, cols0);
InferImageDimsFromInputs();
}
virtual void InferImageDimsFromInputs()
{
//input 0 is the matrix and input 1 is a row vector
InferImageDimsFromInput(0);
}
virtual void AttachInputs(const ComputationNodePtr leftNode, const ComputationNodePtr rightNode)
{
m_children.resize(2);
m_children[0] = leftNode;
m_children[1] = rightNode;
}
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId)
{
ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);
if (deviceId != AUTOPLACEMATRIX)
{
if (m_tempMatrix.GetDeviceId() != deviceId)
m_tempMatrix.TransferFromDeviceToDevice(m_tempMatrix.GetDeviceId(), deviceId);
}
}
private:
Matrix<ElemType> m_tempMatrix;
};
template class RowElementTimesNode<float>;
template class RowElementTimesNode<double>;
template<class ElemType>
class ColumnElementTimesNode : public ComputationNode<ElemType>
{
UsingComputationNodeMembers;
public:
ColumnElementTimesNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
InitRecurrentNode();
}
ColumnElementTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
}
// copy constructor
ColumnElementTimesNode(const ColumnElementTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId), m_tempMatrix(node->m_deviceId)
{
node->CopyTo(this, newName, flags);
}
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
{
const std::wstring& name = (newName == L"") ? NodeName() : newName;
ComputationNodePtr node = new ColumnElementTimesNode<ElemType>(this, name, flags);
return node;
}
virtual const std::wstring OperationName() const { return TypeName(); }
static const std::wstring TypeName() { return L"ColumnElementTimes"; }
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex > 1)
throw std::invalid_argument("ColumnElementTimes operation only takes two inputs.");
if (inputIndex == 0)
{
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), m_tempMatrix);
}
else
{
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), m_tempMatrix);
}
}
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
{
if (inputIndex > 1)
throw std::invalid_argument("ColumnElementTimes operation only takes two inputs.");
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
if (inputIndex == 0)
{
Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
}
else
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix);
}
}
//left (input 0) is a matrix
static void WINAPI ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
Matrix<ElemType>& input0GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
{
tempMatrix.SetValue(gradientValues);
tempMatrix.ColumnElementMultiplyWith(input1FunctionValues);
input0GradientValues += tempMatrix;
#if NANCHECK
input0GradientValues.HasNan("ColumnElementTimes");
#endif
}
//right (input 1) is a col vector
static void WINAPI ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
Matrix<ElemType>& input1GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
{
tempMatrix.AssignInnerProductOf(gradientValues, input0FunctionValues, false);
input1GradientValues += tempMatrix;
#if NANCHECK
input1GradientValues.HasNan("ColumnElementTimes");
#endif
}
virtual void EvaluateThisNode()
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
}
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
functionValues.SetValue(input0);
functionValues.ColumnElementMultiplyWith(input1);
#if NANCHECK
functionValues.HasNan("ColumnElementTimes");
#endif
}
virtual void Validate()
{
PrintSelfBeforeValidation();
if (m_children.size() != 2)
throw std::logic_error("ColumnElementTimes operation requires two inputs.");
//derive number of rows if possible
for (size_t index = 0; index < 2; index++)
{
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
{
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0 ? Inputs(1 - index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0 ? Inputs(1 - index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
Inputs(index)->FunctionValues().Resize(rows, cols);
}
}
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
throw std::logic_error("ColumnElementTimes operation: one of the operants has 0 element.");
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
if (rows0 != rows1 || cols1 != 1)
throw std::logic_error("ColumnElementTimes: Either the second operand is not a column vector or the number of rows of operands does not match.");
FunctionValues().Resize(rows0, cols0);
InferImageDimsFromInputs();
}
virtual void InferImageDimsFromInputs()
{
//input 0 is the matrix and input 1 is a column vector
InferImageDimsFromInput(0);
}
virtual void AttachInputs(const ComputationNodePtr leftNode, const ComputationNodePtr rightNode)
{
m_children.resize(2);
m_children[0] = leftNode;
m_children[1] = rightNode;
}
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId)
{
ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);
if (deviceId != AUTOPLACEMATRIX)
{
if (m_tempMatrix.GetDeviceId() != deviceId)
m_tempMatrix.TransferFromDeviceToDevice(m_tempMatrix.GetDeviceId(), deviceId);
}
}
private:
Matrix<ElemType> m_tempMatrix;
};
template class ColumnElementTimesNode<float>;
template class ColumnElementTimesNode<double>;
template<class ElemType>
class PlusNode : public ComputationNode<ElemType>
{
@ -3021,14 +3340,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
UsingComputationNodeMembers;
size_t mStrideDim; /// the dimension index on which stride works
size_t mStride; /// the stride
size_t m_StrideDim; /// the dimension index on which stride works
size_t m_Stride; /// the stride
private:
void UpdateStride(const Matrix<ElemType>& input1)
{
mStride = input1.GetNumCols();
m_Stride = input1.GetNumCols();
}
public:
@ -3037,21 +3356,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
mStride = 1;
m_Stride = 1;
InitRecurrentNode();
}
StrideTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
mStride = 1;
m_Stride = 1;
LoadFromFile(fstream, modelVersion, deviceId);
}
// copy constructor
StrideTimesNode(const StrideTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
{
mStride = 1;
m_Stride = 1;
node->CopyTo(this, newName, flags);
}
@ -3078,7 +3397,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
if (mStrideDim == 1) /// column stride
if (m_StrideDim == 1) /// column stride
{
if (inputIndex == 0) //left derivative
{
@ -3133,7 +3452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
}
else if (mStrideDim == 0) /// row stride
else if (m_StrideDim == 0) /// row stride
{
if (inputIndex == 0) //left derivative
{
@ -3226,12 +3545,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
UpdateStride(Inputs(1)->FunctionValues());
if (mStrideDim == 0)
if (m_StrideDim == 0)
FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1);
if (mStrideDim == 1)
if (m_StrideDim == 1)
FunctionValues().Resize(rows0, cols1);
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), mStride, mStrideDim);
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_Stride, m_StrideDim);
#ifdef DEBUG_DECODER
fprintf(stderr, "Times node %ls output norm = %.8e, input(0) norm = %.8e, input(1) norm = %.8e\n", this->NodeName().c_str(), FunctionValues().FrobeniusNorm(),
Inputs(0)->FunctionValues().FrobeniusNorm(), Inputs(1)->FunctionValues().FrobeniusNorm());
@ -3244,13 +3563,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceInput1Value = Inputs(1)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
UpdateStride(sliceInput1Value);
if (mStrideDim == 0)
if (m_StrideDim == 0)
FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1);
if (mStrideDim == 1)
if (m_StrideDim == 1)
FunctionValues().Resize(rows0, cols1);
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, mStride, mStrideDim);
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_Stride, m_StrideDim);
}
/**
@ -3344,30 +3663,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (Inputs(2)->FunctionValues().GetNumElements() != 1)
LogicError("StrideTimes : input(2) should be a single element matrix");
mStrideDim = (size_t) Inputs(2)->FunctionValues().Get00Element();
m_StrideDim = (size_t) Inputs(2)->FunctionValues().Get00Element();
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
if (mStrideDim != 0 && mStrideDim != 1)
if (m_StrideDim != 0 && m_StrideDim != 1)
LogicError("StrideTimes : stride dim must be either 0 (row) or 1 (column)");
if (Inputs(2)->NeedGradient())
LogicError("StrideTImes : no gradient update should be on input(2)");
//cols0 and rows1 may have been changed so don't use them in the following check
if (mStrideDim == 0)
if (m_StrideDim == 0)
{
if (rows1 != cols0)
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and rows %d in B.", mStrideDim, cols0, rows1);
size_t T1 = rows0 / mStride;
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and rows %d in B.", m_StrideDim, cols0, rows1);
size_t T1 = rows0 / m_Stride;
FunctionValues().Resize(T1, cols1);
}
//cols0 and rows1 may have been changed so don't use them in the following check
if (mStrideDim == 1)
if (m_StrideDim == 1)
{
if (cols0/mStride != rows1)
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and row number %d in B.", mStrideDim, cols0, rows1);
if (cols0/m_Stride != rows1)
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and row number %d in B.", m_StrideDim, cols0, rows1);
FunctionValues().Resize(rows0, cols1);
}

Просмотреть файл

@ -1,284 +1,290 @@
//
// <copyright file="NetworkDescriptionLanguage.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// NetworkDescriptionLanguage.cpp : Code used to interpret the Network Description Language.
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "NetworkDescriptionLanguage.h"
#include "SynchronousExecutionEngine.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// DuplicateNode - Duplicate a node in a macro as needed (it might already exist)
// node - node we are duplicating
// return - the new duplicated node if it didn't exist, or the previously duplicated node if it already did
template <typename ElemType>
NDLNode<ElemType>* NDLScript<ElemType>::DuplicateNode(NDLNode<ElemType>* node)
{
NDLNode<ElemType>* newNode = node->Copy();
m_children.push_back(newNode);
newNode->SetParentScript(this);
return newNode;
}
template <typename ElemType>
NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
{
m_baseName = copyMe.m_baseName;
m_scriptString = copyMe.m_scriptString;
m_macroNode = copyMe.m_macroNode;
m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
m_definingMacro = false; // not defining when expanding macros (only reason to call this method
m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
// script lines in parsed node order
for (NDLNode<ElemType>* node : copyMe.m_script)
{
// duplicate this node
NDLNode<ElemType>* newNode = DuplicateNode(node);
AddSymbol(newNode->GetName(), newNode);
// now get the parameters to the functions added
ConfigValue value = newNode->GetParamString();
ParseParameters(newNode, value, true /*createNew*/);
// add it to the new script
m_script.push_back(newNode);
}
// now search the symbol table for other symbols that haven't been copied yet
// this happens for constants defined in macros and such
for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
{
// if we can't find the symbol in the copied symbol table, copy it here
if (m_symbols.find(pair.first) == end(m_symbols))
{
// duplicate this node
NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
AddSymbol(pair.first, newNode);
// anything that takes parameters should be evaluated in the script loop
assert(newNode->GetParamString().empty());
}
}
// NOTE: the child nodes get populated as the nodes are duplicated in the loop above
// we shouldn't try to duplicate them separately
}
// copy constructor, creates a new disconnected copy of this node
// doesn't copy everything, so use for macro expansion only (it's private)
// copyMe - node to copy
template <typename ElemType>
NDLNode<ElemType>::NDLNode(const NDLNode<ElemType>& copyMe)
{
m_name = copyMe.m_name; // value on the left of the equals
m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
m_parent = copyMe.m_parent; // parent script
m_type = copyMe.m_type; //type of node
m_paramString = copyMe.m_paramString; // parameter of a function/array
m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
// don't copy over the parameters, they will be reparsed after the copy
//m_parameters = copyMe.m_parameters; // copy over the parameters straight
m_eval = nullptr; // pointer to an arbitrary eval structure
// script for macro calls, need to expand the macro for each call
// if it's not expanded the evalValue will be overwitten on multiple calls to a macro
m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
}
template <typename ElemType>
NDLScript<ElemType>::NDLScript(const NDLScript&& moveMe) : ConfigParser(move(moveMe))
{
m_baseName = move(moveMe.m_baseName);
m_scriptString = move(moveMe.m_scriptString);
m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
m_symbols = move(moveMe.m_symbols); // symbol table
m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
m_definingMacro = move(moveMe.m_definingMacro);
m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
}
// EqualInsensitive - check to see if two nodes are equal
// string1 - [in,out] string to compare, if comparision is equal insensitive but not sensitive, will replace with sensitive version
// string2 - second string to compare
// alternate - alternate naming of the string
// return - true if strings are equal insensitive and modifies string1 to sensitive version if different
bool EqualInsensitive(std::wstring& string1, const std::wstring& string2, const wchar_t* alternate/*=NULL*/)
{
bool equal = !_wcsnicmp(string1.c_str(), string2.c_str(), string1.size()) && string1.size()==string2.size();
if (!equal && alternate != NULL)
equal = !_wcsnicmp(string1.c_str(), alternate, string1.size()) && string1.size()==wcslen(alternate);
if (equal)
string1 = string2;
return equal;
}
// ++ operator for this enum, so loops work
NDLPass &operator++(NDLPass &ndlPass) {
assert(ndlPass != ndlPassMax);
ndlPass = static_cast<NDLPass>(ndlPass + 1);
return ndlPass;
}
// CheckFunction - check to see if we match a function name
// string1 - [in,out] string to compare, if comparision is equal and at least half the full node name will replace with full node name
// allowUndeterminedVariable - [out] set to true if undetermined variables (symbols yet to be defined) are allowed here
// return - true if function name found
template <typename ElemType>
bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
{
std::wstring nodeType = msra::strfun::utf16(p_nodeType);
bool ret = false;
if (allowUndeterminedVariable)
*allowUndeterminedVariable = true; // be default we allow undetermined variables
if (EqualInsensitive(nodeType, InputValue<ElemType>::TypeName(), L"Input"))
ret = true;
else if (EqualInsensitive(nodeType, InputValue<ElemType>::SparseTypeName(), L"SparseInput"))
ret = true;
else if (EqualInsensitive(nodeType, LearnableParameter<ElemType>::TypeName(), L"Parameter"))
ret = true;
//else if (EqualInsensitive(nodeType, SparseLearnableParameter<ElemType>::TypeName(), L"SparseParameter"))
// ret = true;
else if (EqualInsensitive(nodeType, L"Constant", L"Const"))
ret = true;
else if (EqualInsensitive(nodeType, L"ImageInput", L"Image"))
ret = true;
else if (EqualInsensitive(nodeType, L"SparseImageInput", L"SparseImage"))
ret = true;
else if (EqualInsensitive(nodeType, SumElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, SumColumnElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ScaleNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TransposeNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TransposeTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ElementTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, DiagTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, CosDistanceNode<ElemType>::TypeName(), L"CosDist"))
ret = true;
else if (EqualInsensitive(nodeType, KhatriRaoProductNode<ElemType>::TypeName(), L"ColumnwiseCrossProduct"))
ret = true;
else if (EqualInsensitive(nodeType, PlusNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, MinusNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, NegateNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RectifiedLinearNode<ElemType>::TypeName(), L"ReLU"))
ret = true;
else if (EqualInsensitive(nodeType, SigmoidNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TanhNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ExpNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LogNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, CosineNode<ElemType>::TypeName(), L"Cos"))
ret = true;
else if (EqualInsensitive(nodeType, SoftmaxNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LogSoftmaxNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, SquareErrorNode<ElemType>::TypeName(), L"SE"))
ret = true;
else if (EqualInsensitive(nodeType, CrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CEWithSM"))
ret = true;
else if (EqualInsensitive(nodeType, CrossEntropyNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ClassBasedCrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CBCEWithSM"))
ret = true;
else if (EqualInsensitive(nodeType, MatrixL1RegNode<ElemType>::TypeName(), L"L1Reg"))
ret = true;
else if (EqualInsensitive(nodeType, MatrixL2RegNode<ElemType>::TypeName(), L"L2Reg"))
ret = true;
else if (EqualInsensitive(nodeType, PerDimMeanVarNormalizationNode<ElemType>::TypeName(),L"PerDimMVNorm"))
ret = true;
else if (EqualInsensitive(nodeType, PerDimMeanVarDeNormalizationNode<ElemType>::TypeName(),L"PerDimMVDeNorm"))
ret = true;
else if (EqualInsensitive(nodeType, ErrorPredictionNode<ElemType>::TypeName(), L"ClassificationError"))
ret = true;
else if (EqualInsensitive(nodeType, DropoutNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ReshapeNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowRepeatNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, MeanNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, InvStdDevNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ConvolutionNode<ElemType>::TypeName(), L"Convolve"))
ret = true;
else if (EqualInsensitive(nodeType, MaxPoolingNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, AveragePoolingNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, PastValueNode<ElemType>::TypeName(), L"Delay"))
ret = true;
else if (EqualInsensitive(nodeType, FutureValueNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
ret = true;
else if (EqualInsensitive(nodeType, CosDistanceWithNegativeSamplesNode<ElemType>::TypeName(), L"CosWithNegSamples"))
ret = true;
else if (EqualInsensitive(nodeType, TimeReverseNode<ElemType>::TypeName(), L"TimeReverse"))
ret = true;
else if (EqualInsensitive(nodeType, CRFNode<ElemType>::TypeName(), L"CRF"))
ret = true;
else if (EqualInsensitive(nodeType, DummyCriterionNode<ElemType>::TypeName(), L"DummyCriterion"))
ret = true;
else if (EqualInsensitive(nodeType, ParallelNode<ElemType>::TypeName(), L"Parallel"))
ret = true;
else if (EqualInsensitive(nodeType, LSTMNode<ElemType>::TypeName(), L"LSTM"))
ret = true;
//
// <copyright file="NetworkDescriptionLanguage.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// NetworkDescriptionLanguage.cpp : Code used to interpret the Network Description Language.
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "NetworkDescriptionLanguage.h"
#include "SynchronousExecutionEngine.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// DuplicateNode - Duplicate a node in a macro as needed (it might already exist)
// node - node we are duplicating
// return - the new duplicated node if it didn't exist, or the previously duplicated node if it already did
template <typename ElemType>
NDLNode<ElemType>* NDLScript<ElemType>::DuplicateNode(NDLNode<ElemType>* node)
{
NDLNode<ElemType>* newNode = node->Copy();
m_children.push_back(newNode);
newNode->SetParentScript(this);
return newNode;
}
template <typename ElemType>
NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
{
m_baseName = copyMe.m_baseName;
m_scriptString = copyMe.m_scriptString;
m_macroNode = copyMe.m_macroNode;
m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
m_definingMacro = false; // not defining when expanding macros (only reason to call this method
m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
// script lines in parsed node order
for (NDLNode<ElemType>* node : copyMe.m_script)
{
// duplicate this node
NDLNode<ElemType>* newNode = DuplicateNode(node);
AddSymbol(newNode->GetName(), newNode);
// now get the parameters to the functions added
ConfigValue value = newNode->GetParamString();
ParseParameters(newNode, value, true /*createNew*/);
// add it to the new script
m_script.push_back(newNode);
}
// now search the symbol table for other symbols that haven't been copied yet
// this happens for constants defined in macros and such
for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
{
// if we can't find the symbol in the copied symbol table, copy it here
if (m_symbols.find(pair.first) == end(m_symbols))
{
// duplicate this node
NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
AddSymbol(pair.first, newNode);
// anything that takes parameters should be evaluated in the script loop
assert(newNode->GetParamString().empty());
}
}
// NOTE: the child nodes get populated as the nodes are duplicated in the loop above
// we shouldn't try to duplicate them separately
}
// copy constructor, creates a new disconnected copy of this node
// doesn't copy everything, so use for macro expansion only (it's private)
// copyMe - node to copy
template <typename ElemType>
NDLNode<ElemType>::NDLNode(const NDLNode<ElemType>& copyMe)
{
m_name = copyMe.m_name; // value on the left of the equals
m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
m_parent = copyMe.m_parent; // parent script
m_type = copyMe.m_type; //type of node
m_paramString = copyMe.m_paramString; // parameter of a function/array
m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
// don't copy over the parameters, they will be reparsed after the copy
//m_parameters = copyMe.m_parameters; // copy over the parameters straight
m_eval = nullptr; // pointer to an arbitrary eval structure
// script for macro calls, need to expand the macro for each call
// if it's not expanded the evalValue will be overwitten on multiple calls to a macro
m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
}
template <typename ElemType>
NDLScript<ElemType>::NDLScript(const NDLScript&& moveMe) : ConfigParser(move(moveMe))
{
m_baseName = move(moveMe.m_baseName);
m_scriptString = move(moveMe.m_scriptString);
m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
m_symbols = move(moveMe.m_symbols); // symbol table
m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
m_definingMacro = move(moveMe.m_definingMacro);
m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
}
// EqualInsensitive - check to see if two nodes are equal
// string1 - [in,out] string to compare, if comparision is equal insensitive but not sensitive, will replace with sensitive version
// string2 - second string to compare
// alternate - alternate naming of the string
// return - true if strings are equal insensitive and modifies string1 to sensitive version if different
bool EqualInsensitive(std::wstring& string1, const std::wstring& string2, const wchar_t* alternate/*=NULL*/)
{
bool equal = !_wcsnicmp(string1.c_str(), string2.c_str(), string1.size()) && string1.size()==string2.size();
if (!equal && alternate != NULL)
equal = !_wcsnicmp(string1.c_str(), alternate, string1.size()) && string1.size()==wcslen(alternate);
if (equal)
string1 = string2;
return equal;
}
// ++ operator for this enum, so loops work
NDLPass &operator++(NDLPass &ndlPass) {
assert(ndlPass != ndlPassMax);
ndlPass = static_cast<NDLPass>(ndlPass + 1);
return ndlPass;
}
// CheckFunction - check to see if we match a function name
// string1 - [in,out] string to compare, if comparision is equal and at least half the full node name will replace with full node name
// allowUndeterminedVariable - [out] set to true if undetermined variables (symbols yet to be defined) are allowed here
// return - true if function name found
template <typename ElemType>
bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
{
std::wstring nodeType = msra::strfun::utf16(p_nodeType);
bool ret = false;
if (allowUndeterminedVariable)
*allowUndeterminedVariable = true; // be default we allow undetermined variables
if (EqualInsensitive(nodeType, InputValue<ElemType>::TypeName(), L"Input"))
ret = true;
else if (EqualInsensitive(nodeType, InputValue<ElemType>::SparseTypeName(), L"SparseInput"))
ret = true;
else if (EqualInsensitive(nodeType, LearnableParameter<ElemType>::TypeName(), L"Parameter"))
ret = true;
//else if (EqualInsensitive(nodeType, SparseLearnableParameter<ElemType>::TypeName(), L"SparseParameter"))
// ret = true;
else if (EqualInsensitive(nodeType, L"Constant", L"Const"))
ret = true;
else if (EqualInsensitive(nodeType, L"ImageInput", L"Image"))
ret = true;
else if (EqualInsensitive(nodeType, L"SparseImageInput", L"SparseImage"))
ret = true;
else if (EqualInsensitive(nodeType, SumElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, SumColumnElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ScaleNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TransposeNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TransposeTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, StrideTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ElementTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowElementTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ColumnElementTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, DiagTimesNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, CosDistanceNode<ElemType>::TypeName(), L"CosDist"))
ret = true;
else if (EqualInsensitive(nodeType, KhatriRaoProductNode<ElemType>::TypeName(), L"ColumnwiseCrossProduct"))
ret = true;
else if (EqualInsensitive(nodeType, PlusNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, MinusNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, NegateNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RectifiedLinearNode<ElemType>::TypeName(), L"ReLU"))
ret = true;
else if (EqualInsensitive(nodeType, SigmoidNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TanhNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ExpNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LogNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, CosineNode<ElemType>::TypeName(), L"Cos"))
ret = true;
else if (EqualInsensitive(nodeType, SoftmaxNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LogSoftmaxNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, SquareErrorNode<ElemType>::TypeName(), L"SE"))
ret = true;
else if (EqualInsensitive(nodeType, CrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CEWithSM"))
ret = true;
else if (EqualInsensitive(nodeType, CrossEntropyNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ClassBasedCrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CBCEWithSM"))
ret = true;
else if (EqualInsensitive(nodeType, MatrixL1RegNode<ElemType>::TypeName(), L"L1Reg"))
ret = true;
else if (EqualInsensitive(nodeType, MatrixL2RegNode<ElemType>::TypeName(), L"L2Reg"))
ret = true;
else if (EqualInsensitive(nodeType, PerDimMeanVarNormalizationNode<ElemType>::TypeName(),L"PerDimMVNorm"))
ret = true;
else if (EqualInsensitive(nodeType, PerDimMeanVarDeNormalizationNode<ElemType>::TypeName(),L"PerDimMVDeNorm"))
ret = true;
else if (EqualInsensitive(nodeType, ErrorPredictionNode<ElemType>::TypeName(), L"ClassificationError"))
ret = true;
else if (EqualInsensitive(nodeType, DropoutNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ReshapeNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowRepeatNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, MeanNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, InvStdDevNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ConvolutionNode<ElemType>::TypeName(), L"Convolve"))
ret = true;
else if (EqualInsensitive(nodeType, MaxPoolingNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, AveragePoolingNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, PastValueNode<ElemType>::TypeName(), L"Delay"))
ret = true;
else if (EqualInsensitive(nodeType, FutureValueNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
ret = true;
else if (EqualInsensitive(nodeType, CosDistanceWithNegativeSamplesNode<ElemType>::TypeName(), L"CosWithNegSamples"))
ret = true;
else if (EqualInsensitive(nodeType, TimeReverseNode<ElemType>::TypeName(), L"TimeReverse"))
ret = true;
else if (EqualInsensitive(nodeType, CRFNode<ElemType>::TypeName(), L"CRF"))
ret = true;
else if (EqualInsensitive(nodeType, DummyCriterionNode<ElemType>::TypeName(), L"DummyCriterion"))
ret = true;
else if (EqualInsensitive(nodeType, ParallelNode<ElemType>::TypeName(), L"Parallel"))
ret = true;
else if (EqualInsensitive(nodeType, LSTMNode<ElemType>::TypeName(), L"LSTM"))
ret = true;
else if (EqualInsensitive(nodeType, PairNetworkNode<ElemType>::TypeName(), L"PairNetwork"))
ret = true;
else if (EqualInsensitive(nodeType, StrideTimesNode<ElemType>::TypeName(), L"StrideTimes"))
ret = true;
// return the actual node name in the parameter if we found something
if (ret)
{
p_nodeType = msra::strfun::utf8(nodeType);
}
return ret;
}
template <typename ElemType>
NDLScript<ElemType> NDLScript<ElemType>::s_global("global");
// declare the static variables from the classes
template<> NDLScript<float> NDLScript<float>::s_global{};
template<> NDLScript<double> NDLScript<double>::s_global{};
template<> int NDLNode<float>::s_nameCounter = 0;
template<> int NDLNode<double>::s_nameCounter = 0;
template class NDLNode<float>;
template class NDLNode<double>;
template class NDLScript<float>;
template class NDLScript<double>;
}}}
// return the actual node name in the parameter if we found something
if (ret)
{
p_nodeType = msra::strfun::utf8(nodeType);
}
return ret;
}
template <typename ElemType>
NDLScript<ElemType> NDLScript<ElemType>::s_global("global");
// declare the static variables from the classes
template<> NDLScript<float> NDLScript<float>::s_global{};
template<> NDLScript<double> NDLScript<double>::s_global{};
template<> int NDLNode<float>::s_nameCounter = 0;
template<> int NDLNode<double>::s_nameCounter = 0;
template class NDLNode<float>;
template class NDLNode<double>;
template class NDLScript<float>;
template class NDLScript<double>;
}}}

Просмотреть файл

@ -247,7 +247,7 @@ public:
if (doGradientCheck && sizeof(ElemType) != sizeof(double))
{
LogicError("Gradient check needs to use type = double");
LogicError("Gradient check needs to use precision = double");
}
m_doUnitTest = configSGD("unittest", "false");