added RowElementTimes and ColumnElementTimes nodes.
Revoke back ElementTimes node to do element-wise multiplication since the implementation there for column element-wise multiplication is incorrect.
This commit is contained in:
Родитель
a0567f66ba
Коммит
d62e5db3f1
|
@ -1,605 +0,0 @@
|
|||
running on KAISHENGLP1 at 2014/08/28 16:50:28
|
||||
command line options:
|
||||
configFile=C:\dev\cntk3\CheckInSuites\SLU\globals.config+C:\dev\cntk3\CheckInSuites\SLU\rnnlu.config
|
||||
|
||||
>>>>>>>>>>>>>>>>>>>> config >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: rnnlu.config:command=LSTM:LSTMTest
|
||||
configparameters: rnnlu.config:ConfigDir=$WorkDir$\config
|
||||
configparameters: rnnlu.config:DataDir=$WorkDir$
|
||||
configparameters: rnnlu.config:DeviceNumber=-1
|
||||
configparameters: rnnlu.config:ExpDir=c:\temp\exp\atis
|
||||
configparameters: rnnlu.config:LSTM=[
|
||||
action=train
|
||||
makeMode=true
|
||||
minibatchSize=10
|
||||
traceLevel=1
|
||||
deviceId=-1
|
||||
epochSize=4430000
|
||||
SimpleNetworkBuilder=[
|
||||
trainingCriterion=crossentropywithsoftmax
|
||||
evalCriterion=crossentropywithsoftmax
|
||||
defaultHiddenActivity=0.1
|
||||
recurrentLayer=2
|
||||
initValueScale=6.0
|
||||
layerSizes=2832:50:300:127
|
||||
rnnType=LSTM
|
||||
lookupTableOrder=3
|
||||
addPrior=false
|
||||
addDropoutNodes=false
|
||||
applyMeanVarNorm=false
|
||||
uniformInit=true
|
||||
]
|
||||
SGD=[
|
||||
learningRatesPerSample=0.1
|
||||
momentumPerMB=0.90
|
||||
gradientClippingWithTruncation=true
|
||||
clippingThresholdPerSample=15.0
|
||||
maxEpochs=3
|
||||
gradientcheck=false
|
||||
numMBsToShowResult=1000
|
||||
modelPath=$ExpDir$\cntkdebug.dnn
|
||||
loadBestModel=true
|
||||
AutoAdjust=[
|
||||
autoAdjustLR=adjustafterepoch
|
||||
reduceLearnRateIfImproveLessThan=0
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
numMiniBatch4LRSearch=100
|
||||
numPrevLearnRates=5
|
||||
numBestSearchEpoch=1
|
||||
]
|
||||
dropoutRate=0
|
||||
]
|
||||
reader=[
|
||||
readerType=LUSequenceReader
|
||||
wordContext=0:1:2
|
||||
randomize=None
|
||||
nbruttsineachrecurrentiter=10
|
||||
wfile=$ExpDir$\sequenceSentence.bin
|
||||
wsize=256
|
||||
wrecords=1000
|
||||
windowSize=10000
|
||||
unk="<unk>"
|
||||
wordmap=$DataDir$\inputmap.txt
|
||||
file=$DataDir$\atis.train.apos.pred.pos.head.IOB.simple
|
||||
features=[
|
||||
dim=0
|
||||
sectionType=data
|
||||
]
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
sectionType=data
|
||||
]
|
||||
labelIn=[
|
||||
dim=1
|
||||
usewordmap=true
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="BOS"
|
||||
endSequence="EOS"
|
||||
usewordmap=true
|
||||
token=$DataDir$\input.txt
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
wrecords=11
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=Category
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
token=$DataDir$\output.txt
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
cvReader=[
|
||||
readerType=LUSequenceReader
|
||||
randomize=None
|
||||
wordContext=0:1:2
|
||||
wfile=$ExpDir$\sequenceSentence.valid.bin
|
||||
wsize=256
|
||||
wrecords=1000
|
||||
windowSize=10000
|
||||
unk="<unk>"
|
||||
wordmap=$DataDir$\inputmap.txt
|
||||
file=$DataDir$\atis.dev.IOB.simple
|
||||
features=[
|
||||
dim=0
|
||||
sectionType=data
|
||||
]
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
sectionType=data
|
||||
]
|
||||
labelIn=[
|
||||
dim=1
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.in.txt
|
||||
labelType=Category
|
||||
beginSequence="BOS"
|
||||
endSequence="EOS"
|
||||
usewordmap=true
|
||||
token=$DataDir$\input.txt
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
wrecords=11
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=Category
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
token=$DataDir$\output.txt
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
wrecords=3
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
configparameters: rnnlu.config:LSTMTest=[
|
||||
action=write
|
||||
minibatchSize=1
|
||||
traceLevel=1
|
||||
deviceId=-1
|
||||
epochSize=4430000
|
||||
defaultHiddenActivity=0.1
|
||||
modelPath=$ExpDir$\cntkdebug.dnn
|
||||
outputNodeNames=outputs
|
||||
reader=[
|
||||
readerType=LUSequenceReader
|
||||
randomize=None
|
||||
wordContext=0:1:2
|
||||
unk="<unk>"
|
||||
wordmap=$DataDir$\inputmap.txt
|
||||
file=$DataDir$\atis.test.apos.pred.pos.head.IOB.simple
|
||||
wfile=$ExpDir$\sequenceSentence.bin
|
||||
wsize=256
|
||||
wrecords=1000
|
||||
windowSize=10000
|
||||
features=[
|
||||
dim=0
|
||||
sectionType=data
|
||||
]
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
sectionType=data
|
||||
]
|
||||
labelIn=[
|
||||
dim=1
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="BOS"
|
||||
endSequence="EOS"
|
||||
usewordmap=true
|
||||
token=$DataDir$\input.txt
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
wrecords=11
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=Category
|
||||
beginSequence="BOS"
|
||||
endSequence="EOS"
|
||||
token=$DataDir$\output.txt
|
||||
labelDim=127
|
||||
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
wrecords=3
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
writer=[
|
||||
writerType=LUSequenceWriter
|
||||
outputs=[
|
||||
file=$OutDir$\output.rec.txt
|
||||
token=$DataDir$\output.txt
|
||||
]
|
||||
]
|
||||
]
|
||||
configparameters: rnnlu.config:NdlDir=$ConfigDir$
|
||||
configparameters: rnnlu.config:OutDir=$ExpDir$
|
||||
configparameters: rnnlu.config:stderr=$ExpDir$\ATIS\log
|
||||
configparameters: rnnlu.config:type=double
|
||||
configparameters: rnnlu.config:WorkDir=.
|
||||
<<<<<<<<<<<<<<<<<<<< config <<<<<<<<<<<<<<<<<<<<
|
||||
command: LSTM LSTMTest
|
||||
precision = double
|
||||
SimpleNetworkBuilder Using CPU
|
||||
reading sequence file .\atis.train.apos.pred.pos.head.IOB.simple
|
||||
reading sequence file .\atis.dev.IOB.simple
|
||||
GetTrainCriterionNodes ...
|
||||
GetEvalCriterionNodes ...
|
||||
nodes in the recurrent loops :
|
||||
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
|
||||
|
||||
Validating node CrossEntropyWithSoftmax
|
||||
|
||||
Validating --> labels = InputValue
|
||||
Validating --> W2 = LearnableParameter
|
||||
Validating --> WXO0 = LearnableParameter
|
||||
Validating --> E0 = LearnableParameter
|
||||
Validating --> features = InputValue
|
||||
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
|
||||
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bo0 = LearnableParameter
|
||||
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
|
||||
Validating --> WHO0 = LearnableParameter
|
||||
Validating --> WCO0 = LearnableParameter
|
||||
Validating --> WXF0 = LearnableParameter
|
||||
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bf0 = LearnableParameter
|
||||
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
|
||||
Validating --> WHF0 = LearnableParameter
|
||||
Validating --> WCF0 = LearnableParameter
|
||||
Validating --> WXI0 = LearnableParameter
|
||||
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bi0 = LearnableParameter
|
||||
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
|
||||
Validating --> WHI0 = LearnableParameter
|
||||
Validating --> WCI0 = LearnableParameter
|
||||
Validating --> WXC0 = LearnableParameter
|
||||
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
|
||||
Validating --> WHC0 = LearnableParameter
|
||||
Validating --> bc0 = LearnableParameter
|
||||
Validating --> AutoName3 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
|
||||
Validating --> AutoName2 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
|
||||
Validating --> AutoName6 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName7 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName1 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
|
||||
Validating --> AutoName5 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName4 = Delay(AutoName38[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
|
||||
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
|
||||
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
|
||||
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
|
||||
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
|
||||
|
||||
No PreCompute nodes found, skipping PreCompute step
|
||||
Set Max Temp Mem Size For Convolution Nodes to 0 samples.
|
||||
WARNING: there is no convolution node.
|
||||
Finished Epoch[1]: [Training Set] Train Loss Per Sample = 4.7967326 EvalErr Per Sample = 4.7967326 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.177
|
||||
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 4.6260059 CrossEntropyWithSoftmax/Sample = 4.6260059
|
||||
Finished Epoch[1]: [Validation Set] Train Loss Per Sample = 4.6260059 EvalErr Per Sample = 4.6260059
|
||||
Finished Epoch[2]: [Training Set] Train Loss Per Sample = 4.4580467 EvalErr Per Sample = 4.4580467 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.178
|
||||
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 4.0801723 CrossEntropyWithSoftmax/Sample = 4.0801723
|
||||
Finished Epoch[2]: [Validation Set] Train Loss Per Sample = 4.0801723 EvalErr Per Sample = 4.0801723
|
||||
Finished Epoch[3]: [Training Set] Train Loss Per Sample = 3.6568716 EvalErr Per Sample = 3.6568716 Ave Learn Rate Per Sample = 0.1000000015 Epoch Time=0.171
|
||||
Final Results: Minibatch[1-11]: Samples Seen = 81 CrossEntropyWithSoftmax/Sample = 2.6959986 CrossEntropyWithSoftmax/Sample = 2.6959986
|
||||
Finished Epoch[3]: [Validation Set] Train Loss Per Sample = 2.6959986 EvalErr Per Sample = 2.6959986
|
||||
reading sequence file .\atis.test.apos.pred.pos.head.IOB.simple
|
||||
nodes in the recurrent loops :
|
||||
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
|
||||
|
||||
Validating node CrossEntropyWithSoftmax
|
||||
|
||||
Validating --> labels = InputValue
|
||||
Validating --> W2 = LearnableParameter
|
||||
Validating --> WXO0 = LearnableParameter
|
||||
Validating --> E0 = LearnableParameter
|
||||
Validating --> features = InputValue
|
||||
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
|
||||
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bo0 = LearnableParameter
|
||||
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
|
||||
Validating --> WHO0 = LearnableParameter
|
||||
Validating --> WCO0 = LearnableParameter
|
||||
Validating --> WXF0 = LearnableParameter
|
||||
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bf0 = LearnableParameter
|
||||
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
|
||||
Validating --> WHF0 = LearnableParameter
|
||||
Validating --> WCF0 = LearnableParameter
|
||||
Validating --> WXI0 = LearnableParameter
|
||||
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bi0 = LearnableParameter
|
||||
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
|
||||
Validating --> WHI0 = LearnableParameter
|
||||
Validating --> WCI0 = LearnableParameter
|
||||
Validating --> WXC0 = LearnableParameter
|
||||
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
|
||||
Validating --> WHC0 = LearnableParameter
|
||||
Validating --> bc0 = LearnableParameter
|
||||
Validating --> AutoName3 = Delay(AutoName38[0, 0])
|
||||
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300, 1])
|
||||
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
|
||||
Validating --> AutoName2 = Delay(AutoName38[0, 0])
|
||||
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300, 1])
|
||||
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
|
||||
Validating --> AutoName6 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName7 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName1 = Delay(AutoName38[0, 0])
|
||||
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300, 1])
|
||||
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
|
||||
Validating --> AutoName5 = Delay(AutoName29[0 {W=0, H=0, C=0}, 0])
|
||||
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName4 = Delay(AutoName38[0, 0])
|
||||
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300, 1])
|
||||
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
|
||||
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
|
||||
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
|
||||
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
|
||||
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
|
||||
|
||||
|
||||
|
||||
Validating node outputs
|
||||
|
||||
Validating --> W2 = LearnableParameter
|
||||
Validating --> WXO0 = LearnableParameter
|
||||
Validating --> E0 = LearnableParameter
|
||||
Validating --> features = InputValue
|
||||
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
|
||||
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bo0 = LearnableParameter
|
||||
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
|
||||
Validating --> WHO0 = LearnableParameter
|
||||
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
|
||||
Validating --> WCO0 = LearnableParameter
|
||||
Validating --> WXF0 = LearnableParameter
|
||||
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bf0 = LearnableParameter
|
||||
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
|
||||
Validating --> WHF0 = LearnableParameter
|
||||
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
|
||||
Validating --> WCF0 = LearnableParameter
|
||||
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> WXI0 = LearnableParameter
|
||||
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bi0 = LearnableParameter
|
||||
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
|
||||
Validating --> WHI0 = LearnableParameter
|
||||
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
|
||||
Validating --> WCI0 = LearnableParameter
|
||||
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> WXC0 = LearnableParameter
|
||||
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
|
||||
Validating --> WHC0 = LearnableParameter
|
||||
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> bc0 = LearnableParameter
|
||||
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
|
||||
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
|
||||
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
|
||||
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
|
||||
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> outputs = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
|
||||
|
||||
|
||||
Validating node CrossEntropyWithSoftmax
|
||||
|
||||
Validating --> labels = InputValue
|
||||
Validating --> W2 = LearnableParameter
|
||||
Validating --> WXO0 = LearnableParameter
|
||||
Validating --> E0 = LearnableParameter
|
||||
Validating --> features = InputValue
|
||||
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
|
||||
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bo0 = LearnableParameter
|
||||
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
|
||||
Validating --> WHO0 = LearnableParameter
|
||||
Validating --> WCO0 = LearnableParameter
|
||||
Validating --> WXF0 = LearnableParameter
|
||||
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bf0 = LearnableParameter
|
||||
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
|
||||
Validating --> WHF0 = LearnableParameter
|
||||
Validating --> WCF0 = LearnableParameter
|
||||
Validating --> WXI0 = LearnableParameter
|
||||
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bi0 = LearnableParameter
|
||||
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
|
||||
Validating --> WHI0 = LearnableParameter
|
||||
Validating --> WCI0 = LearnableParameter
|
||||
Validating --> WXC0 = LearnableParameter
|
||||
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
|
||||
Validating --> WHC0 = LearnableParameter
|
||||
Validating --> bc0 = LearnableParameter
|
||||
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
|
||||
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
|
||||
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
|
||||
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
|
||||
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
|
||||
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
|
||||
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
|
||||
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName39 = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[127, 1], AutoName39[127, 1])
|
||||
|
||||
nodes in the recurrent loops :
|
||||
AutoName37 AutoName4 AutoName15 AutoName16 AutoName18 AutoName19 AutoName5 AutoName8 AutoName1 AutoName9 AutoName12 AutoName13 AutoName14 AutoName20 AutoName7 AutoName6 AutoName21 AutoName2 AutoName22 AutoName25 AutoName26 AutoName27 AutoName28 AutoName29 AutoName30 AutoName3 AutoName31 AutoName34 AutoName35 AutoName36 AutoName38
|
||||
|
||||
Validating node outputs
|
||||
|
||||
Validating --> W2 = LearnableParameter
|
||||
Validating --> WXO0 = LearnableParameter
|
||||
Validating --> E0 = LearnableParameter
|
||||
Validating --> features = InputValue
|
||||
Validating --> LookupTable = LookupTable(E0[50, 944], features[2832, 1])
|
||||
Validating --> AutoName32 = Times(WXO0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bo0 = LearnableParameter
|
||||
Validating --> AutoName33 = Plus(AutoName32[300, 1], bo0[300, 1])
|
||||
Validating --> WHO0 = LearnableParameter
|
||||
Validating --> WCO0 = LearnableParameter
|
||||
Validating --> WXF0 = LearnableParameter
|
||||
Validating --> AutoName23 = Times(WXF0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bf0 = LearnableParameter
|
||||
Validating --> AutoName24 = Plus(AutoName23[300, 1], bf0[300, 1])
|
||||
Validating --> WHF0 = LearnableParameter
|
||||
Validating --> WCF0 = LearnableParameter
|
||||
Validating --> WXI0 = LearnableParameter
|
||||
Validating --> AutoName10 = Times(WXI0[300, 150], LookupTable[150, 1])
|
||||
Validating --> bi0 = LearnableParameter
|
||||
Validating --> AutoName11 = Plus(AutoName10[300, 1], bi0[300, 1])
|
||||
Validating --> WHI0 = LearnableParameter
|
||||
Validating --> WCI0 = LearnableParameter
|
||||
Validating --> WXC0 = LearnableParameter
|
||||
Validating --> AutoName17 = Times(WXC0[300, 150], LookupTable[150, 1])
|
||||
Validating --> WHC0 = LearnableParameter
|
||||
Validating --> bc0 = LearnableParameter
|
||||
Validating --> AutoName3 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName31 = Times(WHO0[300, 300], AutoName3[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName34 = Plus(AutoName33[300, 1], AutoName31[300, 1])
|
||||
Validating --> AutoName2 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName22 = Times(WHF0[300, 300], AutoName2[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName25 = Plus(AutoName24[300, 1], AutoName22[300, 1])
|
||||
Validating --> AutoName6 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName21 = DiagTimes(WCF0[300, 1], AutoName6[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName26 = Plus(AutoName25[300, 1], AutoName21[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName27 = Sigmoid(AutoName26[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName7 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName28 = ElementTimes(AutoName27[300 {W=0, H=0, C=0}, 1], AutoName7[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName1 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName9 = Times(WHI0[300, 300], AutoName1[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName12 = Plus(AutoName11[300, 1], AutoName9[300, 1])
|
||||
Validating --> AutoName5 = Delay(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName8 = DiagTimes(WCI0[300, 1], AutoName5[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName13 = Plus(AutoName12[300, 1], AutoName8[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName14 = Sigmoid(AutoName13[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName4 = Delay(AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName15 = Times(WHC0[300, 300], AutoName4[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName16 = Plus(AutoName15[300, 1], bc0[300, 1])
|
||||
Validating --> AutoName18 = Plus(AutoName17[300, 1], AutoName16[300, 1])
|
||||
Validating --> AutoName19 = Tanh(AutoName18[300, 1])
|
||||
Validating --> AutoName20 = ElementTimes(AutoName14[300 {W=0, H=0, C=0}, 1], AutoName19[300, 1])
|
||||
Validating --> AutoName29 = Plus(AutoName28[300 {W=0, H=0, C=0}, 1], AutoName20[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName30 = DiagTimes(WCO0[300, 1], AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName35 = Plus(AutoName34[300, 1], AutoName30[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName36 = Sigmoid(AutoName35[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName37 = Tanh(AutoName29[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> AutoName38 = ElementTimes(AutoName36[300 {W=0, H=0, C=0}, 1], AutoName37[300 {W=0, H=0, C=0}, 1])
|
||||
Validating --> outputs = Times(W2[127, 300], AutoName38[300 {W=0, H=0, C=0}, 1])
|
||||
|
||||
Total Samples Evaluated = 91
|
|
@ -1268,14 +1268,22 @@ public:
|
|||
{
|
||||
newNode = new TransposeTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new StrideTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new StrideTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == ElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new ElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == RowElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new RowElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == ColumnElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new ColumnElementTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == DiagTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new DiagTimesNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
|
||||
|
@ -1606,14 +1614,22 @@ public:
|
|||
{
|
||||
newNode = new TransposeTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new StrideTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == StrideTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new StrideTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == ElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new ElementTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == RowElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new RowElementTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == ColumnElementTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new ColumnElementTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
}
|
||||
else if (nodeType == DiagTimesNode<ElemType>::TypeName())
|
||||
{
|
||||
newNode = new DiagTimesNode<ElemType>(m_deviceId, nodeName);
|
||||
|
@ -2110,7 +2126,26 @@ public:
|
|||
return newNode;
|
||||
}
|
||||
|
||||
ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"")
|
||||
ComputationNodePtr RowElementTimes(const ComputationNodePtr a,
|
||||
const ComputationNodePtr b,
|
||||
const std::wstring nodeName = L"")
|
||||
{
|
||||
ComputationNodePtr newNode(new RowElementTimesNode<ElemType>(m_deviceId, nodeName));
|
||||
newNode->AttachInputs(a, b);
|
||||
AddNodeToNet(newNode);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
ComputationNodePtr ColumnElementTimes(const ComputationNodePtr a,
|
||||
const ComputationNodePtr b,
|
||||
const std::wstring nodeName = L"")
|
||||
{
|
||||
ComputationNodePtr newNode(new ColumnElementTimesNode<ElemType>(m_deviceId, nodeName));
|
||||
newNode->AttachInputs(a, b);
|
||||
AddNodeToNet(newNode);
|
||||
return newNode;
|
||||
}
|
||||
ComputationNodePtr StrideTimes(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"")
|
||||
{
|
||||
ComputationNodePtr newNode(new StrideTimesNode<ElemType>(m_deviceId, nodeName));
|
||||
newNode->AttachInputs(a, b, c);
|
||||
|
|
|
@ -1269,18 +1269,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// inputIndex == 1 (right) - inputGradientValues[1], inputFunctionValues[0]
|
||||
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
|
||||
{
|
||||
size_t gradCol = gradientValues.GetNumCols();
|
||||
size_t inputCol = inputFunctionValues.GetNumCols();
|
||||
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
|
||||
|
||||
if (gradCol != inputCol && inputCol == 1)
|
||||
{
|
||||
inputGradientValues.SetValue(gradientValues);
|
||||
inputGradientValues.ColumnElementMultiplyWith(inputFunctionValues);
|
||||
}
|
||||
else
|
||||
{
|
||||
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
|
||||
}
|
||||
#if NANCHECK
|
||||
inputGradientValues.HasNan("ElementTimes");
|
||||
#endif
|
||||
|
@ -1303,30 +1293,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
|
||||
{
|
||||
size_t rows0 = input0.GetNumRows(), cols0 = input0.GetNumCols();
|
||||
size_t rows1 = input1.GetNumRows(), cols1 = input1.GetNumCols();
|
||||
if (rows0 == rows1 && cols0 == cols1)
|
||||
{
|
||||
functionValues.AssignElementProductOf(input0, input1);
|
||||
}
|
||||
else if ((cols0 == 1 || cols1 == 1) && rows1 == rows0) // col vec with matching rows
|
||||
{
|
||||
Matrix<ElemType> tmpMat;
|
||||
if (cols0 == 1)
|
||||
{
|
||||
functionValues.SetValue(input1);
|
||||
functionValues.ColumnElementMultiplyWith(input0);
|
||||
}
|
||||
else if (cols1 == 1)
|
||||
{
|
||||
functionValues.SetValue(input0);
|
||||
functionValues.ColumnElementMultiplyWith(input1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");
|
||||
}
|
||||
functionValues.AssignElementProductOf(input0, input1);
|
||||
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("ElementTimes");
|
||||
#endif
|
||||
|
@ -1339,29 +1307,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_children.size() != 2)
|
||||
throw std::logic_error("ElementTimes operation requires two inputs.");
|
||||
|
||||
size_t index = 0;
|
||||
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
|
||||
//derive number of rows if possible
|
||||
for (size_t index = 0; index < 2; index++)
|
||||
{
|
||||
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0? Inputs(1-index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
|
||||
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0? Inputs(1-index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
|
||||
Inputs(index)->FunctionValues().Resize(rows, cols);
|
||||
}
|
||||
|
||||
index = 1;
|
||||
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
|
||||
{
|
||||
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0? Inputs(1-index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
|
||||
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0? Inputs(1-index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
|
||||
Inputs(index)->FunctionValues().Resize(rows, cols);
|
||||
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
|
||||
{
|
||||
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0 ? Inputs(1 - index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
|
||||
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0 ? Inputs(1 - index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
|
||||
Inputs(index)->FunctionValues().Resize(rows, cols);
|
||||
}
|
||||
}
|
||||
|
||||
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
|
||||
throw std::logic_error("ElementTimes operation: one of the operants has 0 element.");
|
||||
|
||||
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
|
||||
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
|
||||
|
||||
if (rows0 != rows1 || (cols0 != cols1 && cols0 != 1 && cols1 != 1))
|
||||
if (Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumRows() ||
|
||||
Inputs(1)->FunctionValues().GetNumCols() != Inputs(0)->FunctionValues().GetNumCols())
|
||||
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");
|
||||
|
||||
FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
|
||||
|
@ -1387,6 +1348,364 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template class ElementTimesNode<float>;
|
||||
template class ElementTimesNode<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class RowElementTimesNode : public ComputationNode<ElemType>
|
||||
{
|
||||
UsingComputationNodeMembers;
|
||||
public:
|
||||
RowElementTimesNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
m_deviceId = deviceId;
|
||||
MoveMatricesToDevice(deviceId);
|
||||
InitRecurrentNode();
|
||||
}
|
||||
|
||||
RowElementTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
LoadFromFile(fstream, modelVersion, deviceId);
|
||||
}
|
||||
|
||||
// copy constructor
|
||||
RowElementTimesNode(const RowElementTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId), m_tempMatrix(node->m_deviceId)
|
||||
{
|
||||
node->CopyTo(this, newName, flags);
|
||||
}
|
||||
|
||||
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
|
||||
{
|
||||
const std::wstring& name = (newName == L"") ? NodeName() : newName;
|
||||
|
||||
ComputationNodePtr node = new RowElementTimesNode<ElemType>(this, name, flags);
|
||||
return node;
|
||||
}
|
||||
|
||||
virtual const std::wstring OperationName() const { return TypeName(); }
|
||||
static const std::wstring TypeName() { return L"RowElementTimes"; }
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex)
|
||||
{
|
||||
if (inputIndex > 1)
|
||||
throw std::invalid_argument("RowElementTimes operation only takes two inputs.");
|
||||
|
||||
if (inputIndex == 0)
|
||||
{
|
||||
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), m_tempMatrix);
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), m_tempMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
|
||||
{
|
||||
if (inputIndex > 1)
|
||||
throw std::invalid_argument("RowElementTimes operation only takes two inputs.");
|
||||
|
||||
Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
Matrix<ElemType> sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
if (inputIndex == 0)
|
||||
{
|
||||
ComputeInputPartialLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeInputPartialRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//left (input 0) is a matrix
|
||||
static void WINAPI ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
|
||||
Matrix<ElemType>& input0GradientValues,
|
||||
const Matrix<ElemType>& gradientValues,
|
||||
Matrix<ElemType>& tempMatrix)
|
||||
{
|
||||
tempMatrix.SetValue(gradientValues);
|
||||
tempMatrix.RowElementMultiplyWith(input1FunctionValues);
|
||||
input0GradientValues += tempMatrix;
|
||||
|
||||
#if NANCHECK
|
||||
input0GradientValues.HasNan("RowElementTimes");
|
||||
#endif
|
||||
}
|
||||
|
||||
//right (input 1) is a row vector
|
||||
static void WINAPI ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
|
||||
Matrix<ElemType>& input1GradientValues,
|
||||
const Matrix<ElemType>& gradientValues,
|
||||
Matrix<ElemType>& tempMatrix)
|
||||
{
|
||||
tempMatrix.AssignInnerProductOf(gradientValues, input0FunctionValues, true);
|
||||
input1GradientValues += tempMatrix;
|
||||
|
||||
#if NANCHECK
|
||||
input1GradientValues.HasNan("RowElementTimes");
|
||||
#endif
|
||||
}
|
||||
virtual void EvaluateThisNode()
|
||||
{
|
||||
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
|
||||
}
|
||||
|
||||
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
|
||||
{
|
||||
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
Matrix<ElemType> sliceInput1Value = Inputs(1)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
|
||||
}
|
||||
|
||||
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
|
||||
{
|
||||
functionValues.SetValue(input0);
|
||||
functionValues.RowElementMultiplyWith(input1);
|
||||
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("RowElementTimes");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void Validate()
|
||||
{
|
||||
PrintSelfBeforeValidation();
|
||||
|
||||
if (m_children.size() != 2)
|
||||
throw std::logic_error("RowElementTimes operation requires two inputs.");
|
||||
|
||||
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
|
||||
throw std::logic_error("RowElementTimes operation: one of the operants has 0 element.");
|
||||
|
||||
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
|
||||
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
|
||||
|
||||
if (cols0 != cols1 || rows1 != 1)
|
||||
throw std::logic_error("RowElementTimes: Either the second operand is not a row vector or the number of columns of operands does not match.");
|
||||
|
||||
FunctionValues().Resize(rows0, cols0);
|
||||
InferImageDimsFromInputs();
|
||||
}
|
||||
|
||||
virtual void InferImageDimsFromInputs()
|
||||
{
|
||||
//input 0 is the matrix and input 1 is a row vector
|
||||
InferImageDimsFromInput(0);
|
||||
}
|
||||
|
||||
virtual void AttachInputs(const ComputationNodePtr leftNode, const ComputationNodePtr rightNode)
|
||||
{
|
||||
m_children.resize(2);
|
||||
m_children[0] = leftNode;
|
||||
m_children[1] = rightNode;
|
||||
}
|
||||
|
||||
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId)
|
||||
{
|
||||
ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);
|
||||
|
||||
if (deviceId != AUTOPLACEMATRIX)
|
||||
{
|
||||
if (m_tempMatrix.GetDeviceId() != deviceId)
|
||||
m_tempMatrix.TransferFromDeviceToDevice(m_tempMatrix.GetDeviceId(), deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix<ElemType> m_tempMatrix;
|
||||
};
|
||||
|
||||
template class RowElementTimesNode<float>;
|
||||
template class RowElementTimesNode<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class ColumnElementTimesNode : public ComputationNode<ElemType>
|
||||
{
|
||||
UsingComputationNodeMembers;
|
||||
public:
|
||||
ColumnElementTimesNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
m_deviceId = deviceId;
|
||||
MoveMatricesToDevice(deviceId);
|
||||
InitRecurrentNode();
|
||||
}
|
||||
|
||||
ColumnElementTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId), m_tempMatrix(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
LoadFromFile(fstream, modelVersion, deviceId);
|
||||
}
|
||||
|
||||
// copy constructor
|
||||
ColumnElementTimesNode(const ColumnElementTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId), m_tempMatrix(node->m_deviceId)
|
||||
{
|
||||
node->CopyTo(this, newName, flags);
|
||||
}
|
||||
|
||||
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
|
||||
{
|
||||
const std::wstring& name = (newName == L"") ? NodeName() : newName;
|
||||
|
||||
ComputationNodePtr node = new ColumnElementTimesNode<ElemType>(this, name, flags);
|
||||
return node;
|
||||
}
|
||||
|
||||
virtual const std::wstring OperationName() const { return TypeName(); }
|
||||
static const std::wstring TypeName() { return L"ColumnElementTimes"; }
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex)
|
||||
{
|
||||
if (inputIndex > 1)
|
||||
throw std::invalid_argument("ColumnElementTimes operation only takes two inputs.");
|
||||
|
||||
if (inputIndex == 0)
|
||||
{
|
||||
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), m_tempMatrix);
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), m_tempMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
|
||||
{
|
||||
if (inputIndex > 1)
|
||||
throw std::invalid_argument("ColumnElementTimes operation only takes two inputs.");
|
||||
|
||||
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
if (inputIndex == 0)
|
||||
{
|
||||
Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix);
|
||||
}
|
||||
else
|
||||
{
|
||||
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
//left (input 0) is a matrix
|
||||
static void WINAPI ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
|
||||
Matrix<ElemType>& input0GradientValues,
|
||||
const Matrix<ElemType>& gradientValues,
|
||||
Matrix<ElemType>& tempMatrix)
|
||||
{
|
||||
tempMatrix.SetValue(gradientValues);
|
||||
tempMatrix.ColumnElementMultiplyWith(input1FunctionValues);
|
||||
input0GradientValues += tempMatrix;
|
||||
|
||||
#if NANCHECK
|
||||
input0GradientValues.HasNan("ColumnElementTimes");
|
||||
#endif
|
||||
}
|
||||
|
||||
//right (input 1) is a col vector
|
||||
static void WINAPI ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
|
||||
Matrix<ElemType>& input1GradientValues,
|
||||
const Matrix<ElemType>& gradientValues,
|
||||
Matrix<ElemType>& tempMatrix)
|
||||
{
|
||||
tempMatrix.AssignInnerProductOf(gradientValues, input0FunctionValues, false);
|
||||
input1GradientValues += tempMatrix;
|
||||
|
||||
#if NANCHECK
|
||||
input1GradientValues.HasNan("ColumnElementTimes");
|
||||
#endif
|
||||
}
|
||||
virtual void EvaluateThisNode()
|
||||
{
|
||||
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
|
||||
}
|
||||
|
||||
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
|
||||
{
|
||||
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
|
||||
}
|
||||
|
||||
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
|
||||
{
|
||||
functionValues.SetValue(input0);
|
||||
functionValues.ColumnElementMultiplyWith(input1);
|
||||
|
||||
#if NANCHECK
|
||||
functionValues.HasNan("ColumnElementTimes");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void Validate()
|
||||
{
|
||||
PrintSelfBeforeValidation();
|
||||
|
||||
if (m_children.size() != 2)
|
||||
throw std::logic_error("ColumnElementTimes operation requires two inputs.");
|
||||
|
||||
//derive number of rows if possible
|
||||
for (size_t index = 0; index < 2; index++)
|
||||
{
|
||||
if (Inputs(index)->OperationName() == LearnableParameter<ElemType>::TypeName())
|
||||
{
|
||||
size_t rows = Inputs(index)->FunctionValues().GetNumRows() == 0 ? Inputs(1 - index)->FunctionValues().GetNumRows() : Inputs(index)->FunctionValues().GetNumRows();
|
||||
size_t cols = Inputs(index)->FunctionValues().GetNumCols() == 0 ? Inputs(1 - index)->FunctionValues().GetNumCols() : Inputs(index)->FunctionValues().GetNumCols();
|
||||
Inputs(index)->FunctionValues().Resize(rows, cols);
|
||||
}
|
||||
}
|
||||
|
||||
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
|
||||
throw std::logic_error("ColumnElementTimes operation: one of the operants has 0 element.");
|
||||
|
||||
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
|
||||
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
|
||||
|
||||
if (rows0 != rows1 || cols1 != 1)
|
||||
throw std::logic_error("ColumnElementTimes: Either the second operand is not a column vector or the number of rows of operands does not match.");
|
||||
|
||||
FunctionValues().Resize(rows0, cols0);
|
||||
InferImageDimsFromInputs();
|
||||
}
|
||||
|
||||
virtual void InferImageDimsFromInputs()
|
||||
{
|
||||
//input 0 is the matrix and input 1 is a column vector
|
||||
InferImageDimsFromInput(0);
|
||||
}
|
||||
|
||||
virtual void AttachInputs(const ComputationNodePtr leftNode, const ComputationNodePtr rightNode)
|
||||
{
|
||||
m_children.resize(2);
|
||||
m_children[0] = leftNode;
|
||||
m_children[1] = rightNode;
|
||||
}
|
||||
|
||||
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId)
|
||||
{
|
||||
ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);
|
||||
|
||||
if (deviceId != AUTOPLACEMATRIX)
|
||||
{
|
||||
if (m_tempMatrix.GetDeviceId() != deviceId)
|
||||
m_tempMatrix.TransferFromDeviceToDevice(m_tempMatrix.GetDeviceId(), deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix<ElemType> m_tempMatrix;
|
||||
};
|
||||
|
||||
template class ColumnElementTimesNode<float>;
|
||||
template class ColumnElementTimesNode<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class PlusNode : public ComputationNode<ElemType>
|
||||
{
|
||||
|
@ -3021,14 +3340,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
UsingComputationNodeMembers;
|
||||
|
||||
size_t mStrideDim; /// the dimension index on which stride works
|
||||
size_t mStride; /// the stride
|
||||
size_t m_StrideDim; /// the dimension index on which stride works
|
||||
size_t m_Stride; /// the stride
|
||||
|
||||
private:
|
||||
|
||||
void UpdateStride(const Matrix<ElemType>& input1)
|
||||
{
|
||||
mStride = input1.GetNumCols();
|
||||
m_Stride = input1.GetNumCols();
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -3037,21 +3356,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
m_deviceId = deviceId;
|
||||
MoveMatricesToDevice(deviceId);
|
||||
mStride = 1;
|
||||
m_Stride = 1;
|
||||
InitRecurrentNode();
|
||||
}
|
||||
|
||||
StrideTimesNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
|
||||
{
|
||||
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
|
||||
mStride = 1;
|
||||
m_Stride = 1;
|
||||
LoadFromFile(fstream, modelVersion, deviceId);
|
||||
}
|
||||
|
||||
// copy constructor
|
||||
StrideTimesNode(const StrideTimesNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
|
||||
{
|
||||
mStride = 1;
|
||||
m_Stride = 1;
|
||||
node->CopyTo(this, newName, flags);
|
||||
}
|
||||
|
||||
|
@ -3078,7 +3397,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
if (mStrideDim == 1) /// column stride
|
||||
if (m_StrideDim == 1) /// column stride
|
||||
{
|
||||
if (inputIndex == 0) //left derivative
|
||||
{
|
||||
|
@ -3133,7 +3452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (mStrideDim == 0) /// row stride
|
||||
else if (m_StrideDim == 0) /// row stride
|
||||
{
|
||||
if (inputIndex == 0) //left derivative
|
||||
{
|
||||
|
@ -3226,12 +3545,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
|
||||
UpdateStride(Inputs(1)->FunctionValues());
|
||||
if (mStrideDim == 0)
|
||||
if (m_StrideDim == 0)
|
||||
FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1);
|
||||
if (mStrideDim == 1)
|
||||
if (m_StrideDim == 1)
|
||||
FunctionValues().Resize(rows0, cols1);
|
||||
|
||||
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), mStride, mStrideDim);
|
||||
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_Stride, m_StrideDim);
|
||||
#ifdef DEBUG_DECODER
|
||||
fprintf(stderr, "Times node %ls output norm = %.8e, input(0) norm = %.8e, input(1) norm = %.8e\n", this->NodeName().c_str(), FunctionValues().FrobeniusNorm(),
|
||||
Inputs(0)->FunctionValues().FrobeniusNorm(), Inputs(1)->FunctionValues().FrobeniusNorm());
|
||||
|
@ -3244,13 +3563,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
Matrix<ElemType> sliceInput1Value = Inputs(1)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
UpdateStride(sliceInput1Value);
|
||||
if (mStrideDim == 0)
|
||||
if (m_StrideDim == 0)
|
||||
FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1);
|
||||
if (mStrideDim == 1)
|
||||
if (m_StrideDim == 1)
|
||||
FunctionValues().Resize(rows0, cols1);
|
||||
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
|
||||
|
||||
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, mStride, mStrideDim);
|
||||
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_Stride, m_StrideDim);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3344,30 +3663,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (Inputs(2)->FunctionValues().GetNumElements() != 1)
|
||||
LogicError("StrideTimes : input(2) should be a single element matrix");
|
||||
|
||||
mStrideDim = (size_t) Inputs(2)->FunctionValues().Get00Element();
|
||||
m_StrideDim = (size_t) Inputs(2)->FunctionValues().Get00Element();
|
||||
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
|
||||
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();
|
||||
|
||||
if (mStrideDim != 0 && mStrideDim != 1)
|
||||
if (m_StrideDim != 0 && m_StrideDim != 1)
|
||||
LogicError("StrideTimes : stride dim must be either 0 (row) or 1 (column)");
|
||||
|
||||
if (Inputs(2)->NeedGradient())
|
||||
LogicError("StrideTImes : no gradient update should be on input(2)");
|
||||
|
||||
//cols0 and rows1 may have been changed so don't use them in the following check
|
||||
if (mStrideDim == 0)
|
||||
if (m_StrideDim == 0)
|
||||
{
|
||||
if (rows1 != cols0)
|
||||
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and rows %d in B.", mStrideDim, cols0, rows1);
|
||||
size_t T1 = rows0 / mStride;
|
||||
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and rows %d in B.", m_StrideDim, cols0, rows1);
|
||||
size_t T1 = rows0 / m_Stride;
|
||||
FunctionValues().Resize(T1, cols1);
|
||||
}
|
||||
|
||||
//cols0 and rows1 may have been changed so don't use them in the following check
|
||||
if (mStrideDim == 1)
|
||||
if (m_StrideDim == 1)
|
||||
{
|
||||
if (cols0/mStride != rows1)
|
||||
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and row number %d in B.", mStrideDim, cols0, rows1);
|
||||
if (cols0/m_Stride != rows1)
|
||||
LogicError("The Matrix dimension in the StrideTimes operation in dim %d does not match for cols %d in A and row number %d in B.", m_StrideDim, cols0, rows1);
|
||||
FunctionValues().Resize(rows0, cols1);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,284 +1,290 @@
|
|||
//
|
||||
// <copyright file="NetworkDescriptionLanguage.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
// NetworkDescriptionLanguage.cpp : Code used to interpret the Network Description Language.
|
||||
//
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
||||
|
||||
#include "NetworkDescriptionLanguage.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// DuplicateNode - Duplicate a node in a macro as needed (it might already exist)
|
||||
// node - node we are duplicating
|
||||
// return - the new duplicated node if it didn't exist, or the previously duplicated node if it already did
|
||||
template <typename ElemType>
|
||||
NDLNode<ElemType>* NDLScript<ElemType>::DuplicateNode(NDLNode<ElemType>* node)
|
||||
{
|
||||
NDLNode<ElemType>* newNode = node->Copy();
|
||||
m_children.push_back(newNode);
|
||||
newNode->SetParentScript(this);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
|
||||
{
|
||||
m_baseName = copyMe.m_baseName;
|
||||
m_scriptString = copyMe.m_scriptString;
|
||||
m_macroNode = copyMe.m_macroNode;
|
||||
m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
|
||||
m_definingMacro = false; // not defining when expanding macros (only reason to call this method
|
||||
m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
|
||||
// script lines in parsed node order
|
||||
for (NDLNode<ElemType>* node : copyMe.m_script)
|
||||
{
|
||||
// duplicate this node
|
||||
NDLNode<ElemType>* newNode = DuplicateNode(node);
|
||||
AddSymbol(newNode->GetName(), newNode);
|
||||
|
||||
// now get the parameters to the functions added
|
||||
ConfigValue value = newNode->GetParamString();
|
||||
ParseParameters(newNode, value, true /*createNew*/);
|
||||
|
||||
// add it to the new script
|
||||
m_script.push_back(newNode);
|
||||
}
|
||||
|
||||
// now search the symbol table for other symbols that haven't been copied yet
|
||||
// this happens for constants defined in macros and such
|
||||
for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
|
||||
{
|
||||
// if we can't find the symbol in the copied symbol table, copy it here
|
||||
if (m_symbols.find(pair.first) == end(m_symbols))
|
||||
{
|
||||
// duplicate this node
|
||||
NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
|
||||
AddSymbol(pair.first, newNode);
|
||||
// anything that takes parameters should be evaluated in the script loop
|
||||
assert(newNode->GetParamString().empty());
|
||||
}
|
||||
}
|
||||
// NOTE: the child nodes get populated as the nodes are duplicated in the loop above
|
||||
// we shouldn't try to duplicate them separately
|
||||
}
|
||||
|
||||
// copy constructor, creates a new disconnected copy of this node
|
||||
// doesn't copy everything, so use for macro expansion only (it's private)
|
||||
// copyMe - node to copy
|
||||
template <typename ElemType>
|
||||
NDLNode<ElemType>::NDLNode(const NDLNode<ElemType>& copyMe)
|
||||
{
|
||||
m_name = copyMe.m_name; // value on the left of the equals
|
||||
m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
|
||||
m_parent = copyMe.m_parent; // parent script
|
||||
m_type = copyMe.m_type; //type of node
|
||||
m_paramString = copyMe.m_paramString; // parameter of a function/array
|
||||
m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
|
||||
// don't copy over the parameters, they will be reparsed after the copy
|
||||
//m_parameters = copyMe.m_parameters; // copy over the parameters straight
|
||||
|
||||
m_eval = nullptr; // pointer to an arbitrary eval structure
|
||||
// script for macro calls, need to expand the macro for each call
|
||||
// if it's not expanded the evalValue will be overwitten on multiple calls to a macro
|
||||
m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
|
||||
}
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType>::NDLScript(const NDLScript&& moveMe) : ConfigParser(move(moveMe))
|
||||
{
|
||||
m_baseName = move(moveMe.m_baseName);
|
||||
m_scriptString = move(moveMe.m_scriptString);
|
||||
m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
|
||||
m_symbols = move(moveMe.m_symbols); // symbol table
|
||||
m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
|
||||
m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
|
||||
m_definingMacro = move(moveMe.m_definingMacro);
|
||||
m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
|
||||
m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
}
|
||||
|
||||
// EqualInsensitive - check to see if two nodes are equal
|
||||
// string1 - [in,out] string to compare, if comparision is equal insensitive but not sensitive, will replace with sensitive version
|
||||
// string2 - second string to compare
|
||||
// alternate - alternate naming of the string
|
||||
// return - true if strings are equal insensitive and modifies string1 to sensitive version if different
|
||||
bool EqualInsensitive(std::wstring& string1, const std::wstring& string2, const wchar_t* alternate/*=NULL*/)
|
||||
{
|
||||
bool equal = !_wcsnicmp(string1.c_str(), string2.c_str(), string1.size()) && string1.size()==string2.size();
|
||||
|
||||
if (!equal && alternate != NULL)
|
||||
equal = !_wcsnicmp(string1.c_str(), alternate, string1.size()) && string1.size()==wcslen(alternate);
|
||||
|
||||
if (equal)
|
||||
string1 = string2;
|
||||
|
||||
return equal;
|
||||
}
|
||||
|
||||
// ++ operator for this enum, so loops work
|
||||
NDLPass &operator++(NDLPass &ndlPass) {
|
||||
assert(ndlPass != ndlPassMax);
|
||||
ndlPass = static_cast<NDLPass>(ndlPass + 1);
|
||||
return ndlPass;
|
||||
}
|
||||
|
||||
// CheckFunction - check to see if we match a function name
|
||||
// string1 - [in,out] string to compare, if comparision is equal and at least half the full node name will replace with full node name
|
||||
// allowUndeterminedVariable - [out] set to true if undetermined variables (symbols yet to be defined) are allowed here
|
||||
// return - true if function name found
|
||||
template <typename ElemType>
|
||||
bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
||||
{
|
||||
std::wstring nodeType = msra::strfun::utf16(p_nodeType);
|
||||
bool ret = false;
|
||||
if (allowUndeterminedVariable)
|
||||
*allowUndeterminedVariable = true; // be default we allow undetermined variables
|
||||
if (EqualInsensitive(nodeType, InputValue<ElemType>::TypeName(), L"Input"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, InputValue<ElemType>::SparseTypeName(), L"SparseInput"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LearnableParameter<ElemType>::TypeName(), L"Parameter"))
|
||||
ret = true;
|
||||
//else if (EqualInsensitive(nodeType, SparseLearnableParameter<ElemType>::TypeName(), L"SparseParameter"))
|
||||
// ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"Constant", L"Const"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"ImageInput", L"Image"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"SparseImageInput", L"SparseImage"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SumElementsNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SumColumnElementsNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ScaleNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TransposeNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TransposeTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ElementTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DiagTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosDistanceNode<ElemType>::TypeName(), L"CosDist"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, KhatriRaoProductNode<ElemType>::TypeName(), L"ColumnwiseCrossProduct"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PlusNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MinusNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, NegateNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RectifiedLinearNode<ElemType>::TypeName(), L"ReLU"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SigmoidNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TanhNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ExpNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LogNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosineNode<ElemType>::TypeName(), L"Cos"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SoftmaxNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LogSoftmaxNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SquareErrorNode<ElemType>::TypeName(), L"SE"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CEWithSM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CrossEntropyNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ClassBasedCrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CBCEWithSM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MatrixL1RegNode<ElemType>::TypeName(), L"L1Reg"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MatrixL2RegNode<ElemType>::TypeName(), L"L2Reg"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PerDimMeanVarNormalizationNode<ElemType>::TypeName(),L"PerDimMVNorm"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PerDimMeanVarDeNormalizationNode<ElemType>::TypeName(),L"PerDimMVDeNorm"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ErrorPredictionNode<ElemType>::TypeName(), L"ClassificationError"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DropoutNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ReshapeNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowRepeatNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MeanNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, InvStdDevNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ConvolutionNode<ElemType>::TypeName(), L"Convolve"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MaxPoolingNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, AveragePoolingNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PastValueNode<ElemType>::TypeName(), L"Delay"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, FutureValueNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosDistanceWithNegativeSamplesNode<ElemType>::TypeName(), L"CosWithNegSamples"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TimeReverseNode<ElemType>::TypeName(), L"TimeReverse"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CRFNode<ElemType>::TypeName(), L"CRF"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DummyCriterionNode<ElemType>::TypeName(), L"DummyCriterion"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ParallelNode<ElemType>::TypeName(), L"Parallel"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LSTMNode<ElemType>::TypeName(), L"LSTM"))
|
||||
ret = true;
|
||||
//
|
||||
// <copyright file="NetworkDescriptionLanguage.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
// NetworkDescriptionLanguage.cpp : Code used to interpret the Network Description Language.
|
||||
//
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
||||
|
||||
#include "NetworkDescriptionLanguage.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// DuplicateNode - Duplicate a node in a macro as needed (it might already exist)
|
||||
// node - node we are duplicating
|
||||
// return - the new duplicated node if it didn't exist, or the previously duplicated node if it already did
|
||||
template <typename ElemType>
|
||||
NDLNode<ElemType>* NDLScript<ElemType>::DuplicateNode(NDLNode<ElemType>* node)
|
||||
{
|
||||
NDLNode<ElemType>* newNode = node->Copy();
|
||||
m_children.push_back(newNode);
|
||||
newNode->SetParentScript(this);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
|
||||
{
|
||||
m_baseName = copyMe.m_baseName;
|
||||
m_scriptString = copyMe.m_scriptString;
|
||||
m_macroNode = copyMe.m_macroNode;
|
||||
m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
|
||||
m_definingMacro = false; // not defining when expanding macros (only reason to call this method
|
||||
m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
|
||||
// script lines in parsed node order
|
||||
for (NDLNode<ElemType>* node : copyMe.m_script)
|
||||
{
|
||||
// duplicate this node
|
||||
NDLNode<ElemType>* newNode = DuplicateNode(node);
|
||||
AddSymbol(newNode->GetName(), newNode);
|
||||
|
||||
// now get the parameters to the functions added
|
||||
ConfigValue value = newNode->GetParamString();
|
||||
ParseParameters(newNode, value, true /*createNew*/);
|
||||
|
||||
// add it to the new script
|
||||
m_script.push_back(newNode);
|
||||
}
|
||||
|
||||
// now search the symbol table for other symbols that haven't been copied yet
|
||||
// this happens for constants defined in macros and such
|
||||
for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
|
||||
{
|
||||
// if we can't find the symbol in the copied symbol table, copy it here
|
||||
if (m_symbols.find(pair.first) == end(m_symbols))
|
||||
{
|
||||
// duplicate this node
|
||||
NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
|
||||
AddSymbol(pair.first, newNode);
|
||||
// anything that takes parameters should be evaluated in the script loop
|
||||
assert(newNode->GetParamString().empty());
|
||||
}
|
||||
}
|
||||
// NOTE: the child nodes get populated as the nodes are duplicated in the loop above
|
||||
// we shouldn't try to duplicate them separately
|
||||
}
|
||||
|
||||
// copy constructor, creates a new disconnected copy of this node
|
||||
// doesn't copy everything, so use for macro expansion only (it's private)
|
||||
// copyMe - node to copy
|
||||
template <typename ElemType>
|
||||
NDLNode<ElemType>::NDLNode(const NDLNode<ElemType>& copyMe)
|
||||
{
|
||||
m_name = copyMe.m_name; // value on the left of the equals
|
||||
m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
|
||||
m_parent = copyMe.m_parent; // parent script
|
||||
m_type = copyMe.m_type; //type of node
|
||||
m_paramString = copyMe.m_paramString; // parameter of a function/array
|
||||
m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
|
||||
// don't copy over the parameters, they will be reparsed after the copy
|
||||
//m_parameters = copyMe.m_parameters; // copy over the parameters straight
|
||||
|
||||
m_eval = nullptr; // pointer to an arbitrary eval structure
|
||||
// script for macro calls, need to expand the macro for each call
|
||||
// if it's not expanded the evalValue will be overwitten on multiple calls to a macro
|
||||
m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
|
||||
}
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType>::NDLScript(const NDLScript&& moveMe) : ConfigParser(move(moveMe))
|
||||
{
|
||||
m_baseName = move(moveMe.m_baseName);
|
||||
m_scriptString = move(moveMe.m_scriptString);
|
||||
m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
|
||||
m_symbols = move(moveMe.m_symbols); // symbol table
|
||||
m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
|
||||
m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
|
||||
m_definingMacro = move(moveMe.m_definingMacro);
|
||||
m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
|
||||
m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
}
|
||||
|
||||
// EqualInsensitive - check to see if two nodes are equal
|
||||
// string1 - [in,out] string to compare, if comparision is equal insensitive but not sensitive, will replace with sensitive version
|
||||
// string2 - second string to compare
|
||||
// alternate - alternate naming of the string
|
||||
// return - true if strings are equal insensitive and modifies string1 to sensitive version if different
|
||||
bool EqualInsensitive(std::wstring& string1, const std::wstring& string2, const wchar_t* alternate/*=NULL*/)
|
||||
{
|
||||
bool equal = !_wcsnicmp(string1.c_str(), string2.c_str(), string1.size()) && string1.size()==string2.size();
|
||||
|
||||
if (!equal && alternate != NULL)
|
||||
equal = !_wcsnicmp(string1.c_str(), alternate, string1.size()) && string1.size()==wcslen(alternate);
|
||||
|
||||
if (equal)
|
||||
string1 = string2;
|
||||
|
||||
return equal;
|
||||
}
|
||||
|
||||
// ++ operator for this enum, so loops work
|
||||
NDLPass &operator++(NDLPass &ndlPass) {
|
||||
assert(ndlPass != ndlPassMax);
|
||||
ndlPass = static_cast<NDLPass>(ndlPass + 1);
|
||||
return ndlPass;
|
||||
}
|
||||
|
||||
// CheckFunction - check to see if we match a function name
|
||||
// string1 - [in,out] string to compare, if comparision is equal and at least half the full node name will replace with full node name
|
||||
// allowUndeterminedVariable - [out] set to true if undetermined variables (symbols yet to be defined) are allowed here
|
||||
// return - true if function name found
|
||||
template <typename ElemType>
|
||||
bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
||||
{
|
||||
std::wstring nodeType = msra::strfun::utf16(p_nodeType);
|
||||
bool ret = false;
|
||||
if (allowUndeterminedVariable)
|
||||
*allowUndeterminedVariable = true; // be default we allow undetermined variables
|
||||
if (EqualInsensitive(nodeType, InputValue<ElemType>::TypeName(), L"Input"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, InputValue<ElemType>::SparseTypeName(), L"SparseInput"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LearnableParameter<ElemType>::TypeName(), L"Parameter"))
|
||||
ret = true;
|
||||
//else if (EqualInsensitive(nodeType, SparseLearnableParameter<ElemType>::TypeName(), L"SparseParameter"))
|
||||
// ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"Constant", L"Const"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"ImageInput", L"Image"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, L"SparseImageInput", L"SparseImage"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SumElementsNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SumColumnElementsNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ScaleNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TransposeNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TransposeTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, StrideTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ElementTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowElementTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ColumnElementTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DiagTimesNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosDistanceNode<ElemType>::TypeName(), L"CosDist"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, KhatriRaoProductNode<ElemType>::TypeName(), L"ColumnwiseCrossProduct"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PlusNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MinusNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, NegateNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RectifiedLinearNode<ElemType>::TypeName(), L"ReLU"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SigmoidNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TanhNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ExpNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LogNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosineNode<ElemType>::TypeName(), L"Cos"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SoftmaxNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LogSoftmaxNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, SquareErrorNode<ElemType>::TypeName(), L"SE"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CEWithSM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CrossEntropyNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ClassBasedCrossEntropyWithSoftmaxNode<ElemType>::TypeName(), L"CBCEWithSM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MatrixL1RegNode<ElemType>::TypeName(), L"L1Reg"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MatrixL2RegNode<ElemType>::TypeName(), L"L2Reg"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PerDimMeanVarNormalizationNode<ElemType>::TypeName(),L"PerDimMVNorm"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PerDimMeanVarDeNormalizationNode<ElemType>::TypeName(),L"PerDimMVDeNorm"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ErrorPredictionNode<ElemType>::TypeName(), L"ClassificationError"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DropoutNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ReshapeNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowRepeatNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MeanNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, InvStdDevNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ConvolutionNode<ElemType>::TypeName(), L"Convolve"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, MaxPoolingNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, AveragePoolingNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PastValueNode<ElemType>::TypeName(), L"Delay"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, FutureValueNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, RowStackNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CosDistanceWithNegativeSamplesNode<ElemType>::TypeName(), L"CosWithNegSamples"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, TimeReverseNode<ElemType>::TypeName(), L"TimeReverse"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, CRFNode<ElemType>::TypeName(), L"CRF"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, DummyCriterionNode<ElemType>::TypeName(), L"DummyCriterion"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, ParallelNode<ElemType>::TypeName(), L"Parallel"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, LSTMNode<ElemType>::TypeName(), L"LSTM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, PairNetworkNode<ElemType>::TypeName(), L"PairNetwork"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, StrideTimesNode<ElemType>::TypeName(), L"StrideTimes"))
|
||||
ret = true;
|
||||
|
||||
// return the actual node name in the parameter if we found something
|
||||
if (ret)
|
||||
{
|
||||
p_nodeType = msra::strfun::utf8(nodeType);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType> NDLScript<ElemType>::s_global("global");
|
||||
|
||||
// declare the static variables from the classes
|
||||
template<> NDLScript<float> NDLScript<float>::s_global{};
|
||||
template<> NDLScript<double> NDLScript<double>::s_global{};
|
||||
|
||||
template<> int NDLNode<float>::s_nameCounter = 0;
|
||||
template<> int NDLNode<double>::s_nameCounter = 0;
|
||||
|
||||
template class NDLNode<float>;
|
||||
template class NDLNode<double>;
|
||||
|
||||
template class NDLScript<float>;
|
||||
template class NDLScript<double>;
|
||||
|
||||
}}}
|
||||
|
||||
// return the actual node name in the parameter if we found something
|
||||
if (ret)
|
||||
{
|
||||
p_nodeType = msra::strfun::utf8(nodeType);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
NDLScript<ElemType> NDLScript<ElemType>::s_global("global");
|
||||
|
||||
// declare the static variables from the classes
|
||||
template<> NDLScript<float> NDLScript<float>::s_global{};
|
||||
template<> NDLScript<double> NDLScript<double>::s_global{};
|
||||
|
||||
template<> int NDLNode<float>::s_nameCounter = 0;
|
||||
template<> int NDLNode<double>::s_nameCounter = 0;
|
||||
|
||||
template class NDLNode<float>;
|
||||
template class NDLNode<double>;
|
||||
|
||||
template class NDLScript<float>;
|
||||
template class NDLScript<double>;
|
||||
|
||||
}}}
|
||||
|
|
|
@ -247,7 +247,7 @@ public:
|
|||
|
||||
if (doGradientCheck && sizeof(ElemType) != sizeof(double))
|
||||
{
|
||||
LogicError("Gradient check needs to use type = double");
|
||||
LogicError("Gradient check needs to use precision = double");
|
||||
}
|
||||
m_doUnitTest = configSGD("unittest", "false");
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче