Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp
This commit is contained in:
Коммит
1a0b88be0c
11
CNTK.sln
11
CNTK.sln
|
@ -458,13 +458,21 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{BFBC6BE1-C33
|
|||
Tests\EndToEndTests\SLU\atis.dev.IOB.simple = Tests\EndToEndTests\SLU\atis.dev.IOB.simple
|
||||
Tests\EndToEndTests\SLU\atis.test.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\SLU\atis.test.apos.pred.pos.head.IOB.simple
|
||||
Tests\EndToEndTests\SLU\atis.train.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\SLU\atis.train.apos.pred.pos.head.IOB.simple
|
||||
Tests\EndToEndTests\SLU\baseline.linux.cpu.txt = Tests\EndToEndTests\SLU\baseline.linux.cpu.txt
|
||||
Tests\EndToEndTests\SLU\baseline.linux.gpu.txt = Tests\EndToEndTests\SLU\baseline.linux.gpu.txt
|
||||
Tests\EndToEndTests\SLU\baseline.windows.cpu.txt = Tests\EndToEndTests\SLU\baseline.windows.cpu.txt
|
||||
Tests\EndToEndTests\SLU\baseline.windows.gpu.txt = Tests\EndToEndTests\SLU\baseline.windows.gpu.txt
|
||||
Tests\EndToEndTests\SLU\globals.cntk = Tests\EndToEndTests\SLU\globals.cntk
|
||||
Tests\EndToEndTests\SLU\input.txt = Tests\EndToEndTests\SLU\input.txt
|
||||
Tests\EndToEndTests\SLU\inputmap.txt = Tests\EndToEndTests\SLU\inputmap.txt
|
||||
Tests\EndToEndTests\SLU\lstm.ndl = Tests\EndToEndTests\SLU\lstm.ndl
|
||||
Tests\EndToEndTests\SLU\lstmNDL.txt = Tests\EndToEndTests\SLU\lstmNDL.txt
|
||||
Tests\EndToEndTests\SLU\output.txt = Tests\EndToEndTests\SLU\output.txt
|
||||
Tests\EndToEndTests\SLU\README.txt = Tests\EndToEndTests\SLU\README.txt
|
||||
Tests\EndToEndTests\SLU\rnnlu.cntk = Tests\EndToEndTests\SLU\rnnlu.cntk
|
||||
Tests\EndToEndTests\SLU\rnnlu.ndl.cntk = Tests\EndToEndTests\SLU\rnnlu.ndl.cntk
|
||||
Tests\EndToEndTests\SLU\run-test = Tests\EndToEndTests\SLU\run-test
|
||||
Tests\EndToEndTests\SLU\testcases.yml = Tests\EndToEndTests\SLU\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MNIST", "MNIST", "{FA33A61E-95C7-4049-8111-22058CE361A3}"
|
||||
|
@ -509,7 +517,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{77
|
|||
Examples\Image\Miscellaneous\CIFAR-10\CifarConverter.py = Examples\Image\Miscellaneous\CIFAR-10\CifarConverter.py
|
||||
Examples\Image\Miscellaneous\CIFAR-10\labelsmap.txt = Examples\Image\Miscellaneous\CIFAR-10\labelsmap.txt
|
||||
Examples\Image\Miscellaneous\CIFAR-10\Macros.ndl = Examples\Image\Miscellaneous\CIFAR-10\Macros.ndl
|
||||
Examples\Image\Miscellaneous\CIFAR-10\README.md = Examples\Image\Miscellaneous\CIFAR-10\README.md
|
||||
Examples\Image\Miscellaneous\CIFAR-10\readme.txt = Examples\Image\Miscellaneous\CIFAR-10\readme.txt
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ImageNet", "ImageNet", "{EF710C5A-E616-442A-889D-C997D39AF2E1}"
|
||||
|
@ -666,6 +674,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{850008
|
|||
ProjectSection(SolutionItems) = preProject
|
||||
Examples\Text\PennTreebank\Config\rnn.cntk = Examples\Text\PennTreebank\Config\rnn.cntk
|
||||
Examples\Text\PennTreebank\Config\S2SAutoEncoder.cntk = Examples\Text\PennTreebank\Config\S2SAutoEncoder.cntk
|
||||
Examples\Text\PennTreebank\Config\S2SLib.bs = Examples\Text\PennTreebank\Config\S2SLib.bs
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{E6DC3B7D-303D-4A54-B040-D8DCF8C56E17}"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunDir=$(SolutionDir)Examples/Text/PennTreebank/_run RootDir=$(SolutionDir)Examples/Text/PennTreebank/_run DataDir=$(SolutionDir)Examples/Text/PennTreebank/Data ConfigDir=$(SolutionDir)Examples/Text/PennTreebank/Config stderr=$(SolutionDir)Examples/Text/PennTreebank/_run/S2SAutoEncoder.log DeviceId=-1 makeMode=false
|
||||
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)Examples/Text/PennTreebank DeviceId=-1 makeMode=false
|
||||
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)g2p makeMode=false
|
||||
####################
|
||||
# WORK IN PROGRESS #
|
||||
# WORK IN PROGRESS #
|
||||
|
@ -6,7 +7,28 @@
|
|||
####################
|
||||
|
||||
# Command line to run in debugger:
|
||||
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunDir=$(SolutionDir)Examples/Text/PennTreebank/_run RootDir=$(SolutionDir)Examples/Text/PennTreebank/_run DataDir=$(SolutionDir)Examples/Text/PennTreebank/Data ConfigDir=$(SolutionDir)Examples/Text/PennTreebank/Config stderr=$(SolutionDir)Examples/Text/PennTreebank/_run/S2SAutoEncoder.log train=[SGD=[maxEpochs=1]] confVocabSize=1000 DeviceId=-1 makeMode=false
|
||||
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)Examples/Text/PennTreebank train=[SGD=[maxEpochs=1]] confVocabSize=1000 DeviceId=-1 makeMode=false
|
||||
|
||||
# directory defaults (if not overridden)
|
||||
RunRootDir = "../.." # default if not overridden
|
||||
DataDir = "$RunRootDir$/Data"
|
||||
CacheDir = "$DataDir$/cache" # (not used currently)
|
||||
ExpRootDir = "$RunRootDir$"
|
||||
|
||||
# experiment id
|
||||
#ExpId = _run
|
||||
|
||||
deviceId = 1
|
||||
#ExpId = 68-$deviceId$-s2sae-bigmodel
|
||||
ExpId = 06-$deviceId$-g2p
|
||||
#ExpId = 05-3-g2p # for decoding a different model
|
||||
|
||||
# directories
|
||||
ExpDir = "$ExpRootDir$/$ExpId$"
|
||||
ModelDir = "$ExpDir$/Models"
|
||||
|
||||
stderr = $ExpDir$/S2SAutoEncoder.log7
|
||||
|
||||
# Append this for small set:
|
||||
# train=[epochSize=2048]] trainFile=ptb.small.train.txt validFile=ptb.small.valid.txt testFile=ptb.small.test.txt
|
||||
|
||||
|
@ -14,44 +36,37 @@
|
|||
# It encodes an entire sentence into a flat vector, and tries to regenerate it.
|
||||
# Meant to be useful mainly understanding how to do sequence-to-sequence in CNTK.
|
||||
|
||||
# Parameters can be overwritten on the command line
|
||||
# for example: cntk configFile=myConfigFile RootDir=../..
|
||||
# For running from Visual Studio add
|
||||
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
|
||||
RootDir = ".."
|
||||
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
CacheDir = "$RootDir$/Data/cache"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
# deviceId=-1 for CPU, >=0 for GPU devices, "auto" chooses the best GPU, or CPU if no usable GPU is available
|
||||
deviceId = "auto"
|
||||
|
||||
command = writeWordAndClassInfo:train:test:write
|
||||
#command = write
|
||||
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "$ModelDir$/S2SAutoEncoder.dnn"
|
||||
decodeModelPath = "$modelPath$.13" # epoch to decode. Has best CV WER
|
||||
|
||||
# uncomment the following line to write logs to a file
|
||||
#stderr=$OutputDir$/rnnOutput
|
||||
decodeModelPath = "$modelPath$.13" # epoch to decode can be appended here
|
||||
beamDepth = 1 # 0=predict; 1=greedy; >1=beam
|
||||
decodeOutputPath = "$decodeModelPath$.b$beamDepth$"
|
||||
|
||||
#numCPUThreads = 1
|
||||
#confVocabSize = 10000
|
||||
#confClassSize = 50
|
||||
|
||||
confVocabSize = 10000
|
||||
confClassSize = 50
|
||||
useStabilizer = true
|
||||
#trainFile = "ptb.train.txt"
|
||||
##trainFile = "ptb.small.train.txt"
|
||||
#validFile = "ptb.valid.txt"
|
||||
##validFile = "ptb.small.valid.txt"
|
||||
#testFile = "ptb.test.txt"
|
||||
##testFile = "ptb.test.txt-econ1"
|
||||
##testFile = "ptb.small.train.txt" # test on train, to see whether model makes sense at all
|
||||
#startSymbol = "</s>"
|
||||
|
||||
trainFile = "ptb.train.txt"
|
||||
#trainFile = "ptb.small.train.txt"
|
||||
validFile = "ptb.valid.txt"
|
||||
#validFile = "ptb.small.valid.txt"
|
||||
testFile = "ptb.test.txt"
|
||||
#testFile = "ptb.test.txt-econ1"
|
||||
confVocabSize = 69 #10000
|
||||
confClassSize = 0 #50
|
||||
|
||||
trainFile = "g014b2b.train-dev-20-21.bsf.joint"
|
||||
#trainFile = "g014b2b.train-dev-1-21.bsf.joint" # small one for debugging
|
||||
validFile = "g014b2b.train-dev-1-21.bsf.joint"
|
||||
testFile = "g014b2b.test.bsf.joint"
|
||||
startSymbol = "<s>"
|
||||
|
||||
#######################################
|
||||
# network definition #
|
||||
|
@ -59,12 +74,22 @@ testFile = "ptb.test.txt"
|
|||
|
||||
BrainScriptNetworkBuilder = (new ComputationNetwork [
|
||||
|
||||
# TODO: move this somewhere shared
|
||||
enableTracing = true
|
||||
traceFrequency = 1000
|
||||
tracingLabelMappingFile = "$ModelDir$/vocab.wl"
|
||||
include "S2SLib.bs"
|
||||
beamDepth=3 // for above Trace macros only
|
||||
|
||||
# import general config options from outside config values
|
||||
vocabDim = $confVocabSize$
|
||||
nbrClass = $confClassSize$
|
||||
|
||||
useStabilizer = $useStabilizer$
|
||||
useEncoder = true // if false, this becomes a regular RNN
|
||||
isAutoencoder = false # input is only one sequence, meant to reproduce itself
|
||||
useStabilizer = true
|
||||
useEncoder = true # if false, this becomes a regular RNN
|
||||
useNYUStyle = false # if true use thought vector for all inputs, NYU-style
|
||||
attentionSpan = 20 # we only support fixed-size attention windows for now. 0 means no attention; exactly 20 is needed for the g2p CMUDict task
|
||||
|
||||
# import some namespaces
|
||||
Parameters = BS.Parameters
|
||||
|
@ -74,125 +99,176 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
|
|||
Boolean = BS.Boolean
|
||||
RecurrentLSTMP = BS.RNNs.RecurrentLSTMP
|
||||
|
||||
embeddingDim = 300
|
||||
hiddenDim = 200
|
||||
embeddingDim = $confVocabSize$ # 300
|
||||
hiddenDim = 750 # 512 # 1024 # 200 --TODO: Kaisheng used 500
|
||||
maxLayer = 2 # 1 # 0
|
||||
|
||||
encoderDims[i:0..0] = hiddenDim # this defines the number of hidden layers in each
|
||||
decoderDims[i:0..0] = hiddenDim # both are one LSTM layer only for now
|
||||
encoderDims[i:0..maxLayer] = hiddenDim # this defines the number of hidden layers in each
|
||||
decoderDims[i:0..maxLayer] = hiddenDim # both are one LSTM layer only for now
|
||||
|
||||
# inputs
|
||||
#input = SparseInput(vocabDim, tag='feature'); # BUGBUG: Slice() not working for sparse, need to extend TensorView
|
||||
input = Input(vocabDim, tag='feature');
|
||||
|
||||
# for an auto-encoder, both are the same
|
||||
labels = input
|
||||
streams = [
|
||||
rawInput = input
|
||||
out = if isAutoencoder
|
||||
then [
|
||||
# for an auto-encoder, both are the same
|
||||
input = rawInput
|
||||
labels = rawInput
|
||||
]
|
||||
else [
|
||||
# we encode input and label as a single input; this splits it into two
|
||||
separatorRow = 2 # row index of separator symbokl
|
||||
isSeparator = RowSlice (separatorRow, 1, rawInput) # cut out the separator as a flag
|
||||
inInput = Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) # flag sequence: word is input...
|
||||
inLabels = Boolean.Or (PastValue (1, inLabels, defaultHiddenActivation=0), isSeparator) # ...or labels
|
||||
input = Sequences.Gather (inInput, rawInput) # use flags to split raw input into input and labels
|
||||
labels = Sequences.Gather (inLabels, rawInput) # (both have different lengths)
|
||||
]
|
||||
].out
|
||||
|
||||
# helpers
|
||||
First (x) = Slice (0, 1, x, axis=-1)
|
||||
Last (x) = Slice (-1, 0, x, axis=-1)
|
||||
|
||||
# strip separators
|
||||
CastAs (type, data) = Sequences.Scatter (Constants.OnesLike (type), data)
|
||||
# TODO: find out which one is the correct one
|
||||
#inputSequence = Slice (0, -1, streams.input, axis=-1) # e.g. <s> A B C # TODO: process </s> as well, to trigger the thought vector
|
||||
inputSequence = streams.input # e.g. <s> A B C </s>
|
||||
labelSequence = Slice (1, 0, streams.labels, axis=-1) # e.g. A B C </s>
|
||||
|
||||
inputSequence = Slice (0, -1, input, axis=-1) # e.g. <s> A B C
|
||||
labelSequence = CastAs (inputSequence, Slice (1, 0, labels, axis=-1)) # e.g. A B C </s>
|
||||
|
||||
# embeddings
|
||||
# embeddings --as long as we cannot read multiple sequences, we got one embedding
|
||||
# Note: Embeddings are linear, so better stabilize. We really should use BatchNorm.
|
||||
|
||||
Einput = Parameters.Stabilize (Parameters.WeightParam (vocabDim, embeddingDim), enabled=useStabilizer) # note: this is assumed to be applied transposed, hence the swapped dimensions
|
||||
Elabel = Einput
|
||||
E = Parameters.Stabilize (Parameters.WeightParam (vocabDim, embeddingDim), enabled=useStabilizer) # note: this is assumed to be applied transposed, hence the swapped dimensions
|
||||
EmbedInput (x) = if vocabDim != embeddingDim then TransposeTimes (E, x) else x
|
||||
EmbedLabels (x) = if vocabDim != embeddingDim then TransposeTimes (E, x) else x
|
||||
|
||||
Embed (E, x) = TransposeTimes (E, x)
|
||||
inputEmbedded = EmbedInput (inputSequence)
|
||||
labelsEmbedded = EmbedLabels (labelSequence)
|
||||
labelSentenceStart = First (streams.labels)
|
||||
labelSentenceStartEmbedded = EmbedLabels (labelSentenceStart)
|
||||
|
||||
inputEmbedded = Embed (Einput, inputSequence)
|
||||
labelsEmbedded = Embed (Elabel, labelSequence)
|
||||
RecurrentLSTMPWithAttentionWindow2 (inputDim/*x.dim*/, outputDim/*h.dim*/, cellDim/*c.dim*/, x, projectedAttentionWindowBroadcast, attentionDim, attentionSpan, enableSelfStabilization=false) =
|
||||
[
|
||||
prevState =
|
||||
[
|
||||
h = Loop.Previous (lstmState.h) # hidden state(t-1)
|
||||
c = Loop.Previous (lstmState.c) # cell(t-1)
|
||||
]
|
||||
|
||||
# compute additional hidden state from attention
|
||||
W(x) = Parameters.WeightParam (attentionDim, outputDim) * Parameters.Stabilize (x, enabled=useStabilizer)
|
||||
projectedH = W (prevState.h) # [cellDim]
|
||||
tanHOut = Tanh (projectedAttentionWindowBroadcast.value + projectedH) # [attentionDim x attentionSpan]
|
||||
v(x) = Parameters.WeightParam (1, attentionDim) * Parameters.Stabilize (x, enabled=useStabilizer) # [1 x attentionDim]
|
||||
u = v (tanHOut) # [1 x attentionSpan]
|
||||
uValid = u + Log (projectedAttentionWindowBroadcast.valid) # [1 x attentionSpan]
|
||||
attentionWeights = Softmax (uValid) # [1 x attentionSpan]
|
||||
weightedAttentionWindow = projectedAttentionWindowBroadcast.value .* attentionWeights # [attentionDim x attentionSpan]
|
||||
weightedAttentionAverage = weightedAttentionWindow * BS.Constants.OnesTensor (attentionSpan) # [attentionDim]
|
||||
|
||||
# feed both to LSTM as a single agumented input, so that we can reuse the existing LSTM component
|
||||
augmentedX = RowStack (weightedAttentionAverage : x)
|
||||
|
||||
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
|
||||
lstmState = BS.RNNs.LSTMP (attentionDim + inputDim, outputDim, cellDim, augmentedX, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
].lstmState // that's the value we return
|
||||
|
||||
RecurrentLSTMP2WithInitialState (inputDim, outputDim, cellDim, x, initialState, enableSelfStabilization=false) =
|
||||
[
|
||||
prevState =
|
||||
[
|
||||
isFirst = Loop.IsFirst (initialState.h)
|
||||
h = Boolean.If (isFirst, initialState.h, Loop.Previous (lstmState.h)) // hidden state(t-1)
|
||||
c = Boolean.If (isFirst, initialState.c, Loop.Previous (lstmState.c)) // cell(t-1)
|
||||
]
|
||||
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
|
||||
lstmState = BS.RNNs.LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
].lstmState // that's the value we return
|
||||
|
||||
# encoder (processes inputEmbedded)
|
||||
encoder = BS.RNNs.RecurrentLSTMP2Stack (inputEmbedded, embeddingDim, encoderDims, encoderDims, enableSelfStabilization=useStabilizer)
|
||||
|
||||
# encoder (processes user input)
|
||||
encoderOutputLayer = Length (encoderDims)-1
|
||||
encoder[i:0..encoderOutputLayer] =
|
||||
RecurrentLSTMP(if i == 0 then embeddingDim else encoderDims[i-1],
|
||||
encoderDims[i], encoderDims[i],
|
||||
if i == 0 then inputEmbedded else encoder[i-1],
|
||||
enableSelfStabilization=useStabilizer)
|
||||
encoderOutput = encoder[encoderOutputLayer]
|
||||
|
||||
# that last frame should be fed as an additional input to every decoder step
|
||||
# (This is the NYU model, not the Google model where the thought vector is only the initial state.)
|
||||
# Three ways of passing encoder state:
|
||||
# 1. as initial state for decoder (Google style)
|
||||
# 2. as side information for every decoder step (NYU style)
|
||||
# 3. attention
|
||||
|
||||
thoughtVector =
|
||||
[
|
||||
x = encoderOutput
|
||||
result = Boolean.If (Loop.IsLast (x), // if last entry
|
||||
/*then*/ x, // then copy that
|
||||
/*else*/ FutureValue (0, result)) // else just propagate to the front --TODO: Use Scatter() once input and labels are no longer the same.
|
||||
].result
|
||||
thoughtVector = [
|
||||
h = Last (encoderOutput.h)
|
||||
c = Last (encoderOutput.c)
|
||||
]
|
||||
thoughtVectorDim = encoderDims[encoderOutputLayer]
|
||||
|
||||
thoughtVectorPadded = [ # padded with zeroes until end of target sequence
|
||||
h = Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
|
||||
c = Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
|
||||
]
|
||||
|
||||
# attention (fixed rolling window)
|
||||
attentionWindow = Sequences.PastValueWindow (attentionSpan, encoderOutput.h)
|
||||
attentionDim = thoughtVectorDim
|
||||
projectedAttentionWindowBroadcast = [
|
||||
W(x) = Parameters.WeightParam (attentionDim, thoughtVectorDim) * Parameters.Stabilize (x, enabled=useStabilizer)
|
||||
#B = Parameters.BiasParam (vocabDim) # no bias in attention
|
||||
value = Sequences.BroadcastSequenceAs (labelsEmbedded, W (attentionWindow.value)) # apply the projection columnwise to the attentionWindow tensor
|
||||
valid = Sequences.BroadcastSequenceAs (labelsEmbedded, attentionWindow.valid)
|
||||
]
|
||||
|
||||
# NYU style: expand h to all, drop c
|
||||
# TODO: just use use thoughtVectorPadded.h (do this when we next test this branch again)
|
||||
thoughtVectorEverywhere = Boolean.If (Loop.IsFirst (thoughtVectorPadded.h), # if first entry
|
||||
/*then*/ thoughtVectorPadded.h, # then copy that
|
||||
/*else*/ Loop.Previous (thoughtVectorEverywhere)) # else just propagate to the front
|
||||
# TODO: create an indexSequence that contains all zeroes, basically broadcast a single-frame sequence across another sequence length
|
||||
|
||||
# decoder
|
||||
# NYU style:
|
||||
# The decoder starts with hidden state 0
|
||||
# and takes as input [thoughtVector; previous word].
|
||||
# and takes as input [thoughtVectorEverywhere; previous word].
|
||||
|
||||
isTraining = EnvironmentInput ('isTraining', tag='evaluation')
|
||||
#decoderFeedback = Boolean.If (isTraining, labelsEmbedded, decoderOutputEmbedded) # not working
|
||||
decoderFeedback = labelsEmbedded
|
||||
sentenceStartEmbedded = inputEmbedded # first token is sentence start
|
||||
# ^^ inputEmbedded is used to get </s>. Must make this a constant once we separate input and output.
|
||||
delayedDecoderFeedback = Loop.PreviousOrDefault (defaultValue=labelSentenceStartEmbedded, labelsEmbedded)
|
||||
|
||||
delayedDecoderFeedback = Boolean.If (Loop.IsFirst (decoderFeedback), sentenceStartEmbedded, Loop.Previous (decoderFeedback))
|
||||
|
||||
decoderInputDim = if useEncoder then thoughtVectorDim + embeddingDim else embeddingDim
|
||||
decoderInput = if useEncoder then RowStack (thoughtVector : delayedDecoderFeedback) else delayedDecoderFeedback
|
||||
decoderInputDim = labelsEmbedded.dim #embeddingDim
|
||||
decoderInput = Pass (delayedDecoderFeedback)
|
||||
decoderOutputLayer = Length (decoderDims)-1
|
||||
decoder[i:0..decoderOutputLayer] =
|
||||
if i == 0
|
||||
then RecurrentLSTMP (decoderInputDim, decoderDims[i], decoderDims[i],
|
||||
decoderInput,
|
||||
enableSelfStabilization=useStabilizer)
|
||||
else RecurrentLSTMP (decoderDims[i-1], decoderDims[i], decoderDims[i],
|
||||
decoder[i-1],
|
||||
enableSelfStabilization=useStabilizer)
|
||||
decoderDim = decoderDims[decoderOutputLayer]
|
||||
decoderOutput = decoder[decoderOutputLayer]
|
||||
then if useEncoder && useNYUStyle then BS.RNNs.RecurrentLSTMP2 (thoughtVectorDim + decoderInputDim, decoderDims[i], decoderDims[i],
|
||||
RowStack (thoughtVectorEverywhere : decoderInput),
|
||||
enableSelfStabilization=useStabilizer)
|
||||
else if useEncoder && attentionSpan > 0 then RecurrentLSTMPWithAttentionWindow2 (thoughtVectorDim + decoderInputDim, decoderDims[i], decoderDims[i],
|
||||
RowStack (thoughtVectorEverywhere : decoderInput),
|
||||
projectedAttentionWindowBroadcast, attentionDim, attentionSpan,
|
||||
enableSelfStabilization=useStabilizer)
|
||||
else RecurrentLSTMP2WithInitialState (decoderInputDim, decoderDims[i], decoderDims[i],
|
||||
decoderInput,
|
||||
thoughtVectorPadded, # BUGBUG: Should be thoughtVector, but Scatter() can't expand from inside a loop
|
||||
enableSelfStabilization=useStabilizer)
|
||||
else BS.RNNs.RecurrentLSTMP2 (decoderDims[i-1], decoderDims[i], decoderDims[i],
|
||||
decoder[i-1].h,
|
||||
enableSelfStabilization=useStabilizer)
|
||||
#decoderDim = decoderDims[decoderOutputLayer]
|
||||
decoderOutput = decoder[decoderOutputLayer].h
|
||||
decoderDim = decoderOutput.dim
|
||||
|
||||
# and add a softmax layer on top
|
||||
|
||||
W(x) = Parameters.WeightParam (vocabDim, decoderDim) * Parameters.Stabilize (x, enabled=useStabilizer)
|
||||
B = Parameters.BiasParam (vocabDim)
|
||||
|
||||
z = W(decoderOutput) + B; // top-level input to Softmax
|
||||
|
||||
decoderOutputEmbedded = Embed (Elabel, Hardmax (z))
|
||||
z = W (decoderOutput) + B; // top-level input to Softmax
|
||||
|
||||
# training criteria
|
||||
ce = CrossEntropyWithSoftmax(labelSequence, z, tag='criterion') // this is the training objective
|
||||
wer = ErrorPrediction (labelSequence, z, tag='evaluation') // this also gets tracked
|
||||
|
||||
#indexTestVals = Plus (decoderOutput, BS.Constants.Zero, tag='evaluation')
|
||||
#indexTest = Slice (0, 1, indexTestVals)
|
||||
#index = Where (RectifiedLinear (indexTest), tag='evaluation'); // for testing: this thresholds all negative numbers to 0=false, keeping positive as !=0=true
|
||||
#packedIndex = PackedIndex (indexTest, index, tag='evaluation')
|
||||
#filtered = GatherPacked (packedIndex, indexTestVals, tag='evaluation')
|
||||
#unfiltered = ScatterPacked (indexTest, packedIndex, filtered, tag='evaluation')
|
||||
|
||||
//# define an LSTM with a per-sequence initialization value
|
||||
//# TODO: Not currently used. Move to BS library once tested.
|
||||
//RecurrentLSTMPWithInitValue (inputDim, outputDim, cellDim, x, initValue, enableSelfStabilization=false) =
|
||||
//[
|
||||
// prevState = // Loop.Previous (lstmState). BS can't apply Previous() to dictionaries, so expand it manually
|
||||
// [
|
||||
// h = Loop.Previous (lstmState.h); // hidden state(t-1)
|
||||
// c = Loop.Previous (lstmState.c); // cell(t-1)
|
||||
// ]
|
||||
// # resettable LSTM function
|
||||
// lstmState =
|
||||
// [
|
||||
// // apply the LSTM function to the input state; for first frame, we will ignore the output
|
||||
// enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
|
||||
// lstmState1 = LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
//
|
||||
// // the actual LSTM state (incl. its output) gets overwritten in the first frame by the initValue
|
||||
// isFirst = Loop.IsFirst (x)
|
||||
// h = Boolean.If (isFirst, initValue, lstmState1.h); // hidden state(t-1)
|
||||
// c = Boolean.If (isFirst, initValue, lstmState1.c); // cell(t-1)
|
||||
// ]
|
||||
//].lstmState.h // that's the value we return
|
||||
ce = CrossEntropyWithSoftmax (labelSequence, z, tag='criterion') // this is the training objective
|
||||
wer = ErrorPrediction (labelSequence, z, tag='evaluation') // this also gets tracked
|
||||
])
|
||||
|
||||
#######################################
|
||||
|
@ -241,7 +317,7 @@ reader = [
|
|||
labelType = "category"
|
||||
labelDim = "$confVocabSize$"
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
beginSequence = "</s>"
|
||||
beginSequence = "$startSymbol$" # "</s>"
|
||||
endSequence = "</s>"
|
||||
|
||||
#### Write definition ####
|
||||
|
@ -341,7 +417,7 @@ cvReader = [
|
|||
writeWordAndClassInfo = [
|
||||
action = "writeWordAndClass"
|
||||
inputFile = "$DataDir$/$trainFile$"
|
||||
beginSequence = "</s>"
|
||||
beginSequence = "$startSymbol$" # "</s>"
|
||||
endSequence = "</s>"
|
||||
outputMappingFile = "$ModelDir$/vocab.wl"
|
||||
outputVocabFile = "$ModelDir$/vocab.txt"
|
||||
|
@ -362,23 +438,24 @@ train = [
|
|||
traceLevel = 1
|
||||
epochSize = 0 # (for quick tests, this can be overridden with something small)
|
||||
|
||||
#BrainScriptNetworkBuilder is defined in outer scope
|
||||
# BrainScriptNetworkBuilder is defined in outer scope
|
||||
|
||||
SGD = [
|
||||
minibatchSize = 128*2:256:512
|
||||
learningRatesPerSample = 0.01
|
||||
minibatchSize = 128:128:256:512
|
||||
learningRatesPerSample = 0.007*2:0.0035 #0.01 #0.005 # 0.01
|
||||
momentumAsTimeConstant = 2500
|
||||
gradientClippingWithTruncation = true # TODO: clip and truncate? What is the difference?
|
||||
clippingThresholdPerSample = 15.0
|
||||
maxEpochs = 16
|
||||
maxEpochs = 50
|
||||
numMBsToShowResult = 100
|
||||
firstMBsToShowResult = 10
|
||||
gradUpdateType = "none" # FSAdaGrad?
|
||||
loadBestModel = true
|
||||
|
||||
# tracing (enable these for debugging)
|
||||
#traceNodeNamesReal = labelsEmbedded:decoderInput:"decoder[0].lstmState._privateInnards.ht":z.Plus_left.Times_right.result:z:ce
|
||||
#traceNodeNamesReal = labelsEmbedded:decoderInput:z:ce
|
||||
#traceNodeNamesReal = thoughtVector.result:zMask:z:ce:wer:indexTestVals:index:packedIndex:filtered:unfiltered:isTraining
|
||||
#traceNodeNamesReal = thoughtVectorEverywhere.result:zMask:z:ce:wer:indexTestVals:index:packedIndex:filtered:unfiltered:isTraining
|
||||
#traceNodeNamesCategory = inputSequence.out:labelSequence
|
||||
|
||||
dropoutRate = 0.0
|
||||
|
@ -454,7 +531,7 @@ test = [
|
|||
labelType = "category"
|
||||
labelDim = "$confVocabSize$"
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
beginSequence = "</s>"
|
||||
beginSequence = "$startSymbol$" # "</s>"
|
||||
endSequence = "</s>"
|
||||
|
||||
#### Write definition ####
|
||||
|
@ -504,51 +581,21 @@ write = [
|
|||
# We need to make a change:
|
||||
BrainScriptNetworkBuilder = ([
|
||||
|
||||
beamDepth = 3 // 0=predict; 1=greedy; >1=beam
|
||||
enableTracing = true
|
||||
traceFrequency = 1000
|
||||
tracingLabelMappingFile = "$ModelDir$/vocab.wl"
|
||||
include "S2SLib.bs"
|
||||
|
||||
beamDepth = $beamDepth$ // 0=predict; 1=greedy; >1=beam
|
||||
|
||||
# import some names
|
||||
Constants = BS.Constants
|
||||
Boolean = BS.Boolean
|
||||
Loop = BS.Loop
|
||||
Previous = Loop.Previous
|
||||
IsFirst = Loop.IsFirst
|
||||
|
||||
Trace (node, say='', logFrequency=traceFrequency, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [
|
||||
operation = 'Trace' ; inputs = node
|
||||
]
|
||||
|
||||
formatDense = [
|
||||
type = "real"
|
||||
transpose = false
|
||||
precisionFormat = ".4"
|
||||
]
|
||||
formatOneHot = [
|
||||
type = "category"
|
||||
transpose = false
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
]
|
||||
formatSparse = [
|
||||
type = "sparse"
|
||||
transpose = false
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
]
|
||||
enableTracing = true
|
||||
traceFrequency = 1
|
||||
TraceState (h, what) =
|
||||
if enableTracing
|
||||
then Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3, format=formatDense))
|
||||
else h
|
||||
TraceDense (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3, format=formatDense)
|
||||
else h
|
||||
TraceOneHot (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=15,*/ format=formatOneHot)
|
||||
else h
|
||||
TraceSparse (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3,*/ format=formatSparse)
|
||||
else h
|
||||
If = Boolean.If
|
||||
OnesTensor = Constants.OnesTensor
|
||||
|
||||
# macro that extracts top D hypotheses from a 2D tensor
|
||||
# input: scores[w,n] w = word index, d = hyp index in beam (d=0 is the best one)
|
||||
|
@ -575,31 +622,49 @@ write = [
|
|||
|
||||
modelAsTrained = BS.Network.Load ("$decodeModelPath$")
|
||||
|
||||
useNYUStyle = false # TODO: we should be able to infer this from some dimensions
|
||||
hasEmbeddings = false # TODO: infer this
|
||||
|
||||
top1DecodingModel(model) = new ComputationNetwork [
|
||||
# compute top-N from output
|
||||
logP = LogSoftmax (model.z)
|
||||
|
||||
offset = Constant (10000)
|
||||
top1a = Hardmax (logP) .* (logP + offset)/*for tracing*/
|
||||
top1b = top1a
|
||||
top1b = Hardmax (logP) .* (logP + offset)/*for tracing*/
|
||||
top1 = TraceSparse (top1b, 'logP') # TODO: get the accumulated logP out, it's a little more involved
|
||||
|
||||
topN = 10
|
||||
tokenSet = GetTopNTensor (topN, logP) # [V x 1] -> [V x 1 x topN]
|
||||
tokenSetScores = tokenSet .* logP # [V x 1 x topN]
|
||||
topPaths = GetTopNTensor (topN, logP) # [V x 1] -> [V x 1 x topN]
|
||||
topPathScores = topPaths .* logP # [V x 1 x topN]
|
||||
# reduce back to a single column
|
||||
topHyps = TraceSparse (tokenSetScores * ConstantTensor (1, (1 : topN)), 'topHyps')
|
||||
topHyps = TraceSparse (topPathScores * OnesTensor (1 : topN), 'topHyps')
|
||||
|
||||
inputsOut = Pass (model.inputSequence)
|
||||
labelsOut = Pass (TraceOneHot (model.labelSequence, 'labels'))
|
||||
decodeOut = Pass (TraceOneHot (top1, 'out'))
|
||||
topNOut = Pass (topHyps)
|
||||
]
|
||||
|
||||
# replace old decoderFeedback node by newDecoderFeedback
|
||||
EmbedLabels (x) = if hasEmbeddings then TransposeTimes (modelAsTrained.labelsEmbedded.TransposeTimesArgs[0], x) else x
|
||||
decoderFeedback = EmbedLabels (Hardmax (modelAsTrained.z)) # in training, this is decoderFeedback = labelsEmbedded
|
||||
|
||||
decoderFeedback = modelAsTrained.decoderOutputEmbedded # in training, this is decoderFeedback = labelsEmbedded
|
||||
sentenceStartEmbedded = Boolean.If (Loop.IsFirst (decoderFeedback), modelAsTrained.inputEmbedded, Previous (sentenceStartEmbedded)) # enforces no leaking of labels
|
||||
delayedDecoderFeedback = Boolean.If (Loop.IsFirst (decoderFeedback), sentenceStartEmbedded, Loop.Previous (decoderFeedback)) # same expression as in training
|
||||
# TODO: fold this in
|
||||
PreviousOrDefault1 (x, defaultValue=Constant (0)) = # a delay node with initial value --TODO: merge the two, then do in C++
|
||||
[
|
||||
flags = IsFirst (defaultValue/*x*/)
|
||||
out = If (flags,
|
||||
/*then*/ defaultValue,
|
||||
/*else*/ Previous (x))
|
||||
].out
|
||||
|
||||
labelSentenceStart = modelAsTrained.labelSentenceStart_out # _ is a hack
|
||||
|
||||
labelsToUse = if hasEmbeddings then modelAsTrained.labelsEmbedded else modelAsTrained.labelSequence
|
||||
labelSentenceStartToUse = if hasEmbeddings then modelAsTrained.labelSentenceStartEmbedded else labelSentenceStart
|
||||
labelSentenceStartEmbeddedScattered = TraceDense (BS.Sequences.Scatter (IsFirst (labelsToUse), labelSentenceStartToUse), 'sest')
|
||||
|
||||
delayedDecoderFeedback = TraceDense (/*Loop.*/PreviousOrDefault1 (defaultValue=labelSentenceStartEmbeddedScattered, TraceDense (decoderFeedback, 'lemb')) , 'prev lemb')
|
||||
|
||||
greedyDecodingModel = BS.Network.Edit (modelAsTrained,
|
||||
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.delayedDecoderFeedback, delayedDecoderFeedback),
|
||||
|
@ -611,6 +676,8 @@ write = [
|
|||
# decoder[0].prevState.h = PastValue (decoder[0].lstmState._privateInnards.ht) : [200 x 1 {1,200} x *] -> [200 x 1 {1,200} x *]
|
||||
# decoder[0].prevState.c = PastValue (decoder[0].lstmState._privateInnards.ct) : [200 x 1 {1,200} x *] -> [200 x 1 {1,200} x *]
|
||||
# decoderInput.inputs[1] = PastValue (labelsEmbedded) : [300 x 1 {1,300} x *] -> [300 x 1 {1,300} x *]
|
||||
# decoder[0].prevState.h.elseVal = PastValue (decoder[0].lstmState._privateInnards.ht) : [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h] -> [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h]
|
||||
# decoder[0].prevState.c.elseVal = PastValue (decoder[0].lstmState._privateInnards.ct) : [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h] -> [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h]
|
||||
|
||||
hiddenDim = modelAsTrained.delayedDecoderFeedback.dim
|
||||
embeddingDim = modelAsTrained.decoderOutputEmbedded.dim
|
||||
|
@ -635,21 +702,66 @@ write = [
|
|||
# - traceback is a right-to-left recurrence
|
||||
# - output best hypo conditioned on the path (it is already known)
|
||||
|
||||
propagationEdits[i:0..2] = // TODO: implement and use { } syntax
|
||||
if i == 0 then (node => if node.name == 'decoder[0].prevState.h' then TraceState (Previous (PropagateTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
|
||||
else if i == 1 then (node => if node.name == 'decoder[0].prevState.c' then TraceState (Previous (PropagateTopN (node.PastValueArgs[0])), 'propagated') else node)
|
||||
propagationEdits[i:0..8] = // TODO: implement and use { } syntax TODO: VV elseVal only for non-NYU?
|
||||
# non-NYU:
|
||||
if i == 0 then (node => if node.name == 'decoder[0].prevState.h.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
|
||||
else if i == 1 then (node => if node.name == 'decoder[0].prevState.c.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
|
||||
# NYU:
|
||||
else if i == 2 then (node => if node.name == 'decoder[0].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
|
||||
else if i == 3 then (node => if node.name == 'decoder[0].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
|
||||
# all:
|
||||
else if i == 4 then (node => if node.name == 'decoder[1].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
|
||||
else if i == 5 then (node => if node.name == 'decoder[1].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
|
||||
else if i == 6 then (node => if node.name == 'decoder[2].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
|
||||
else if i == 7 then (node => if node.name == 'decoder[2].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
|
||||
else BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.delayedDecoderFeedback, delayedDecoderFeedback)
|
||||
|
||||
# decoderFeedback must be updated to take actual decoder output
|
||||
|
||||
Elabel = modelAsTrained.decoderOutputEmbedded.TransposeTimesArgs[0]
|
||||
decoderFeedback = TraceState (TransposeTimes (Elabel, TraceSparse (topWords, 'topWords')), 'feedback')
|
||||
|
||||
delayedDecoderFeedback = Boolean.If (Loop.IsFirst (decoderFeedback), sentenceStartEmbedded, Loop.Previous (decoderFeedback))
|
||||
|
||||
m2 = BS.Network.Edit (modelAsTrained,
|
||||
propagationEdits,
|
||||
(labelsOut : decodeOut)) # additional roots
|
||||
(inputsOut : labelsOut : decodeOut)) # additional roots
|
||||
|
||||
ReduceAxis (axisDim, x, axis=1) = # unfortunately, we must feed in the dimension of the axis, it can't be inferred
|
||||
if axis == 1 then Times (OnesTensor (axisDim), x, outputRank = 0)
|
||||
else if axis == 2 then ReduceAxis (axisDim, TransposeDimensions (x, 1, 2), axis=1)
|
||||
else Fail("ReduceAxis: Only supports axes 1 and 2.")
|
||||
|
||||
# === BEGIN DECODER ===
|
||||
|
||||
# constants for initial score and final traceback
|
||||
initialPathScores = FirstAndOther (0, LOGZERO, beamDepth, axis = 2) # [1 x D]: [ 0, -INF, -INF, -INF, ... ]
|
||||
finalHyp = FirstAndOther (1, 0, beamDepth, axis = 1) # [D] the final token is the top-scoring hypothesis, that is, hyp[0]
|
||||
|
||||
# path expansion of the D hypotheses that were best in previous time step (ordered as in previous time step)
|
||||
logLLs = Columnwise (LogSoftmax, beamDepth, modelAsTrained.z) # [V x Dprev] log P(w|hist)
|
||||
expandedPathScores = logLLs + If (IsFirst (logLLs), initialPathScores, Previous (tokens.score)) # [V x Dprev] log (P(w|hist) * P(hist)) for all top D hypotheses
|
||||
|
||||
# determine top D of expanded paths
|
||||
topPaths = TraceSparse (GetTopNTensor (beamDepth, expandedPathScores), 'topPaths') # [V x Dprev] -> [V x Dprev x Dnew]
|
||||
topPathScores = TraceSparse (topPaths .* expandedPathScores, 'topPathScores') # [V x Dprev x Dnew]
|
||||
|
||||
# form new decoding token, by reducing topPaths(Scores) along relevant dimensions
|
||||
tokens = [ # [. x Dnew]
|
||||
from = ReduceAxis (axis=1, vocabSize, topPaths) # [Dprev x Dnew], reduced over V
|
||||
word = ReduceAxis (axis=2, beamDepth, topPaths) # [V x Dnew], reduced over Dprev
|
||||
score = TraceDense (OnesTensor (1/*output dim*/ : /*reduction dims: */vocabSize : beamDepth/*Dprev*/) * topPathScores, 'tokens.score') # [1 x Dnew], reduced over [V x Dprev] and inserted a '1'
|
||||
]
|
||||
|
||||
# network feedback for next time step
|
||||
decoderFeedback = TraceState (EmbedLabels (TraceSparse (tokens.word, 'tokens.word')), 'feedback') # [embeddingDim x Dnew]
|
||||
delayedDecoderFeedback = If (IsFirst (labelSentenceStartEmbeddedScattered), labelSentenceStartEmbeddedScattered, Loop.Previous (decoderFeedback))
|
||||
|
||||
# network state for next step. We must reorder the network state for use in next time step: Apply this lambda to all decoder LSTMs' h and c.
|
||||
ReorderTopN (past_h_or_c) = Times (TraceState (past_h_or_c, 'past'), TraceDense (tokens.from, 'backp'))
|
||||
|
||||
# final traceback
|
||||
traceback = TraceDense (If (Loop.IsLast (labelSentenceStartEmbeddedScattered/*tokens.from*/), finalHyp, Loop.Next (tokens.from * traceback)), 'traceback') # [D] one-hot, multiplying tokens.from from the left will select another one-hot row of tokens.from
|
||||
decodeHyp = Times (topPaths, traceback, outputRank = 2) # [V x Dprev] 2D one-hot, selected the best hyp according to traceback
|
||||
decode = TraceOneHot (decodeHyp * OnesTensor (beamDepth), 'out') # [V] reduces over Dprev -> 1D one-hot
|
||||
# TODO: Can this be done in one ^^ go?
|
||||
|
||||
# === END DECODER ===
|
||||
|
||||
# propagate LSTM state to the right top-N rank given where that rank came from in the previous time step
|
||||
|
||||
|
@ -658,21 +770,19 @@ write = [
|
|||
0 0 0
|
||||
0 0 0")
|
||||
|
||||
PropagateTopN (past_h_or_c) = Times (TraceState (past_h_or_c, 'past'), TraceDense (backPointers, 'backp'))
|
||||
# backPointers: [Dprev, Dnew]
|
||||
# PropagateTopN:
|
||||
# tokens.from: [Dprev, Dnew]
|
||||
# v--------- best came from input hyp[1]
|
||||
# v------- second best came from input hyp[0]
|
||||
# v----- third best came from input hyp[2]
|
||||
# 0 1 0
|
||||
# 1 0 0
|
||||
# 0 0 1
|
||||
# backPointers[:,n] one-hot encodes the best predecessor at top-N rank n
|
||||
# tokens.from[:,n] one-hot encodes the best predecessor at top-N rank n
|
||||
# each column is a one-hot vector
|
||||
# multiplying with such a column from the right will select the column represented by the one-hot value
|
||||
|
||||
# get decoder log likelihoods
|
||||
# EvalActions: EnableNodeTracing {L"decoder[0].lstmState._privateInnards.it", L"z"}, //
|
||||
logLLs = Columnwise (LogSoftmax, beamDepth, modelAsTrained.z) # [V x D] un-normalized log P(w|hist) + const
|
||||
# logLLs: get decoder log likelihoods
|
||||
|
||||
Columnwise (f, beamDepth, z) = # TODO: Takes LogSoftmax over axis=1. it is more tricky to do this over arbitrary axes
|
||||
[
|
||||
|
@ -680,14 +790,12 @@ write = [
|
|||
out = Splice (cols, axis=2)
|
||||
].out
|
||||
|
||||
# decoder start token: 0 for first hyp, -INF for the others
|
||||
# initialPathScores: decoder start token: 0 for first hyp, -INF for the others
|
||||
LOGZERO = -1e30
|
||||
initialPathScores = FirstAndOther (0, LOGZERO, beamDepth, axis = 2) # row vector: [ 0, -INF, -INF, -INF, ... ]
|
||||
|
||||
expandedPathScores = logLLs + PreviousOrDefault (PropagateTopN (pathScores), initialPathScores) # [V x Dprev] un-normalized log (P(w|hist) * P(hist)) for all top D hypotheses
|
||||
# ^^ path expansion, [V x 1] + [1 x D] -> [V x D]
|
||||
# expandedPathScores: path expansion, [V x 1] + [1 x D] -> [V x D]
|
||||
|
||||
tokenSet = TraceSparse (GetTopNTensor (beamDepth, expandedPathScores), 'tokenSet') # [V x Dprev] -> [V x Dprev x Dnew]
|
||||
# topPaths:
|
||||
# +-----+
|
||||
# |0 0 0|
|
||||
# |0 0 0|-+
|
||||
|
@ -699,11 +807,8 @@ write = [
|
|||
# |0 0 0|
|
||||
# +-----+
|
||||
|
||||
#topWords = ReduceSum (axis=2, tokenSet) # TODO: add an axis parameter to SumColumnElements()
|
||||
topWords = [
|
||||
v1 = TransposeDimensions (tokenSet, 1, 2) # reduction axis is now the first
|
||||
out = Times (ConstantTensor (1, (beamDepth)), v1, outputRank = 0) # reduce over the first axis and drop it
|
||||
].out
|
||||
# tokens.word:
|
||||
#tokens.word = ReduceSum (axis=2, topPaths) # TODO: add an axis parameter to SumColumnElements()
|
||||
# +-+
|
||||
# |0|
|
||||
# |0|-+
|
||||
|
@ -715,7 +820,7 @@ write = [
|
|||
# |0|
|
||||
# +-+
|
||||
|
||||
backPointers = Times (ConstantTensor (1, (vocabSize)), tokenSet, outputRank = 0) # this is a tensor Times operation that reduces over the first dimension
|
||||
# tokens.from:
|
||||
# before dropping the first dimension: [V x Dprev x Dnew]
|
||||
# +-----+
|
||||
# |0 1 0| means input hyp[1] gave rise to the best
|
||||
|
@ -724,16 +829,16 @@ write = [
|
|||
# +-----+-+
|
||||
# |0 0 1| means input hyp[2] gave rise to third best
|
||||
# +-----+
|
||||
# after: [Dprev,Dnew] e.g. "0 1 0" goes into first column, vertically
|
||||
# after: [Dprev x Dnew] e.g. "0 1 0" goes into first column, vertically
|
||||
# v--------- best came from input hyp[1]
|
||||
# v------- second best came from input hyp[0]
|
||||
# v----- third best came from input hyp[2]
|
||||
# 0 1 0
|
||||
# 1 0 0
|
||||
# 0 0 1
|
||||
# backPointers[:,n] one-hot encodes the best predecessor at top-N rank n
|
||||
# tokens.from[:,n] one-hot encodes the best predecessor at top-N rank n
|
||||
|
||||
tokenSetScores = TraceSparse (tokenSet .* expandedPathScores, 'tokenSetScores') # [V x Dprev x Dnew]
|
||||
# topPathScores:
|
||||
# +-----+
|
||||
# |0 0 0|
|
||||
# |0 0 0|-+
|
||||
|
@ -744,29 +849,24 @@ write = [
|
|||
# +-----+z| z denotes the accumulated path score max_w P(w|hyp[2])
|
||||
# |0 0 0|
|
||||
# +-----+
|
||||
pathScores = TraceDense (ConstantTensor (1, (1/*output dim*/ : /*reduction dims: */vocabSize : beamDepth/*Dprev*/)) * tokenSetScores, 'pathScores') # [1 x Dnew]
|
||||
|
||||
# traceback
|
||||
# last state: take Hardmax over pathScores
|
||||
# previous states: multiply wth respective backPointers matrix
|
||||
# traceback:
|
||||
# last state: take Hardmax over tokens.score
|
||||
# previous states: multiply wth respective tokens.from matrix
|
||||
# -> hyp index for every time step
|
||||
# then finally use that to select the actual output TODO: That's a sample-wise matrix product between two sequences!!!
|
||||
traceback = TraceDense (NextOrDefault (backPointers * traceback, finalHyp), 'traceback') # [D] one-hot, multiplying backPointers from the left will select another one-hot row of backPointers
|
||||
# TODO: condition must be 1-dim, not 2-dim tensor, so we use labelSentenceStartEmbeddedScattered instead of tokens.from
|
||||
# +-+
|
||||
# |0|
|
||||
# |1| means at this time step, hyp[1] was the best globally
|
||||
# |0|
|
||||
# +-+
|
||||
finalHyp = FirstAndOther (1, 0, beamDepth, axis = 1) # the final token is the top-scoring hypothesis, that is, hyp[0]
|
||||
|
||||
# and the actual decoding output
|
||||
# decode: and the actual decoding output
|
||||
# This is the one to output (top sentence-level hypothesis after traceback).
|
||||
decode = [
|
||||
hyp = Times (tokenSet, traceback, outputRank = 2) # [V x Dprev] 2D one-hot
|
||||
out = TraceOneHot (hyp * ConstantTensor (1, beamDepth), 'out') # reduces over Dprev -> 1D one-hot
|
||||
].out
|
||||
|
||||
# traceback : [Dnew]
|
||||
# tokenSet : [V x Dprev x Dnew]
|
||||
# topPaths : [V x Dprev x Dnew]
|
||||
# +-----+
|
||||
# |0 0 0|
|
||||
# |0 0 0|-+
|
||||
|
@ -787,25 +887,10 @@ write = [
|
|||
else Splice (Constant (firstVal) : ConstantTensor (otherVals, (1 : N -1)), axis = axis1 /*, axis*/) # row vector: [ 0, -INF, -INF, -INF, ... ]
|
||||
].out
|
||||
|
||||
inputsOut = Pass (modelAsTrained.inputSequence)
|
||||
labelsOut = Pass (modelAsTrained.labelSequence)
|
||||
decodeOut = Pass (decode)
|
||||
#topNOut = Pass (topHyps)
|
||||
|
||||
PreviousOrDefault (x, initialValue) = # a delay node with initial value
|
||||
BS.Boolean.If (BS.Loop.IsFirst (x),
|
||||
/*then*/ initialValue,
|
||||
/*else*/ BS.Loop.Previous (x))
|
||||
#if BS.Loop.IsFirst (x)
|
||||
#then initialValue
|
||||
#else BS.Loop.Previous (x)
|
||||
|
||||
NextOrDefault (x, initialValue) = # a delay node with initial value
|
||||
BS.Boolean.If (BS.Loop.IsLast (x),
|
||||
/*then*/ initialValue,
|
||||
/*else*/ BS.Loop.Next (x))
|
||||
#if BS.Loop.IsLast (x)
|
||||
#then initialValue
|
||||
#else BS.Loop.Next (x)
|
||||
].m2
|
||||
|
||||
model = if beamDepth == 0 then top1DecodingModel (modelAsTrained)
|
||||
|
@ -814,8 +899,8 @@ write = [
|
|||
|
||||
].model)
|
||||
|
||||
#outputPath = "$OutputDir$/Write"
|
||||
outputPath = "-" # "-" will write to stdout; useful for debugging
|
||||
outputPath = $decodeOutputPath$
|
||||
#outputPath = "-" # "-" will write to stdout; useful for debugging
|
||||
#outputNodeNames = z1.out:labels1 # when processing one sentence per minibatch, this is the sentence posterior
|
||||
#outputNodeNames = network.beamDecodingModel.z1.out:labels1 # when processing one sentence per minibatch, this is the sentence posterior
|
||||
|
||||
|
@ -825,13 +910,13 @@ write = [
|
|||
#outputNodeNames = network.beamDecodingModel.labelsOut:network.beamDecodingModel.decodeOut #:topNOut
|
||||
|
||||
# joint:
|
||||
outputNodeNames = labelsOut:decodeOut:network.beamDecodingModel.labelsOut:network.beamDecodingModel.decodeOut
|
||||
outputNodeNames = inputsOut:labelsOut:decodeOut:network.beamDecodingModel.inputsOut:network.beamDecodingModel.labelsOut:network.beamDecodingModel.decodeOut
|
||||
|
||||
#outputNodeNames = labels1:network.beamDecodingModel.decode.out
|
||||
#outputNodeNames = labels1:network.beamDecodingModel.expandedPathScores
|
||||
#outputNodeNames = network.beamDecodingModel.pathScores:network.beamDecodingModel.traceback
|
||||
# network.beamDecodingModel.tokenSetScores
|
||||
# network.beamDecodingModel.pathScores
|
||||
#outputNodeNames = network.beamDecodingModel.tokens.score:network.beamDecodingModel.traceback
|
||||
# network.beamDecodingModel.topPathScores
|
||||
# network.beamDecodingModel.tokens.score
|
||||
# network.beamDecodingModel.traceback
|
||||
# network.beamDecodingModel.expandedPathScores
|
||||
|
||||
|
@ -840,12 +925,12 @@ write = [
|
|||
transpose = false
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
#precisionFormat = "10"
|
||||
sequenceEpilogue = "\t// %s\n"
|
||||
#sequenceEpilogue = "\t// %s\n"
|
||||
]
|
||||
|
||||
#traceNodeNamesReal = network.beamDecodingModel.pathScores:network.beamDecodingModel.tokenSetScores:network.beamDecodingModel.expandedPathScores:network.beamDecodingModel.backPointers
|
||||
#traceNodeNamesCategory = network.beamDecodingModel.tokenSetScores
|
||||
#traceNodeNamesSparse = network.beamDecodingModel.tokenSetScores:network.beamDecodingModel.backPointers:decoderOutputEmbedded.x
|
||||
#traceNodeNamesReal = network.beamDecodingModel.tokens.score:network.beamDecodingModel.topPathScores:network.beamDecodingModel.expandedPathScores:network.beamDecodingModel.tokens.from
|
||||
#traceNodeNamesCategory = network.beamDecodingModel.topPathScores
|
||||
#traceNodeNamesSparse = network.beamDecodingModel.topPathScores:network.beamDecodingModel.tokens.from:decoderOutputEmbedded.x
|
||||
|
||||
minibatchSize = 8192 # choose this to be big enough for the longest sentence
|
||||
# need to be small since models are updated for each minibatch
|
||||
|
@ -895,7 +980,7 @@ write = [
|
|||
labelType = "category"
|
||||
labelDim = "$confVocabSize$"
|
||||
labelMappingFile = "$ModelDir$/vocab.wl"
|
||||
beginSequence = "</s>"
|
||||
beginSequence = "$startSymbol$" # "</s>"
|
||||
endSequence = "</s>"
|
||||
|
||||
#### Write definition ####
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
# TODO: must sort this out. For now, this is just shared stuff between training and decoding.
|
||||
|
||||
# these depend on beamDepth parameter for now, fix this
|
||||
TraceState (h, what) =
|
||||
if enableTracing
|
||||
then Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3, format=formatDense))
|
||||
else h
|
||||
TraceDense (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=21/*beamDepth*beamDepth*/, onlyUpToT=25, format=formatDense)
|
||||
else h
|
||||
TraceDenseTransposed (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=25, format=formatDenseTransposed)
|
||||
else h
|
||||
TraceOneHot (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=15,*/ format=formatOneHot)
|
||||
else h
|
||||
TraceSparse (h, what) =
|
||||
if enableTracing
|
||||
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3,*/ format=formatSparse)
|
||||
else h
|
||||
|
||||
Trace (node, say='', logFrequency=traceFrequency, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [
|
||||
operation = 'Trace' ; inputs = node
|
||||
]
|
||||
|
||||
formatDense = [
|
||||
type = "real"
|
||||
transpose = false
|
||||
precisionFormat = ".4"
|
||||
]
|
||||
formatDenseTransposed = [
|
||||
type = "real"
|
||||
transpose = true
|
||||
precisionFormat = ".4"
|
||||
]
|
||||
formatOneHot = [
|
||||
type = "category"
|
||||
transpose = false
|
||||
labelMappingFile = tracingLabelMappingFile
|
||||
]
|
||||
formatSparse = [
|
||||
type = "sparse"
|
||||
transpose = false
|
||||
labelMappingFile = tracingLabelMappingFile
|
||||
]
|
|
@ -1 +1 @@
|
|||
Subproject commit f785679a6bd5cc089b138b3c6bcb68e4b1f345ae
|
||||
Subproject commit f57be8b8caeddf385a44a14acc587f4e5168152d
|
|
@ -17,6 +17,7 @@
|
|||
#include "Config.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "Criterion.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "BrainScriptEvaluator.h"
|
||||
|
@ -121,8 +122,8 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
|
||||
int traceLevel = config(L"traceLevel", "0");
|
||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
||||
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
|
||||
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
|
||||
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
|
||||
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
|
||||
|
||||
ConfigArray evalNodeNames = config(L"evalNodeNames", "");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
|
@ -131,7 +132,7 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> cvErrorResults;
|
||||
std::vector<std::vector<EpochCriterion>> cvErrorResults;
|
||||
std::vector<std::wstring> cvModels;
|
||||
|
||||
DataReader cvDataReader(readerConfig);
|
||||
|
@ -143,7 +144,7 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
|
||||
if (!fexists(cvModelPath))
|
||||
{
|
||||
fprintf(stderr, "model %ls does not exist.\n", cvModelPath.c_str());
|
||||
fprintf(stderr, "Model %ls does not exist.\n", cvModelPath.c_str());
|
||||
if (finalModelEvaluated || !fexists(modelPath))
|
||||
continue; // file missing
|
||||
else
|
||||
|
@ -158,7 +159,7 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
|
||||
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
|
||||
|
||||
fprintf(stderr, "model %ls --> \n", cvModelPath.c_str());
|
||||
fprintf(stderr, "Model %ls --> \n", cvModelPath.c_str());
|
||||
auto evalErrors = eval.Evaluate(&cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);
|
||||
cvErrorResults.push_back(evalErrors);
|
||||
|
||||
|
@ -167,16 +168,14 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
|
||||
// find best model
|
||||
if (cvErrorResults.size() == 0)
|
||||
{
|
||||
LogicError("No model is evaluated.");
|
||||
}
|
||||
|
||||
std::vector<double> minErrors;
|
||||
std::vector<int> minErrIds;
|
||||
std::vector<double> evalErrors = cvErrorResults[0];
|
||||
vector<double> minErrors;
|
||||
vector<int> minErrIds;
|
||||
vector<EpochCriterion> evalErrors = cvErrorResults[0];
|
||||
for (int i = 0; i < evalErrors.size(); ++i)
|
||||
{
|
||||
minErrors.push_back(evalErrors[i]);
|
||||
minErrors.push_back(evalErrors[i].Average());
|
||||
minErrIds.push_back(0);
|
||||
}
|
||||
|
||||
|
@ -185,9 +184,9 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
evalErrors = cvErrorResults[i];
|
||||
for (int j = 0; j < evalErrors.size(); j++)
|
||||
{
|
||||
if (evalErrors[j] < minErrors[j])
|
||||
if (evalErrors[j].Average() < minErrors[j])
|
||||
{
|
||||
minErrors[j] = evalErrors[j];
|
||||
minErrors[j] = evalErrors[j].Average();
|
||||
minErrIds[j] = i;
|
||||
}
|
||||
}
|
||||
|
@ -196,9 +195,7 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
fprintf(stderr, "Best models:\n");
|
||||
fprintf(stderr, "------------\n");
|
||||
for (int i = 0; i < minErrors.size(); ++i)
|
||||
{
|
||||
fprintf(stderr, "Based on Err[%d]: Best model = %ls with min err %.8g\n", i, cvModels[minErrIds[i]].c_str(), minErrors[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template void DoCrossValidate<float>(const ConfigParameters& config);
|
||||
|
|
|
@ -74,6 +74,7 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
|
|||
auto tensorShape = ProcessTensorShapeParameters(node, params, i, /*isImage=*/false, cnNodeType);
|
||||
|
||||
wstring dynamicAxis = node->GetOptionalParameter("dynamicAxis", "");
|
||||
// TODO: Map dynamicAxis from name to node at this point, where that node is memoized inside NDL.
|
||||
// first look for this node already existing in the network
|
||||
// BUGBUG: How does this set the dimensions then?
|
||||
if (m_net->NodeNameExists(name))
|
||||
|
|
|
@ -263,8 +263,8 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
InvalidArgument("Please specify parameters 'beginSequence' and 'endSequence'.");
|
||||
|
||||
if (!outputMappingFile.empty())
|
||||
cerr << "Mapping file --> " << outputVocabFile << endl;
|
||||
cerr << "Vocabulary file --> " << outputVocabFile << endl;
|
||||
cerr << "Mapping file --> " << outputMappingFile << endl;
|
||||
cerr << "Vocabulary file --> " << outputVocabFile << endl;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
cerr << "Word-to-class map --> " << outputWord2Cls << endl;
|
||||
|
@ -321,7 +321,10 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
str = str + endSequencePattern;
|
||||
|
||||
vstr = msra::strfun::split(str, "\t ");
|
||||
for (int i = 1; i < vstr.size(); i++)
|
||||
// This loop used to start with 1, assuming begin and end symbol are the same.
|
||||
// If they are not, I am now counting them both. No idea whether that is correct w.r.t. the class algorithm.
|
||||
bool startWith1 = !beginSequence.empty() && beginSequence == endSequence;
|
||||
for (size_t i = startWith1 ? 1 : 0; i < vstr.size(); i++)
|
||||
v_count[vstr[i]]++;
|
||||
}
|
||||
fp.close();
|
||||
|
@ -355,93 +358,108 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
vocabSize = wordCountLessCutoff;
|
||||
}
|
||||
|
||||
// form classes
|
||||
// Implements an algorithm by Mikolov --TODO: get the reference
|
||||
wrd2cls.Resize(vocabSize, 1);
|
||||
|
||||
typedef pair<string, double> stringdouble;
|
||||
unordered_map<string, double> removed; // note: std::map is supposedly faster
|
||||
double unkCount = 0; // TODO: why double?
|
||||
size_t size = 0;
|
||||
size_t actual_vocab_size = vocabSize - 1;
|
||||
priority_queue<stringdouble, vector<stringdouble>, compare_second<stringdouble>>
|
||||
q(compare_second<stringdouble>(), vector<stringdouble>(v_count.begin(), v_count.end()));
|
||||
while (size < actual_vocab_size && !q.empty()) // ==for (q=...; cond; q.pop())
|
||||
{
|
||||
size++;
|
||||
string word = q.top().first;
|
||||
double freq = q.top().second; // TODO: why double?
|
||||
if (word == unkWord)
|
||||
{
|
||||
unkCount += freq;
|
||||
actual_vocab_size++;
|
||||
}
|
||||
removed[q.top().first] = q.top().second;
|
||||
q.pop();
|
||||
}
|
||||
while (!q.empty())
|
||||
{
|
||||
unkCount += q.top().second;
|
||||
q.pop();
|
||||
}
|
||||
removed[unkWord] = unkCount;
|
||||
m_count.resize(removed.size());
|
||||
double total = 0;
|
||||
double dd = 0;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
for (const auto& iter : removed)
|
||||
total += iter.second;
|
||||
// form classes
|
||||
// Implements an algorithm by Mikolov --TODO: get the reference
|
||||
wrd2cls.Resize(vocabSize, 1);
|
||||
|
||||
for (const auto& iter : removed)
|
||||
dd += sqrt(iter.second / total);
|
||||
}
|
||||
|
||||
double df = 0;
|
||||
size_t class_id = 0;
|
||||
m_class.resize(removed.size());
|
||||
|
||||
priority_queue<stringdouble, vector<stringdouble>, compare_second<stringdouble>>
|
||||
p(compare_second<stringdouble>(), vector<stringdouble>(removed.begin(), removed.end()));
|
||||
while (!p.empty())
|
||||
{
|
||||
string word = p.top().first;
|
||||
double freq = p.top().second;
|
||||
typedef pair<string, double> stringdouble;
|
||||
unordered_map<string, double> removed; // note: std::map is supposedly faster
|
||||
double unkCount = 0; // TODO: why double?
|
||||
size_t size = 0;
|
||||
size_t actual_vocab_size = vocabSize - 1;
|
||||
priority_queue<stringdouble, vector<stringdouble>, compare_second<stringdouble>>
|
||||
q(compare_second<stringdouble>(), vector<stringdouble>(v_count.begin(), v_count.end()));
|
||||
while (size < actual_vocab_size && !q.empty()) // ==for (q=...; cond; q.pop())
|
||||
{
|
||||
size++;
|
||||
string word = q.top().first;
|
||||
double freq = q.top().second; // TODO: why double?
|
||||
if (word == unkWord)
|
||||
{
|
||||
unkCount += freq;
|
||||
actual_vocab_size++;
|
||||
}
|
||||
removed[q.top().first] = q.top().second;
|
||||
q.pop();
|
||||
}
|
||||
while (!q.empty())
|
||||
{
|
||||
unkCount += q.top().second;
|
||||
q.pop();
|
||||
}
|
||||
removed[unkWord] = unkCount;
|
||||
m_count.resize(removed.size());
|
||||
double total = 0;
|
||||
double dd = 0;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
df += sqrt(freq / total) / dd;
|
||||
if (df > 1)
|
||||
df = 1;
|
||||
for (const auto& iter : removed)
|
||||
total += iter.second;
|
||||
|
||||
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
|
||||
class_id++;
|
||||
for (const auto& iter : removed)
|
||||
dd += sqrt(iter.second / total);
|
||||
}
|
||||
|
||||
size_t wid = m_words.size();
|
||||
bool inserted = m_index.insert(make_pair(word, wid)).second;
|
||||
if (inserted)
|
||||
m_words.push_back(word);
|
||||
double df = 0;
|
||||
size_t class_id = 0;
|
||||
m_class.resize(removed.size());
|
||||
|
||||
m_count[wid] = freq;
|
||||
if (nbrCls > 0)
|
||||
m_class[wid] = class_id;
|
||||
p.pop();
|
||||
priority_queue<stringdouble, vector<stringdouble>, compare_second<stringdouble>>
|
||||
p(compare_second<stringdouble>(), vector<stringdouble>(removed.begin(), removed.end()));
|
||||
while (!p.empty())
|
||||
{
|
||||
string word = p.top().first;
|
||||
double freq = p.top().second;
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
df += sqrt(freq / total) / dd;
|
||||
if (df > 1)
|
||||
df = 1;
|
||||
|
||||
if (df > 1.0 * (class_id + 1) / nbrCls && class_id < nbrCls)
|
||||
class_id++;
|
||||
}
|
||||
|
||||
size_t wid = m_words.size();
|
||||
bool inserted = m_index.insert(make_pair(word, wid)).second;
|
||||
if (inserted)
|
||||
m_words.push_back(word);
|
||||
|
||||
m_count[wid] = freq;
|
||||
if (nbrCls > 0)
|
||||
m_class[wid] = class_id;
|
||||
p.pop();
|
||||
}
|
||||
assert(m_words.size() == m_index.size() && m_words.size() == m_class.size());
|
||||
}
|
||||
else // no classes
|
||||
{
|
||||
for (let& iter : v_count)
|
||||
m_words.push_back(iter.first);
|
||||
sort(m_words.begin(), m_words.end());
|
||||
m_count.resize(m_words.size());
|
||||
for (size_t i = 0; i < m_words.size(); i++)
|
||||
m_count[i] = v_count.find(m_words[i])->second;
|
||||
}
|
||||
|
||||
assert(m_words.size() == m_count.size());
|
||||
|
||||
// write the files
|
||||
if (!outputMappingFile.empty())
|
||||
{
|
||||
msra::files::make_intermediate_dirs(s2ws(outputMappingFile));
|
||||
ofstream ofmapping(outputMappingFile.c_str());
|
||||
for (size_t i = 0; i < m_index.size(); i++)
|
||||
ofmapping << m_words[i] << endl;
|
||||
for (let& word : m_words)
|
||||
ofmapping << word << endl;
|
||||
ofmapping.close();
|
||||
cerr << "Created label-mapping file with " << v_count.size() << " entries.\n";
|
||||
}
|
||||
|
||||
msra::files::make_intermediate_dirs(s2ws(outputVocabFile));
|
||||
ofstream ofvocab(outputVocabFile.c_str());
|
||||
for (size_t i = 0; i < m_index.size(); i++)
|
||||
for (size_t i = 0; i < m_words.size(); i++)
|
||||
{
|
||||
if (nbrCls > 0)
|
||||
wrd2cls(i, 0) = (ElemType) m_class[i];
|
||||
|
|
|
@ -36,6 +36,7 @@ ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValu
|
|||
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, init = 'fromLiteral', initFromLiteral = literal, learningRateMultiplier = 0.0)
|
||||
DynamicAxis(tag='') = new ComputationNode [ operation = 'DynamicAxis' ; /*plus the function args*/ ]
|
||||
Input(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
# TODO: change from dynamicAxis by name to dynamicAxis being an actual object
|
||||
SparseInput(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
|
||||
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
|
||||
|
@ -81,6 +82,7 @@ Times(A, B, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ;
|
|||
Logistic(label, probability, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability) /*plus the function args*/ ]
|
||||
WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability : instanceWeight) /*plus the function args*/ ]
|
||||
ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileDynamicAxis' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
|
||||
ReconcileMBLayout = ReconcileDynamicAxis # back compat
|
||||
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
|
||||
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
|
@ -173,11 +175,14 @@ BS = [
|
|||
Constants = [
|
||||
Zero = ConstantTensor (0, (1))
|
||||
One = ConstantTensor (1, (1))
|
||||
OnesTensor (dims) = ConstantTensor (1, dims)
|
||||
# BUGBUG: ZeroesLike() would recreate the full dimension of x. Well, no need if it considers broadcasting. But still wrong if we want to broadcast a vector of different tensor dim.
|
||||
#ZeroesLike (x) = CastAs (x, Zero) // read: Cast<x>(Zero)
|
||||
#OnesLike (x) = CastAs (x, One)
|
||||
# CastAs() does not implement broadcasting
|
||||
ZeroesLike (x) = RowSlice (0, 1, x) .* Zero // hack: get one row of input and multiply with zero
|
||||
ZeroesLike (x) = SumColumnElements (RowSlice (0, 1, x) .* Zero) // hack: get one row of input and multiply with zero; double-hack: reduce extra tensor dims by SumCol
|
||||
ZeroSequenceLike = ZeroesLike # TODO: this should yield a scalar sequence, while ZeroesLike should be a tensor
|
||||
ZeroesLike1 (x) = x .* Zero # get a tensor of zeroes of same dim as x TODO: Do this as a C++ node (will be simple)
|
||||
OnesLike (x) = ZeroesLike (x) + One
|
||||
# is this like Sequences.Repeat?
|
||||
True = 1
|
||||
|
@ -216,6 +221,32 @@ Boolean = [
|
|||
##############################################################################
|
||||
|
||||
Sequences = [
|
||||
# broadcast a single-step sequence to a multi-step sequence
|
||||
BroadcastSequenceAs (type, data1) = [ # type=example sequence with desired length (outside of a loop), data1=1 time step
|
||||
ZeroSequenceLike (x) = RowSlice (0, 1, x) .* Constants.Zero # BUGBUG: SumColumnElements() has a CPU/GPU problem
|
||||
index = /*Constants.*/ZeroSequenceLike (type) # create an index sequence [ 0 0 0 ... ] of target length
|
||||
packedIndex = PackedIndex (data1, index) # convert into internal packed index w.r.t. 'data1'
|
||||
out = GatherPacked (packedIndex, data1) # copy data1[0] to all elements, total length like 'type'
|
||||
].out
|
||||
|
||||
# rolling window over past N samples
|
||||
# returns a record [ value=..., valid=... ]
|
||||
# This implementation is suboptimal in that it creates copies for the intermediate steps.
|
||||
PastValueWindow (N, in) = [
|
||||
delayLine[t:0..N-1] = [ # shift register for encoder, last N inputs
|
||||
value = if t == 0
|
||||
then in # delay 0: current value
|
||||
else Loop.PastValue (0, in, timeStep=t)
|
||||
valid = if t == 0
|
||||
then Constants.One
|
||||
else Constants.One - PastValue (1, Constants.ZeroesLike (in), timeStep=t, defaultHiddenActivation=1)
|
||||
]
|
||||
# delayLine[t].value = value of t steps in the past
|
||||
# delayLine[t].valid = true if we had a value t steps in the past
|
||||
value = Slice (-1, 0, axis=-1, SplitDimension (RowStack (array[0..N-1](t=>delayLine[t].value)), 1, N)) # [i, delay]
|
||||
valid = Slice (-1, 0, axis=-1, SplitDimension (RowStack (array[0..N-1](t=>delayLine[t].valid)), 1, N)) # [i, delay]
|
||||
]
|
||||
|
||||
# fold left/right: Reduce entire sequence by applying binaryOp, e.g. FoldL (Plus, 0, input)
|
||||
# LINQ calls this Aggregate; and may or may not specify the seed value; and allows a predicate
|
||||
FoldL (binaryOp, x0, x) = _Fold (PastValue, binaryOp, x0, x)
|
||||
|
@ -312,8 +343,24 @@ Loop = [
|
|||
_IsWithin (DelayFn/*PastValue or FutureValue*/, N, x) = DelayFn (0, Constants.ZeroesLike (x)/*false*/, timeStep=N, defaultHiddenActivation=Constants.True)
|
||||
|
||||
# opposite of Id's "next"
|
||||
Previous (x) = PastValue (0, x, timeStep=1)
|
||||
Next (x) = FutureValue (0, x, timeStep=1)
|
||||
Previous (x) = PastValue (0, x, timeStep=1)
|
||||
Next (x) = FutureValue (0, x, timeStep=1)
|
||||
|
||||
PreviousOrDefault (x, defaultValue=Constant (0)) = # a delay node with initial value --TODO: merge the two, then do in C++
|
||||
[
|
||||
flags = BS.Loop.IsFirst (x)
|
||||
out = BS.Boolean.If (flags,
|
||||
/*then*/ BS.Sequences.Scatter (flags, defaultValue),
|
||||
/*else*/ Previous (x))
|
||||
].out
|
||||
|
||||
NextOrDefault (x, defaultValue=Constant (0)) = # a delay node with initial value
|
||||
[
|
||||
flags = BS.Loop.IsLast (x)
|
||||
out = BS.Boolean.If (flags,
|
||||
/*then*/ BS.Sequences.Scatter (flags, defaultValue),
|
||||
/*else*/ Next (x))
|
||||
].out
|
||||
]
|
||||
|
||||
##############################################################################
|
||||
|
@ -323,8 +370,9 @@ Loop = [
|
|||
Parameters =
|
||||
[
|
||||
WeightParam (outputDim, inputDim) = Parameter (outputDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
|
||||
BiasParam (dim) = ParameterTensor (dim, init='fixedValue', value=0.0)
|
||||
ScalarParam() = Parameter (1, 1, init='fixedValue', value=0.0)
|
||||
DiagWeightParam (outputDim) = ParameterTensor ((outputDim), init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1) # meant to be applied elementwise
|
||||
BiasParam (dim) = ParameterTensor ((dim), init='fixedValue', value=0.0)
|
||||
ScalarParam() = BiasParam (1)
|
||||
|
||||
# route input through an extra scalar weight, for stabilization
|
||||
Stabilize (x, enabled=true) =
|
||||
|
@ -350,16 +398,17 @@ RNNs =
|
|||
// If we change this, we'd need to fix the LSTM end-to-end test.
|
||||
LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=false) =
|
||||
[
|
||||
#inputDim = x.dim # get dimension from 'x' (if this works, we can remove the inputDim1 parameter)
|
||||
_privateInnards = [ // encapsulate the privateInnards workings
|
||||
dh = prevState.h // previous values
|
||||
dc = prevState.c
|
||||
|
||||
// parameter macros--these carry their own weight matrices
|
||||
B() = Parameters.BiasParam(cellDim)
|
||||
B() = Parameters.BiasParam (cellDim)
|
||||
|
||||
W(v) = Parameters.WeightParam (cellDim, inputDim) * Parameters.Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
|
||||
H(h) = Parameters.WeightParam (cellDim, outputDim) * Parameters.Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
|
||||
C(c) = Parameters.WeightParam (cellDim, 1) .* Parameters.Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
||||
C(c) = Parameters.DiagWeightParam (cellDim) .* Parameters.Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
|
||||
|
||||
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
|
||||
it = Sigmoid (W(x) + B() + H(dh) + C(dc)) // input gate(t)
|
||||
|
@ -401,6 +450,28 @@ RNNs =
|
|||
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
|
||||
lstmState = LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
].lstmState.h // that's the value we return
|
||||
|
||||
# same as RecurrentLSTMP but returns both h and c
|
||||
RecurrentLSTMP2 (inputDim, outputDim, cellDim, x, enableSelfStabilization=false) =
|
||||
[
|
||||
prevState =
|
||||
[
|
||||
h = Loop.Previous (lstmState.h) # hidden state(t-1)
|
||||
c = Loop.Previous (lstmState.c) # cell(t-1)
|
||||
]
|
||||
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
|
||||
lstmState = BS.RNNs.LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
|
||||
].lstmState // that's the value we return
|
||||
|
||||
# a stack of recurrent LSTMs (unidirectional)
|
||||
RecurrentLSTMP2Stack (input, inputDim, hiddenDims, cellDims, enableSelfStabilization=false) = [
|
||||
useStabilizer = enableSelfStabilization
|
||||
layer[i:0..Length (hiddenDims)-1] =
|
||||
RecurrentLSTMP2 (if i == 0 then inputDim else hiddenDims[i-1],
|
||||
hiddenDims[i], cellDims[i],
|
||||
if i == 0 then input else layer[i-1].h,
|
||||
enableSelfStabilization=useStabilizer)
|
||||
].layer
|
||||
]
|
||||
|
||||
##############################################################################
|
||||
|
|
|
@ -184,7 +184,7 @@ void TestSequenceReader(const ConfigParameters& configBase)
|
|||
MBLayoutPtr pMBLayout = make_shared<MBLayout>();
|
||||
StreamMinibatchInputs matrices;
|
||||
matrices.AddInput(featureNames[0], featuresMatrix, pMBLayout, TensorShape());
|
||||
matrices.AddInput(labelNames[0], labelsMatrix , pMBLayout, TensorShape());
|
||||
matrices.AddInput(labelNames[1] , labelsMatrix , pMBLayout, TensorShape());
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
int epochs = config("maxEpochs");
|
||||
|
|
|
@ -40,6 +40,7 @@ void DataReaderBase::SetMinibatchLayout(StreamMinibatchInputs& minibatch)
|
|||
for (const auto& iter : minibatch)
|
||||
{
|
||||
assert(iter.second.pMBLayout == pMBLayout);
|
||||
// TODO: This should be a runtime check, not an assert() that only runs in Debug.
|
||||
UNUSED(iter);
|
||||
}
|
||||
|
||||
|
|
|
@ -148,47 +148,33 @@ void File::Init(const wchar_t* filename, int fileOptions)
|
|||
// (wstring only for now; feel free to make this a template if needed)
|
||||
/*static*/ wstring File::DirectoryPathOf(wstring path)
|
||||
{
|
||||
#ifdef WIN32
|
||||
if (IsWindows8OrGreater())
|
||||
#ifdef _WIN32
|
||||
HRESULT hr;
|
||||
path = msra::strfun::ReplaceAll<wstring>(path, L"/", L"\\"); // Win32 accepts forward slashes, but it seems that PathRemoveFileSpec() does not
|
||||
if (IsWindows8OrGreater()) // PathCchRemoveFileSpec() only available on Windows 8+
|
||||
{
|
||||
typedef HRESULT(*PathCchRemoveFileSpecProc)(_Inout_updates_(_Inexpressible_(cchPath)) PWSTR, _In_ size_t);
|
||||
HINSTANCE hinstLib = LoadLibrary(TEXT("api-ms-win-core-path-l1-1-0.dll"));
|
||||
if (hinstLib == nullptr)
|
||||
RuntimeError("DirectoryPathOf: LoadLibrary() unexpectedly failed.");
|
||||
PathCchRemoveFileSpecProc PathCchRemoveFileSpec = reinterpret_cast<PathCchRemoveFileSpecProc>(GetProcAddress(hinstLib, "PathCchRemoveFileSpec"));
|
||||
if (!PathCchRemoveFileSpec)
|
||||
RuntimeError("DirectoryPathOf: GetProcAddress() unexpectedly failed.");
|
||||
|
||||
HINSTANCE hinstLib;
|
||||
PathCchRemoveFileSpecProc ProcAdd;
|
||||
BOOL fFreeResult = FALSE;
|
||||
// this is the actual function call we care about
|
||||
hr = PathCchRemoveFileSpec(&path[0], path.size());
|
||||
|
||||
FreeLibrary(hinstLib);
|
||||
}
|
||||
else // on Windows 7-, use older PathRemoveFileSpec() instead
|
||||
hr = PathRemoveFileSpec(&path[0]);
|
||||
|
||||
hinstLib = LoadLibrary(TEXT("api-ms-win-core-path-l1-1-0.dll"));
|
||||
if (hinstLib != nullptr)
|
||||
{
|
||||
ProcAdd = reinterpret_cast<PathCchRemoveFileSpecProc>(GetProcAddress(hinstLib, "PathCchRemoveFileSpec"));
|
||||
if (NULL != ProcAdd)
|
||||
{
|
||||
auto hr = (ProcAdd)(&path[0], path.size());
|
||||
if (hr == S_OK) // done
|
||||
path.resize(wcslen(&path[0]));
|
||||
else if (hr == S_FALSE) // nothing to remove: use .
|
||||
path = L".";
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("DirectoryPathOf: GetProcAddress() unexpectedly failed.");
|
||||
}
|
||||
|
||||
fFreeResult = FreeLibrary(hinstLib);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("DirectoryPathOf: LoadLibrary() unexpectedly failed.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto hr = PathRemoveFileSpec(&path[0]);
|
||||
if (hr != 0) // done
|
||||
path.resize(wcslen(&path[0]));
|
||||
else
|
||||
path = L".";
|
||||
}
|
||||
RuntimeError("DirectoryPathOf: Path(Cch)RemoveFileSpec() unexpectedly failed with 0x%08x.", (unsigned int)hr);
|
||||
#else
|
||||
auto pos = path.find_last_of(L"/");
|
||||
if (pos != path.npos)
|
||||
|
@ -264,7 +250,7 @@ File::~File(void)
|
|||
{
|
||||
if (m_pcloseNeeded)
|
||||
{
|
||||
// TODO: Check for error code and throw if !std::uncaught_exception()
|
||||
// TODO: Check for error code and throw if !std::uncaught_exception()
|
||||
_pclose(m_file);
|
||||
}
|
||||
else if (m_file != stdin && m_file != stdout && m_file != stderr)
|
||||
|
|
|
@ -384,8 +384,8 @@ public:
|
|||
{
|
||||
// look for closing brace and also for another opening brace
|
||||
// Inside strings we only accept the closing quote, and ignore any braces inside.
|
||||
current = str.find_first_of(braceStack.back() == '"' ? "\"" : charsToLookFor, current + 1); //
|
||||
if (current == string::npos) // none found: done or error
|
||||
current = str.find_first_of(braceStack.back() == '"' ? "\"" : charsToLookFor, current + 1);
|
||||
if (current == string::npos) // none found: error
|
||||
break;
|
||||
char brace = str[current];
|
||||
// found the expected closing brace?
|
||||
|
@ -406,7 +406,7 @@ public:
|
|||
}
|
||||
}
|
||||
// hit end before everything was closed: error
|
||||
RuntimeError("no closing bracket found in parameters");
|
||||
RuntimeError("no closing %c found in parameters", braceStack.back());
|
||||
//RuntimeError("no closing bracket found in parameters (opening bracket at offset %d)\n%s", (int)tokenStart, str.substr(tokenStart).c_str());
|
||||
}
|
||||
|
||||
|
|
|
@ -67,21 +67,21 @@ public:
|
|||
Input() {} // some STL classes need this for general happiness
|
||||
|
||||
// helper for typecasting the matrix pointer
|
||||
template<class ElemType>
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& GetMatrix(const wchar_t* name/*for debugging only*/ = L"(unknown)") const
|
||||
{
|
||||
{
|
||||
assert(matrix);
|
||||
auto* matrixp = dynamic_cast<Matrix<ElemType>*>(matrix.get());
|
||||
if (!matrixp)
|
||||
{
|
||||
// print a rather rich error to track down a regression failure
|
||||
auto isFloat = !!dynamic_cast<Matrix<float>*> (matrix.get());
|
||||
if (!matrixp)
|
||||
{
|
||||
// print a rather rich error to track down a regression failure
|
||||
auto isFloat = !!dynamic_cast<Matrix<float>*> (matrix.get());
|
||||
auto isDouble = !!dynamic_cast<Matrix<double>*>(matrix.get());
|
||||
LogicError("GetMatrix<%s>: Attempted to access input stream '%ls' with wrong precision, got %s {%d,%d} instead of %s.",
|
||||
typeid(ElemType).name(), name, typeid(matrix.get()).name(), (int)isFloat, (int)isDouble, typeid(Matrix<ElemType>*).name());
|
||||
}
|
||||
return *matrixp;
|
||||
}
|
||||
return *matrixp;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
|
|
|
@ -166,6 +166,7 @@ struct MBLayout
|
|||
|
||||
m_columnsValidityMask = std::move(other->m_columnsValidityMask);
|
||||
m_writable = other->m_writable;
|
||||
|
||||
m_axisName = std::move(other->m_axisName);
|
||||
}
|
||||
|
||||
|
@ -254,9 +255,11 @@ public:
|
|||
|
||||
size_t GetNumTimeSteps() const { return m_numTimeSteps; }
|
||||
size_t GetNumParallelSequences() const { return m_numParallelSequences; }
|
||||
const std::wstring GetAxisName() const { return m_axisName; }
|
||||
void SetAxisName(const std::wstring& axisName) { m_axisName = axisName; }
|
||||
|
||||
// axis names are for now only a debugging aid
|
||||
// In the future, there will be a mechanism to denote that axes are meant to be the same.
|
||||
const wchar_t* GetAxisName() const { return m_axisName.c_str(); }
|
||||
void SetAxisName(const std::wstring& name) { m_axisName = name; }
|
||||
void SetUniqueAxisName(std::wstring name) // helper for constructing
|
||||
{
|
||||
static std::map<std::wstring, size_t> nameIndices;
|
||||
|
@ -554,7 +557,9 @@ private:
|
|||
mutable bool m_writable;
|
||||
|
||||
// The axis this MBLayout represents.
|
||||
// For now only a string meant for debugging.
|
||||
std::wstring m_axisName;
|
||||
|
||||
public:
|
||||
|
||||
// special accessor for sequence training --TODO: must be replaced by a different mechanism
|
||||
|
|
|
@ -776,8 +776,8 @@ void ComputationNetwork::DescribeNetworkUsingDot(list<ComputationArc>& arcs,
|
|||
for (const auto& x : allnodes)
|
||||
{
|
||||
line.clear();
|
||||
line = msra::strfun::wstrprintf(L" \"%ls\" [ label = \"%ls [%s%s]\\n%ls\" ] ;\n",
|
||||
x->GetName().c_str(), x->GetName().c_str(), string(x->GetSampleLayout()).c_str(), x->HasMBLayout() ? " x *" : "",
|
||||
line = msra::strfun::wstrprintf(L" \"%ls\" [ label = \"%ls [%s%ls]\\n%ls\" ] ;\n",
|
||||
x->GetName().c_str(), x->GetName().c_str(), string(x->GetSampleLayout()).c_str(), x->GetMBLayoutAxisString().c_str(),
|
||||
x->OperationName().c_str());
|
||||
fstream << line;
|
||||
}
|
||||
|
|
|
@ -52,9 +52,10 @@ public:
|
|||
m_randomSeedOffset(0),
|
||||
m_isCompiled(false),
|
||||
m_areMatricesAllocated(false),
|
||||
m_pMBLayoutOfNetwork(make_shared<MBLayout>(1,0, L"*")),
|
||||
m_pMBLayoutOfNetwork(make_shared<MBLayout>(1, 0, L"*")),
|
||||
m_environment(make_shared<ComputationEnvironment>())
|
||||
{
|
||||
//m_pMBLayoutOfNetwork->SetAxisName(L"T");
|
||||
}
|
||||
|
||||
ComputationNetwork(DEVICEID_TYPE deviceId)
|
||||
|
@ -706,10 +707,9 @@ public:
|
|||
// evaluation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// zeroes out all gradients except the root itself
|
||||
// TODO: why not the root?
|
||||
// zeroes out all gradients except the root itself (since its gradient is set from outside rather than propagated down)
|
||||
// (Note that inside the nodes this only really sets a flag to do it later when needed, but that's not our concern.)
|
||||
void ZeroGradients(const ComputationNodeBasePtr& rootNode)
|
||||
void ZeroInputGradients(const ComputationNodeBasePtr& rootNode)
|
||||
{
|
||||
for (auto& node : GetAllNodesForRoot(rootNode))
|
||||
node->ZeroGradientsOfInputs();
|
||||
|
|
|
@ -111,6 +111,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.
|
||||
else if (nodeType == L"PerDimMeanVarNormalizationNode") return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"PerDimMeanVarDeNormalizationNode") return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"ReconcileMBLayout") return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"RowElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"RowSlice") return New<SliceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Scale") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -194,6 +195,7 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
|||
return net.AddNodeToNetWithElemType(New<LearnableParameter<ElemType>>(net.GetDeviceId(), paramName, tensorShape));
|
||||
}
|
||||
|
||||
// TODO: change these to take an actual object instead of a name for dynamicAxis
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName)
|
||||
{
|
||||
|
|
|
@ -43,18 +43,17 @@ void ComputationNetwork::ForwardProp(const ComputationNodeBasePtr rootNode)
|
|||
GetNestedNetwork(rootNode)->ForwardProp(FrameRange(nullptr));
|
||||
}
|
||||
|
||||
// set the gradient matrix of a node to an 1x1 matrix containing 1.0
|
||||
// Returns false if the node is not a ComputationNode<ElemType>.
|
||||
// set the gradient matrix of a (root) node 1.0
|
||||
// Returns false if the node is not a ComputationNode<ElemType>; see Backprop() below for intended use.
|
||||
template <class ElemType>
|
||||
static bool SetGradientToScalarOne(ComputationNodeBasePtr nodep)
|
||||
static bool SetRootGradientToScalarOne(ComputationNodeBasePtr nodep)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<ComputationNode<ElemType>>(nodep);
|
||||
bool hasMatchingType = (node != nullptr);
|
||||
if (hasMatchingType)
|
||||
{
|
||||
Matrix<ElemType>& grad = node->Gradient();
|
||||
grad.Resize(node->Value());
|
||||
grad.SetValue((ElemType) 1.0);
|
||||
// reset the root gradient to 1
|
||||
node->ResetGradient(1);
|
||||
}
|
||||
return hasMatchingType;
|
||||
}
|
||||
|
@ -69,13 +68,13 @@ void ComputationNetwork::Backprop(const ComputationNodeBasePtr rootNode) // trai
|
|||
if (!Environment().IsTraining())
|
||||
LogicError("Backprop: Requires network is to be in training mode.");
|
||||
|
||||
// reset all gradients to zero (actually, internally, this is lazy, but we don't care here)
|
||||
ZeroGradients(rootNode);
|
||||
|
||||
// initialize root gradient with a scalar value of 1.0
|
||||
if (!SetGradientToScalarOne<float>(rootNode) && !SetGradientToScalarOne<double>(rootNode))
|
||||
if (!SetRootGradientToScalarOne<float>(rootNode) && !SetRootGradientToScalarOne<double>(rootNode))
|
||||
LogicError("Backprop: Training criterion is neither ComputationNode<float> nor ComputationNode<double>.");
|
||||
|
||||
// reset all gradients below rootNode to zero (actually, internally, this is lazy, but we don't care here)
|
||||
ZeroInputGradients(rootNode);
|
||||
|
||||
// backpropagate through the network
|
||||
GetNestedNetwork(rootNode)->Backprop(FrameRange(nullptr), true, true);
|
||||
}
|
||||
|
@ -134,6 +133,10 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con
|
|||
{
|
||||
for (auto& node : m_nestedNodes)
|
||||
{
|
||||
#if 0
|
||||
if (dynamic_pointer_cast<LearnableParameter<float>>(node))
|
||||
dynamic_pointer_cast<ComputationNode<float>>(node)->DebugLogMinibatch();
|
||||
#endif
|
||||
if (node->IsOutOfDateWrtInputs())
|
||||
{
|
||||
node->BeginForwardProp();
|
||||
|
@ -189,8 +192,9 @@ ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(con
|
|||
for (auto& node : m_nestedNodes)
|
||||
{
|
||||
if (node->GetMBLayout() != GetMBLayout())
|
||||
LogicError("Evaluate: all nodes inside a recurrent loop must have a layout that is identical; mismatch found for nodes '%ls' vs. '%ls'",
|
||||
node->NodeName().c_str(), m_nestedNodes[0]->NodeName().c_str());
|
||||
LogicError("Evaluate: All nodes inside a recurrent loop must have a layout that is identical; mismatch found for nodes '%ls' (%ls) vs. '%ls' (%ls)",
|
||||
node ->NodeName().c_str(), node ->GetMBLayoutAxisString().c_str(),
|
||||
m_nestedNodes[0]->NodeName().c_str(), m_nestedNodes[0]->GetMBLayoutAxisString().c_str());
|
||||
}
|
||||
|
||||
// tell all that loop is about to commence
|
||||
|
@ -525,7 +529,7 @@ void ComputationNetwork::ResetMBLayouts()
|
|||
for (const auto& node : GetAllNodesForRoot(nullptr))
|
||||
node->LinkToMBLayout(nullptr);
|
||||
|
||||
// DynamicAxis nodes are (apart from the network-wide MBLayout) the main holders of MBLayouts. Initialize them.
|
||||
// DynamicAxis nodes are (apart from the soon-to-be-deprecated network-wide MBLayout) the main holders of MBLayouts. Initialize them.
|
||||
// The only other instances are nodes that change the MBLayout, like WhereNode.
|
||||
for (auto node : GetNodesWithType(L"DynamicAxis"))
|
||||
node->LinkToMBLayout(make_shared<MBLayout>(1, 0, node->GetName()));
|
||||
|
@ -533,6 +537,7 @@ void ComputationNetwork::ResetMBLayouts()
|
|||
// This is now initialized inside of the Input nodes, with the proper connections.
|
||||
for (auto node : InputNodes(nullptr))
|
||||
{
|
||||
// TODO: use if (!Is<ITakesDynamicAxis>(node))...
|
||||
auto n = dynamic_pointer_cast<ITakesDynamicAxis>(node);
|
||||
if (!n)
|
||||
LogicError("Expected %ls to implement ITakesDynamicAxis, but it doesn't.", node->NodeDescription().c_str());
|
||||
|
@ -704,7 +709,7 @@ size_t ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, boo
|
|||
{
|
||||
unchanged = !ValidateNode(node, isFinalValidationPass);
|
||||
string updatedPrototype = node->FormatOperationPrototype("");
|
||||
#if 1 // print prototype in final validation pass
|
||||
#if 0 // print prototype in final validation pass. Problematic for tracking down validation errors in loops.
|
||||
unchanged;
|
||||
if (isFinalValidationPass)
|
||||
#else // print prototype upon every change (useful for debugging)
|
||||
|
|
|
@ -156,9 +156,16 @@ void ComputationNetwork::ConstructFromRoots(DEVICEID_TYPE deviceId, deque<Comput
|
|||
// not in the cache yet: create it (or not if no such member)
|
||||
void /*CustomConfigRecord::*/ ComputationNetwork::LazyCreateConfigMember(const wstring& id) const /*override*/
|
||||
{
|
||||
let iter = m_nameToNodeMap.find(id);
|
||||
auto iter = m_nameToNodeMap.find(id);
|
||||
if (iter == m_nameToNodeMap.end())
|
||||
return; // no such node
|
||||
{
|
||||
// workaround to allow to access members with '.' inside: change to _
|
||||
for (iter = m_nameToNodeMap.begin(); iter != m_nameToNodeMap.end(); iter++)
|
||||
if (msra::strfun::ReplaceAll<wstring>(iter->first, L".", L"_") == id)
|
||||
break;
|
||||
if (iter == m_nameToNodeMap.end())
|
||||
return; // no such node
|
||||
}
|
||||
const ComputationNodeBasePtr& node = iter->second;
|
||||
// TODO: What is the expressionPath?
|
||||
let& nodeName = node->NodeName(); // failFn lambda below holds a copy of the name for the error message. Let's not hold an unneccessary shared_ptr to the node, risking cycles & stuff.
|
||||
|
@ -168,16 +175,20 @@ void /*CustomConfigRecord::*/ ComputationNetwork::LazyCreateConfigMember(const w
|
|||
|
||||
vector<wstring> /*IConfigRecord::*/ ComputationNetwork::GetMemberIds() const
|
||||
{
|
||||
vector<wstring> nodeNames;
|
||||
set<wstring> nodeNames;
|
||||
for (let& iter : m_nameToNodeMap)
|
||||
{
|
||||
const ComputationNodeBasePtr& node = iter.second;
|
||||
const wstring& nodeName = node->NodeName();
|
||||
if (nodeName.find_first_of(L".[$")) // only expose the top-level names
|
||||
wstring nodeName = node->NodeName();
|
||||
if (nodeName.find_first_of(L"$") != nodeName.npos) // skip non-top-level names
|
||||
continue;
|
||||
nodeNames.push_back(nodeName);
|
||||
// temp solution for composites: use _ instead of .
|
||||
nodeName = msra::strfun::ReplaceAll<wstring>(nodeName, L".", L"_");
|
||||
if (nodeName.find_first_of(L".[") != nodeName.npos) // skip composite names
|
||||
continue;
|
||||
nodeNames.insert(nodeName);
|
||||
}
|
||||
return nodeNames;
|
||||
return vector<wstring>(nodeNames.begin(), nodeNames.end());
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
|
|
|
@ -31,8 +31,15 @@ void ComputationNode<ElemType>::Backprop(const FrameRange& fr, bool childrenInTh
|
|||
// after nodes that propagate outside of the loop, and thus, in the last
|
||||
// time step of the sequence, have not yet received a gradient from a parent
|
||||
// and thus may not have had their gradient matrices allocated.
|
||||
//if (m_needsGradient)
|
||||
// LazyZeroGradient(); // set gradient to 0 if this is the first time
|
||||
#if 1 // keep enabled once this works
|
||||
#if 1 // log the cases where this is needed
|
||||
if (m_needsGradient && !m_gradientInitialized)
|
||||
//LogicError("%ls %ls operation: Backprop called with uninitialized gradient.", NodeName().c_str(), OperationName().c_str());
|
||||
fprintf(stderr, "%ls %ls operation: Initializing gradient out of line.\n", NodeName().c_str(), OperationName().c_str());
|
||||
#endif
|
||||
if (m_needsGradient)
|
||||
LazyZeroGradient(); // set gradient to 0 if this is the first time
|
||||
#endif
|
||||
|
||||
if (fr.IsAllFrames() && IsPartOfLoop() && childrenInThisLoop)
|
||||
LogicError("%ls %ls operation: Backprop called with whole-batch FrameRange on node that participates in a loop", NodeName().c_str(), OperationName().c_str());
|
||||
|
@ -139,11 +146,11 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
|
|||
{
|
||||
size_t dim1 = shape1[k];
|
||||
// BUGBUG: We must consider the allowBroadcast flag here.
|
||||
if (dims[k] == 1) // is [0] broadcasting?
|
||||
if (dims[k] <= 1 && dim1 != 0) // is [0] broadcasting (1) or unspecified (0)?
|
||||
dims[k] = dim1; // then use dimension we broadcast to
|
||||
else if (dim1 == 1) // if [1] is broadcasting
|
||||
; // dims is already correct
|
||||
else if (isFinalValidationPass && dim1 != dims[k]) // no broadcasting: they must match
|
||||
else if (dim1 <= 1 && dims[k] != 0) // if [1] is broadcasting or unspecified
|
||||
; // then dims is already correct
|
||||
else if (isFinalValidationPass && dim1 != dims[k]) // no broadcasting or unspecified: they must match
|
||||
InvalidArgument("%ls: Input dimensions [%s] and [%s] are not compatible.",
|
||||
NodeDescription().c_str(), string(shape0).c_str(), string(shape1).c_str());
|
||||
}
|
||||
|
@ -348,7 +355,7 @@ const std::string ComputationNodeBase::ShapeDescription() const
|
|||
return msra::strfun::strprintf("[%s%s%ls]",
|
||||
string(m_sampleLayout).c_str(),
|
||||
HasMBLayout() ? " x " : "",
|
||||
HasMBLayout() ? GetMBLayout()->GetAxisName().c_str() : L"");
|
||||
HasMBLayout() ? GetMBLayout()->GetAxisName() : L"");
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -507,6 +514,7 @@ void ComputationNode<ElemType>::WriteMinibatchWithFormatting(FILE* f, const Fram
|
|||
{
|
||||
if (formatChar == 'f') // print as real number
|
||||
{
|
||||
if (dval == 0) dval = fabs(dval); // clear the sign of a negative 0, which are produced inconsistently between CPU and GPU
|
||||
fprintfOrDie(f, valueFormatString.c_str(), dval);
|
||||
}
|
||||
else if (formatChar == 'u') // print category as integer index
|
||||
|
@ -707,7 +715,11 @@ using namespace Microsoft::MSR::CNTK;
|
|||
template <>
|
||||
shared_ptr<Object> MakeRuntimeObject<ComputationNodeBase>(const IConfigRecordPtr configp)
|
||||
{
|
||||
return NewComputationNodeFromConfig(configp);
|
||||
let node = NewComputationNodeFromConfig(configp);
|
||||
// temporarily disabling this, as it caused a test to fail:
|
||||
//if (!node->Is<IRecurrentNode>())
|
||||
// node->Validate(/*isFinalValidationPass*/false); // do an initial validation, so that we have access to dimensions
|
||||
return node;
|
||||
}
|
||||
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNodeBase> registerComputationNode(L"ComputationNode");
|
||||
|
|
|
@ -482,6 +482,18 @@ public:
|
|||
const MBLayoutPtr& GetMBLayout() const { return m_pMBLayout; }
|
||||
bool HasMBLayout() const { return !!m_pMBLayout; }
|
||||
|
||||
// for logging: get the string fragment for displaying the dimension
|
||||
std::wstring GetMBLayoutAxisString() const
|
||||
{
|
||||
if (!HasMBLayout())
|
||||
return L"";
|
||||
const wstring& axisName = GetMBLayout()->GetAxisName();
|
||||
if (axisName.empty())
|
||||
return L" x *";
|
||||
else
|
||||
return L" x " + axisName;
|
||||
}
|
||||
|
||||
protected: public: // ...the following should be protected, but nodes inquire about their children, requiring public access
|
||||
|
||||
size_t GetNumParallelSequences() const
|
||||
|
@ -685,6 +697,14 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
// reset gradients of a node's inputs
|
||||
// This really only clears the lazy-init flags (LazyZeroGradient() actually clears the values lazily).
|
||||
void /*ComputationNodeBase::*/ ZeroGradientsOfInputs()
|
||||
{
|
||||
for (size_t i = 0; i < m_inputs.size(); i++)
|
||||
Input(i)->m_gradientInitialized = false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// masking
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -695,8 +715,6 @@ public:
|
|||
virtual void InvalidateMissingValueColumns(const FrameRange&) = 0;
|
||||
virtual void InvalidateMissingGradientColumns(const FrameRange&) = 0;
|
||||
|
||||
virtual void ZeroGradientsOfInputs() = 0;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// memory sharing
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -1218,7 +1236,7 @@ public:
|
|||
return GradientFor(fr);
|
||||
}
|
||||
// tensor version of the above functions
|
||||
TensorView<ElemType> DataTensorFor(Matrix<ElemType>& data, size_t rank, const FrameRange& fr)
|
||||
TensorView<ElemType> DataTensorFor(const MatrixBasePtr& data, size_t rank, const FrameRange& fr)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -1231,11 +1249,11 @@ public:
|
|||
}
|
||||
TensorView<ElemType> ValueTensorFor(size_t rank, const FrameRange& fr)
|
||||
{
|
||||
return DataTensorFor(Value(), rank, fr);
|
||||
return DataTensorFor(ValuePtr(), rank, fr);
|
||||
}
|
||||
TensorView<ElemType> GradientTensorFor(size_t rank, const FrameRange& fr)
|
||||
{
|
||||
return DataTensorFor(Gradient(), rank, fr);
|
||||
return DataTensorFor(GradientPtr(), rank, fr);
|
||||
}
|
||||
|
||||
// TODO: Are all these meant to read out a scalar? Then rename and verify dimensions.
|
||||
|
@ -1300,6 +1318,7 @@ public:
|
|||
void UpdateFunctionValuesSize()
|
||||
{
|
||||
UpdateDataSize(Value());
|
||||
Value().CollapseDataLocationAfterWriting(); // actually before writing, should change the name
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -1375,14 +1394,8 @@ public:
|
|||
// TODO: move to -Base (or -Network?)
|
||||
void Backprop(const FrameRange& fr, bool childrenInThisLoop, bool childrenInOuterLoop) override;
|
||||
|
||||
// TODO: why of the inputs, and not the node itself?
|
||||
void /*ComputationNodeBase::*/ ZeroGradientsOfInputs() override // clears the lazy-init flags (LazyZeroGradient() actually clears the values lazily)
|
||||
{
|
||||
for (size_t i = 0; i < m_inputs.size(); i++)
|
||||
Input(i)->m_gradientInitialized = false;
|
||||
}
|
||||
|
||||
// lazy resetting of gradient
|
||||
// This performs the actual zeroing out.
|
||||
void LazyZeroGradient()
|
||||
{
|
||||
if (!m_needsGradient)
|
||||
|
@ -1391,8 +1404,14 @@ public:
|
|||
if (m_gradientInitialized)
|
||||
return;
|
||||
|
||||
ResetGradient(0);
|
||||
}
|
||||
|
||||
// resize and reset this node's gradient to a given value (normally 0, 1 for root)
|
||||
void ResetGradient(ElemType val)
|
||||
{
|
||||
UpdateDataSize(Gradient());
|
||||
Gradient().SetValue(0);
|
||||
Gradient().SetValue(val);
|
||||
|
||||
m_gradientInitialized = true;
|
||||
}
|
||||
|
@ -1503,8 +1522,45 @@ public:
|
|||
const std::string& sampleSeparator, std::string valueFormatString,
|
||||
bool outputGradient = false) const;
|
||||
|
||||
// simple helper to log the content of a minibatch
|
||||
void DebugLogMinibatch(bool outputGradient = false) const
|
||||
{
|
||||
fprintf(stderr, "<<<<<<\n"); // some prologue and epilogue so that we can use diff -c1 to see the node name
|
||||
fprintf(stderr, "<<<<<<\n");
|
||||
fprintf(stderr, "DebugLogMinibatch: <<<<< %ls%s >>>>>\n", NodeName().c_str(), outputGradient ? " (gradient)" : "");
|
||||
WriteMinibatchWithFormatting(stderr, FrameRange(), 8, 10, false/*transpose*/, /*isCategoryLabel=*/false, /*isSparse=*/false, std::vector<std::string>(),
|
||||
""/*sequenceSeparator*/, " "/*sequencePrologue*/, "\n"/*sequenceEpilogue*/, " "/*elementSeparator*/, "\n "/*sampleSeparator*/,
|
||||
"%.8f"/*valueFormatString*/, outputGradient);
|
||||
fprintf(stderr, ">>>>>>\n");
|
||||
fprintf(stderr, ">>>>>>\n");
|
||||
}
|
||||
|
||||
void Trace()
|
||||
{
|
||||
#if 0
|
||||
static const std::set<std::wstring> toLog{
|
||||
L"labelSentenceStartEmbedded",
|
||||
L"delayedDecoderFeedback.h.x",
|
||||
L"delayedDecoderFeedback.h.flags",
|
||||
L"delayedDecoderFeedback.h.out.thenVal.h.indexSequence.h.indexSequence.h",
|
||||
L"delayedDecoderFeedback.h.out.thenVal.h.indexSequence.h",
|
||||
L"delayedDecoderFeedback.h.out.thenVal.h",
|
||||
L"delayedDecoderFeedback.h.out.PlusArgs[0]",
|
||||
L"delayedDecoderFeedback.h.out.PlusArgs[1].ElementTimesArgs[0]",
|
||||
L"delayedDecoderFeedback.h.out.elseVal",
|
||||
L"delayedDecoderFeedback.h.out.PlusArgs[1]",
|
||||
L"delayedDecoderFeedback.h.out",
|
||||
L"delayedDecoderFeedback"
|
||||
};
|
||||
if (toLog.find(NodeName()) != toLog.end())
|
||||
DebugLogMinibatch();
|
||||
if (NodeName() == L"delayedDecoderFeedback.h.out")
|
||||
{
|
||||
static int i = 0;
|
||||
if (++i == 2)
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
if (m_traceNodeValueReal || m_traceNodeValueAsCategoryLabel || m_traceNodeValueSparse)
|
||||
{
|
||||
fprintf(stderr, "Trace --> %s\n", FormatOperationPrototype("").c_str());
|
||||
|
@ -1556,8 +1612,8 @@ public:
|
|||
/*HasToString::*/ wstring ToString() const override
|
||||
{
|
||||
// we format it like "name : type rows x cols ( args )"
|
||||
wstring result = /*TidyName*/ (NodeName()) + L" : " + OperationName();
|
||||
result.append(msra::strfun::wstrprintf(L" [%s%s]", string(GetSampleLayout()).c_str(), HasMBLayout() ? " x *" : ""));
|
||||
wstring result = NodeName() + L" : " + OperationName();
|
||||
result.append(msra::strfun::wstrprintf(L" [%s%ls]", string(GetSampleLayout()).c_str(), GetMBLayoutAxisString().c_str()));
|
||||
if (m_inputs.empty())
|
||||
result.append(L" ()");
|
||||
else
|
||||
|
@ -1580,7 +1636,7 @@ public:
|
|||
// for debugging purposes
|
||||
void /*ComputationNodeBase::*/ PrintSelf(bool printMatrices = false) const
|
||||
{
|
||||
fprintf(stderr, "\n%ls[%s%s] = %ls", NodeName().c_str(), string(GetSampleLayout()).c_str(), HasMBLayout() ? " x *" : "", OperationName().c_str());
|
||||
fprintf(stderr, "\n%ls[%s%ls] = %ls", NodeName().c_str(), string(GetSampleLayout()).c_str(), GetMBLayoutAxisString().c_str(), OperationName().c_str());
|
||||
|
||||
if (!IsLeaf())
|
||||
{
|
||||
|
@ -1589,7 +1645,7 @@ public:
|
|||
{
|
||||
if (i > 0)
|
||||
fprintf(stderr, ", ");
|
||||
fprintf(stderr, "%ls[%s%s] = %ls", m_inputs[i] ? m_inputs[i]->NodeName().c_str() : L"NULL", string(m_inputs[i]->GetSampleLayout()).c_str(), m_inputs[i]->HasMBLayout() ? " x *" : "", OperationName().c_str());
|
||||
fprintf(stderr, "%ls[%s%ls] = %ls", m_inputs[i] ? m_inputs[i]->NodeName().c_str() : L"NULL", string(m_inputs[i]->GetSampleLayout()).c_str(), m_inputs[i]->GetMBLayoutAxisString().c_str(), OperationName().c_str());
|
||||
}
|
||||
fprintf(stderr, ")");
|
||||
}
|
||||
|
@ -1749,7 +1805,6 @@ public:
|
|||
virtual void PrintSelf(bool) const override { NOT_IMPLEMENTED; }
|
||||
virtual void ValidateInferInputDimsFrom(const TensorShape&) override { NOT_IMPLEMENTED; }
|
||||
virtual void SetInput(const size_t, const Microsoft::MSR::CNTK::ComputationNodeBase::ComputationNodeBasePtr&) override { NOT_IMPLEMENTED; }
|
||||
virtual void ZeroGradientsOfInputs(void) override { NOT_IMPLEMENTED; }
|
||||
virtual void MaskMissingValueColumnsToZero(const Microsoft::MSR::CNTK::FrameRange&) override { NOT_IMPLEMENTED; }
|
||||
virtual void MaskMissingGradientColumnsToZero(const Microsoft::MSR::CNTK::FrameRange&) override { NOT_IMPLEMENTED; }
|
||||
virtual void InvalidateMissingValueColumns(const Microsoft::MSR::CNTK::FrameRange&) override { NOT_IMPLEMENTED; }
|
||||
|
@ -1854,6 +1909,7 @@ protected:
|
|||
using Base::GetInputSampleLayout; \
|
||||
using Base::GetInputsFromConfig; \
|
||||
using Base::GetMBLayout; \
|
||||
using Base::GetMBLayoutAxisString; \
|
||||
using Base::GetNumInputs; \
|
||||
using Base::GetNumParallelSequences; \
|
||||
using Base::GetNumTimeSteps; \
|
||||
|
@ -1865,6 +1921,7 @@ protected:
|
|||
using Base::Gradient; \
|
||||
using Base::GradientAsMatrix; \
|
||||
using Base::GradientFor; \
|
||||
using Base::GradientPtr; \
|
||||
using Base::GradientTensorFor; \
|
||||
using Base::HasMBLayout; \
|
||||
using Base::InferMBLayoutFromInputsForStandardCase; \
|
||||
|
@ -1909,6 +1966,7 @@ protected:
|
|||
using Base::ValidateUnaryMap; \
|
||||
using Base::ValidateUnaryReduce; \
|
||||
using Base::ValueFor; \
|
||||
using Base::ValuePtr; \
|
||||
using Base::ValueTensorFor; \
|
||||
using Base::VerifyDataSize; \
|
||||
using Base::VerifyDims; \
|
||||
|
|
|
@ -340,8 +340,8 @@ public:
|
|||
size_t mapCount = m_mapCount.GetNumElements();
|
||||
size_t weightCols = kW * kH * inDims.m_numChannels;
|
||||
|
||||
// check/infer input [0] (weights)
|
||||
// BUGBUG: For now, we treat the weights as a 2D matrix. They should be a tensor proper.
|
||||
// check/infer input [0] (weights)
|
||||
// BUGBUG: For now, we treat the weights as a 2D matrix. They should be a tensor proper.
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(mapCount, weightCols));
|
||||
|
||||
if (isFinalValidationPass && (Input(0)->GetAsMatrixNumCols() != weightCols || Input(0)->GetAsMatrixNumRows() != mapCount))
|
||||
|
@ -358,31 +358,31 @@ public:
|
|||
else
|
||||
{
|
||||
if (m_imageLayout != ImageLayoutKind::CHW)
|
||||
{
|
||||
{
|
||||
InvalidArgument(
|
||||
"%ls %ls supports only cuDNN (CHW) data layout. "
|
||||
"Please specify imageLayout=\"cudnn\" in %ls node in your script "
|
||||
"and make sure input data layout is CHW", NodeName().c_str(), OperationName().c_str(), NodeName().c_str());
|
||||
}
|
||||
}
|
||||
inputShape = GetInputSampleLayout(inputIdx);
|
||||
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
|
||||
SetDims(outDims, HasMBLayout());
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if (m_convEng == nullptr)
|
||||
{
|
||||
{
|
||||
auto geometry = std::make_shared<ConvolveGeometry>(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
|
||||
m_convEng = ConvolutionEngine<ElemType>::Create(geometry, m_deviceId, m_imageLayout,
|
||||
m_maxTempMemSizeInSamples, m_poolKind);
|
||||
}
|
||||
}
|
||||
|
||||
if (Input(0)->GetAsMatrixNumCols() != m_kernelShape.GetNumElements() ||
|
||||
Input(0)->GetAsMatrixNumRows() != m_convEng->Geometry()->KernelCount())
|
||||
{
|
||||
{
|
||||
LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [kernelCount, kernelWidth * kernelHeight * inputChannels]",
|
||||
Input(0)->NodeName().c_str(), (int)m_convEng->Geometry()->KernelCount(), (int)m_kernelShape.GetNumElements());
|
||||
}
|
||||
|
@ -587,7 +587,7 @@ public:
|
|||
|
||||
m_inputSizePerSample = inDims.m_width * inDims.m_height * inDims.m_numChannels;
|
||||
|
||||
SetDims(outDims.AsTensorShape(m_imageLayoutKind), true);
|
||||
SetDims(outDims.AsTensorShape(m_imageLayoutKind), HasMBLayout());
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
|
|
|
@ -260,7 +260,7 @@ private:
|
|||
TensorView<ElemType> OneSampleTensorFor(int inputIndex/*-1 for output*/, bool gradient/*instead of value*/, const FrameRange& fr)
|
||||
{
|
||||
auto input = inputIndex < 0 ? this : Input(inputIndex).get();
|
||||
auto& data = gradient ? input->Gradient() : input->Value();
|
||||
auto data = gradient ? input->GradientPtr() : input->ValuePtr();
|
||||
size_t rank = input->GetSampleLayout().GetRank();
|
||||
if (!Input(0)->HasMBLayout()) // left input is no MB data: run normally
|
||||
return input->DataTensorFor(data, rank, fr);
|
||||
|
@ -287,9 +287,9 @@ public:
|
|||
// TensorView::DoMatrixProductOf() will reduce each tensor object into a 2D tensor (or fail if it cannot)
|
||||
// and recreate actual Matrix objects (in case of sparse, they must be identical to the original tensor storage object).
|
||||
// Transposition is applied after flattening into 2D, but only allowed if the input sample is 2D anyway.
|
||||
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto output = OneSampleTensorFor(-1, /*gradient=*/false, fr);
|
||||
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto output = OneSampleTensorFor(-1, /*gradient=*/false, fr);
|
||||
output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
|
||||
}
|
||||
|
||||
|
@ -318,16 +318,16 @@ public:
|
|||
// If input data is sparse, then gradient is block sparse.
|
||||
if (Input(1)->Value().GetMatrixType() == SPARSE && Input(0)->Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
|
||||
Input(0)->Gradient().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
|
||||
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
|
||||
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
|
||||
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
|
||||
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
|
||||
input0Gradient.AddMatrixProductOf(m_transpose/*transC*/, outputGradient, false/*transA*/, input1, true/*transB*/);
|
||||
}
|
||||
else if (inputIndex == 1) // right derivative
|
||||
{
|
||||
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto input1Gradient = OneSampleTensorFor(1, /*gradient=*/true, fr.AllowBroadcast());
|
||||
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
|
||||
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto input1Gradient = OneSampleTensorFor(1, /*gradient=*/true, fr.AllowBroadcast());
|
||||
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
|
||||
input1Gradient.AddMatrixProductOf(false/*transC*/, input0, !m_transpose/*transA*/, outputGradient, false/*transB*/);
|
||||
}
|
||||
}
|
||||
|
@ -819,16 +819,16 @@ public:
|
|||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto output = ValueTensorFor(rank, fr);
|
||||
auto input = TensorView<ElemType>(Input(0)->Value(), GetTransposedTensorSliceFor(rank, fr));
|
||||
auto output = ValueTensorFor( rank, fr);
|
||||
auto input = TensorView<ElemType>(Input(0)->ValuePtr(), GetTransposedTensorSliceFor(rank, fr));
|
||||
output.AssignCopyOf(input);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto outputGradient = GradientTensorFor(rank, fr);
|
||||
auto inputGradient = TensorView<ElemType>(Input(0)->Gradient(), GetTransposedTensorSliceFor(rank, fr));
|
||||
auto outputGradient = GradientTensorFor( rank, fr);
|
||||
auto inputGradient = TensorView<ElemType>(Input(0)->GradientPtr(), GetTransposedTensorSliceFor(rank, fr));
|
||||
inputGradient.AddCopyOf(outputGradient);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ public:
|
|||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
result.DoUnaryOpOf(0, input, 1, opForward);
|
||||
result.DoUnaryOpOf(0, input, 1, opForward, opSum);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
|
@ -61,8 +61,8 @@ public:
|
|||
|
||||
// get the args
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto sliceOutputGrad = GradientTensorFor(rank, fr); // propagate from this one...
|
||||
auto sliceInputGrad = Input(0)->GradientTensorFor(rank, fr); // ...to this one
|
||||
auto sliceOutputGrad = GradientTensorFor(rank, fr); // propagate from this one...
|
||||
auto sliceInputGrad = Input(0)->GradientTensorFor(rank, fr); // ...to this one
|
||||
|
||||
// we expect a constant conditional expression here -- suppress the warning that leads to an error
|
||||
// TODO: alternative: assign to a non-const variable and test that.
|
||||
|
@ -70,7 +70,7 @@ public:
|
|||
#pragma warning( disable : 4127 )
|
||||
if (opType == UnaryGradient)
|
||||
{
|
||||
sliceInputGrad.DoUnaryOpOf(1, sliceOutputGrad, 1, opBackward);
|
||||
sliceInputGrad.DoUnaryOpOf(1, sliceOutputGrad, 1, opBackward, opSum);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -78,7 +78,7 @@ public:
|
|||
// Not possible for Cos().
|
||||
auto sliceValue = (opType == BinaryWithOutputGradient) ? ValueTensorFor(rank, fr) : // using input or output value
|
||||
Input(0)->ValueTensorFor(rank, fr);
|
||||
sliceInputGrad.DoBinaryOpOf(1, sliceOutputGrad, sliceValue, 1, opBackward);
|
||||
sliceInputGrad.DoBinaryOpOf(1, sliceOutputGrad, sliceValue, 1, opBackward, opSum);
|
||||
}
|
||||
#pragma warning( pop )
|
||||
}
|
||||
|
@ -194,6 +194,10 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
// move the target matrix to the target device, since below it is accessed as slices which cannot move
|
||||
// TODO: once this gets reimplemented using TensorView, then this is no longer needed.
|
||||
Input(0)->Value().TransferToDeviceIfNotThere(Value().GetDeviceId(), /*isBeingMoved=*/ false);
|
||||
|
||||
auto values = ValueFor(fr);
|
||||
ForwardPropV(values, Input(0)->ValueFor(fr));
|
||||
}
|
||||
|
|
|
@ -281,9 +281,9 @@ public:
|
|||
DeclareConstructorFromConfigWithNumInputs(InvStdDevNode);
|
||||
InvStdDevNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name),
|
||||
m_mean(deviceId),
|
||||
m_var(deviceId),
|
||||
m_temp(deviceId)
|
||||
m_mean(make_shared<Matrix<ElemType>>(deviceId)),
|
||||
m_var (make_shared<Matrix<ElemType>>(deviceId)),
|
||||
m_temp(make_shared<Matrix<ElemType>>(deviceId))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -295,21 +295,21 @@ public:
|
|||
{
|
||||
// reset accumulators
|
||||
UpdateFunctionValuesSize();
|
||||
m_mean.Resize(Value()); // mean accumulator normalized by #samples in it
|
||||
m_var .Resize(Value()); // likewise the variance
|
||||
m_temp.Resize(Value()); // and a temp
|
||||
m_mean.SetValue(0); // reset the mean and var accumulators
|
||||
m_var .SetValue(0);
|
||||
m_mean->Resize(Value()); // mean accumulator normalized by #samples in it
|
||||
m_var ->Resize(Value()); // likewise the variance
|
||||
m_temp->Resize(Value()); // and a temp
|
||||
m_mean->SetValue(0); // reset the mean and var accumulators
|
||||
m_var ->SetValue(0);
|
||||
Value().SetValue(0); // and clear m_value as well: We must do this here already to avoid a NaN check to flag while this is being estimated.
|
||||
}
|
||||
else // finalize
|
||||
{
|
||||
// m_value <- 1/stddev
|
||||
ElemType sqrtFloor = 1e-10f;
|
||||
m_var.InplaceTruncateBottom(sqrtFloor); // prevent too small variance (and negative square roots due to numeric inaccuracy)
|
||||
m_var.InplaceSqrt();
|
||||
m_var.ElementInverse();
|
||||
Value().SetValue(m_var);
|
||||
m_var->InplaceTruncateBottom(sqrtFloor); // prevent too small variance (and negative square roots due to numeric inaccuracy)
|
||||
m_var->InplaceSqrt();
|
||||
m_var->ElementInverse();
|
||||
Value().SetValue(*m_var);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -361,16 +361,16 @@ public:
|
|||
if (flags & CopyNodeFlags::copyNodeValue)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<InvStdDevNode<ElemType>>(nodeP);
|
||||
node->m_mean.SetValue(m_mean);
|
||||
node->m_var.SetValue(m_var);
|
||||
node->m_temp.SetValue(m_temp);
|
||||
node->m_mean->SetValue(*m_mean);
|
||||
node->m_var ->SetValue(*m_var);
|
||||
node->m_temp->SetValue(*m_temp);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix<ElemType> m_mean;
|
||||
Matrix<ElemType> m_var;
|
||||
Matrix<ElemType> m_temp;
|
||||
shared_ptr<Matrix<ElemType>> m_mean;
|
||||
shared_ptr<Matrix<ElemType>> m_var;
|
||||
shared_ptr<Matrix<ElemType>> m_temp;
|
||||
};
|
||||
|
||||
template class InvStdDevNode<float>;
|
||||
|
|
|
@ -183,6 +183,10 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
// move the target matrix to the target device, since below it is accessed as slices which cannot move
|
||||
// TODO: change below accesses to TensorView, then this is no longer needed.
|
||||
Input(0)->Gradient().TransferToDeviceIfNotThere(m_deviceId, /*isBeingMoved=*/ true);
|
||||
|
||||
assert(inputIndex == 0);
|
||||
inputIndex;
|
||||
|
||||
|
|
|
@ -74,30 +74,27 @@ template <class ElemType>
|
|||
indexSequence.push_back(t);
|
||||
// Note: The above accesses m_value directly on the CPU, putting it into BOTH state, possibly for other consumers as well.
|
||||
}
|
||||
input.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
|
||||
// create a new MBLayout
|
||||
let& outMBLayout = GetMBLayout();
|
||||
outMBLayout->InitAsPackedSequences(SequenceLengthVector(sequences, indexSequences), /*temp*/m_placementBuffer, /*temp*/m_rowAllocationsBuffer);
|
||||
// copy to output
|
||||
vector<ElemType> buf(outMBLayout->GetNumCols(), numeric_limits<ElemType>::quiet_NaN()); // STL cannot easily avoid initializing, so we might as well init with NaN for gaps
|
||||
for (size_t i = 0, j = 0; i < sequences.size();)
|
||||
let size = min(sequences.size(), outMBLayout->GetAllSequences().size()); // no non-gap sequence has an index beyond this
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
if (sequences[i].seqId == GAP_SEQUENCE_ID) // gaps will keep the NaN
|
||||
{
|
||||
++i;
|
||||
let& seq = outMBLayout->GetAllSequences()[i];
|
||||
if (seq.seqId == GAP_SEQUENCE_ID) // gaps will keep the NaN
|
||||
continue;
|
||||
}
|
||||
let& seq = outMBLayout->GetAllSequences()[j];
|
||||
if (seq.seqId == GAP_SEQUENCE_ID) // When would we see this?
|
||||
{
|
||||
++j;
|
||||
continue;
|
||||
}
|
||||
let& indexSequence = indexSequences[i];
|
||||
for (size_t t = 0; t < seq.GetNumTimeSteps(); t++)
|
||||
buf[outMBLayout->GetColumnIndex(seq, t)] = (ElemType)indexSequence[t];
|
||||
++i;
|
||||
++j;
|
||||
}
|
||||
// there may be dangling gaps at the end. Take the opportunity to verify this.
|
||||
for (size_t i = size; i < sequences.size(); i++)
|
||||
assert(sequences[i].seqId == GAP_SEQUENCE_ID);
|
||||
for (size_t i = size; i < outMBLayout->GetAllSequences().size(); i++)
|
||||
assert(outMBLayout->GetAllSequences()[i].seqId == GAP_SEQUENCE_ID);
|
||||
// the result will be kept in CPUDEVICE, since most likely we will access it again in PackedIndexNode
|
||||
Value().TransferToDeviceIfNotThere(CPUDEVICE, /*isBeingMoved=*/ true, /*emptyTransfer=*/ true, /*updatePreferredDevice=*/ true);
|
||||
Value().SetValue(1, outMBLayout->GetNumCols(), CPUDEVICE, buf.data(), MatrixFormat::matrixFormatColMajor);
|
||||
|
@ -107,7 +104,6 @@ template <class ElemType>
|
|||
/*virtual*/ void WhereNode<ElemType>::BackpropToNonLooping(size_t /*inputIndex*/) /*override*/
|
||||
{
|
||||
// we cannot backprop through a condition
|
||||
// Can we?
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -161,6 +157,8 @@ template <class ElemType>
|
|||
result(0, jIndex) = (ElemType)jSource;
|
||||
}
|
||||
}
|
||||
// Note: maybe this is no longer needed, now that we do the same inside UpdateFunctionValueSize() for all nodes.
|
||||
result.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
|
|
@ -303,16 +303,16 @@ public:
|
|||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto output = ValueTensorFor(rank, fr);
|
||||
let input = TensorView<ElemType>(Input(0)->Value(), GetInputSlice(rank, fr.AllowBroadcast()));
|
||||
auto output = ValueTensorFor( rank, fr);
|
||||
let input = TensorView<ElemType>(Input(0)->ValuePtr(), GetInputSlice(rank, fr.AllowBroadcast()));
|
||||
output.AssignCopyOf(input);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
let outputGrad = GradientTensorFor(rank, fr);
|
||||
auto inputGrad = TensorView<ElemType>(Input(0)->Gradient(), GetInputSlice(rank, fr));
|
||||
let outputGrad = GradientTensorFor( rank, fr);
|
||||
auto inputGrad = TensorView<ElemType>(Input(0)->GradientPtr(), GetInputSlice(rank, fr.AllowBroadcast()));
|
||||
inputGrad.AddCopyOf(outputGrad);
|
||||
}
|
||||
|
||||
|
@ -413,7 +413,7 @@ public:
|
|||
{
|
||||
let input = Input(inputIndex)->ValueTensorFor(rank, fr.AllowBroadcast());
|
||||
let outputSubSlice = NarrowToStripe(outputSlice, inputIndex);
|
||||
auto output = TensorView<ElemType>(Value(), outputSubSlice);
|
||||
auto output = TensorView<ElemType>(ValuePtr(), outputSubSlice);
|
||||
output.AssignCopyOf(input);
|
||||
}
|
||||
}
|
||||
|
@ -425,7 +425,7 @@ public:
|
|||
|
||||
auto inputGrad = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
|
||||
let outputSubSlice = NarrowToStripe(outputSlice, inputIndex);
|
||||
let outputGrad = TensorView<ElemType>(Gradient(), outputSubSlice);
|
||||
let outputGrad = TensorView<ElemType>(GradientPtr(), outputSubSlice);
|
||||
inputGrad.AddCopyOf(outputGrad);
|
||||
}
|
||||
|
||||
|
@ -1074,7 +1074,10 @@ public:
|
|||
else if (Input(0)->HasMBLayout())
|
||||
{
|
||||
if (!m_pMBLayout)
|
||||
{
|
||||
m_pMBLayout = make_shared<MBLayout>(); // mini-batch data: this generates a new layout
|
||||
m_pMBLayout->SetUniqueAxisName(NodeName());
|
||||
}
|
||||
}
|
||||
else
|
||||
assert(!m_pMBLayout); // reshaping non-mini-batch data
|
||||
|
|
|
@ -692,7 +692,7 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, cons
|
|||
foreach_column(jIn, a)
|
||||
{
|
||||
auto jOutF = idx(0, jIn); // this is the column we copy/add into
|
||||
if (jOutF < 0) // negative index means gap
|
||||
if (jOutF < 0) // negative index means gap
|
||||
continue;
|
||||
size_t jOut = (size_t)jOutF;
|
||||
if (jOut >= GetNumCols())
|
||||
|
@ -4856,15 +4856,17 @@ void CPUMatrix<ElemType>::AssignScaledDifference(const ElemType alpha, const CPU
|
|||
}
|
||||
}
|
||||
|
||||
//c[ci,cj] += a[ai,aj]
|
||||
// c[ci,cj] += a[ai,aj]
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::AddElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
void CPUMatrix<ElemType>::AddElementToElement(ElemType beta, const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
{
|
||||
if (ai >= a.GetNumRows() || aj >= a.GetNumCols() ||
|
||||
ci >= c.GetNumRows() || cj >= c.GetNumCols())
|
||||
InvalidArgument("AddElementToElement: index out of range.");
|
||||
|
||||
c(ci, cj) += a(ai, aj);
|
||||
ElemType us = beta ? beta * c(ci, cj) : 0; // do not multiply if beta is 0, could be a NaN
|
||||
us += a(ai, aj);
|
||||
c(ci, cj) = us;
|
||||
}
|
||||
|
||||
////c[ci,cj] += a[ai,aj]
|
||||
|
@ -4879,7 +4881,8 @@ void CPUMatrix<ElemType>::AddElementToElement(const CPUMatrix<ElemType>& a, cons
|
|||
// c(ci, cj) += ((v < EPS_IN_LOG) ? LOG_OF_EPS_IN_LOG : log(v));
|
||||
//}
|
||||
|
||||
//c[ci,cj] = a[ai,aj]
|
||||
#if 0 // now done as AddElementToElement (beta=0)
|
||||
// c[ci,cj] = a[ai,aj]
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::AssignElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
{
|
||||
|
@ -4889,6 +4892,7 @@ void CPUMatrix<ElemType>::AssignElementToElement(const CPUMatrix<ElemType>& a, c
|
|||
|
||||
c(ci, cj) = a(ai, aj);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>c += alpha * (a-b)</summary>
|
||||
/// if a, b, c must have same dim
|
||||
|
@ -6079,11 +6083,14 @@ static void TensorOpWithFn(ElemType beta, array<ElemType*, N> pointers, ElemType
|
|||
// perform unary operation 'op' on a giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
// This maps 'op' to a lambda.
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum) // TODO: enable the reduction ops
|
||||
InvalidArgument("TensorOp: Unary reduction operations other than opSum not yet implemented.");
|
||||
|
||||
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
|
||||
#define CaseUnaryTensorOp(oper) \
|
||||
case ElementWiseOperator::op##oper: \
|
||||
|
@ -6098,18 +6105,21 @@ void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a,
|
|||
{
|
||||
ForAllUnaryOps(CaseUnaryTensorOp);
|
||||
default:
|
||||
LogicError("TensorUnaryOp: Unknown op code %d.", (int) op);
|
||||
LogicError("TensorOp: Unknown unary op code %d.", (int) op);
|
||||
}
|
||||
}
|
||||
|
||||
// perform binary operation 'op' on a and b giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
// This maps 'op' to a lambda.
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum)
|
||||
InvalidArgument("TensorOp (binary): The only permitted binary reduction operation is opSum.");
|
||||
|
||||
#define CaseBinaryTensorOp(oper) \
|
||||
case ElementWiseOperator::op##oper: \
|
||||
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 3>& pp) \
|
||||
|
@ -6123,18 +6133,21 @@ void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a,
|
|||
{
|
||||
ForAllBinaryOps(CaseBinaryTensorOp);
|
||||
default:
|
||||
LogicError("TensorBinaryOp: Unknown op code %d.", (int) op);
|
||||
LogicError("TensorOp: Unknown op binary code %d.", (int) op);
|
||||
}
|
||||
}
|
||||
|
||||
// perform ternary operation 'op' on a, and c giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
// This maps 'op' to a lambda.
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum)
|
||||
InvalidArgument("TensorOp: The only permitted ternary reduction operation is opSum.");
|
||||
|
||||
#define CaseTernaryTensorOp(oper) \
|
||||
case ElementWiseOperator::op##oper: \
|
||||
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 4>& pp) \
|
||||
|
@ -6148,7 +6161,7 @@ void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a,
|
|||
{
|
||||
ForAllTernaryOps(CaseTernaryTensorOp);
|
||||
default:
|
||||
LogicError("TensorTernaryOp: Unknown op code %d.", (int) op);
|
||||
LogicError("TensorOp: Unknown ternary op code %d.", (int) op);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -380,9 +380,7 @@ public:
|
|||
static void AddScaledDifference(const CPUMatrix<ElemType>& alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c); // alpha must be 1X1
|
||||
static void AssignScaledDifference(const CPUMatrix<ElemType>& alpha, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c); // alpha must be 1X1
|
||||
|
||||
static void AddElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
// static void AddLogElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
static void AssignElementToElement(const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
static void AddElementToElement(ElemType beta, const CPUMatrix<ElemType>& a, const size_t ai, const size_t aj, CPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
|
||||
static void MinusOneAt(CPUMatrix<ElemType>& c, const size_t position);
|
||||
|
||||
|
@ -397,15 +395,15 @@ public:
|
|||
|
||||
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const CPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
|
||||
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& reducingStrides);
|
||||
|
|
|
@ -413,10 +413,20 @@ public:
|
|||
{
|
||||
if (!m_sob.unique())
|
||||
LogicError("%s: Cannot resize the matrix because it is a view.", function);
|
||||
if (m_sob->HasExternalBuffer())
|
||||
else if (m_sob->HasExternalBuffer())
|
||||
LogicError("%s: Cannot resize the matrix because it is externally owned.", function);
|
||||
}
|
||||
// This is needed for Sparse Matrices to ensure they can write to the matrix. Note: writing to slices is not currently supported
|
||||
|
||||
// same as VerifyResizable() except for the error message. Could be folded into one.
|
||||
void VerifyMigratable(const char* function) const
|
||||
{
|
||||
if (!m_sob.unique())
|
||||
LogicError("%s: Cannot migrate the matrix between devices because it is a view.", function);
|
||||
else if (m_sob->HasExternalBuffer())
|
||||
LogicError("%s: Cannot migrate the matrix between devices because it is externally owned.", function);
|
||||
}
|
||||
|
||||
// This is needed for Sparse Matrices to ensure they can write to the matrix. Note: writing to slices is not currently supported
|
||||
void VerifyWritable(const char* function) const
|
||||
{
|
||||
if (!(m_sob->GetNumStorageRows() == m_numRows && m_sob->GetNumStorageCols() == m_numCols))
|
||||
|
|
|
@ -880,6 +880,7 @@ __global__ void _doGatherColumnsOf(ElemType* us, size_t usStride, const ElemType
|
|||
return;
|
||||
|
||||
// id = i + jOut * usStride;
|
||||
// Each thread processes one element of the output matrix.
|
||||
CUDA_LONG i = id % usStride; // row index into 'us' and 'a'
|
||||
CUDA_LONG jOut = id / usStride; // col index into 'us' and 'idx'
|
||||
|
||||
|
@ -892,7 +893,7 @@ __global__ void _doGatherColumnsOf(ElemType* us, size_t usStride, const ElemType
|
|||
|
||||
const ElemType& ra = a[ i + jIn * aStride ];
|
||||
ElemType& rus = us[id/*i + jOut * usStride*/];
|
||||
|
||||
|
||||
ElemType res = ra * alpha;
|
||||
if (beta != 0)
|
||||
res += rus * beta;
|
||||
|
@ -909,7 +910,7 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::DoGatherColumnsOf(ElemType beta, const
|
|||
if (beta == 0)
|
||||
RequireSize(a.GetNumRows(), idx.GetNumCols()); // output has same column format as a, but number of columns comes from idx
|
||||
else
|
||||
this->VerifySize(a.GetNumRows(), idx.GetNumCols());
|
||||
VerifySize(a.GetNumRows(), idx.GetNumCols());
|
||||
|
||||
if (idx.GetComputeDeviceId() != a.GetComputeDeviceId() || GetComputeDeviceId() != a.GetComputeDeviceId())
|
||||
InvalidArgument("All matrices must be on the same GPU");
|
||||
|
@ -935,6 +936,7 @@ __global__ void _doScatterColumnsOf(ElemType* us, size_t usStride, size_t usCols
|
|||
return;
|
||||
|
||||
// id = i + jIn * aStride
|
||||
// Each thread processes one element of a
|
||||
CUDA_LONG i = id % aStride; // row index into 'a' and 'us'
|
||||
CUDA_LONG jIn = id / aStride; // col index into 'a' and 'idx'
|
||||
|
||||
|
@ -943,7 +945,7 @@ __global__ void _doScatterColumnsOf(ElemType* us, size_t usStride, size_t usCols
|
|||
return;
|
||||
size_t jOut = (size_t)jOutF;
|
||||
if (jOut >= usCols)
|
||||
return; // actually a failure
|
||||
return; // actually a failure --TODO: This should not be necessary. Why is it?
|
||||
|
||||
const ElemType& ra = a[id/*i + jIn * aStride*/];
|
||||
ElemType& rus = us[ i + jOut * usStride ];
|
||||
|
@ -3345,7 +3347,7 @@ template <class ElemType>
|
|||
return;
|
||||
a.PrepareDevice();
|
||||
if (a.IsEmpty() || b.IsEmpty())
|
||||
LogicError("ScaleAndAdd: one of the input matrices is empty.");
|
||||
LogicError("ScaleAndAdd: One of the input matrices is empty.");
|
||||
c.RequireSize(b.GetNumRows(), b.GetNumCols());
|
||||
// if (a.GetNumRows() != 1 && a.GetNumCols() != 1) // a is not a col or row vector
|
||||
if (a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()) // dimensions match
|
||||
|
@ -3396,7 +3398,7 @@ template <class ElemType>
|
|||
_matrixVectorRowWiseAddWithThreadPerElem<ElemType><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream>>>(a.Data(), b.Data(), c.Data(), alpha, m, n);
|
||||
}
|
||||
else
|
||||
InvalidArgument("dimension of matrix c does not match dimension of matrix a.");
|
||||
InvalidArgument("Dimension of matrix c does not match dimension of matrix a.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3423,11 +3425,11 @@ void GPUMatrix<ElemType>::AddScaledDifference(const ElemType alpha, const GPUMat
|
|||
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumRows() == c.GetNumRows() &&
|
||||
a.GetNumCols() == b.GetNumCols() && a.GetNumCols() == c.GetNumCols()))
|
||||
{
|
||||
InvalidArgument("AddScaledDifference: a, b, and c must have same dimension.");
|
||||
InvalidArgument("AddScaledDifference: a, b, and c must have same dimension.");
|
||||
}
|
||||
|
||||
if (a.IsEmpty())
|
||||
LogicError("AddScaledDifference: Input matrix a is empty.");
|
||||
LogicError("AddScaledDifference: Input matrix a is empty.");
|
||||
|
||||
CUDA_LONG n = (CUDA_LONG) a.GetNumElements();
|
||||
int blocksPerGrid = (int) ceil(1.0 * n / GridDim::maxThreadsPerBlock);
|
||||
|
@ -3456,12 +3458,10 @@ void GPUMatrix<ElemType>::AssignScaledDifference(const ElemType alpha, const GPU
|
|||
assert(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols());
|
||||
|
||||
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
|
||||
{
|
||||
InvalidArgument("AssignScaledDifference: a, b must have same dimension.");
|
||||
}
|
||||
InvalidArgument("AssignScaledDifference: a, b must have same dimension.");
|
||||
|
||||
if (a.IsEmpty())
|
||||
LogicError("AssignScaledDifference: Input matrix a is empty.");
|
||||
LogicError("AssignScaledDifference: Input matrix a is empty.");
|
||||
|
||||
if (&c != &a && &c != &b)
|
||||
c.RequireSize(a.GetNumRows(), a.GetNumCols());
|
||||
|
@ -3484,7 +3484,7 @@ void GPUMatrix<ElemType>::AddScaledDifference(const GPUMatrix<ElemType>& alpha,
|
|||
{
|
||||
assert(alpha.GetNumElements() == 1);
|
||||
if (!(alpha.GetNumElements() == 1))
|
||||
InvalidArgument("AddScaledDifference: alpha must be a 1X1 matrix.");
|
||||
InvalidArgument("AddScaledDifference: alpha must be a 1X1 matrix.");
|
||||
|
||||
if (a.GetComputeDeviceId() != c.GetComputeDeviceId())
|
||||
{
|
||||
|
@ -3500,11 +3500,11 @@ void GPUMatrix<ElemType>::AddScaledDifference(const GPUMatrix<ElemType>& alpha,
|
|||
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumRows() == c.GetNumRows() &&
|
||||
a.GetNumCols() == b.GetNumCols() && a.GetNumCols() == c.GetNumCols()))
|
||||
{
|
||||
InvalidArgument("AddScaledDifference: a, b, and c must have same dimension.");
|
||||
InvalidArgument("AddScaledDifference: a, b, and c must have same dimension.");
|
||||
}
|
||||
|
||||
if (a.IsEmpty())
|
||||
LogicError("AddScaledDifference: Input matrix a is empty.");
|
||||
LogicError("AddScaledDifference: Input matrix a is empty.");
|
||||
|
||||
CUDA_LONG n = (CUDA_LONG) a.GetNumElements();
|
||||
int blocksPerGrid = (int) ceil(1.0 * n / GridDim::maxThreadsPerBlock);
|
||||
|
@ -3524,7 +3524,7 @@ void GPUMatrix<ElemType>::AssignScaledDifference(const GPUMatrix<ElemType>& alph
|
|||
{
|
||||
assert(alpha.GetNumElements() == 1);
|
||||
if (!(alpha.GetNumElements() == 1))
|
||||
InvalidArgument("AddScaledDifference: alpha must be a 1X1 matrix.");
|
||||
InvalidArgument("AddScaledDifference: alpha must be a 1X1 matrix.");
|
||||
|
||||
if (a.GetComputeDeviceId() != c.GetComputeDeviceId())
|
||||
{
|
||||
|
@ -3538,11 +3538,11 @@ void GPUMatrix<ElemType>::AssignScaledDifference(const GPUMatrix<ElemType>& alph
|
|||
|
||||
if (!(a.GetNumRows() == b.GetNumRows() && a.GetNumCols() == b.GetNumCols()))
|
||||
{
|
||||
InvalidArgument("AssignScaledDifference: a, b must have same dimension.");
|
||||
InvalidArgument("AssignScaledDifference: a, b must have same dimension.");
|
||||
}
|
||||
|
||||
if (a.IsEmpty())
|
||||
LogicError("AssignScaledDifference: Input matrix a is empty.");
|
||||
LogicError("AssignScaledDifference: Input matrix a is empty.");
|
||||
|
||||
c.RequireSize(a.GetNumRows(), a.GetNumCols());
|
||||
|
||||
|
@ -3555,16 +3555,15 @@ void GPUMatrix<ElemType>::AssignScaledDifference(const GPUMatrix<ElemType>& alph
|
|||
|
||||
//c[ci,cj] += a[ai,aj]
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::AddElementToElement(const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
void GPUMatrix<ElemType>::AddElementToElement(ElemType beta, const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
{
|
||||
if (ai >= a.GetNumRows() || aj >= a.GetNumCols() ||
|
||||
ci >= c.GetNumRows() || cj >= c.GetNumCols())
|
||||
InvalidArgument("AddElementToElement: index out of range.");
|
||||
InvalidArgument("AddElementToElement: Index out of range.");
|
||||
|
||||
a.PrepareDevice();
|
||||
int blocksPerGrid = 1; // only one element --BUGBUG: then why not launch only 1 thread per block?
|
||||
SyncGuard syncGuard;
|
||||
_addElementToElement<ElemType><<<blocksPerGrid, GridDim::maxThreadsPerBlock /*BUGBUG: should be 1?*/, 0, t_stream>>>(a.Data(), (CUDA_LONG) a.LocateElement(ai, aj), c.Data(), (CUDA_LONG) c.LocateElement(ci, cj));
|
||||
_addElementToElement<ElemType><<<1, 1, 0, t_stream>>>(beta, a.Data(), (CUDA_LONG) a.LocateElement(ai, aj), c.Data(), (CUDA_LONG) c.LocateElement(ci, cj));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -4238,11 +4237,14 @@ static shared_ptr<GPUMatrix<ElemType>> GetOnesVector(size_t N, DEVICEID_TYPE dev
|
|||
// perform unary operation 'op' on a giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
// This binds the N-ariness to a template parameter N, and gets the data pointers out from the matrix objects.
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum) // TODO: enable the reduction ops
|
||||
InvalidArgument("TensorOp: Unary reduction operations other than opSum not yet implemented.");
|
||||
|
||||
a.PrepareDevice();
|
||||
if (a.GetComputeDeviceId() != GetComputeDeviceId())
|
||||
InvalidArgument("All matrices must be on the same GPU");
|
||||
|
@ -4293,11 +4295,14 @@ void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a,
|
|||
|
||||
// perform binary operation 'op' on a and b giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum)
|
||||
InvalidArgument("TensorOp: The only permitted binary reduction operation is opSum.");
|
||||
|
||||
a.PrepareDevice();
|
||||
if (a.GetComputeDeviceId() != GetComputeDeviceId() || b.GetComputeDeviceId() != GetComputeDeviceId())
|
||||
InvalidArgument("All matrices must be on the same GPU");
|
||||
|
@ -4307,11 +4312,14 @@ void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a,
|
|||
|
||||
// perform ternary operation 'op' on a, and c giving 'this', reinterpreting the matrices as tensors as specified by the dims and strides
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
|
||||
{
|
||||
if (reductionOp != ElementWiseOperator::opSum)
|
||||
InvalidArgument("TensorOp: The only permitted ternary reduction operation is opSum.");
|
||||
|
||||
a.PrepareDevice();
|
||||
if (a.GetComputeDeviceId() != GetComputeDeviceId() || b.GetComputeDeviceId() != GetComputeDeviceId() || c.GetComputeDeviceId() != GetComputeDeviceId())
|
||||
InvalidArgument("All matrices must be on the same GPU");
|
||||
|
|
|
@ -125,6 +125,7 @@ public:
|
|||
using Base::SetFormat;
|
||||
using Base::IsEmpty;
|
||||
using Base::VerifyResizable;
|
||||
using Base::VerifySize;
|
||||
|
||||
public:
|
||||
using Base::VerifyWritable;
|
||||
|
@ -461,7 +462,7 @@ public:
|
|||
static void AddScaledDifference(const GPUMatrix<ElemType>& alpha, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
|
||||
static void AssignScaledDifference(const GPUMatrix<ElemType>& alpha, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
|
||||
|
||||
static void AddElementToElement(const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
static void AddElementToElement(ElemType beta, const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
|
||||
// minus one at a specific position
|
||||
static void MinusOneAt(GPUMatrix<ElemType>& c, const size_t position);
|
||||
|
@ -477,15 +478,15 @@ public:
|
|||
|
||||
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const GPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
|
||||
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& reducingStrides);
|
||||
|
|
|
@ -2567,13 +2567,16 @@ __global__ void _assignScaledDifference(
|
|||
|
||||
template <class ElemType>
|
||||
__global__ void _addElementToElement(
|
||||
ElemType beta,
|
||||
const ElemType* a, CUDA_LONG indexA,
|
||||
ElemType* c, CUDA_LONG indexC)
|
||||
{
|
||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id > 0)
|
||||
return;
|
||||
c[indexC] += a[indexA];
|
||||
//CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x; // only one thread launched
|
||||
//if (id > 0)
|
||||
// return;
|
||||
ElemType us = beta ? beta * c[indexC] : 0; // do not multiply if beta is 0, could be a NaN
|
||||
us += a[indexA];
|
||||
c[indexC] = us;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
|
|
@ -110,61 +110,37 @@
|
|||
} \
|
||||
}
|
||||
|
||||
// version of helper macro that executes both CPU and GPU macros if 'MatrixPointerToCheck' location is BOTH
|
||||
#define DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(MatrixPointerToCheck, MatrixPointerToSetFlag, CPUDense, GPUDense, CPUSparse, GPUSparse) \
|
||||
{ \
|
||||
CurrentDataLocation curLocation = (MatrixPointerToCheck)->GetCurrentMatrixLocation(); \
|
||||
if (curLocation == CurrentDataLocation::BOTH) \
|
||||
{ \
|
||||
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
|
||||
{ \
|
||||
CPUDense; \
|
||||
GPUDense; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::BOTH, MatrixType::DENSE); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
CPUSparse; \
|
||||
GPUSparse; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::BOTH, MatrixType::SPARSE); \
|
||||
} \
|
||||
} \
|
||||
else if (curLocation == CurrentDataLocation::GPU) \
|
||||
{ \
|
||||
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
|
||||
{ \
|
||||
GPUDense; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
GPUSparse; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::GPU, MatrixType::SPARSE); \
|
||||
} \
|
||||
} \
|
||||
else if (curLocation == CurrentDataLocation::CPU) \
|
||||
{ \
|
||||
if ((MatrixPointerToCheck)->GetMatrixType() != MatrixType::SPARSE) \
|
||||
{ \
|
||||
CPUDense; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
CPUSparse; \
|
||||
if (MatrixPointerToSetFlag != nullptr) \
|
||||
((Matrix*) MatrixPointerToSetFlag)->SetDataLocation(CurrentDataLocation::CPU, MatrixType::SPARSE); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
RuntimeError("Matrices do not exist in either CPU or GPU."); \
|
||||
} \
|
||||
// version of helper macro that executes both CPU and GPU macros if 'matrixPointer' location is BOTH
|
||||
#define DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(matrixPointer, CPUDense, GPUDense, CPUSparse, GPUSparse) \
|
||||
{ \
|
||||
auto curLocation = (matrixPointer)->GetCurrentMatrixLocation(); \
|
||||
auto curMatrixType = (matrixPointer)->GetMatrixType(); \
|
||||
if (curLocation == CurrentDataLocation::NONE) \
|
||||
LogicError("Matrices do not exist in either CPU or GPU."); \
|
||||
if (curMatrixType == MatrixType::UNDETERMINED) \
|
||||
LogicError("Matrices must be SPARSE or DENSE."); \
|
||||
if (curLocation != CurrentDataLocation::CPU) /*GPU or BOTH*/ \
|
||||
{ \
|
||||
if (curMatrixType == MatrixType::DENSE) \
|
||||
{ \
|
||||
GPUDense; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
GPUSparse; \
|
||||
} \
|
||||
} \
|
||||
if (curLocation != CurrentDataLocation::GPU) /*CPU or BOTH*/ \
|
||||
{ \
|
||||
if (curMatrixType == MatrixType::DENSE) \
|
||||
{ \
|
||||
CPUDense; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
CPUSparse; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
@ -224,46 +200,85 @@ void Matrix<ElemType>::ShallowCopyFrom(const Matrix<ElemType>& other)
|
|||
}
|
||||
|
||||
// Call this function after an update operation has created/set/updated the respective pointers.
|
||||
// - location: BOTH|CPU|GPU
|
||||
// - pass BOTH only if object will be read from; it is not allowed to write to both and then call this function.
|
||||
// - if CPU/GPU and current is BOTH, then object was written to
|
||||
// What gets updated:
|
||||
// - m_currentDataLocation: from function argument
|
||||
// - m_matrixType: from function argument unless UNDETERMINED in which case m_matrixType remains unmodified
|
||||
// - m_baseMatrix: to one of current values of m_[GC]PU{Sparse,}Matrix
|
||||
// This function is heavily overloaded in its responsibility.
|
||||
// - first-time initialization, e.g. of a ColumnSlice (NONE->!NONE)
|
||||
// - after creating a temp copy for reading
|
||||
// - collapse temp copies after writing to one of them
|
||||
// - setting matrixType if not set yet
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType type) const
|
||||
{
|
||||
assert(location == CurrentDataLocation::CPU || location == CurrentDataLocation::GPU || location == CurrentDataLocation::BOTH);
|
||||
|
||||
// if the object used to live on BOTH, this will collapse it to 'location' (unless we actually wrote into BOTH)
|
||||
// In that case, we do a sanity check here that the object is an owning Matrix,
|
||||
// since otherwise the collapsing would go unnoticed by the original owner.
|
||||
// In that case, we do a sanity check here that the object is a singleton view,
|
||||
// since otherwise the collapsing would go unnoticed by the other views.
|
||||
// The cases to cover:
|
||||
// - original owner is BOTH, and this is called on the original owner
|
||||
// -> The result was written to 'location' so we should collapse it to there.
|
||||
// - original owning matrix is in BOTH state
|
||||
// and a view inherits this
|
||||
// -> FORBIDDEN to write into CPU or GPU since we cannot ensure we wrote into the one that will be read next
|
||||
// - original owning matrix is CPU or GPU
|
||||
// and a view onto it is put into BOTH state
|
||||
// -> inefficent to read, since this is likely happening over again; so put the owner into BOTH state
|
||||
// -> FORBIDDEN to write into CPU or GPU since we don't know the owner's true location and hence cannot ensure we wrote to the correct place
|
||||
if (m_currentDataLocation == CurrentDataLocation::BOTH && location != CurrentDataLocation::BOTH)
|
||||
// - everything is allowed on a singleton view
|
||||
// - if the current state is BOTH:
|
||||
// -> The result was written to 'location' so we should collapse it to there.
|
||||
// - multiple views: much is forbidden since we cannot notify the other views on which one was written to
|
||||
// - CPU <-> GPU: FORBIDDEN
|
||||
// - BOTH -> CPU or GPU: current state is BOTH: location says which side was written to
|
||||
// -> FORBIDDEN to write into
|
||||
// - CPU or GPU -> BOTH: current state is CPU or GPU
|
||||
// and a view onto it is put into BOTH state
|
||||
// -> OK but inefficent to read, since this is likely happening over again; but we cannot put all views into BOTH state
|
||||
// - BOTH -> BOTH:
|
||||
// - read case: OK
|
||||
// - write case: forbidden to call this function in this way
|
||||
// - NONE -> !NONE: FORBIDDEN
|
||||
if (m_currentDataLocation != location && // it is attempted to change location
|
||||
m_currentDataLocation != CurrentDataLocation::NONE && // from a valid object (NONE means we are a fresh object from ColumnSlice())
|
||||
location != CurrentDataLocation::BOTH) // and we are changing it not into a temporary copy for reading
|
||||
{
|
||||
// we get here if we wrote into this object that was BOTH but is no longer
|
||||
if (!OwnBuffer()) // this means we should not have written into it in the first place, so fail now (better late than never)
|
||||
// we get here if we wrote into this object that was BOTH but is no longer, or if we move between CPU and GPU
|
||||
// Both is forbidden on shared views since we cannot inform other views of this change.
|
||||
// We will now check any *valid* pointer will now be checked for uniqueness. There may be mismatching left-over pointers kept around in case they should be revived.
|
||||
if (m_matrixType == MatrixType::DENSE) // note: this checks the current type, not the new one passed in. Asssumption: this tells us which pointers are valid.
|
||||
{
|
||||
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUMatrix);
|
||||
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUMatrix);
|
||||
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUMatrix.get())->VerifyMigratable("SetDataLocation [CPUMatrix]");
|
||||
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUMatrix.get())->VerifyMigratable("SetDataLocation [GPUMatrix]");
|
||||
}
|
||||
else if (m_matrixType == MatrixType::SPARSE)
|
||||
{
|
||||
assert(m_currentDataLocation == CurrentDataLocation::GPU || m_CPUSparseMatrix);
|
||||
assert(m_currentDataLocation == CurrentDataLocation::CPU || m_GPUSparseMatrix);
|
||||
if (m_currentDataLocation != CurrentDataLocation::GPU) ((BaseMatrix<ElemType>*)m_CPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [CPUSparseMatrix]");
|
||||
if (m_currentDataLocation != CurrentDataLocation::CPU) ((BaseMatrix<ElemType>*)m_GPUSparseMatrix.get())->VerifyMigratable("SetDataLocation [GPUSparseMatrix]");
|
||||
}
|
||||
// TODO: Why do we need these typecasts? (without it will fail with "cannot access private member declared in class 'Microsoft::MSR::CNTK::CPUMatrix<float>'")
|
||||
|
||||
if (m_baseMatrix && !OwnBuffer()) // same arguments for externally owned matrices: Can read a temp but not write.
|
||||
LogicError("SetDataLocation: A non-owning object cannot be written to in BOTH state.");
|
||||
}
|
||||
// passed validation: we can now update the state
|
||||
|
||||
m_currentDataLocation = location;
|
||||
|
||||
// set the matrix type if passed in
|
||||
// update the matrix type if passed in
|
||||
if (type != MatrixType::UNDETERMINED)
|
||||
m_matrixType = type;
|
||||
|
||||
// set m_baseMatrix (if location is unchanged, this will not change the pointer)
|
||||
// Note: m_currentDataLocation may also be CurrentDataLocation::BOTH, in which case the base matrix will be GPU.
|
||||
if (m_matrixType == MatrixType::DENSE)
|
||||
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_pointer_cast<BaseMatrix<ElemType>>(m_CPUMatrix) : dynamic_pointer_cast<BaseMatrix<ElemType>>(m_GPUMatrix));
|
||||
else if (m_matrixType == MatrixType::SPARSE)
|
||||
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? dynamic_pointer_cast<BaseMatrix<ElemType>>(m_CPUSparseMatrix) : dynamic_pointer_cast<BaseMatrix<ElemType>>(m_GPUSparseMatrix));
|
||||
// Note: Typecasts are necessary since C++ cannot figure out the common base type (probably due to shared_ptr).
|
||||
// sanity check
|
||||
if (!m_baseMatrix && m_matrixType != MatrixType::UNDETERMINED)
|
||||
LogicError("SetDataLocation: new m_baseMatrix must not be NULL.");
|
||||
LogicError("SetDataLocation: New m_baseMatrix must not be NULL.");
|
||||
}
|
||||
|
||||
//this is a private constructor only used internally to initialize a blank matrix
|
||||
|
@ -908,9 +923,8 @@ void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat
|
|||
if (keepValues)
|
||||
CopyElementsFromDenseToSparse(*m_CPUMatrix, *m_CPUSparseMatrix);
|
||||
|
||||
m_CPUMatrix = nullptr;
|
||||
|
||||
SetDataLocation(CPU, SPARSE);
|
||||
m_CPUMatrix = nullptr;
|
||||
}
|
||||
else if (newMatrixType == MatrixType::DENSE)
|
||||
{
|
||||
|
@ -922,9 +936,8 @@ void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat
|
|||
if (keepValues)
|
||||
m_CPUMatrix->SetValue(m_CPUSparseMatrix->CopyColumnSliceToDense(0, GetNumCols()));
|
||||
|
||||
m_CPUSparseMatrix = nullptr;
|
||||
|
||||
SetDataLocation(CPU, DENSE);
|
||||
m_CPUSparseMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
|
||||
|
@ -941,9 +954,8 @@ void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat
|
|||
if (keepValues)
|
||||
m_GPUSparseMatrix->SetValue(*m_GPUMatrix);
|
||||
|
||||
m_GPUMatrix = nullptr;
|
||||
|
||||
SetDataLocation(GPU, SPARSE);
|
||||
m_GPUMatrix = nullptr;
|
||||
}
|
||||
else if (newMatrixType == MatrixType::DENSE)
|
||||
{
|
||||
|
@ -955,9 +967,8 @@ void Matrix<ElemType>::SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat
|
|||
if (keepValues)
|
||||
m_GPUSparseMatrix->CopyToDenseMatrix(*m_GPUMatrix);
|
||||
|
||||
m_GPUSparseMatrix = nullptr;
|
||||
|
||||
SetDataLocation(GPU, DENSE);
|
||||
m_GPUSparseMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
LogicError("SwitchToMatrixType: Unexpected/invalid new matrix type");
|
||||
|
@ -977,25 +988,25 @@ void Matrix<ElemType>::CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from,
|
|||
template <class ElemType>
|
||||
ElemType Matrix<ElemType>::Get00Element() const
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
nullptr,
|
||||
return m_CPUMatrix->Get00Element(),
|
||||
return m_GPUMatrix->Get00Element(),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
|
||||
{ return m_CPUMatrix->Get00Element(); },
|
||||
{ return m_GPUMatrix->Get00Element(); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
}
|
||||
|
||||
// const operator(,)
|
||||
template <class ElemType>
|
||||
const ElemType Matrix<ElemType>::operator()(const size_t row, const size_t col) const
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
|
||||
nullptr,
|
||||
return m_CPUMatrix->operator()(row, col),
|
||||
_transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); return m_CPUMatrix->operator()(row, col),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this, nullptr,
|
||||
{ return m_CPUMatrix->operator()(row, col); },
|
||||
{ _transferFromDeviceToDevice(GetDeviceId(), CPUDEVICE, false); return m_CPUMatrix->operator()(row, col); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
}
|
||||
|
||||
// non-const operator(,)
|
||||
//WARNING: This function is very slow for GPUs since it requires copying values between CPUs and GPUs.
|
||||
//In addition, if ColumnSlice is used after this function but before the values are copied back to GPU
|
||||
//the operation will fail since the memory is not managed by the slice.
|
||||
|
@ -1427,22 +1438,18 @@ void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients,
|
|||
}
|
||||
}
|
||||
|
||||
//both this and gradients will be changed
|
||||
// both 'this' and gradients will be changed
|
||||
template <class ElemType>
|
||||
ElemType Matrix<ElemType>::Adagrad(Matrix<ElemType>& gradients, const bool needAveMultiplier)
|
||||
{
|
||||
DecideAndMoveToRightDevice(*this, gradients);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
&gradients,
|
||||
return m_CPUMatrix->Adagrad(*gradients.m_CPUMatrix, needAveMultiplier);
|
||||
SetDataLocation(CPU),
|
||||
return m_GPUMatrix->Adagrad(*gradients.m_GPUMatrix, needAveMultiplier);
|
||||
SetDataLocation(GPU),
|
||||
return gradients.m_CPUSparseMatrix->Adagrad(*m_CPUMatrix, needAveMultiplier);
|
||||
SetDataLocation(CPU),
|
||||
return gradients.m_GPUSparseMatrix->Adagrad(*m_GPUMatrix, needAveMultiplier);
|
||||
SetDataLocation(GPU));
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
|
||||
{ return m_CPUMatrix->Adagrad(*gradients.m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
|
||||
{ return m_GPUMatrix->Adagrad(*gradients.m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); },
|
||||
{ return gradients.m_CPUSparseMatrix->Adagrad(*m_CPUMatrix, needAveMultiplier); SetDataLocation(CPU); },
|
||||
{ return gradients.m_GPUSparseMatrix->Adagrad(*m_GPUMatrix, needAveMultiplier); SetDataLocation(GPU); });
|
||||
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -1458,14 +1465,12 @@ void Matrix<ElemType>::FSAdagrad(size_t mbSize, Matrix<ElemType>& gradients, Mat
|
|||
aggadagradsqrframes = adagradkeepweight * aggadagradsqrframes + (1.0f - adagradkeepweight) * mbSize;
|
||||
const ElemType targetadagradavdenom_x_sqrtadagradsqrframes = static_cast<ElemType>(targetadagradavdenom * sqrt(aggadagradsqrframes));
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
&gradients,
|
||||
m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes);
|
||||
SetDataLocation(CPU),
|
||||
m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes);
|
||||
SetDataLocation(GPU),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients,
|
||||
{ m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes); SetDataLocation(CPU); },
|
||||
{ m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes); SetDataLocation(GPU); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -1479,14 +1484,12 @@ ElemType Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
|
|||
{
|
||||
DecideAndMoveToRightDevice(*this, gradients);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
&gradients,
|
||||
return m_CPUMatrix->RmsProp(*gradients.m_CPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier);
|
||||
SetDataLocation(CPU),
|
||||
return m_GPUMatrix->RmsProp(*gradients.m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier);
|
||||
SetDataLocation(GPU),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(this, &gradients,
|
||||
{ return m_CPUMatrix->RmsProp(*gradients.m_CPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier); SetDataLocation(CPU); },
|
||||
{ return m_GPUMatrix->RmsProp(*gradients.m_GPUMatrix, RMS_GAMMA, RMS_WGT_INC, RMS_WGT_MAX, RMS_WGT_DEC, RMS_WGT_MIN, needAveMultiplier); SetDataLocation(GPU); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
// Note: Since both 'this' and gradients are changed, we must call SetDataLocation() on 'this' as well.
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -1494,12 +1497,11 @@ void Matrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
|
|||
{
|
||||
if (numRows != GetNumRows() || numCols != GetNumCols())
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
m_CPUMatrix->Reshape(numRows, numCols),
|
||||
m_GPUMatrix->Reshape(numRows, numCols),
|
||||
NOT_IMPLEMENTED,
|
||||
m_GPUSparseMatrix->Reshape(numRows, numCols));
|
||||
DISPATCH_MATRIX_ON_FLAG(this, this,
|
||||
{ m_CPUMatrix->Reshape(numRows, numCols); },
|
||||
{ m_GPUMatrix->Reshape(numRows, numCols); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ m_GPUSparseMatrix->Reshape(numRows, numCols); });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1510,11 +1512,10 @@ void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const
|
|||
{
|
||||
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)
|
||||
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
|
||||
this,
|
||||
m_CPUMatrix->Resize(numRows, numCols, growOnly),
|
||||
m_GPUMatrix->Resize(numRows, numCols, growOnly),
|
||||
m_CPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false),
|
||||
m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false));
|
||||
{ m_CPUMatrix->Resize(numRows, numCols, growOnly); },
|
||||
{ m_GPUMatrix->Resize(numRows, numCols, growOnly); },
|
||||
{ m_CPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); },
|
||||
{ m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); });
|
||||
#ifdef _DEBUG
|
||||
if (GetMatrixType() != MatrixType::SPARSE)
|
||||
Invalidate(); // Fill the matrix with NaNs to detect using the content which is undefined. Unfortunately this won't work for sparse matrices.
|
||||
|
@ -1551,11 +1552,10 @@ template <class ElemType>
|
|||
void Matrix<ElemType>::Reset()
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
|
||||
this,
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED,
|
||||
m_CPUSparseMatrix->Reset(),
|
||||
m_GPUSparseMatrix->Reset());
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ m_CPUSparseMatrix->Reset(); },
|
||||
{ m_GPUSparseMatrix->Reset(); });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -3027,12 +3027,11 @@ ElemType Matrix<ElemType>::SumOfAbsElements() const
|
|||
if (IsEmpty())
|
||||
LogicError("SumOfAbsElements: Matrix is empty.");
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
nullptr,
|
||||
return m_CPUMatrix->SumOfAbsElements(),
|
||||
return m_GPUMatrix->SumOfAbsElements(),
|
||||
NOT_IMPLEMENTED,
|
||||
return m_GPUSparseMatrix->SumOfAbsElements());
|
||||
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
|
||||
{ return m_CPUMatrix->SumOfAbsElements(); },
|
||||
{ return m_GPUMatrix->SumOfAbsElements(); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ return m_GPUSparseMatrix->SumOfAbsElements(); });
|
||||
}
|
||||
|
||||
//sum of all elements
|
||||
|
@ -3042,11 +3041,10 @@ ElemType Matrix<ElemType>::LogSumOfElements() const
|
|||
if (IsEmpty())
|
||||
LogicError("LogSumOfElements: Matrix is empty.");
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
nullptr,
|
||||
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
|
||||
{ return m_CPUMatrix->LogSumOfElements(); },
|
||||
{ return m_GPUMatrix->LogSumOfElements(); },
|
||||
{NOT_IMPLEMENTED},
|
||||
{ NOT_IMPLEMENTED},
|
||||
{ NOT_IMPLEMENTED });
|
||||
}
|
||||
|
||||
|
@ -3354,65 +3352,57 @@ Matrix<ElemType>& Matrix<ElemType>::AddSignOf(const Matrix<ElemType>& a)
|
|||
return *this;
|
||||
}
|
||||
|
||||
//I decided to use Matrix<ElemType>& maxIndexes instead of integer vector because the result may be used to do additional calculation
|
||||
// I decided to use Matrix<ElemType>& maxIndices instead of integer vector because the result may be used to do additional calculation
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const
|
||||
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise) const
|
||||
{
|
||||
if (IsEmpty())
|
||||
LogicError("VectorMax: Matrix is empty.");
|
||||
|
||||
DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
|
||||
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
|
||||
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
&maxValues,
|
||||
m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise);
|
||||
maxIndexes.SetDataLocation(CPU, DENSE),
|
||||
m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise);
|
||||
maxIndexes.SetDataLocation(GPU, DENSE),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
|
||||
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise); maxIndices.SetDataLocation(CPU, DENSE); },
|
||||
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise); maxIndices.SetDataLocation(GPU, DENSE); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
// Note: must SetDataLocation() also on maxIndices, since both maxValues and maxIndices are written.
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
|
||||
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndices, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
|
||||
{
|
||||
if (IsEmpty())
|
||||
LogicError("VectorMax: Matrix is empty.");
|
||||
|
||||
DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
|
||||
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
DecideAndMoveToRightDevice(*this, maxIndices, maxValues);
|
||||
maxIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
&maxValues,
|
||||
m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK);
|
||||
maxIndexes.SetDataLocation(CPU, DENSE),
|
||||
m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK);
|
||||
maxIndexes.SetDataLocation(GPU, DENSE),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(this, &maxValues,
|
||||
{ m_CPUMatrix->VectorMax(*maxIndices.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK); maxIndices.SetDataLocation(CPU, DENSE); },
|
||||
{ m_GPUMatrix->VectorMax(*maxIndices.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK); maxIndices.SetDataLocation(GPU, DENSE); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndexes, Matrix<ElemType>& minValues, const bool isColWise) const
|
||||
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndices, Matrix<ElemType>& minValues, const bool isColWise) const
|
||||
{
|
||||
if (IsEmpty())
|
||||
LogicError("VectorMin: Matrix is empty.");
|
||||
|
||||
DecideAndMoveToRightDevice(*this, minIndexes, minValues);
|
||||
minIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
DecideAndMoveToRightDevice(*this, minIndices, minValues);
|
||||
minIndices.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
minValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
&minValues,
|
||||
m_CPUMatrix->VectorMin(*minIndexes.m_CPUMatrix, *minValues.m_CPUMatrix, isColWise);
|
||||
minIndexes.SetDataLocation(CPU, DENSE),
|
||||
m_GPUMatrix->VectorMin(*minIndexes.m_GPUMatrix, *minValues.m_GPUMatrix, isColWise);
|
||||
minIndexes.SetDataLocation(GPU, DENSE),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(this, &minValues,
|
||||
{ m_CPUMatrix->VectorMin(*minIndices.m_CPUMatrix, *minValues.m_CPUMatrix, isColWise); minIndices.SetDataLocation(CPU, DENSE); },
|
||||
{ m_GPUMatrix->VectorMin(*minIndices.m_GPUMatrix, *minValues.m_GPUMatrix, isColWise); minIndices.SetDataLocation(GPU, DENSE); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
}
|
||||
|
||||
#pragma endregion Member BLAS Functions
|
||||
|
@ -3425,12 +3415,11 @@ int Matrix<ElemType>::GetDeviceId() const
|
|||
if (m_currentDataLocation == CurrentDataLocation::NONE)
|
||||
return m_preferredDeviceId;
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
nullptr,
|
||||
return CPUDEVICE,
|
||||
return m_GPUMatrix->GetComputeDeviceId(),
|
||||
return CPUDEVICE,
|
||||
return m_GPUSparseMatrix->GetComputeDeviceId());
|
||||
DISPATCH_MATRIX_ON_FLAG(this, nullptr,
|
||||
{ return CPUDEVICE; },
|
||||
{ return m_GPUMatrix->GetComputeDeviceId(); },
|
||||
{ return CPUDEVICE; },
|
||||
{ return m_GPUSparseMatrix->GetComputeDeviceId(); });
|
||||
}
|
||||
|
||||
// TODO: Comment why we need a second ElemType.
|
||||
|
@ -3544,25 +3533,21 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
|
|||
return;
|
||||
}
|
||||
|
||||
// warn about device change
|
||||
#define NUM_DEVICE_CHANGED_WARN 20
|
||||
if (m_numTimesDeviceChanged <= NUM_DEVICE_CHANGED_WARN &&
|
||||
(!emptyTransfer || (from_id >= 0 && to_id >= 0)))
|
||||
{
|
||||
m_numTimesDeviceChanged++;
|
||||
if (m_devicesTransferedTo[0] < CPUDEVICE)
|
||||
{
|
||||
m_devicesTransferedTo[0] = to_id;
|
||||
}
|
||||
else if (m_devicesTransferedTo[0] != to_id)
|
||||
{
|
||||
m_devicesTransferedTo[1] = to_id;
|
||||
}
|
||||
}
|
||||
if (m_numTimesDeviceChanged == NUM_DEVICE_CHANGED_WARN && m_devicesTransferedTo[1] >= CPUDEVICE)
|
||||
{
|
||||
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long) GetNumRows(), (unsigned long) GetNumCols(), NUM_DEVICE_CHANGED_WARN);
|
||||
}
|
||||
|
||||
// do the transfer
|
||||
if (m_matrixType == MatrixType::SPARSE)
|
||||
{
|
||||
if (from_id == CPUDEVICE) // from CPU to GPU
|
||||
|
@ -3582,8 +3567,8 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
|
|||
|
||||
if (isBeingMoved)
|
||||
{
|
||||
m_CPUSparseMatrix = nullptr;
|
||||
SetDataLocation(GPU, SPARSE);
|
||||
m_CPUSparseMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3607,8 +3592,8 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
|
|||
|
||||
if (isBeingMoved)
|
||||
{
|
||||
m_GPUSparseMatrix = nullptr;
|
||||
SetDataLocation(CPU, SPARSE);
|
||||
m_GPUSparseMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3638,8 +3623,8 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
|
|||
}
|
||||
if (isBeingMoved)
|
||||
{
|
||||
m_CPUMatrix = nullptr;
|
||||
SetDataLocation(GPU, DENSE);
|
||||
m_CPUMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3666,8 +3651,8 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
|
|||
|
||||
if (isBeingMoved)
|
||||
{
|
||||
m_GPUMatrix = nullptr;
|
||||
SetDataLocation(CPU, DENSE);
|
||||
m_GPUMatrix = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4180,17 +4165,19 @@ void Matrix<ElemType>::SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, M
|
|||
VT.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
|
||||
W.SwitchToMatrixType(A.GetMatrixType(), A.GetFormat(), false);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&A,
|
||||
nullptr,
|
||||
Matrix<ElemType> tA = A.DeepClone();
|
||||
CPUMatrix<ElemType>::SVD(*tA.m_CPUMatrix, *SIGMA.m_CPUMatrix, *U.m_CPUMatrix, *VT.m_CPUMatrix, *W.m_CPUMatrix);
|
||||
SIGMA.SetDataLocation(CPU);
|
||||
U.SetDataLocation(CPU);
|
||||
VT.SetDataLocation(CPU);
|
||||
W.SetDataLocation(CPU),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
DISPATCH_MATRIX_ON_FLAG(&A, nullptr,
|
||||
{
|
||||
Matrix<ElemType> tA = A.DeepClone();
|
||||
CPUMatrix<ElemType>::SVD(*tA.m_CPUMatrix, *SIGMA.m_CPUMatrix, *U.m_CPUMatrix, *VT.m_CPUMatrix, *W.m_CPUMatrix);
|
||||
SIGMA.SetDataLocation(CPU);
|
||||
U.SetDataLocation(CPU);
|
||||
VT.SetDataLocation(CPU);
|
||||
W.SetDataLocation(CPU);
|
||||
// need to SetDataLocation() on all matrices we write to
|
||||
},
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
}
|
||||
|
||||
/// <summary>Matrix-matrix multiply with col-major matrices (a and b may be transposed): c = alpha * op(a) * op(b) + beta*c</summary>
|
||||
|
@ -4400,34 +4387,33 @@ template <class ElemType>
|
|||
|
||||
if (a.GetMatrixType() == c.GetMatrixType())
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&c,
|
||||
&c,
|
||||
CPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix),
|
||||
GPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix),
|
||||
NOT_IMPLEMENTED,
|
||||
GPUSparseMatrix<ElemType> b = move(*c.m_GPUSparseMatrix);
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, b, *c.m_GPUSparseMatrix));
|
||||
DISPATCH_MATRIX_ON_FLAG(&c, &c,
|
||||
{ CPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUMatrix, *c.m_CPUMatrix); },
|
||||
{ GPUMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, *c.m_GPUMatrix); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ GPUSparseMatrix<ElemType> b = move(*c.m_GPUSparseMatrix); GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, b, *c.m_GPUSparseMatrix); });
|
||||
}
|
||||
else
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&c,
|
||||
nullptr,
|
||||
CPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUSparseMatrix, *c.m_CPUMatrix);
|
||||
c.SetDataLocation(CPU),
|
||||
if (a.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSC)
|
||||
{
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, *c.m_GPUMatrix, *c.m_GPUMatrix);
|
||||
} else // new GPU sparse matrix code
|
||||
{
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, *c.m_GPUMatrix);
|
||||
} c.SetDataLocation(GPU),
|
||||
NOT_IMPLEMENTED,
|
||||
{
|
||||
c.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(c.m_GPUSparseMatrix->CopyToDenseMatrix());
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, 1, *c.m_GPUSparseMatrix, *c.m_GPUMatrix);
|
||||
c.m_GPUSparseMatrix = nullptr;
|
||||
c.SetDataLocation(GPU, DENSE);
|
||||
});
|
||||
DISPATCH_MATRIX_ON_FLAG(&c, nullptr,
|
||||
{
|
||||
CPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_CPUSparseMatrix, *c.m_CPUMatrix);
|
||||
c.SetDataLocation(CPU);
|
||||
},
|
||||
{
|
||||
if (a.m_GPUSparseMatrix->GetFormat() == MatrixFormat::matrixFormatSparseCSC)
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, 1, *c.m_GPUMatrix, *c.m_GPUMatrix);
|
||||
else // new GPU sparse matrix code
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUSparseMatrix, *c.m_GPUMatrix);
|
||||
c.SetDataLocation(GPU);
|
||||
},
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{
|
||||
c.m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(c.m_GPUSparseMatrix->CopyToDenseMatrix());
|
||||
GPUSparseMatrix<ElemType>::ScaleAndAdd(alpha, *a.m_GPUMatrix, 1, *c.m_GPUSparseMatrix, *c.m_GPUMatrix);
|
||||
c.SetDataLocation(GPU, DENSE);
|
||||
c.m_GPUSparseMatrix = nullptr;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4444,9 +4430,7 @@ template <class ElemType>
|
|||
if (beta == 1)
|
||||
ScaleAndAdd(alpha, a, c);
|
||||
else if (beta == 0)
|
||||
{
|
||||
Scale(alpha, a, c);
|
||||
}
|
||||
else
|
||||
{
|
||||
ScaleAndAdd(alpha / beta, a, c); // c1=alpha/beta * a + c
|
||||
|
@ -4598,8 +4582,8 @@ void Matrix<ElemType>::AddElementToElement(const Matrix<ElemType>& a, const size
|
|||
|
||||
DISPATCH_MATRIX_ON_FLAG(&c,
|
||||
&c,
|
||||
CPUMatrix<ElemType>::AddElementToElement(*a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
|
||||
GPUMatrix<ElemType>::AddElementToElement(*a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
|
||||
CPUMatrix<ElemType>::AddElementToElement(1, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
|
||||
GPUMatrix<ElemType>::AddElementToElement(1, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
}
|
||||
|
@ -4615,8 +4599,8 @@ void Matrix<ElemType>::AssignElementToElement(const Matrix<ElemType>& a, const s
|
|||
|
||||
DISPATCH_MATRIX_ON_FLAG(&c,
|
||||
&c,
|
||||
CPUMatrix<ElemType>::AssignElementToElement(*a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
|
||||
NOT_IMPLEMENTED,
|
||||
CPUMatrix<ElemType>::AddElementToElement(0, *a.m_CPUMatrix, ai, aj, *c.m_CPUMatrix, ci, cj),
|
||||
GPUMatrix<ElemType>::AddElementToElement(0, *a.m_GPUMatrix, ai, aj, *c.m_GPUMatrix, ci, cj),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
}
|
||||
|
@ -5205,7 +5189,7 @@ static bool VerifyIsDense(const Matrix<ElemType>& a)
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
|
||||
|
@ -5216,14 +5200,14 @@ void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemTy
|
|||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
|
||||
|
@ -5234,14 +5218,14 @@ void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const
|
|||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
|
||||
|
@ -5252,8 +5236,8 @@ void Matrix<ElemType>::TensorOp(ElemType beta, const Matrix<ElemType>& a, const
|
|||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_CPUMatrix->TensorOp(beta, *a.m_CPUMatrix, *b.m_CPUMatrix, *c.m_CPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
m_GPUMatrix->TensorOp(beta, *a.m_GPUMatrix, *b.m_GPUMatrix, *c.m_GPUMatrix, alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides),
|
||||
NOT_IMPLEMENTED,
|
||||
NOT_IMPLEMENTED);
|
||||
}
|
||||
|
|
|
@ -115,11 +115,17 @@ public:
|
|||
static Matrix<ElemType> RandomUniform(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
|
||||
static Matrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId, const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
|
||||
|
||||
static void SetDevice(DEVICEID_TYPE deviceId);
|
||||
static void SetDevice(DEVICEID_TYPE deviceId); // TODO: unify with PrepareDevice()
|
||||
|
||||
void ReleaseMemory();
|
||||
~Matrix();
|
||||
|
||||
// workaround to bugs in BOTH implementation: force to collapse to home location
|
||||
void CollapseDataLocationAfterWriting() const
|
||||
{
|
||||
SetDataLocation(GetDeviceId() < 0 ? CurrentDataLocation::CPU : CurrentDataLocation::GPU, GetMatrixType());
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID); // only used internally to initialize a blank matrix
|
||||
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID); // only used internally to initialize a blank matrix
|
||||
|
@ -530,15 +536,15 @@ public:
|
|||
|
||||
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c);
|
||||
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 2>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 3>& reducingStrides);
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void TensorOp(ElemType beta, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const std::array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const std::array<SmallVector<ptrdiff_t>, 4>& reducingStrides);
|
||||
|
|
|
@ -1894,7 +1894,7 @@ void GPUMatrix<ElemType>::AssignScaledDifference(const GPUMatrix<ElemType>& /*al
|
|||
|
||||
//c[ci,cj] += a[ai,aj]
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::AddElementToElement(const GPUMatrix<ElemType>& /*a*/, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
void GPUMatrix<ElemType>::AddElementToElement(ElemType beta, const GPUMatrix<ElemType>& /*a*/, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1953,21 +1953,21 @@ void GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const GP
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 2>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 2>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 2>& reducingStrides)
|
||||
{
|
||||
}
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 3>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 3>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 3>& reducingStrides)
|
||||
{
|
||||
}
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op,
|
||||
void GPUMatrix<ElemType>::TensorOp(ElemType beta, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const GPUMatrix<ElemType>& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp,
|
||||
const array<size_t, 4>& offsets,
|
||||
const SmallVector<size_t>& regularOpDims, const array<SmallVector<ptrdiff_t>, 4>& regularStrides,
|
||||
const SmallVector<size_t>& reducingOpDims, const array<SmallVector<ptrdiff_t>, 4>& reducingStrides)
|
||||
|
|
|
@ -38,14 +38,16 @@ using namespace std;
|
|||
|
||||
// main constructor (all constructors except the default one route through this)
|
||||
template <class ElemType>
|
||||
TensorView<ElemType>::TensorView(const Matrix<ElemType>& sob, const TensorShape& shape)
|
||||
: m_sob(sob.AsReference()), m_shape(shape)
|
||||
TensorView<ElemType>::TensorView(const MatrixBasePtr& sob, const TensorShape& shape)
|
||||
: m_sob(dynamic_pointer_cast<Matrix<ElemType>>(sob)), m_shape(shape)
|
||||
{
|
||||
if (!m_sob)
|
||||
LogicError("TensorView: Attempted to create a TensorView<ElemType> on a storage object of a different ElemType.");
|
||||
#ifdef _DEBUG
|
||||
// check bounds of TensorShape against underlying storage object
|
||||
// This is useful to detect errors like passing a matrix from the wrong input.
|
||||
const auto r = shape.GetLocationRange();
|
||||
const auto n = m_sob.GetNumElements();
|
||||
const auto n = m_sob->GetNumElements();
|
||||
if (r.first < 0 || (size_t)r.second > n)
|
||||
LogicError("TensorView: Shape bounds [%d,%d) exceed bounds of underlying storage object [0,%d).", (int) r.first, (int) r.second, (int) n);
|
||||
#endif
|
||||
|
@ -228,7 +230,7 @@ static bool CheckDifferentObject(const TensorView<ElemType>& a, const TensorView
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
void TensorView<ElemType>::DoUnaryOpOf(ElemType beta, const TensorView& a, ElemType alpha, ElementWiseOperator op)
|
||||
void TensorView<ElemType>::DoUnaryOpOf(ElemType beta, const TensorView& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp)
|
||||
{
|
||||
// static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
@ -244,11 +246,11 @@ void TensorView<ElemType>::DoUnaryOpOf(ElemType beta, const TensorView& a, ElemT
|
|||
CheckDifferentObject(a, *this);
|
||||
|
||||
// now perform the operation
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void TensorView<ElemType>::DoBinaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha, ElementWiseOperator op)
|
||||
void TensorView<ElemType>::DoBinaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp)
|
||||
{
|
||||
// static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s op %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
@ -262,11 +264,11 @@ void TensorView<ElemType>::DoBinaryOpOf(ElemType beta, const TensorView& a, cons
|
|||
if (reducingOpDims.size() > 0)
|
||||
CheckDifferentObject(a, *this) && CheckDifferentObject(b, *this);
|
||||
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), b.GetSOB(), alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), b.GetSOB(), alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void TensorView<ElemType>::DoTernaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha, ElementWiseOperator op)
|
||||
void TensorView<ElemType>::DoTernaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp)
|
||||
{
|
||||
// static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s, %s, %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(c.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
@ -280,79 +282,7 @@ void TensorView<ElemType>::DoTernaryOpOf(ElemType beta, const TensorView& a, con
|
|||
if (reducingOpDims.size() > 0)
|
||||
CheckDifferentObject(a, *this) && CheckDifferentObject(b, *this) && CheckDifferentObject(c, *this);
|
||||
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), b.GetSOB(), c.GetSOB(), alpha, op, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
}
|
||||
|
||||
// simple test function for testing stuff
|
||||
// Call as: Microsoft::MSR::CNTK::TensorView<float>::Test();
|
||||
template <class ElemType>
|
||||
/*static*/ void TensorView<ElemType>::Test()
|
||||
{
|
||||
const DEVICEID_TYPE deviceId = 0; // -1
|
||||
Matrix<ElemType> m1(deviceId);
|
||||
Matrix<ElemType> m2(deviceId);
|
||||
Matrix<ElemType> m3(deviceId);
|
||||
{
|
||||
m1.SetValue(5, 3, {1, 2, 3,
|
||||
14, 15, 6,
|
||||
4, 5, 16,
|
||||
41, 5, 1,
|
||||
1.8, 4.5, 7});
|
||||
m2.SetValue(5, 1, {42,
|
||||
13,
|
||||
1968,
|
||||
3.1415f,
|
||||
7});
|
||||
|
||||
m3.Resize(m1);
|
||||
|
||||
// regular zip (just add m1 to itself)
|
||||
TensorView(m3).DoSumOf(0, TensorView(m1), TensorView(m1), 1);
|
||||
m3.Print();
|
||||
|
||||
// unary op
|
||||
TensorView(m3).DoSqrtOf(0, TensorView(m1), 1);
|
||||
m3.Print();
|
||||
|
||||
// broadcasting of an input
|
||||
TensorView(m3).DoSumOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
|
||||
TensorView(m3).DoMaxOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
|
||||
TensorView(m3).DoGTOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
|
||||
// reduction over columns
|
||||
m3.Resize(5, 1);
|
||||
TensorView(m3).DoSumOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
|
||||
// reduction over rows
|
||||
m3.Resize(1, 3);
|
||||
TensorView(m3).DoSumOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
|
||||
TensorView(m3).DoLogSumOf(0, TensorView(m1), TensorView(m2), 1);
|
||||
m3.Print();
|
||||
}
|
||||
{
|
||||
m1.Resize(1, 42);
|
||||
m2.Resize(13, 1);
|
||||
m3.Resize(13, 21);
|
||||
TensorShape s1(1, 2, 21);
|
||||
TensorShape s2(13, 1);
|
||||
TensorShape s3(13, 1, 21);
|
||||
let t1 = TensorView<ElemType>(m1, s1);
|
||||
t1;
|
||||
let t2 = TensorView<ElemType>(m2, s2);
|
||||
t2;
|
||||
auto t3 = TensorView<ElemType>(m3, s3);
|
||||
t3;
|
||||
t3.DoSumOf(0, t1, t2, 1);
|
||||
m3.Print();
|
||||
}
|
||||
GetSOB().TensorOp(beta, a.GetSOB(), b.GetSOB(), c.GetSOB(), alpha, op, reductionOp, offsets, regularOpDims, regularStrides, reducingOpDims, reducingStrides);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
@ -409,19 +339,20 @@ static void FlattenToMatrix(TensorShape& shape, bool trans, size_t splitPoint)
|
|||
|
||||
// convert tensor into a Matrix object
|
||||
template <class ElemType>
|
||||
Matrix/*ref*/<ElemType> TensorView<ElemType>::AsMatrix() const
|
||||
shared_ptr<Matrix<ElemType>> TensorView<ElemType>::AsMatrix() const
|
||||
{
|
||||
assert(m_shape.GetRank() == 2);
|
||||
if (m_shape.GetStrides()[0] != 1 && m_shape[0] != 1)
|
||||
InvalidArgument("AsMatrix: Flattened [%s] matrix is not dense (it has a stride).", string(m_shape).c_str());
|
||||
|
||||
// create a Matrix view into the TensorView (which in turn is a view over a Matrix...)
|
||||
// The way to do this is to use a ColumnSlice.
|
||||
// express the TensorView's storage in m_sob's coordinates
|
||||
let firstColumn = m_shape.GetOffset() / m_sob.GetNumRows();
|
||||
let numColumns = m_shape.GetNumElements() / m_sob.GetNumRows();
|
||||
if (firstColumn * m_sob.GetNumRows() != m_shape.GetOffset() || numColumns * m_sob.GetNumRows() != m_shape.GetNumElements())
|
||||
let firstColumn = m_shape.GetOffset() / m_sob->GetNumRows();
|
||||
let numColumns = m_shape.GetNumElements() / m_sob->GetNumRows();
|
||||
if (firstColumn * m_sob->GetNumRows() != m_shape.GetOffset() || numColumns * m_sob->GetNumRows() != m_shape.GetNumElements())
|
||||
InvalidArgument("AsMatrix: Flattened [%s] matrix has an offset or width that is not a multiple of the storage object's row dimension.", string(m_shape).c_str());
|
||||
auto sob = m_sob.ColumnSlice(firstColumn, numColumns);
|
||||
|
||||
// now reinterpret this slice according to the new tensor shape
|
||||
// Example:
|
||||
// - each sob column contains a set of vectors stored as a 2D tensor [I x J], and [S x T] samples
|
||||
|
@ -431,12 +362,20 @@ Matrix/*ref*/<ElemType> TensorView<ElemType>::AsMatrix() const
|
|||
// - which in turn yields a [K x (J * S x*T)] matrix
|
||||
// which gets reinterpreted back as a [K x J x S x T] tensor
|
||||
// In the special case of sparse matrices, this split cannot be done. E.g. in the above example, we could only multiply with a [K x I x J] tensor.
|
||||
if (sob.GetMatrixType() == MatrixType::DENSE)
|
||||
return sob.Reshaped(m_shape[0], m_shape[1]);
|
||||
else if (m_shape[0] == sob.GetNumRows()) // SPARSE matrices cannot be reshaped, so we only support 1D and 2D tensors
|
||||
return sob;
|
||||
else
|
||||
let needsSlicing = firstColumn != 0 || numColumns != m_sob->GetNumCols();
|
||||
let needsReshaping = m_shape[0] != m_sob->GetNumRows() || m_shape[1] != m_sob->GetNumCols();
|
||||
|
||||
// Note: If an output matrix is a view and needs to move to a different device, we will fail later, since the current structure cannot support that.
|
||||
// As a consequence, some configurations will simply not work currently.
|
||||
// We minimize the chance of this by using the original storage object whenever possible.
|
||||
if (!needsSlicing && !needsReshaping) // no need to mess with the storage object: pass it on as it is. Full support for moving devices.
|
||||
return m_sob;
|
||||
else if (needsSlicing && !needsReshaping) // slicing is supported for sparse as well
|
||||
return make_shared<Matrix<ElemType>>(m_sob->ColumnSlice(firstColumn, numColumns));
|
||||
else if (m_sob->GetMatrixType() != MatrixType::DENSE) // needsReshaping: not allowed for sparse matrices
|
||||
RuntimeError("AsMatrix: Sparse tensors are not supported unless they are 1D or 2D matrices.");
|
||||
else // dense can slice and reshape neutrally, but will also fail if output matrix needs to move devices
|
||||
return make_shared<Matrix<ElemType>>(m_sob->ColumnSlice(firstColumn, numColumns).Reshaped(m_shape[0], m_shape[1]));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -471,9 +410,9 @@ void TensorView<ElemType>::DoMatrixProductOf(ElemType beta, bool transC, const T
|
|||
auto C = Reshaped(shapeC).AsMatrix();
|
||||
// and go
|
||||
if (!transC)
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, A, transA, B, transB, beta, C);
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, *A, transA, *B, transB, beta, *C);
|
||||
else // C' = A * B <==> C = (A * B)' = B' * A'
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, B, !transB, A, !transA, beta, C);
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(alpha, *B, !transB, *A, !transA, beta, *C);
|
||||
}
|
||||
|
||||
template class TensorView<float>;
|
||||
|
|
|
@ -26,20 +26,22 @@ public:
|
|||
// -------------------------------------------------------------------
|
||||
|
||||
// reinterpret a matrix storage object (SOB) as a TensorView with a given TensorShape --this is the main constructor
|
||||
TensorView(const Matrix<ElemType>& sob, const TensorShape& shape);
|
||||
TensorView(const MatrixBasePtr& sob, const TensorShape& shape);
|
||||
#if 0
|
||||
// cast a Matrix as a 2D TensorView (without shape change)
|
||||
TensorView(const Matrix<ElemType>& sob)
|
||||
: m_sob(sob.AsReference()), m_shape(TensorShape(array<size_t, 2>{sob.GetNumRows(), sob.GetNumCols()}))
|
||||
TensorView(const MatrixBasePtr& sob)
|
||||
: m_sob(sob), m_shape(TensorShape(array<size_t, 2>{sob->GetNumRows(), sob->GetNumCols()}))
|
||||
{
|
||||
}
|
||||
#endif
|
||||
// reshape a TensorView
|
||||
TensorView(const TensorView<ElemType>& other, const TensorShape& shape)
|
||||
: m_sob(other.m_sob.AsReference()), m_shape(shape)
|
||||
: m_sob(other.m_sob), m_shape(shape)
|
||||
{
|
||||
}
|
||||
// copy constructor
|
||||
TensorView(const TensorView<ElemType>& other)
|
||||
: m_sob(other.m_sob.AsReference()), m_shape(other.m_shape)
|
||||
: m_sob(other.m_sob), m_shape(other.m_shape)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -66,36 +68,36 @@ public:
|
|||
// -------------------------------------------------------------------
|
||||
|
||||
#pragma push_macro("DeclareUnaryTensorOp")
|
||||
#define DeclareUnaryTensorOp(oper) \
|
||||
void Do##oper##Of(ElemType beta, const TensorView& a, ElemType alpha) \
|
||||
{ \
|
||||
DoUnaryOpOf(beta, a, alpha, ElementWiseOperator::op##oper); \
|
||||
} \
|
||||
void Assign##oper##Of(const TensorView& a, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoUnaryOpOf(0, a, alpha, ElementWiseOperator::op##oper); \
|
||||
} \
|
||||
void Add##oper##Of(const TensorView& a, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoUnaryOpOf(1.0f, a, alpha, ElementWiseOperator::op##oper); \
|
||||
#define DeclareUnaryTensorOp(oper) \
|
||||
void Do##oper##Of(ElemType beta, const TensorView& a, ElemType alpha) \
|
||||
{ \
|
||||
DoUnaryOpOf(beta, a, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Assign##oper##Of(const TensorView& a, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoUnaryOpOf(0, a, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Add##oper##Of(const TensorView& a, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoUnaryOpOf(1.0f, a, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
}
|
||||
|
||||
ForAllUnaryOps(DeclareUnaryTensorOp);
|
||||
#pragma pop_macro("DeclareUnaryTensorOp")
|
||||
|
||||
#pragma push_macro("DeclareBinaryTensorOp")
|
||||
#define DeclareBinaryTensorOp(oper) \
|
||||
void Do##oper##Of(ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha) \
|
||||
{ \
|
||||
DoBinaryOpOf(beta, a, b, alpha, ElementWiseOperator::op##oper); \
|
||||
} \
|
||||
void Assign##oper##Of(const TensorView& a, const TensorView& b, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoBinaryOpOf(0, a, b, alpha, ElementWiseOperator::op##oper); \
|
||||
} \
|
||||
void Add##oper##Of(const TensorView& a, const TensorView& b, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoBinaryOpOf(1.0f, a, b, alpha, ElementWiseOperator::op##oper); \
|
||||
#define DeclareBinaryTensorOp(oper) \
|
||||
void Do##oper##Of(ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha) \
|
||||
{ \
|
||||
DoBinaryOpOf(beta, a, b, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Assign##oper##Of(const TensorView& a, const TensorView& b, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoBinaryOpOf(0, a, b, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Add##oper##Of(const TensorView& a, const TensorView& b, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoBinaryOpOf(1.0f, a, b, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
}
|
||||
|
||||
ForAllBinaryOps(DeclareBinaryTensorOp);
|
||||
|
@ -105,25 +107,23 @@ public:
|
|||
#define DeclareTernaryTensorOp(oper) \
|
||||
void Do##oper##Of(ElemType beta, const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha) \
|
||||
{ \
|
||||
DoTernaryOpOf(beta, a, b, c, alpha, ElementWiseOperator::op##oper); \
|
||||
DoTernaryOpOf(beta, a, b, c, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Assign##oper##Of(const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoTernaryOpOf(0, a, b, c, alpha, ElementWiseOperator::op##oper); \
|
||||
DoTernaryOpOf(0, a, b, c, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
} \
|
||||
void Add##oper##Of(const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha = 1.0f) \
|
||||
{ \
|
||||
DoTernaryOpOf(1.0f, a, b, c, alpha, ElementWiseOperator::op##oper); \
|
||||
DoTernaryOpOf(1.0f, a, b, c, alpha, ElementWiseOperator::op##oper, ElementWiseOperator::opSum); \
|
||||
}
|
||||
|
||||
ForAllTernaryOps(DeclareTernaryTensorOp);
|
||||
#pragma pop_macro("DeclareTernaryTensorOp")
|
||||
|
||||
static void Test();
|
||||
|
||||
void DoUnaryOpOf (ElemType beta, const TensorView& a, ElemType alpha, ElementWiseOperator op);
|
||||
void DoBinaryOpOf (ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha, ElementWiseOperator op);
|
||||
void DoTernaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha, ElementWiseOperator op);
|
||||
void DoUnaryOpOf (ElemType beta, const TensorView& a, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp);
|
||||
void DoBinaryOpOf (ElemType beta, const TensorView& a, const TensorView& b, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp);
|
||||
void DoTernaryOpOf(ElemType beta, const TensorView& a, const TensorView& b, const TensorView& c, ElemType alpha, ElementWiseOperator op, ElementWiseOperator reductionOp);
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// matrix product -- GEMM for flattened tensors
|
||||
|
@ -139,23 +139,23 @@ public:
|
|||
void AssignMatrixProductOf( bool transC, const TensorView& a, bool transA, const TensorView& b, bool transB, ElemType alpha = 1.0f) { DoMatrixProductOf(0, transC, a, transA, b, transB, alpha); }
|
||||
void AddMatrixProductOf ( bool transC, const TensorView& a, bool transA, const TensorView& b, bool transB, ElemType alpha = 1.0f) { DoMatrixProductOf(1.0f, transC, a, transA, b, transB, alpha); }
|
||||
|
||||
Matrix/*ref*/<ElemType> AsMatrix() const;
|
||||
shared_ptr<Matrix<ElemType>> AsMatrix() const;
|
||||
|
||||
private:
|
||||
// -------------------------------------------------------------------
|
||||
// accessors
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
const Matrix<ElemType>& GetSOB() const { return m_sob; }
|
||||
Matrix<ElemType>& GetSOB() { return m_sob; }
|
||||
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
|
||||
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
||||
const TensorShape& GetShape() const { return m_shape; }
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// sob members
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
Matrix<ElemType> m_sob; // Storage OBject that holds the data that is being viewed with this TensorView. This is really a reference (not owing the buffer).
|
||||
TensorShape m_shape; // the meta-data that describes the data's shape and/or access pattern
|
||||
shared_ptr<Matrix<ElemType>> m_sob; // Storage OBject that holds the data that is being viewed with this TensorView. This is really a reference (not owing the buffer).
|
||||
TensorShape m_shape; // the meta-data that describes the data's shape and/or access pattern
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -583,6 +583,7 @@ public:
|
|||
BinaryReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"BinaryReader");
|
||||
}
|
||||
virtual ~BinaryReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
|
|
|
@ -152,6 +152,7 @@ public:
|
|||
DSSMReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"DSSMReader");
|
||||
m_qfeaturesBuffer = NULL;
|
||||
m_dfeaturesBuffer = NULL;
|
||||
m_labelsBuffer = NULL;
|
||||
|
|
|
@ -152,6 +152,7 @@ public:
|
|||
HTKMLFReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"HTKMLFReader");
|
||||
}
|
||||
template <class ConfigRecordType>
|
||||
void InitFromConfig(const ConfigRecordType&);
|
||||
|
|
|
@ -2055,4 +2055,5 @@ void HTKMLFReader<ElemType>::GetDataNamesFromConfig(const ConfigRecordType& read
|
|||
|
||||
template class HTKMLFReader<float>;
|
||||
template class HTKMLFReader<double>;
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
|
@ -38,9 +38,10 @@ private:
|
|||
MBLayoutPtr pMBLayout;
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> minibatchUttInfo;
|
||||
size_t currentMBSize;
|
||||
MinibatchBufferUnit()
|
||||
: pMBLayout(make_shared<MBLayout>()), currentMBSize(0)
|
||||
MinibatchBufferUnit() :
|
||||
pMBLayout(make_shared<MBLayout>()), currentMBSize(0)
|
||||
{
|
||||
pMBLayout->SetUniqueAxisName(L"HTKMLFReader");
|
||||
}
|
||||
};
|
||||
bool m_doMinibatchBuffering;
|
||||
|
@ -163,9 +164,10 @@ public:
|
|||
// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
HTKMLFReader() :
|
||||
m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"HTKMLFReader");
|
||||
}
|
||||
|
||||
template <class ConfigRecordType>
|
||||
|
|
|
@ -660,7 +660,7 @@ void SequenceReader<ElemType>::ReadClassInfo(const wstring& vocfile, int& classS
|
|||
|
||||
// check if unk is the same used in vocabulary file
|
||||
if (word4idx.find(mUnk.c_str()) == word4idx.end())
|
||||
RuntimeError("ReadClassInfo unknown symbol '%s' is not in vocabulary file.", mUnk.c_str());
|
||||
fprintf(stderr, "ReadClassInfo: 'unknown' symbol unk='%s' is not in vocabulary file. Unknown words will error out if encountered.\n", mUnk.c_str());
|
||||
}
|
||||
|
||||
// InitCache - Initialize the caching reader if cache files exist, otherwise the writer
|
||||
|
|
|
@ -381,6 +381,7 @@ public:
|
|||
BatchSequenceReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"LMSequenceReader");
|
||||
mLastProcessedSentenceId = 0;
|
||||
mRequestedNumParallelSequences = 1;
|
||||
mLastPosInSentence = 0;
|
||||
|
|
|
@ -270,6 +270,7 @@ public:
|
|||
BatchLUSequenceReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"LUSequenceReader");
|
||||
mLastProcessedSentenceId = 0;
|
||||
mRequestedNumParallelSequences = 1;
|
||||
mLastPosInSentence = 0;
|
||||
|
|
|
@ -247,6 +247,7 @@ public:
|
|||
: DSSMLabels(nullptr), DSSMCols(0)
|
||||
{
|
||||
m_pMBLayout = make_shared<MBLayout>();
|
||||
m_pMBLayout->SetUniqueAxisName(L"LibSVMReader");
|
||||
};
|
||||
|
||||
virtual ~LibSVMBinaryReader();
|
||||
|
|
|
@ -130,7 +130,9 @@ BpttPacker::BpttPacker(
|
|||
auto& buffer = m_streamBuffers[i];
|
||||
buffer.Resize(m_numParallelSequences * m_truncationSize * GetSampleSize(stream));
|
||||
m_sequenceBufferPerStream.push_back(make_shared<SequenceBuffer>(m_numParallelSequences));
|
||||
m_currentLayouts.push_back(make_shared<MBLayout>());
|
||||
auto pMBLayout = make_shared<MBLayout>();
|
||||
pMBLayout->SetUniqueAxisName(L"BpttPacker");
|
||||
m_currentLayouts.push_back(pMBLayout);
|
||||
}
|
||||
|
||||
// Filling in the initial set of sequences
|
||||
|
|
|
@ -116,9 +116,7 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
// If not we should inject the IMemoryProvider per stream.
|
||||
int deviceId = matrices.begin()->second.matrix->GetDeviceId();
|
||||
for (auto mx : matrices)
|
||||
{
|
||||
assert(mx.second.matrix->GetDeviceId() == deviceId), UNUSED(deviceId);
|
||||
}
|
||||
|
||||
assert(m_prefetchTask.valid());
|
||||
|
||||
|
@ -133,6 +131,7 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
}
|
||||
|
||||
// Reset stale mb layouts.
|
||||
// BUGBUG: This seems incorrect. (1) layouts should all be updated below, and (2) some of these layouts are the same, we are resetting them twice.
|
||||
for (const auto& iter : matrices)
|
||||
{
|
||||
iter.second.pMBLayout->Init(1, 0);
|
||||
|
@ -149,12 +148,12 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
if (m_nameToStreamId.find(mx.first) == m_nameToStreamId.end())
|
||||
{
|
||||
string inputNames = EnumerateInputs(m_nameToStreamId);
|
||||
RuntimeError("Could not map input '%ls' to the reader. Reader outputs only [%s].",
|
||||
RuntimeError("Could not map input '%ls' to the reader. Reader outputs only [%s].",
|
||||
mx.first.c_str(), inputNames.c_str());
|
||||
}
|
||||
|
||||
size_t streamId = m_nameToStreamId[mx.first];
|
||||
|
||||
|
||||
const auto& stream = minibatch.m_data[streamId];
|
||||
|
||||
m_numParallelSequences = stream->m_layout->GetNumParallelSequences();
|
||||
|
@ -176,7 +175,7 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
RuntimeError("Dynamic axis layout '%ls' is shared between inputs '%ls' and '%ls', but layouts generated "
|
||||
"from the input data are incompatible on this axis. Are you using different sequence lengths? "
|
||||
"Did you consider adding a DynamicAxis() to the Input nodes?",
|
||||
layout->GetAxisName().c_str(), layoutToInputMap[layout->GetAxisName()].c_str(), mx.first.c_str());
|
||||
layout->GetAxisName(), layoutToInputMap[layout->GetAxisName()].c_str(), mx.first.c_str());
|
||||
}
|
||||
|
||||
size_t sampleSize = m_streams[streamId]->m_sampleLayout->GetNumElements();
|
||||
|
@ -217,7 +216,7 @@ void ReaderShim<ElemType>::FillMatrixFromStream(StorageType type, Matrix<ElemTyp
|
|||
IndexType* columns = reinterpret_cast<IndexType*>(rows + nnzCount);
|
||||
matrix->SetMatrixFromCSCFormat(columns, rows, values, nnzCount, numRows, numCols);
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
RuntimeError("Storage type %d is not supported.", (int)type);
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ MBLayoutPtr SequencePacker::PackDenseStream(const StreamBatch& batch, size_t str
|
|||
assert(sampleOffset == sampleIndex * sampleSize);
|
||||
PackDenseSample(destination, sequence, sampleOffset, sampleSize);
|
||||
sampleOffset += sampleSize;
|
||||
}
|
||||
}
|
||||
else if (stream->m_storageType == StorageType::sparse_csc)
|
||||
{
|
||||
// TODO: make type casts members of the SparseSequenceData
|
||||
|
|
|
@ -59,7 +59,10 @@ class SparsePCReader : public DataReaderBase
|
|||
|
||||
public:
|
||||
SparsePCReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>()){};
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
m_pMBLayout->SetUniqueAxisName(L"SparsePCReader");
|
||||
};
|
||||
virtual ~SparsePCReader();
|
||||
virtual void Destroy();
|
||||
template <class ConfigRecordType>
|
||||
|
|
|
@ -135,6 +135,7 @@ public:
|
|||
UCIFastReader()
|
||||
{
|
||||
m_pMBLayout = make_shared<MBLayout>();
|
||||
m_pMBLayout->SetUniqueAxisName(L"UCIFastReader");
|
||||
}
|
||||
virtual ~UCIFastReader();
|
||||
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
// Criterion.h -- helper classes for accumulating criteria
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Basics.h"
|
||||
#include "Matrix.h"
|
||||
#include <memory> // for pair
|
||||
#include <limits> // for isnan() and numeric_limits --TODO: is that the right header?
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// helper class for passing accumulated epoch-level criteria around while retaining their sample counts
|
||||
// Criteria are represented as a tuple (aggregate criterion, sample count). The average criterion value is their ratio.
|
||||
struct EpochCriterion : public std::pair<double, size_t>
|
||||
{
|
||||
// construction
|
||||
explicit EpochCriterion(double aggregateCriterionValue = 0.0, size_t aggregateSampleCount = 0) : std::pair<double, size_t>(aggregateCriterionValue, aggregateSampleCount) { }
|
||||
EpochCriterion(const std::pair<double, size_t>& other) : std::pair<double, size_t>(other) { }
|
||||
|
||||
// main way of reading this out: compute the actual average criterion value from the aggregate and sample count
|
||||
double Average() const { return second > 0 ? first / second : 0.0; } // compute the epoch-average
|
||||
|
||||
// a few more handy operations that occured multiple times
|
||||
bool IsNan() const { return std::isnan(first); }
|
||||
EpochCriterion operator-(const EpochCriterion& other) const { return EpochCriterion(first - other.first, second - other.second); }
|
||||
void operator+=(const EpochCriterion& other) { first += other.first; second += other.second; }
|
||||
|
||||
static EpochCriterion Infinity() { return EpochCriterion(std::numeric_limits<double>::infinity()); }
|
||||
bool IsInfinity() const { return first == std::numeric_limits<double>::infinity(); }
|
||||
};
|
||||
|
||||
// We accumulate criteria in this struct.
|
||||
// Criteria are accumulated together with their counts (counts depend on sequence lengths, and different criteria may have different sequence lengths).
|
||||
template <class ElemType>
|
||||
struct CriterionAccumulator
|
||||
{
|
||||
// constructor
|
||||
CriterionAccumulator(size_t numCriteria, DEVICEID_TYPE deviceId) :
|
||||
m_aggregateCriterionValues(1, numCriteria, deviceId)
|
||||
{
|
||||
m_aggregateCriterionValues.SetValue(0);
|
||||
m_aggregateSampleCounts.assign(numCriteria, 0);
|
||||
}
|
||||
// 'i' is the index of the element we add into (multiple eval criteria share the same matrix object)
|
||||
// Use 'reset=true' to not accumulate but overwrite.
|
||||
const CriterionAccumulator& Add(const std::vector<ComputationNodeBasePtr>& nodes, size_t i, size_t legacyNumSamples)
|
||||
{
|
||||
return Accumulate</*reset=*/false>(nodes, i, legacyNumSamples);
|
||||
}
|
||||
const CriterionAccumulator& Assign(const std::vector<ComputationNodeBasePtr>& nodes, size_t i, size_t legacyNumSamples)
|
||||
{
|
||||
return Accumulate</*reset=*/true>(nodes, i, legacyNumSamples);
|
||||
}
|
||||
// retrieve an accumulated result as a pair (numerator, denominator)
|
||||
EpochCriterion GetCriterion(size_t i) const
|
||||
{
|
||||
// BUGBUG: For unknown reasons, this (or the other below) check makes a difference for MPI configs.
|
||||
// If it is left out, then training and test configs end up being scaled by the same factor close to 1.
|
||||
if (m_aggregateSampleCounts[i] == 0)
|
||||
return EpochCriterion(0, 0); // avoid unnecessary GPU access
|
||||
else
|
||||
return EpochCriterion(m_aggregateCriterionValues(0, i), m_aggregateSampleCounts[i]);
|
||||
}
|
||||
|
||||
private:
|
||||
// shared part of Add() and Assign()
|
||||
// This code assumes that if number of samples is 0, the criterion value is also 0 and does not need to be fetched from the GPU.
|
||||
template<bool reset>
|
||||
const CriterionAccumulator& Accumulate(const std::vector<ComputationNodeBasePtr>& nodes, size_t i, size_t legacyNumSamples)
|
||||
{
|
||||
const auto& node = nodes[i]; // multiple nodes are managed by this struct
|
||||
float beta = reset ? 0 : 1;
|
||||
// Note: A future change will be that criterion nodes emit criteria per frame.
|
||||
// In that case, we will do masking and an implicit reduction right here using TensorView.
|
||||
size_t numSamples = GetNumSamples(nodes[i], legacyNumSamples);
|
||||
// temp solution until we add TensorView reduction
|
||||
if (beta == 0)
|
||||
{
|
||||
Matrix<ElemType>::AssignElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
|
||||
0, 0, m_aggregateCriterionValues, 0, i);
|
||||
m_aggregateSampleCounts[i] = numSamples;
|
||||
}
|
||||
else if (numSamples > 0) // avoid unnecessary GPU access
|
||||
{
|
||||
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
|
||||
0, 0, m_aggregateCriterionValues, 0, i);
|
||||
m_aggregateSampleCounts[i] += numSamples;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
// get the number of samples
|
||||
static size_t GetNumSamples(const ComputationNodeBasePtr& node, size_t legacyNumSamples)
|
||||
{
|
||||
if (node->HasMBLayout())
|
||||
return node->GetMBLayout()->GetActualNumSamples();
|
||||
else
|
||||
return legacyNumSamples;
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix<ElemType> m_aggregateCriterionValues; // [1 x N]
|
||||
vector<size_t> m_aggregateSampleCounts; // [N]
|
||||
};
|
||||
|
||||
}}}
|
|
@ -70,7 +70,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Verify that there's indeed a single layout
|
||||
for (const auto& iter : inputMatrices)
|
||||
{
|
||||
assert(iter.second.pMBLayout == pMBLayout);
|
||||
assert(iter.second.pMBLayout == pMBLayout);
|
||||
// TODO: This must be a runtime check, not an assert().
|
||||
UNUSED(iter);
|
||||
}
|
||||
|
||||
|
@ -105,8 +106,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template <class ElemType>
|
||||
static pair<size_t, size_t> DecimateMinibatch(const StreamMinibatchInputs& MB, // input matrices
|
||||
StreamMinibatchInputs& decimatedMB, // output decimated matrices.
|
||||
MBLayoutPtr pMBLayout, // input MBLayout
|
||||
MBLayoutPtr& pDecimateMBLayout, // output decimated MBLayout (note: cannot work in-place)
|
||||
MBLayoutPtr pMBLayout, // input MBLayout
|
||||
MBLayoutPtr& pDecimateMBLayout, // output decimated MBLayout (note: cannot work in-place)
|
||||
size_t numProcs, size_t rank)
|
||||
{
|
||||
size_t numParallelSequences = pMBLayout->GetNumParallelSequences();
|
||||
|
@ -148,6 +149,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
// decimate MBLayout as well
|
||||
pDecimateMBLayout = make_shared<MBLayout>(numNewParallelSequence, nT, L"");
|
||||
pDecimateMBLayout->SetAxisName(pMBLayout->GetAxisName());
|
||||
#if 1
|
||||
// now copy over all sequence info records that are inside the range, with adjusted 's'
|
||||
const auto& sequences = pMBLayout->GetAllSequences();
|
||||
|
@ -181,17 +183,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// no need to do inplace decimation if numproc == 1
|
||||
|
||||
// allocate space for non-inplace decimation
|
||||
MBLayoutPtr pDecimatedMB = make_shared<MBLayout>();
|
||||
MBLayoutPtr pDecimatedMBLayout = make_shared<MBLayout>();
|
||||
pDecimatedMBLayout->SetAxisName(pMBLayout->GetAxisName());
|
||||
StreamMinibatchInputs decimatedMB;
|
||||
// call in-place decimation
|
||||
pair<size_t, size_t> selected = DecimateMinibatch<ElemType>(mb, decimatedMB, pMBLayout, pDecimatedMB, numprocs, rank);
|
||||
pair<size_t, size_t> selected = DecimateMinibatch<ElemType>(mb, decimatedMB, pMBLayout, pDecimatedMBLayout, numprocs, rank);
|
||||
// move the data
|
||||
for (auto k : mb)
|
||||
{
|
||||
const auto& name = k.first;
|
||||
mb.GetInputMatrix<ElemType>(name).SetValue(decimatedMB.GetInputMatrix<ElemType>(name)); // deep-copy our local one to the output location
|
||||
}
|
||||
pMBLayout->MoveFrom(pDecimatedMB);
|
||||
pMBLayout->MoveFrom(pDecimatedMBLayout);
|
||||
return selected;
|
||||
}
|
||||
|
||||
|
@ -353,7 +356,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// for sequence training
|
||||
if (criterionNodes[0]->OperationName() == L"SequenceWithSoftmax")
|
||||
if (!criterionNodes.empty() && criterionNodes[0]->OperationName() == L"SequenceWithSoftmax")
|
||||
{
|
||||
auto node = dynamic_pointer_cast<SequenceWithSoftmaxNode<ElemType>>(criterionNodes[0]);
|
||||
assert(node);
|
||||
|
@ -379,7 +382,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t requestedSubminibatches)
|
||||
{
|
||||
// first, remember interface to the net
|
||||
// BUGBUG: This will no longer be correct once we have multiple input layouts.
|
||||
// BUGBUG (Issue #95): This will no longer be correct once we have multiple input layouts.
|
||||
m_netMBLayoutPtr = net.GetMBLayoutPtrOfNetwork();
|
||||
m_netInputMatrixPtr = inputMatrices;
|
||||
|
||||
|
@ -539,18 +542,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
shared_ptr<ComputationNode<ElemType>> pNode = m_LearnableNodePtr[nodename];
|
||||
m_cachedGradient.GetInputMatrix<ElemType>(nodename) += pNode->Gradient();
|
||||
pNode->Gradient().SetValue((ElemType) 0);
|
||||
pNode->Gradient().SetValue(0);
|
||||
}
|
||||
// accumulate criterion value
|
||||
Matrix<ElemType>::AddElementToElement(m_netCriterionNodes[0]->Value(), 0, 0,
|
||||
*m_netCriterionAccumulator, 0, 0);
|
||||
m_netCriterionNodes[0]->Value().SetValue((ElemType) 0);
|
||||
if (!m_netCriterionNodes.empty())
|
||||
{
|
||||
Matrix<ElemType>::AddElementToElement(m_netCriterionNodes[0]->Value(), 0, 0,
|
||||
*m_netCriterionAccumulator, 0, 0);
|
||||
m_netCriterionNodes[0]->Value().SetValue(0);
|
||||
}
|
||||
// accumulate evaluation value
|
||||
for (size_t i = 0; i < m_netEvaluationNodes.size(); i++)
|
||||
{
|
||||
Matrix<ElemType>::AddElementToElement(m_netEvaluationNodes[i]->Value(), 0, 0,
|
||||
*m_netEvaluationAccumulator, 0, i);
|
||||
m_netEvaluationNodes[i]->Value().SetValue((ElemType) 0);
|
||||
m_netEvaluationNodes[i]->Value().SetValue(0);
|
||||
}
|
||||
|
||||
// Export node state
|
||||
|
@ -576,10 +582,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// also revert net.m_MBLayoutPtr
|
||||
m_netMBLayoutPtr->CopyFrom(m_MBLayoutCache);
|
||||
|
||||
// m_netCriterionNodes[0]->Value().SetValue((ElemType)0);
|
||||
Matrix<ElemType>::AddElementToElement(*m_netCriterionAccumulator, 0, 0,
|
||||
m_netCriterionNodes[0]->Value(), 0, 0);
|
||||
m_netCriterionAccumulator->SetValue((ElemType) 0);
|
||||
if (!m_netCriterionNodes.empty())
|
||||
{
|
||||
// m_netCriterionNodes[0]->Value().SetValue((ElemType)0);
|
||||
Matrix<ElemType>::AddElementToElement(*m_netCriterionAccumulator, 0, 0,
|
||||
m_netCriterionNodes[0]->Value(), 0, 0);
|
||||
}
|
||||
m_netCriterionAccumulator->SetValue(0);
|
||||
|
||||
for (size_t i = 0; i < m_netEvaluationNodes.size(); i++)
|
||||
{
|
||||
|
@ -587,7 +596,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType>::AddElementToElement(*m_netEvaluationAccumulator, 0, i,
|
||||
m_netEvaluationNodes[i]->Value(), 0, 0);
|
||||
}
|
||||
m_netEvaluationAccumulator->SetValue((ElemType) 0);
|
||||
m_netEvaluationAccumulator->SetValue(0);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -6,12 +6,12 @@ struct DistGradHeader
|
|||
{
|
||||
public:
|
||||
size_t numSamples;
|
||||
size_t numSamplesWithLabel;
|
||||
size_t numSamplesWithLabel; // this is the denominator for 'criterion'
|
||||
double criterion;
|
||||
|
||||
// variable-size array
|
||||
int numEvalNode;
|
||||
double evalErrors[1];
|
||||
pair<double,size_t> evalErrors[1];
|
||||
|
||||
static DistGradHeader* Create(int numEvalNode)
|
||||
{
|
||||
|
@ -41,7 +41,8 @@ public:
|
|||
criterion += other->criterion;
|
||||
for (int i = 0; i < numEvalNode; i++)
|
||||
{
|
||||
evalErrors[i] += other->evalErrors[i];
|
||||
evalErrors[i].first += other->evalErrors[i].first; // numer
|
||||
evalErrors[i].second += other->evalErrors[i].second; // denom
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -58,7 +59,8 @@ public:
|
|||
criterion = 0;
|
||||
for (int i = 0; i < numEvalNode; i++)
|
||||
{
|
||||
evalErrors[i] = 0;
|
||||
evalErrors[i].first = 0;
|
||||
evalErrors[i].second = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,17 +79,19 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
static size_t DistGradHeaderSize(size_t nEvalNode)
|
||||
static size_t DistGradHeaderSize(size_t nEvalNodes)
|
||||
{
|
||||
return sizeof(DistGradHeader) + (sizeof(double) * (nEvalNode - 1));
|
||||
// BUGBUG: Should be sizeof(evalErrors[0]), but the compiler won't let me. This is only correct because evalErrors has 1 element.
|
||||
return sizeof(DistGradHeader) + (sizeof(decltype(evalErrors)) * (nEvalNodes - 1));
|
||||
}
|
||||
|
||||
// Disallow construction and destruction since this type contains a variable sized array member
|
||||
// and hence must be constructed through the create and destroy functions
|
||||
DistGradHeader() = delete;
|
||||
DistGradHeader() = delete;
|
||||
~DistGradHeader() = delete;
|
||||
|
||||
// Disallow copy and move construction/assignment
|
||||
DISABLE_COPY_AND_MOVE(DistGradHeader);
|
||||
};
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -9,6 +9,7 @@
|
|||
#include "SimpleEvaluator.h"
|
||||
#include "DataReader.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "Criterion.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
@ -230,7 +231,8 @@ protected:
|
|||
GradientUpdateInfo m_gradType;
|
||||
RMSPropInfo m_rpi;
|
||||
|
||||
int m_numMBsToShowResult;
|
||||
size_t m_numMBsToShowResult = 0;
|
||||
size_t m_firstMBsToShowResult = 0;
|
||||
int m_numMBsToCUDAProfile;
|
||||
|
||||
bool m_doGradientCheck;
|
||||
|
@ -398,9 +400,8 @@ protected:
|
|||
StreamMinibatchInputs* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr>& learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
/*out*/ EpochCriterion& epochCriterion,
|
||||
/*out*/ std::vector<EpochCriterion>& epochEvalErrors,
|
||||
std::string prefixMsg = "");
|
||||
|
||||
size_t AdaptiveMinibatchSizing(ComputationNetworkPtr net,
|
||||
|
@ -463,10 +464,9 @@ protected:
|
|||
StreamMinibatchInputs* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr>& learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg = "");
|
||||
/*out*/ EpochCriterion& epochCriterion,
|
||||
/*out*/ std::vector<EpochCriterion>& epochEvalErrors,
|
||||
const std::string& prefixMsg = "");
|
||||
|
||||
void InitDistGradAgg(int numEvalNodes, int traceLevel);
|
||||
void InitModelAggregationHandler(int traceLevel);
|
||||
|
@ -496,13 +496,19 @@ protected:
|
|||
|
||||
void ClipGradient(Matrix<ElemType>& gradient, const size_t actualMBSize) const;
|
||||
|
||||
void SaveCheckPointInfo(const size_t epoch, const size_t totalSamplesSeen,
|
||||
void SaveCheckPointInfo(const size_t epoch, const size_t totalSamplesSeen, // TODO: combine totalSamplesSeen and prevCriterion into a EpochCriterion type
|
||||
const double learnRatePerSample,
|
||||
const std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const double prevCriterion,
|
||||
const size_t minibatchSize);
|
||||
|
||||
bool LoadCheckPointInfo(const size_t epochNumber,
|
||||
bool TryLoadCheckPointInfo(const size_t epochNumber,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
/*out*/ double& learnRatePerSample,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& prevCriterion,
|
||||
/*out*/ size_t& minibatchSize);
|
||||
void LoadCheckPointInfo(const size_t epochNumber,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
/*out*/ double& learnRatePerSample,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
|
@ -533,17 +539,17 @@ public:
|
|||
int npos);
|
||||
|
||||
protected:
|
||||
wstring m_modelPath;
|
||||
std::wstring m_modelPath;
|
||||
bool m_keepCheckPointFiles;
|
||||
// bool m_validateAfterModelReloading; // TODO: remove this. Why would one not validate a model?
|
||||
|
||||
wstring m_trainCriterionNodeName;
|
||||
wstring m_evalCriterionNodeName;
|
||||
std::wstring m_trainCriterionNodeName;
|
||||
std::wstring m_evalCriterionNodeName;
|
||||
|
||||
// enable tracing. Nodes listed here get their m_traceNodeValueXXX flags set
|
||||
vector<wstring> m_traceNodeNamesReal;
|
||||
vector<wstring> m_traceNodeNamesCategory;
|
||||
vector<wstring> m_traceNodeNamesSparse;
|
||||
std::vector<std::wstring> m_traceNodeNamesReal;
|
||||
std::vector<std::wstring> m_traceNodeNamesCategory;
|
||||
std::vector<std::wstring> m_traceNodeNamesSparse;
|
||||
|
||||
size_t m_prevChosenMinibatchSize;
|
||||
double m_lastFinishedEpochTrainLoss;
|
||||
|
|
|
@ -164,6 +164,7 @@
|
|||
<ClInclude Include="..\ComputationNetworkLib\ComputationNetwork.h" />
|
||||
<ClInclude Include="..\ComputationNetworkLib\ComputationNode.h" />
|
||||
<ClInclude Include="..\ComputationNetworkLib\ConvolutionalNodes.h" />
|
||||
<ClInclude Include="Criterion.h" />
|
||||
<ClInclude Include="DataReaderHelpers.h" />
|
||||
<ClInclude Include="DistGradHeader.h" />
|
||||
<ClInclude Include="IDistGradAggregator.h" />
|
||||
|
|
|
@ -147,6 +147,9 @@
|
|||
<ClInclude Include="MASGD.h">
|
||||
<Filter>Parallelization</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Criterion.h">
|
||||
<Filter>SGD</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Common">
|
||||
|
|
|
@ -224,7 +224,7 @@ private:
|
|||
assert(headerCPU->criterion == 0);
|
||||
for (int i = 0; i < headerCPU->numEvalNode; ++i)
|
||||
{
|
||||
assert(headerCPU->evalErrors[i] == 0);
|
||||
assert(headerCPU->evalErrors[i].first == 0);
|
||||
}
|
||||
|
||||
// If the current node did not process any samples, the gradients should be zero'd
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "DistGradHeader.h"
|
||||
#include "IDistGradAggregator.h"
|
||||
#include "SimpleDistGradAggregator.h"
|
||||
#include "Criterion.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
@ -31,10 +32,11 @@ template <class ElemType>
|
|||
class SimpleEvaluator
|
||||
{
|
||||
public:
|
||||
SimpleEvaluator(ComputationNetworkPtr net, const MPIWrapperPtr& mpi, const size_t numMBsToShowResult = 100, const int traceLevel = 0, const size_t maxSamplesInRAM = SIZE_MAX,
|
||||
SimpleEvaluator(ComputationNetworkPtr net, const MPIWrapperPtr& mpi, const size_t numMBsToShowResult = 100, const size_t firstMBsToShowResult = 0, const int traceLevel = 0, const size_t maxSamplesInRAM = SIZE_MAX,
|
||||
const size_t numSubminiBatches = 1)
|
||||
: m_net(net),
|
||||
m_numMBsToShowResult(numMBsToShowResult),
|
||||
m_firstMBsToShowResult(firstMBsToShowResult),
|
||||
m_traceLevel(traceLevel),
|
||||
m_maxSamplesInRAM(maxSamplesInRAM),
|
||||
m_numSubminiBatches(numSubminiBatches),
|
||||
|
@ -45,7 +47,7 @@ public:
|
|||
}
|
||||
|
||||
// returns evaluation node values per sample determined by evalNodeNames (which can include both training and eval criterion nodes)
|
||||
vector<double> Evaluate(IDataReader* dataReader, const vector<wstring>& evalNodeNames, const size_t mbSize, const size_t testSize = requestDataSize)
|
||||
vector<EpochCriterion> Evaluate(IDataReader* dataReader, const vector<wstring>& evalNodeNames, const size_t mbSize, const size_t testSize = requestDataSize)
|
||||
{
|
||||
ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring);
|
||||
|
||||
|
@ -81,9 +83,7 @@ public:
|
|||
}
|
||||
|
||||
// initialize eval results
|
||||
std::vector<double> evalResults;
|
||||
for (int i = 0; i < evalNodes.size(); i++)
|
||||
evalResults.push_back((double) 0);
|
||||
std::vector<EpochCriterion> evalResults(evalNodes.size(), EpochCriterion(0));
|
||||
|
||||
// allocate memory for forward computation
|
||||
m_net->AllocateAllMatrices(evalNodes, {}, nullptr);
|
||||
|
@ -102,12 +102,10 @@ public:
|
|||
size_t totalEpochSamples = 0;
|
||||
size_t numMBsRun = 0;
|
||||
size_t actualMBSize = 0;
|
||||
size_t numSamplesLastMBs = 0;
|
||||
size_t lastMBsRun = 0; // MBs run before this display
|
||||
size_t numSamplesLastLogged = 0;
|
||||
size_t numMBsRunLastLogged = 0; // MBs run before this display
|
||||
|
||||
std::vector<double> evalResultsLastMBs;
|
||||
for (int i = 0; i < evalResults.size(); i++)
|
||||
evalResultsLastMBs.push_back((ElemType) 0);
|
||||
std::vector<EpochCriterion> evalResultsLastLogged(evalResults.size(), EpochCriterion(0));
|
||||
|
||||
//TODO: we should add support for distributed reading
|
||||
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
||||
|
@ -123,6 +121,8 @@ public:
|
|||
if (numSubminibatchesNeeded > 1)
|
||||
smbDispatcher.Init(m_net, learnableNodes, criterionNodes, evalNodes);
|
||||
|
||||
CriterionAccumulator<ElemType> localEpochEvalErrors(evalNodes.size(), m_net->GetDeviceId());
|
||||
|
||||
const size_t numIterationsBeforePrintingProgress = 100;
|
||||
size_t numItersSinceLastPrintOfProgress = 0;
|
||||
while (DataReaderHelpers::GetMinibatchIntoNetwork<ElemType>(*dataReader, m_net, nullptr, dataReader->SupportsDistributedMBRead(), m_mpi != nullptr, inputMatrices, actualMBSize, m_mpi))
|
||||
|
@ -162,9 +162,9 @@ public:
|
|||
m_gradHeader->numEvalNode = evalNodes.size();
|
||||
m_gradHeader->numSamples = actualMBSize;
|
||||
m_gradHeader->numSamplesWithLabel = numSamplesWithLabel;
|
||||
m_gradHeader->criterion = 0.0;
|
||||
m_gradHeader->criterion = 0.0; // (not used here)
|
||||
for (size_t i = 0; i < evalNodes.size(); i++)
|
||||
m_gradHeader->evalErrors[i] = evalNodes[i]->Get00Element();
|
||||
m_gradHeader->evalErrors[i] = localEpochEvalErrors.Assign(evalNodes, i, numSamplesWithLabel).GetCriterion(i);
|
||||
|
||||
// TODO: We are reusing the aggregation logic inside SimpleDistGradAggregator, which has a heavy dependency
|
||||
// on the gradient matrix. At some point we should refactor the aggregator class to be able to only calculating
|
||||
|
@ -185,9 +185,7 @@ public:
|
|||
else
|
||||
{
|
||||
for (int i = 0; i < evalNodes.size(); i++)
|
||||
{
|
||||
evalResults[i] += (double)evalNodes[i]->Get00Element(); // criterionNode should be a scalar
|
||||
}
|
||||
evalResults[i] += localEpochEvalErrors.Assign(evalNodes, i, numSamplesWithLabel).GetCriterion(i);
|
||||
}
|
||||
|
||||
totalEpochSamples += aggregateNumSamplesWithLabel;
|
||||
|
@ -195,22 +193,19 @@ public:
|
|||
|
||||
if (m_traceLevel > 0)
|
||||
{
|
||||
numSamplesLastMBs += aggregateNumSamplesWithLabel;
|
||||
numSamplesLastLogged += aggregateNumSamplesWithLabel;
|
||||
|
||||
if (numMBsRun % m_numMBsToShowResult == 0)
|
||||
if (numMBsRun <= m_firstMBsToShowResult || (m_numMBsToShowResult && (numMBsRun % m_numMBsToShowResult == 0)))
|
||||
{
|
||||
DisplayEvalStatistics(lastMBsRun + 1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
|
||||
DisplayEvalStatistics(numMBsRunLastLogged + 1, numMBsRun, numSamplesLastLogged, evalNodes, evalResults, evalResultsLastLogged);
|
||||
|
||||
for (int i = 0; i < evalResults.size(); i++)
|
||||
{
|
||||
evalResultsLastMBs[i] = evalResults[i];
|
||||
}
|
||||
numSamplesLastMBs = 0;
|
||||
lastMBsRun = numMBsRun;
|
||||
evalResultsLastLogged[i] = evalResults[i];
|
||||
numSamplesLastLogged = 0;
|
||||
numMBsRunLastLogged = numMBsRun;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
numItersSinceLastPrintOfProgress = ProgressTracing::TraceFakeProgress(numIterationsBeforePrintingProgress, numItersSinceLastPrintOfProgress);
|
||||
|
||||
// call DataEnd to check if end of sentence is reached
|
||||
|
@ -219,47 +214,37 @@ public:
|
|||
}
|
||||
|
||||
// show last batch of results
|
||||
if (m_traceLevel > 0 && numSamplesLastMBs > 0)
|
||||
if (m_traceLevel > 0 && numSamplesLastLogged > 0)
|
||||
{
|
||||
DisplayEvalStatistics(lastMBsRun + 1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
|
||||
DisplayEvalStatistics(numMBsRunLastLogged + 1, numMBsRun, numSamplesLastLogged, evalNodes, evalResults, evalResultsLastLogged);
|
||||
}
|
||||
|
||||
// final statistics
|
||||
for (int i = 0; i < evalResultsLastMBs.size(); i++)
|
||||
evalResultsLastMBs[i] = 0; // clear this since statistics display will subtract the previous value
|
||||
for (int i = 0; i < evalResultsLastLogged.size(); i++)
|
||||
evalResultsLastLogged[i] = EpochCriterion(0); // clear this since statistics display will subtract the previous value
|
||||
|
||||
fprintf(stderr, "Final Results: ");
|
||||
DisplayEvalStatistics(1, numMBsRun, totalEpochSamples, evalNodes, evalResults, evalResultsLastMBs, true);
|
||||
|
||||
for (int i = 0; i < evalResults.size(); i++)
|
||||
{
|
||||
evalResults[i] /= totalEpochSamples;
|
||||
}
|
||||
DisplayEvalStatistics(1, numMBsRun, totalEpochSamples, evalNodes, evalResults, evalResultsLastLogged, true);
|
||||
|
||||
return evalResults;
|
||||
}
|
||||
|
||||
protected:
|
||||
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastMBs,
|
||||
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastLogged,
|
||||
const vector<ComputationNodeBasePtr>& evalNodes,
|
||||
const double evalResults, const double evalResultsLastMBs, bool displayConvertedValue = false)
|
||||
const EpochCriterion evalResults, const EpochCriterion evalResultsLastLogged, bool displayConvertedValue = false)
|
||||
{
|
||||
vector<double> evaR;
|
||||
evaR.push_back(evalResults);
|
||||
vector<double> evaLast;
|
||||
evaLast.push_back(evalResultsLastMBs);
|
||||
|
||||
DisplayEvalStatistics(startMBNum, endMBNum, numSamplesLastMBs, evalNodes, evaR, evaLast, displayConvertedValue);
|
||||
DisplayEvalStatistics(startMBNum, endMBNum, numSamplesLastLogged, evalNodes, { evalResults }, { evalResultsLastLogged }, displayConvertedValue);
|
||||
}
|
||||
|
||||
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastMBs, const vector<ComputationNodeBasePtr>& evalNodes,
|
||||
const vector<double>& evalResults, const vector<double>& evalResultsLastMBs, bool displayConvertedValue = false)
|
||||
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastLogged, const vector<ComputationNodeBasePtr>& evalNodes,
|
||||
const vector<EpochCriterion>& evalResults, const vector<EpochCriterion>& evalResultsLastLogged, bool displayConvertedValue = false)
|
||||
{
|
||||
fprintf(stderr, "Minibatch[%lu-%lu]: SamplesSeen = %lu ", startMBNum, endMBNum, numSamplesLastMBs);
|
||||
fprintf(stderr, "Minibatch[%lu-%lu]: SamplesSeen = %lu ", startMBNum, endMBNum, numSamplesLastLogged);
|
||||
|
||||
for (size_t i = 0; i < evalResults.size(); i++)
|
||||
{
|
||||
double eresult = (evalResults[i] - evalResultsLastMBs[i]) / numSamplesLastMBs;
|
||||
double eresult = (evalResults[i] - evalResultsLastLogged[i]).Average(); // / numSamplesLastLogged;
|
||||
fprintf(stderr, "%ls: %ls/Sample = %.8g ", evalNodes[i]->NodeName().c_str(), evalNodes[i]->OperationName().c_str(), eresult);
|
||||
|
||||
if (displayConvertedValue)
|
||||
|
@ -279,6 +264,7 @@ protected:
|
|||
protected:
|
||||
ComputationNetworkPtr m_net;
|
||||
size_t m_numMBsToShowResult;
|
||||
size_t m_firstMBsToShowResult;
|
||||
size_t m_maxSamplesInRAM;
|
||||
size_t m_numSubminiBatches;
|
||||
MPIWrapperPtr m_mpi;
|
||||
|
@ -288,4 +274,5 @@ protected:
|
|||
int m_traceLevel;
|
||||
void operator=(const SimpleEvaluator&); // (not assignable)
|
||||
};
|
||||
} } }
|
||||
|
||||
}}}
|
||||
|
|
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/03_ResNet/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/04_ResNet_56/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/04_ResNet_56/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/CrossValidateSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/CrossValidateSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/EvalSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/EvalSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/TrainSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/TrainSimpleNetwork/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/TrainWithPreTrain/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/TrainWithPreTrain/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/WriteScaledLogLike/run-test
Executable file → Normal file
0
Tests/EndToEndTests/Examples/Speech/TIMIT/WriteScaledLogLike/run-test
Executable file → Normal file
Загрузка…
Ссылка в новой задаче