Merge branch 'master' of https://git01.codeplex.com/cntk
This commit is contained in:
Коммит
676b894daa
|
@ -1777,8 +1777,8 @@ bool BatchSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<E
|
|||
// vector of feature data goes into matrix column
|
||||
size_t idx = (size_t)m_featureData[j];
|
||||
|
||||
//if (matrices.find(m_featuresName) != matrices.end())
|
||||
features.SetValue(idx, j, (ElemType)1);
|
||||
SetSentenceBegin(idx, j, actualmbsize);
|
||||
}
|
||||
|
||||
features.TransferFromDeviceToDevice(CPUDEVICE, featureDeviceId, false,false, false);
|
||||
|
@ -1947,7 +1947,6 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
|||
labels->SetValue(0, j, (ElemType)wrd);
|
||||
|
||||
SetSentenceEnd(wrd, j, actualmbsize);
|
||||
SetSentenceBegin(wrd, j, actualmbsize);
|
||||
|
||||
if (class_size > 0)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
ExpDir=C:\CNTKExp\LSTMLM\log\
|
||||
DataDir=C:\CNTKExp\RNN\data\PennTreeBank
|
|
@ -0,0 +1,414 @@
|
|||
# configuration file for class based RNN training
|
||||
|
||||
ExpFolder=$ExpDir$
|
||||
ConfigFolder=$ConfigDir$
|
||||
DataFolder=$DataDir$
|
||||
|
||||
stderr=$ExpFolder$
|
||||
|
||||
# command=dumpNodeInfo
|
||||
#command=train
|
||||
#command=test
|
||||
command=train:test
|
||||
#command=writeWordAndClassInfo
|
||||
|
||||
type=double
|
||||
|
||||
writeWordAndClassInfo=[
|
||||
action=writeWordAndClass
|
||||
inputFile=$DataFolder$\vocab.txt
|
||||
outputWord2Cls=$ExpFolder$\word2cls.txt
|
||||
outputCls2Index=$ExpFolder$\cls2idx.txt
|
||||
vocabSize=10000
|
||||
nbrClass=50
|
||||
printValues=true
|
||||
]
|
||||
|
||||
dumpNodeInfo=[
|
||||
action=dumpnode
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
#nodeName=W0
|
||||
printValues=true
|
||||
]
|
||||
|
||||
devtest=[action=devtest]
|
||||
|
||||
train=[
|
||||
action=trainRNN
|
||||
minibatchSize=10
|
||||
traceLevel=1
|
||||
deviceId=Auto
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
recurrentLayer=1
|
||||
defaultHiddenActivity=0.1
|
||||
useValidation=true
|
||||
rnnType=CLASSLSTM
|
||||
|
||||
# uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM
|
||||
# NDLNetworkBuilder=[
|
||||
# networkDescription=$ConfigFolder$\rnnlm.ndl
|
||||
# ]
|
||||
|
||||
SimpleNetworkBuilder=[
|
||||
trainingCriterion=classcrossentropywithsoftmax
|
||||
evalCriterion=classcrossentropywithsoftmax
|
||||
nodeType=Sigmoid
|
||||
initValueScale=6.0
|
||||
layerSizes=10000:200:10000
|
||||
addPrior=false
|
||||
addDropoutNodes=false
|
||||
applyMeanVarNorm=false
|
||||
uniformInit=true;
|
||||
|
||||
# these are for the class information for class-based language modeling
|
||||
vocabSize=10000
|
||||
nbrClass=50
|
||||
]
|
||||
|
||||
# configuration file, base parameters
|
||||
SGD=[
|
||||
learningRatesPerSample=0.1
|
||||
momentumPerMB=0
|
||||
gradientClippingWithTruncation=true
|
||||
clippingThresholdPerSample=15.0
|
||||
maxEpochs=40
|
||||
unroll=false
|
||||
numMBsToShowResult=2000
|
||||
# gradUpdateType=AdaGrad
|
||||
gradUpdateType=None
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
loadBestModel=true
|
||||
|
||||
# settings for Auto Adjust Learning Rate
|
||||
AutoAdjust=[
|
||||
# auto learning rate adjustment
|
||||
autoAdjustLR=adjustafterepoch
|
||||
reduceLearnRateIfImproveLessThan=0.001
|
||||
continueReduce=true
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
numMiniBatch4LRSearch=100
|
||||
numPrevLearnRates=5
|
||||
numBestSearchEpoch=1
|
||||
]
|
||||
|
||||
dropoutRate=0.0
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
nbruttsineachrecurrentiter=1
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.train.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
cvReader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.valid.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.valid.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
# it should be the same as that in the training set
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
test=[
|
||||
action=eval
|
||||
|
||||
# correspond to the number of words/characteres to train in a minibatch
|
||||
minibatchSize=1
|
||||
# need to be small since models are updated for each minibatch
|
||||
traceLevel=1
|
||||
deviceId=Auto
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
recurrentLayer=1
|
||||
defaultHiddenActivity=0.1
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
|
||||
reader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
# wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
# windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.test.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
|
@ -0,0 +1,413 @@
|
|||
# configuration file for class based RNN training
|
||||
|
||||
ExpFolder=$ExpDir$
|
||||
ConfigFolder=$ConfigDir$
|
||||
DataFolder=$DataDir$
|
||||
|
||||
stderr=$ExpFolder$
|
||||
# command=dumpNodeInfo
|
||||
command=train
|
||||
#command=test
|
||||
#command=train:test
|
||||
#command=writeWordAndClassInfo
|
||||
|
||||
type=double
|
||||
|
||||
writeWordAndClassInfo=[
|
||||
action=writeWordAndClass
|
||||
inputFile=$DataFolder$\vocab.txt
|
||||
outputWord2Cls=$ExpFolder$\word2cls.txt
|
||||
outputCls2Index=$ExpFolder$\cls2idx.txt
|
||||
vocabSize=10000
|
||||
nbrClass=50
|
||||
printValues=true
|
||||
]
|
||||
|
||||
dumpNodeInfo=[
|
||||
action=dumpnode
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
#nodeName=W0
|
||||
printValues=true
|
||||
]
|
||||
|
||||
devtest=[action=devtest]
|
||||
|
||||
train=[
|
||||
action=trainRNN
|
||||
minibatchSize=10
|
||||
traceLevel=1
|
||||
deviceId=-1
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
defaultHiddenActivity=0.1
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
# rnnType=LSTM
|
||||
|
||||
# uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM
|
||||
# NDLNetworkBuilder=[
|
||||
# networkDescription=$ConfigFolder$\rnnlm.ndl
|
||||
# ]
|
||||
|
||||
SimpleNetworkBuilder=[
|
||||
trainingCriterion=classcrossentropywithsoftmax
|
||||
evalCriterion=classcrossentropywithsoftmax
|
||||
nodeType=Sigmoid
|
||||
initValueScale=6.0
|
||||
layerSizes=10000:200:10000
|
||||
addPrior=false
|
||||
addDropoutNodes=false
|
||||
applyMeanVarNorm=false
|
||||
uniformInit=true;
|
||||
|
||||
# these are for the class information for class-based language modeling
|
||||
vocabSize=10000
|
||||
nbrClass=50
|
||||
]
|
||||
|
||||
# configuration file, base parameters
|
||||
SGD=[
|
||||
learningRatesPerSample=0.1
|
||||
momentumPerMB=0
|
||||
gradientClippingWithTruncation=true
|
||||
clippingThresholdPerSample=15.0
|
||||
maxEpochs=40
|
||||
unroll=false
|
||||
numMBsToShowResult=2000
|
||||
# gradUpdateType=AdaGrad
|
||||
gradUpdateType=None
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
loadBestModel=true
|
||||
|
||||
# settings for Auto Adjust Learning Rate
|
||||
AutoAdjust=[
|
||||
# auto learning rate adjustment
|
||||
autoAdjustLR=adjustafterepoch
|
||||
reduceLearnRateIfImproveLessThan=0.001
|
||||
continueReduce=true
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
numMiniBatch4LRSearch=100
|
||||
numPrevLearnRates=5
|
||||
numBestSearchEpoch=1
|
||||
]
|
||||
|
||||
dropoutRate=0.0
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
nbruttsineachrecurrentiter=1
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.train.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
cvReader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.valid.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.valid.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
# it should be the same as that in the training set
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
test=[
|
||||
action=eval
|
||||
|
||||
# correspond to the number of words/characteres to train in a minibatch
|
||||
minibatchSize=1
|
||||
# need to be small since models are updated for each minibatch
|
||||
traceLevel=1
|
||||
deviceId=-1
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
recurrentLayer=1
|
||||
defaultHiddenActivity=0.1
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
|
||||
reader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
# wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
# windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.test.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
|
@ -0,0 +1,403 @@
|
|||
# configuration file for class based RNN training
|
||||
|
||||
ExpFolder=$ExpDir$
|
||||
ConfigFolder=$ConfigDir$
|
||||
DataFolder=$DataDir$
|
||||
|
||||
stderr=$ExpFolder$
|
||||
# command=dumpNodeInfo
|
||||
# command=train
|
||||
# command=test
|
||||
command=train:test
|
||||
|
||||
type=double
|
||||
|
||||
dumpNodeInfo=[
|
||||
action=dumpnode
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
#nodeName=W0
|
||||
printValues=true
|
||||
]
|
||||
|
||||
devtest=[action=devtest]
|
||||
|
||||
train=[
|
||||
action=trainRNN
|
||||
minibatchSize=10
|
||||
traceLevel=1
|
||||
deviceId=0
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
# recurrentLayer=1
|
||||
defaultHiddenActivity=0.0
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
# rnnType=LSTM
|
||||
|
||||
# uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM
|
||||
# NDLNetworkBuilder=[
|
||||
# networkDescription=$ConfigFolder$\rnnlm.ndl
|
||||
# ]
|
||||
|
||||
SimpleNetworkBuilder=[
|
||||
trainingCriterion=classcrossentropywithsoftmax
|
||||
evalCriterion=classcrossentropywithsoftmax
|
||||
nodeType=Sigmoid
|
||||
initValueScale=6.0
|
||||
layerSizes=10000:200:10000
|
||||
addPrior=false
|
||||
addDropoutNodes=false
|
||||
applyMeanVarNorm=false
|
||||
uniformInit=true;
|
||||
|
||||
# these are for the class information for class-based language modeling
|
||||
vocabSize=10000
|
||||
nbrClass=50
|
||||
]
|
||||
|
||||
# configuration file, base parameters
|
||||
SGD=[
|
||||
learningRatesPerSample=0.1
|
||||
momentumPerMB=0
|
||||
gradientClippingWithTruncation=true
|
||||
clippingThresholdPerSample=15.0
|
||||
maxEpochs=40
|
||||
unroll=false
|
||||
numMBsToShowResult=2000
|
||||
# gradUpdateType=AdaGrad
|
||||
gradUpdateType=None
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
loadBestModel=true
|
||||
|
||||
# settings for Auto Adjust Learning Rate
|
||||
AutoAdjust=[
|
||||
# auto learning rate adjustment
|
||||
autoAdjustLR=adjustafterepoch
|
||||
reduceLearnRateIfImproveLessThan=0.001
|
||||
continueReduce=true
|
||||
increaseLearnRateIfImproveMoreThan=1000000000
|
||||
learnRateDecreaseFactor=0.5
|
||||
learnRateIncreaseFactor=1.382
|
||||
numMiniBatch4LRSearch=100
|
||||
numPrevLearnRates=5
|
||||
numBestSearchEpoch=1
|
||||
]
|
||||
|
||||
dropoutRate=0.0
|
||||
]
|
||||
|
||||
reader=[
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
nbruttsineachrecurrentiter=1
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.train.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
cvReader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.valid.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
#wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
#windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.valid.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
# it should be the same as that in the training set
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
test=[
|
||||
action=eval
|
||||
|
||||
# correspond to the number of words/characteres to train in a minibatch
|
||||
minibatchSize=1
|
||||
# need to be small since models are updated for each minibatch
|
||||
traceLevel=1
|
||||
deviceId=-1
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
recurrentLayer=1
|
||||
defaultHiddenActivity=0.1
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
|
||||
reader=[
|
||||
# reader to use
|
||||
readerType=SequenceReader
|
||||
randomize=None
|
||||
|
||||
# word class info
|
||||
wordclass=$DataFolder$\vocab.txt
|
||||
|
||||
# if writerType is set, we will cache to a binary file
|
||||
# if the binary file exists, we will use it instead of parsing this file
|
||||
# writerType=BinaryReader
|
||||
|
||||
#### write definition
|
||||
wfile=$ExpFolder$\sequenceSentence.bin
|
||||
#wsize - inital size of the file in MB
|
||||
# if calculated size would be bigger, that is used instead
|
||||
wsize=256
|
||||
|
||||
# wrecords - number of records we should allocate space for in the file
|
||||
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
|
||||
wrecords=1000
|
||||
# windowSize - number of records we should include in BinaryWriter window
|
||||
windowSize=10000
|
||||
|
||||
file=$DataFolder$\ptb.test.cntk.txt
|
||||
|
||||
#additional features sections
|
||||
#for now store as expanded category data (including label in)
|
||||
features=[
|
||||
# sentence has no features, so need to set dimension to zero
|
||||
dim=0
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
|
||||
sequence=[
|
||||
dim=1
|
||||
wrecords=2
|
||||
### write definition
|
||||
sectionType=data
|
||||
]
|
||||
#labels sections
|
||||
labelIn=[
|
||||
dim=1
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.txt
|
||||
labelType=Category
|
||||
beginSequence="</s>"
|
||||
endSequence="</s>"
|
||||
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=11
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=11
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
#labels sections
|
||||
labels=[
|
||||
dim=1
|
||||
labelType=NextWord
|
||||
beginSequence="O"
|
||||
endSequence="O"
|
||||
|
||||
# vocabulary size
|
||||
labelDim=10000
|
||||
|
||||
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
|
||||
#### Write definition ####
|
||||
# sizeof(unsigned) which is the label index type
|
||||
elementSize=4
|
||||
sectionType=labels
|
||||
mapping=[
|
||||
#redefine number of records for this section, since we don't need to save it for each data record
|
||||
wrecords=3
|
||||
#variable size so use an average string size
|
||||
elementSize=10
|
||||
sectionType=labelMapping
|
||||
]
|
||||
category=[
|
||||
dim=3
|
||||
#elementSize=sizeof(ElemType) is default
|
||||
sectionType=categoryLabels
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
|
@ -28,7 +28,7 @@ train=[
|
|||
deviceId=0
|
||||
epochSize=4430000
|
||||
# which is 886 * 5000
|
||||
# recurrentLayer=1
|
||||
recurrentLayer=1
|
||||
defaultHiddenActivity=0.0
|
||||
useValidation=true
|
||||
rnnType=CLASSLM
|
||||
|
|
|
@ -184,8 +184,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
rpi.max = (double)configSGD("rms_wgt_max", "10.0");
|
||||
rpi.gamma = (double)configSGD("rms_gamma", "0.99");
|
||||
|
||||
bool needAveMultiplier = (bool)configSGD("needAveMultiplier", "true");
|
||||
bool needAveMultiplier = (bool)configSGD("normWithAveMultiplier", "true");
|
||||
ElemType L2RegWeight = (ElemType)configSGD("L2RegWeight", "0");
|
||||
ElemType L1RegWeight = (ElemType)configSGD("L1RegWeight", "0");
|
||||
|
||||
/// for backward support. future setup should use gradUpdateType=AdaGrad, instead of
|
||||
/// useAdagrad=true
|
||||
|
@ -213,7 +214,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
loadBestModel, numMiniBatch4LRSearch, numPrevLearnRates, numBestSearchEpoch, traceLevel, numMBsToShowResult,
|
||||
maxTempMemSizeInSamplesForCNN, gUpdateInfo, keepCheckPointFiles, adaptationRegType, adaptationRegWeight,
|
||||
trainCriterionNodeName, evalCriterionNodeName, doGradientCheck, gradientCheckSigDigit, validateAfterModelReloading,
|
||||
rpi, learnRateAdjustInterval, UsingAllDataForPreComputedNode, needAveMultiplier, L2RegWeight);
|
||||
rpi, learnRateAdjustInterval, UsingAllDataForPreComputedNode, needAveMultiplier, L2RegWeight, L1RegWeight);
|
||||
}
|
||||
|
||||
void setMomentum(float momentum)
|
||||
|
@ -235,7 +236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const GradientUpdateInfo gradUpdateType = GradientUpdateInfo(), const bool keepCheckPointFiles=false, const AdaptationRegType adaptationRegType = AdaptationRegType::None,
|
||||
const ElemType adaptationRegWeight = 0.0f, const wstring trainCriterionNodeName= L"", const wstring evalCriterionNodeName=L"",
|
||||
const bool doGradientCheck = false, const ElemType gradientCheckSigDigit = 6, const bool validateAfterModelReloading = true,
|
||||
RMSPropInfo rpi = RMSPropInfo(), size_t learnRateAdjustInterval = 1, const bool UsingAllDataForPreComputed = true, const bool needAveMultiplier = true, const ElemType L2RegWeight = 0)
|
||||
RMSPropInfo rpi = RMSPropInfo(), size_t learnRateAdjustInterval = 1, const bool UsingAllDataForPreComputed = true, const bool needAveMultiplier = true, const ElemType L2RegWeight = 0, const ElemType L1RegWeight = 0)
|
||||
{
|
||||
m_numPrevLearnRates = numPrevLearnRates;
|
||||
m_mbSize=mbSize;
|
||||
|
@ -276,6 +277,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
m_needAveMultiplier = needAveMultiplier;
|
||||
m_L2RegWeight = L2RegWeight;
|
||||
m_L1RegWeight = L1RegWeight;
|
||||
|
||||
for (size_t i=0; i<m_mbSize.size(); i++)
|
||||
if (m_epochSize != requestDataSize && m_epochSize < m_mbSize[i])
|
||||
|
@ -1037,7 +1039,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNodePtr node = (*nodeIter);
|
||||
Matrix<ElemType>& smoothedGradient = (*smoothedGradientIter);
|
||||
|
||||
UpdateWeights(node, smoothedGradient, learnRatePerSample, actualMBSize, m_mbSize[epochNumber], m_L2RegWeight, m_needAveMultiplier);
|
||||
UpdateWeights(node, smoothedGradient, learnRatePerSample, actualMBSize, m_mbSize[epochNumber], m_L2RegWeight, m_L1RegWeight, m_needAveMultiplier);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1103,7 +1105,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
public:
|
||||
// UpdateWeightsS - static version of UpdateWeights()
|
||||
static void UpdateWeightsS(const SGD* sgd, Matrix<ElemType>& functionValues, Matrix<ElemType>& gradientValues, Matrix<ElemType>& smoothedGradient, const ElemType learnRatePerSample, size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const bool needAveMultiplier)
|
||||
static void UpdateWeightsS(const SGD* sgd, Matrix<ElemType>& functionValues, Matrix<ElemType>& gradientValues, Matrix<ElemType>& smoothedGradient, const ElemType learnRatePerSample, size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const ElemType L1RegWeight, const bool needAveMultiplier)
|
||||
{
|
||||
#if DUMPOUTPUT
|
||||
fprintf(stderr, "learnRatePerSample=%0.8f, actualMBSize=%ld, expectedMBSize=%ld\n",learnRatePerSample, actualMBSize, expectedMBSize);
|
||||
|
@ -1128,8 +1130,8 @@ public:
|
|||
}
|
||||
|
||||
// L2 regularizer
|
||||
if (L2RegWeight > 0)
|
||||
Matrix<ElemType>::ScaleAndAdd(L2RegWeight, functionValues, gradientValues);
|
||||
if (L2RegWeight > 0) //*actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
Matrix<ElemType>::ScaleAndAdd(L2RegWeight*actualMBSize, functionValues, gradientValues);
|
||||
|
||||
if (adpType == GradientsUpdateType::None)
|
||||
{
|
||||
|
@ -1155,18 +1157,23 @@ public:
|
|||
{
|
||||
Matrix<ElemType>::ScaleAndAdd(1.0, sgdUpdateNoise, functionValues);
|
||||
}
|
||||
|
||||
// L1 regularizer with proximal gradient descent method
|
||||
if (L1RegWeight > 0) //*actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
functionValues.InplaceSoftThreshold(learnRatePerSample*L1RegWeight*actualMBSize);
|
||||
|
||||
#if DUMPOUTPUT
|
||||
functionValues.Print("Parameter Update");
|
||||
#endif
|
||||
}
|
||||
protected:
|
||||
// UpdateWeights - update the weights in
|
||||
void UpdateWeights(const ComputationNodePtr node, Matrix<ElemType>& smoothedGradient, const ElemType learnRatePerSample, const size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const bool needAveMultiplier) const
|
||||
void UpdateWeights(const ComputationNodePtr node, Matrix<ElemType>& smoothedGradient, const ElemType learnRatePerSample, const size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const ElemType L1RegWeight, const bool needAveMultiplier) const
|
||||
{
|
||||
#if DUMPOUTPUT
|
||||
fprintf(stderr, "Update_%ls\n",node->NodeName().c_str());
|
||||
#endif
|
||||
UpdateWeightsS(this, node->FunctionValues(), node->GradientValues(), smoothedGradient, learnRatePerSample, actualMBSize, expectedMBSize, L2RegWeight, needAveMultiplier);
|
||||
UpdateWeightsS(this, node->FunctionValues(), node->GradientValues(), smoothedGradient, learnRatePerSample, actualMBSize, expectedMBSize, L2RegWeight, L1RegWeight, needAveMultiplier);
|
||||
node->UpdateEvalTimeStamp();
|
||||
}
|
||||
|
||||
|
@ -1357,9 +1364,6 @@ protected:
|
|||
irow = max(0, irow);
|
||||
icol = max(0, icol);
|
||||
|
||||
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
|
||||
continue;
|
||||
|
||||
fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str());
|
||||
// node->FunctionValues().Print();
|
||||
ElemType eOrg = node->FunctionValues()(irow, icol);
|
||||
|
@ -1368,6 +1372,10 @@ protected:
|
|||
|
||||
node->UpdateEvalTimeStamp();
|
||||
net.ComputeGradient(criterionNodes[npos]); //use only the first criterion. Is
|
||||
// if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE && node->GradientValues().GetDeviceId() != CPUDEVICE)
|
||||
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
|
||||
break;
|
||||
|
||||
//ElemType mbEvalCri =
|
||||
criterionNodes[npos]->FunctionValues().Get00Element(); //criterionNode should be a scalar
|
||||
ElemType eGradErr = node->GradientValues()(irow, icol);
|
||||
|
@ -1473,6 +1481,7 @@ protected:
|
|||
|
||||
bool m_needAveMultiplier;
|
||||
ElemType m_L2RegWeight;
|
||||
ElemType m_L1RegWeight;
|
||||
};
|
||||
template class SGD<float>;
|
||||
template class SGD<double>;
|
||||
|
|
|
@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (numHiddenLayers > 0)
|
||||
{
|
||||
//TODO: to figure out sparse matrix size
|
||||
u = m_net->CreateSparseLearnableParameter(L"U0", m_layerSizes[1], m_layerSizes[0], 0);
|
||||
u = m_net->CreateLearnableParameter(L"U0", m_layerSizes[1], m_layerSizes[0]);
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
|
||||
|
@ -76,7 +76,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (int i=1; i<numHiddenLayers; i++)
|
||||
{
|
||||
//TODO: to figure out sparse matrix size
|
||||
u = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i], 0);
|
||||
u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
|
||||
|
@ -227,7 +227,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
clsweight = m_net->CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
clslogpostprob = m_net->LogSoftmax(m_net->Times(clsweight, input), L"ClassPostProb");
|
||||
clslogpostprob = m_net->Times(clsweight, input, L"ClassPostProb");
|
||||
|
||||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
||||
clslogpostprob);
|
||||
|
@ -770,7 +770,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNode<ElemType>* SimpleNetworkBuilder<ElemType>::BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs, bool inputWeightSparse)
|
||||
ComputationNode<ElemType>* SimpleNetworkBuilder<ElemType>::BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs)
|
||||
{
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size()-2;
|
||||
|
@ -784,20 +784,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNodePtr bit=nullptr, bft=nullptr, bct=nullptr;
|
||||
|
||||
input = inputObs;
|
||||
if(inputWeightSparse)
|
||||
{
|
||||
Wxo = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXO%d", iLayer), outputDim, inputDim);
|
||||
Wxi = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXI%d", iLayer), outputDim, inputDim);
|
||||
Wxf = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXF%d", iLayer), outputDim, inputDim);
|
||||
Wxc = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXC%d", iLayer), outputDim, inputDim);
|
||||
}
|
||||
else
|
||||
{
|
||||
Wxo = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXO%d", iLayer), outputDim, inputDim);
|
||||
Wxi = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXI%d", iLayer), outputDim, inputDim);
|
||||
Wxf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXF%d", iLayer), outputDim, inputDim);
|
||||
Wxc = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXC%d", iLayer), outputDim, inputDim);
|
||||
}
|
||||
|
||||
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
m_net->InitLearnableParameters(Wxf, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
@ -1082,10 +1073,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
|
||||
ComputationNodePtr Wxo = nullptr, Who=nullptr, Wco=nullptr, bo = nullptr, Wxi=nullptr, Whi=nullptr, Wci=nullptr, bi=nullptr;
|
||||
ComputationNodePtr Wxf=nullptr, Whf=nullptr, Wcf=nullptr, bf=nullptr, Wxc=nullptr, Whc=nullptr, bc=nullptr;
|
||||
ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, ct=nullptr, ht=nullptr;
|
||||
ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC=nullptr, delayCF=nullptr, delayCC=nullptr;
|
||||
ComputationNodePtr directWIO = nullptr, directInput=nullptr, directOutput=nullptr;
|
||||
ComputationNodePtr clslogpostprob = nullptr;
|
||||
ComputationNodePtr clsweight = nullptr;
|
||||
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = {nullptr};
|
||||
|
||||
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
|
||||
|
@ -1120,7 +1109,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
int offset = m_lookupTableOrder > 0? 1 : 0;
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset]*(offset?m_lookupTableOrder:1), m_layerSizes[offset+1], input, true);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset]*(offset?m_lookupTableOrder:1), m_layerSizes[offset+1], input);
|
||||
input = output;
|
||||
outputFromEachLayer[offset+1] = input;
|
||||
|
||||
|
@ -1157,20 +1146,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
input = output;
|
||||
}
|
||||
|
||||
// TODO: verify the change is okay
|
||||
// w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers]*(MAX_WORDS_PER_CLASS+MAX_CLASSES)*mbSize*NUM_UTTS_IN_RECURRENT_ITER);
|
||||
w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
// TODO: verify the change is okay
|
||||
//label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize, 2*mbSize*NUM_UTTS_IN_RECURRENT_ITER);
|
||||
label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize);
|
||||
|
||||
AddTrainAndEvalCriterionNodes(input, label, w);
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
label = m_net->CreateInputNode(L"labels", 4, mbSize);
|
||||
|
||||
output = m_net->Times(w, input, L"outputs");
|
||||
clsweight = m_net->CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
clslogpostprob = m_net->Times(clsweight, input, L"ClassPostProb");
|
||||
|
||||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
||||
clslogpostprob);
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
|
||||
if (m_needPrior)
|
||||
{
|
||||
prior = m_net->Mean(label);
|
||||
|
|
|
@ -330,7 +330,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
ComputationNetwork<ElemType>& BuildClassEntropyNetwork(size_t mbSize = 1);
|
||||
|
||||
ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, bool inputWeightSparse = false);
|
||||
ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
|
||||
ComputationNode<ElemType>* BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
|
||||
|
||||
|
|
|
@ -1041,6 +1041,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
clsLogSoftmax.Resize(input_cls_log_post_prob.GetNumRows(), nT);
|
||||
clsSoftmax.Resize(input_cls_log_post_prob.GetNumRows(), nT);
|
||||
|
||||
clsLogSoftmax = input_cls_log_post_prob;
|
||||
clsLogSoftmax.InplaceLogSoftmax(true); /// 50 x nT
|
||||
clsSoftmax.AssignExpOf(clsLogSoftmax);
|
||||
|
||||
/// loop over time
|
||||
functionValues.SetValue(0);
|
||||
sz = 0;
|
||||
|
@ -1075,12 +1079,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);
|
||||
|
||||
/// add the class log posterior probability
|
||||
Matrix<ElemType> clsLogSoftmax_t = clsLogSoftmax.ColumnSlice(t, 1);
|
||||
clsLogSoftmax_t.SetValue(input_cls_log_post_prob.ColumnSlice(t, 1));
|
||||
clsLogSoftmax_t.InplaceLogSoftmax(true); /// 50 x 1
|
||||
Matrix<ElemType> clsSoftmax_t = clsSoftmax.ColumnSlice(t, 1);
|
||||
clsSoftmax_t.AssignExpOf(clsLogSoftmax_t);
|
||||
Matrix<ElemType>::AddElementToElement(clsLogSoftmax_t, c_t, 0, functionValues, 0, 0);
|
||||
Matrix<ElemType>::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0);
|
||||
|
||||
sz += nbr_wrd;
|
||||
}
|
||||
|
|
|
@ -2296,7 +2296,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CPUMatrix<ElemType>& CPUMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceTruncateBottom: Matrix is empty.");
|
||||
throw std::logic_error("InplaceTruncate: Matrix is empty.");
|
||||
|
||||
auto& us=*this;
|
||||
ElemType locThresholdPos = abs(threshold);
|
||||
|
@ -2342,6 +2342,60 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//x= x-threshold if x>threshold, x+threshold if x<-threshold, 0 otherwise
|
||||
template<class ElemType>
|
||||
CPUMatrix<ElemType>& CPUMatrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceTruncate: Matrix is empty.");
|
||||
|
||||
long m = (long)GetNumElements();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling
|
||||
{
|
||||
if (m_pArray[i] > threshold)
|
||||
m_pArray[i] -= threshold;
|
||||
else if (m_pArray[i] < -threshold)
|
||||
m_pArray[i] += threshold;
|
||||
else
|
||||
m_pArray[i] = 0;
|
||||
|
||||
if (m_pArray[i+1] > threshold)
|
||||
m_pArray[i+1] -= threshold;
|
||||
else if (m_pArray[i+1] < -threshold)
|
||||
m_pArray[i+1] += threshold;
|
||||
else
|
||||
m_pArray[i+1] = 0;
|
||||
|
||||
if (m_pArray[i+2] > threshold)
|
||||
m_pArray[i+2] -= threshold;
|
||||
else if (m_pArray[i+2] < -threshold)
|
||||
m_pArray[i+2] += threshold;
|
||||
else
|
||||
m_pArray[i+2] = 0;
|
||||
|
||||
if (m_pArray[i+3] > threshold)
|
||||
m_pArray[i+3] -= threshold;
|
||||
else if (m_pArray[i+3] < -threshold)
|
||||
m_pArray[i+3] += threshold;
|
||||
else
|
||||
m_pArray[i+3] = 0;
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
if (m_pArray[i] > threshold)
|
||||
m_pArray[i] -= threshold;
|
||||
else if (m_pArray[i] < -threshold)
|
||||
m_pArray[i] += threshold;
|
||||
else
|
||||
m_pArray[i] = 0;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
//Threshold truncating: this[i] = max( a[i], threshold )
|
||||
template<class ElemType>
|
||||
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignTruncateBottomOf (const CPUMatrix<ElemType>& a, const ElemType threshold)
|
||||
|
|
|
@ -197,6 +197,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CPUMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
|
||||
CPUMatrix<ElemType>& AssignTruncateTopOf (const CPUMatrix<ElemType>& a, const ElemType threshold);
|
||||
CPUMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
|
||||
CPUMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
|
||||
|
||||
CPUMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "basetypes.h"
|
||||
#include "fileutil.h"
|
||||
|
||||
#pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
|
||||
|
||||
#ifndef USE_MKL
|
||||
// use ACML as default.
|
||||
|
@ -704,35 +705,241 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncateTop(const ElemType threshold)
|
||||
{
|
||||
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
long m = (long)NzCount();
|
||||
ElemType *nzValues = NzValues();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling
|
||||
{
|
||||
if (nzValues[i] > threshold)
|
||||
nzValues[i] = threshold;
|
||||
|
||||
if (nzValues[i+1] > threshold)
|
||||
nzValues[i+1] = threshold;
|
||||
|
||||
if (nzValues[i+2] > threshold)
|
||||
nzValues[i+2] = threshold;
|
||||
|
||||
if (nzValues[i+3] > threshold)
|
||||
nzValues[i+3] = threshold;
|
||||
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
if (nzValues[i] > threshold)
|
||||
nzValues[i] = threshold;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncateBottom(const ElemType threshold)
|
||||
{
|
||||
long m = (long)NzCount();
|
||||
ElemType *nzValues = NzValues();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling
|
||||
{
|
||||
if (nzValues[i] < threshold)
|
||||
nzValues[i] = threshold;
|
||||
|
||||
if (nzValues[i + 1] < threshold)
|
||||
nzValues[i + 1] = threshold;
|
||||
|
||||
if (nzValues[i + 2] < threshold)
|
||||
nzValues[i + 2] = threshold;
|
||||
|
||||
if (nzValues[i + 3] < threshold)
|
||||
nzValues[i + 3] = threshold;
|
||||
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
if (nzValues[i] < threshold)
|
||||
nzValues[i] = threshold;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
{
|
||||
ElemType locThresholdPos = abs(threshold);
|
||||
ElemType locTHresholdNeg = -locThresholdPos;
|
||||
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
long m = (long)NzCount();
|
||||
ElemType *nzValues = NzValues();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling
|
||||
{
|
||||
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
|
||||
size_t start = j* len;
|
||||
for (size_t p = start; p < start+len; p++)
|
||||
if (nzValues[i] > locThresholdPos)
|
||||
nzValues[i] = locThresholdPos;
|
||||
else if (nzValues[i] < locTHresholdNeg)
|
||||
nzValues[i] = locTHresholdNeg;
|
||||
|
||||
if (nzValues[i+1] > locThresholdPos)
|
||||
nzValues[i+1] = locThresholdPos;
|
||||
else if (nzValues[i+1] < locTHresholdNeg)
|
||||
nzValues[i+1] = locTHresholdNeg;
|
||||
|
||||
if (nzValues[i+2] > locThresholdPos)
|
||||
nzValues[i+2] = locThresholdPos;
|
||||
else if (nzValues[i+2] < locTHresholdNeg)
|
||||
nzValues[i+2] = locTHresholdNeg;
|
||||
|
||||
if (nzValues[i+3] > locThresholdPos)
|
||||
nzValues[i+3] = locThresholdPos;
|
||||
else if (nzValues[i+3] < locTHresholdNeg)
|
||||
nzValues[i+3] = locTHresholdNeg;
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
if (m_pArray[p] > locThresholdPos)
|
||||
if (nzValues[i] > locThresholdPos)
|
||||
nzValues[i] = locThresholdPos;
|
||||
else if (nzValues[i] < locTHresholdNeg)
|
||||
nzValues[i] = locTHresholdNeg;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
|
||||
{
|
||||
m_pArray[p] = locThresholdPos;
|
||||
}
|
||||
else if (m_pArray[p] < locTHresholdNeg)
|
||||
long m = (long)NzCount();
|
||||
ElemType *nzValues = NzValues();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling
|
||||
{
|
||||
m_pArray[p] = locTHresholdNeg;
|
||||
if (nzValues[i] > threshold)
|
||||
nzValues[i] -= threshold;
|
||||
else if (nzValues[i] < -threshold)
|
||||
nzValues[i] += threshold;
|
||||
else
|
||||
nzValues[i] = 0;
|
||||
|
||||
if (nzValues[i + 1] > threshold)
|
||||
nzValues[i + 1] -= threshold;
|
||||
else if (nzValues[i + 1] < -threshold)
|
||||
nzValues[i + 1] += threshold;
|
||||
else
|
||||
nzValues[i + 1] = 0;
|
||||
|
||||
if (nzValues[i + 2] > threshold)
|
||||
nzValues[i + 2] -= threshold;
|
||||
else if (nzValues[i + 2] < -threshold)
|
||||
nzValues[i + 2] += threshold;
|
||||
else
|
||||
nzValues[i + 2] = 0;
|
||||
|
||||
if (nzValues[i + 3] > threshold)
|
||||
nzValues[i + 3] -= threshold;
|
||||
else if (nzValues[i + 3] < -threshold)
|
||||
nzValues[i + 3] += threshold;
|
||||
else
|
||||
nzValues[i + 3] = 0;
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
if (nzValues[i] > threshold)
|
||||
nzValues[i] -= threshold;
|
||||
else if (nzValues[i] < -threshold)
|
||||
nzValues[i] += threshold;
|
||||
else
|
||||
nzValues[i] = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ElemType CPUSparseMatrix<ElemType>::FrobeniusNorm() const
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("FrobeniusNorm: Matrix is empty.");
|
||||
|
||||
ElemType v = 0;
|
||||
|
||||
long m = (long)NzCount();
|
||||
const ElemType *nzValues = NzValues();
|
||||
|
||||
//four-way unrolling
|
||||
#pragma omp parallel for reduction(+:v)
|
||||
for (long i = 0; i<(m & ~3); i += 4)
|
||||
{
|
||||
v += nzValues[i] * nzValues[i] + nzValues[i + 1] * nzValues[i + 1] + nzValues[i + 2] * nzValues[i + 2] + nzValues[i + 3] * nzValues[i + 3];
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
v += nzValues[i] * nzValues[i];
|
||||
}
|
||||
|
||||
return sqrt(v);
|
||||
}
|
||||
|
||||
//sum of all abs(elements)
|
||||
template<class ElemType>
|
||||
ElemType CPUSparseMatrix<ElemType>::SumOfAbsElements() const
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("SumOfAbsElements: Matrix is empty.");
|
||||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifndef USE_MKL
|
||||
return (ElemType)dasum((int)NzCount(), reinterpret_cast <double*>(m_pArray), 1);
|
||||
#else
|
||||
return (ElemType)cblas_dasum((int)NzCount(), reinterpret_cast <double*>(m_pArray), 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix");
|
||||
#pragma warning (suppress: 4244)
|
||||
#ifndef USE_MKL
|
||||
return sasum((int)NzCount(), reinterpret_cast <float*>(m_pArray), 1);
|
||||
#else
|
||||
return cblas_sasum((int)NzCount(), reinterpret_cast <float*>(m_pArray), 1);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
//sum of all elements
|
||||
template<class ElemType>
|
||||
ElemType CPUSparseMatrix<ElemType>::SumOfElements() const
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("SumOfElements: Matrix is empty.");
|
||||
|
||||
ElemType sum = 0;
|
||||
|
||||
long m = (long)NzCount();
|
||||
const ElemType *nzValues = NzValues();
|
||||
|
||||
//four-way unrolling
|
||||
#pragma omp parallel for reduction(+:sum)
|
||||
for (long i = 0; i<(m & ~3); i += 4)
|
||||
{
|
||||
sum += nzValues[i] + nzValues[i + 1] + nzValues[i + 2] + nzValues[i + 3];
|
||||
}
|
||||
//handle remaining stuffs
|
||||
for (long i = m & ~3; i<m; i++)
|
||||
{
|
||||
sum += nzValues[i];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
|
|
@ -109,9 +109,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ElemType Adagrad(CPUMatrix<ElemType>& c, const bool needAveMultiplier);
|
||||
|
||||
public:
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncateTop (const ElemType /*threshold*/) { NOT_IMPLEMENTED; }
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType /*threshold*/) { NOT_IMPLEMENTED; }
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType /*threshold*/);
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncateTop(const ElemType threshold);
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncateBottom(const ElemType threshold);
|
||||
CPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
|
||||
CPUSparseMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
|
||||
|
||||
ElemType FrobeniusNorm() const; //useful for comparing CPU and GPU results
|
||||
|
||||
ElemType SumOfAbsElements() const; //sum of all abs(elements)
|
||||
ElemType SumOfElements() const; //sum of all elements
|
||||
|
||||
public:
|
||||
//void Print(const char* /*matrixName*/) const { NOT_IMPLEMENTED; }
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
#define AUTOPLACEMATRIX 1000 // used in parameters only
|
||||
#define MANAGEDEXTERN -2 // managed externally (i.e. PTask)
|
||||
#define CPUDEVICE -1 // device is the CPU
|
||||
#define EPS_IN_INVERSE 1e-30f // min float is 1.4e-45 and max float is 3.4e-38
|
||||
#define EPS_IN_LOG 1e-40f
|
||||
#define LOG_OF_EPS_IN_LOG -92.1f // log(EPS_IN_LOG)
|
||||
#define LOG10_OF_EPS_IN_LOG -40 // log_10(EPS_IN_LOG)
|
||||
#define EPS_IN_INVERSE 1e-30f // 1e-37 is the only guaranteed precision
|
||||
#define EPS_IN_LOG 1e-37f // 1e-37 is the only guaranteed precision
|
||||
#define LOG_OF_EPS_IN_LOG -85.1f // log(EPS_IN_LOG)
|
||||
#define LOG10_OF_EPS_IN_LOG -37 // log_10(EPS_IN_LOG)
|
||||
#define LZERO -10e10
|
||||
#define MINLOGEXP -9.2103
|
||||
#define LSMALL -0.5E10
|
||||
|
|
|
@ -2003,6 +2003,42 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceTruncate(const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceTruncate: Matrix is empty.");
|
||||
|
||||
LONG64 N = (LONG64)GetNumElements();
|
||||
int blocksPerGrid = (int)ceil(N*1.0 / threadsPerBlock);
|
||||
PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceTruncate<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(m_pArray, threshold, N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceSoftThreshold: Matrix is empty.");
|
||||
|
||||
LONG64 N = (LONG64)GetNumElements();
|
||||
int blocksPerGrid = (int)ceil(N*1.0 / threadsPerBlock);
|
||||
PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
_inplaceSoftThreshold<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(m_pArray, threshold, N);
|
||||
if (do_sync) CUDA_CALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
|
||||
return *this;
|
||||
}
|
||||
template<class ElemType>
|
||||
GPUMatrix<ElemType>& GPUMatrix<ElemType>::SetToZeroIfAbsLessThan (const ElemType threshold)
|
||||
{
|
||||
|
|
|
@ -218,6 +218,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUMatrix<ElemType>& AssignTruncateBottomOf (const GPUMatrix<ElemType>& a, const ElemType threshold);
|
||||
GPUMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
|
||||
GPUMatrix<ElemType>& AssignTruncateTopOf (const GPUMatrix<ElemType>& a, const ElemType threshold);
|
||||
GPUMatrix<ElemType>& InplaceTruncate(const ElemType threshold);
|
||||
GPUMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
|
||||
|
||||
GPUMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
|
||||
|
||||
|
|
|
@ -2784,6 +2784,29 @@ __global__ void _inplaceTruncate(
|
|||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _inplaceSoftThreshold(
|
||||
ElemType* a,
|
||||
const ElemType threshold,
|
||||
const LONG64 N)
|
||||
{
|
||||
LONG64 id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (id >= N)
|
||||
return;
|
||||
|
||||
if (a[id] > threshold)
|
||||
{
|
||||
a[id] -= threshold;
|
||||
}
|
||||
else if (a[id] < -threshold)
|
||||
{
|
||||
a[id] += threshold;
|
||||
}
|
||||
else
|
||||
a[id] = 0;
|
||||
}
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
__global__ void _normalGradForSparseBlock(
|
||||
const ElemType momentum,
|
||||
|
|
|
@ -1107,12 +1107,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template<class ElemType>
|
||||
GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
{
|
||||
if(m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow ||
|
||||
m_format == matrixFormatSparseCSR || m_format == matrixFormatSparseCSC)
|
||||
{
|
||||
long N=(long)GetNumNZElements();
|
||||
int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock);
|
||||
|
||||
long blocksPerGrid = (long)ceil(N*1.0 / threadsPerBlock);
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDACALL(cudaEventCreate(&done));
|
||||
ElemType * values = NzValues();
|
||||
|
@ -1120,11 +1118,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (do_sync) CUDACALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDACALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDACALL(cudaEventDestroy(done));
|
||||
|
||||
return *this;
|
||||
}
|
||||
else
|
||||
|
||||
template<class ElemType>
|
||||
GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
long N = (long)GetNumNZElements();
|
||||
|
||||
long blocksPerGrid = (long)ceil(N*1.0 / threadsPerBlock);
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDACALL(cudaEventCreate(&done));
|
||||
ElemType * values = NzValues();
|
||||
_inplaceSoftThreshold<ElemType> << <blocksPerGrid, threadsPerBlock >> >(values, threshold, N);
|
||||
if (do_sync) CUDACALL(cudaEventRecord(done));
|
||||
if (do_sync) CUDACALL(cudaEventSynchronize(done));
|
||||
if (do_sync) CUDACALL(cudaEventDestroy(done));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
@ -232,6 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
GPUSparseMatrix<ElemType>& AssignAbsOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
|
||||
GPUSparseMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
|
||||
GPUSparseMatrix<ElemType>& AssignTruncateBottomOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
|
||||
|
|
|
@ -2440,7 +2440,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncate(const ElemType threshold)
|
||||
{
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceTruncateBottom: Matrix is empty.");
|
||||
throw std::logic_error("InplaceTruncate: Matrix is empty.");
|
||||
|
||||
if (sizeof(ElemType)==sizeof(float))
|
||||
{
|
||||
|
@ -2456,7 +2456,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
this->m_CPUMatrix->InplaceTruncate(threshold),
|
||||
this->m_GPUMatrix->InplaceTruncateTop(fabs(threshold)); this->m_GPUMatrix->InplaceTruncateBottom(-fabs(threshold)),
|
||||
this->m_GPUMatrix->InplaceTruncate(threshold),
|
||||
this->m_CPUSparseMatrix->InplaceTruncate(threshold),
|
||||
this->m_GPUSparseMatrix->InplaceTruncate(threshold)
|
||||
);
|
||||
|
@ -2464,6 +2464,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::InplaceSoftThreshold(const ElemType threshold)
|
||||
{
|
||||
assert(threshold >= 0);
|
||||
|
||||
if (IsEmpty())
|
||||
throw std::logic_error("InplaceSoftThreshold: Matrix is empty.");
|
||||
|
||||
if (threshold == 0)
|
||||
return *this;
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this,
|
||||
this,
|
||||
this->m_CPUMatrix->InplaceSoftThreshold(threshold),
|
||||
this->m_GPUMatrix->InplaceSoftThreshold(threshold),
|
||||
this->m_CPUSparseMatrix->InplaceSoftThreshold(threshold),
|
||||
this->m_GPUSparseMatrix->InplaceSoftThreshold(threshold)
|
||||
);
|
||||
|
||||
return *this;
|
||||
}
|
||||
//Threshold truncating: this[i] = max( this[i], threshold )
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& Matrix<ElemType>::InplaceTruncateBottom (const ElemType threshold)
|
||||
|
@ -2486,7 +2507,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
this,
|
||||
this->m_CPUMatrix->InplaceTruncateBottom(threshold),
|
||||
this->m_GPUMatrix->InplaceTruncateBottom(threshold),
|
||||
NOT_IMPLEMENTED,
|
||||
this->m_CPUSparseMatrix->InplaceTruncateBottom(threshold),
|
||||
this->m_GPUSparseMatrix->InplaceTruncateBottom(threshold)
|
||||
);
|
||||
|
||||
|
@ -2553,7 +2574,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
this,
|
||||
this->m_CPUMatrix->InplaceTruncateTop(threshold),
|
||||
this->m_GPUMatrix->InplaceTruncateTop(threshold),
|
||||
NOT_IMPLEMENTED,
|
||||
this->m_CPUSparseMatrix->InplaceTruncateTop(threshold),
|
||||
this->m_GPUSparseMatrix->InplaceTruncateTop(threshold)
|
||||
);
|
||||
|
||||
|
@ -2626,7 +2647,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
nullptr,
|
||||
return this->m_CPUMatrix->SumOfElements(),
|
||||
return this->m_GPUMatrix->SumOfElements(),
|
||||
NOT_IMPLEMENTED,
|
||||
return this->m_CPUSparseMatrix->SumOfElements(),
|
||||
return this->m_GPUSparseMatrix->SumOfElements()
|
||||
);
|
||||
|
||||
|
@ -2869,7 +2890,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
nullptr,
|
||||
return this->m_CPUMatrix->FrobeniusNorm(),
|
||||
return this->m_GPUMatrix->FrobeniusNorm(),
|
||||
NOT_IMPLEMENTED,
|
||||
return this->m_CPUSparseMatrix->FrobeniusNorm(),
|
||||
return this->m_GPUSparseMatrix->FrobeniusNorm()
|
||||
);
|
||||
}
|
||||
|
|
|
@ -236,6 +236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
|
||||
Matrix<ElemType>& AssignTruncateTopOf (const Matrix<ElemType>& a, const ElemType threshold);
|
||||
Matrix<ElemType>& InplaceTruncate (const ElemType threshold);
|
||||
Matrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
|
||||
|
||||
Matrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче