diff --git a/DataReader/SequenceReader/SequenceReader.cpp b/DataReader/SequenceReader/SequenceReader.cpp index 9967deccb..c181bd7b7 100644 --- a/DataReader/SequenceReader/SequenceReader.cpp +++ b/DataReader/SequenceReader/SequenceReader.cpp @@ -1777,8 +1777,8 @@ bool BatchSequenceReader::GetMinibatch(std::map::GetLabelOutput(std::mapSetValue(0, j, (ElemType)wrd); SetSentenceEnd(wrd, j, actualmbsize); - SetSentenceBegin(wrd, j, actualmbsize); if (class_size > 0) { diff --git a/ExampleSetups/LM/LSTMLM/global.config b/ExampleSetups/LM/LSTMLM/global.config new file mode 100644 index 000000000..aa81c1c65 --- /dev/null +++ b/ExampleSetups/LM/LSTMLM/global.config @@ -0,0 +1,2 @@ +ExpDir=C:\CNTKExp\LSTMLM\log\ +DataDir=C:\CNTKExp\RNN\data\PennTreeBank \ No newline at end of file diff --git a/ExampleSetups/LM/LSTMLM/lstmlm.gpu.config b/ExampleSetups/LM/LSTMLM/lstmlm.gpu.config new file mode 100644 index 000000000..c2bdca121 --- /dev/null +++ b/ExampleSetups/LM/LSTMLM/lstmlm.gpu.config @@ -0,0 +1,414 @@ +# configuration file for class based RNN training + +ExpFolder=$ExpDir$ +ConfigFolder=$ConfigDir$ +DataFolder=$DataDir$ + +stderr=$ExpFolder$ + +# command=dumpNodeInfo +#command=train +#command=test +command=train:test +#command=writeWordAndClassInfo + +type=double + +writeWordAndClassInfo=[ + action=writeWordAndClass + inputFile=$DataFolder$\vocab.txt + outputWord2Cls=$ExpFolder$\word2cls.txt + outputCls2Index=$ExpFolder$\cls2idx.txt + vocabSize=10000 + nbrClass=50 + printValues=true +] + +dumpNodeInfo=[ + action=dumpnode + modelPath=$ExpFolder$\modelRnnCNTK + #nodeName=W0 + printValues=true +] + +devtest=[action=devtest] + +train=[ + action=trainRNN + minibatchSize=10 + traceLevel=1 + deviceId=Auto + epochSize=4430000 + # which is 886 * 5000 + recurrentLayer=1 + defaultHiddenActivity=0.1 + useValidation=true + rnnType=CLASSLSTM + + # uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM + # NDLNetworkBuilder=[ + # networkDescription=$ConfigFolder$\rnnlm.ndl + # ] + + SimpleNetworkBuilder=[ + trainingCriterion=classcrossentropywithsoftmax + evalCriterion=classcrossentropywithsoftmax + nodeType=Sigmoid + initValueScale=6.0 + layerSizes=10000:200:10000 + addPrior=false + addDropoutNodes=false + applyMeanVarNorm=false + uniformInit=true; + + # these are for the class information for class-based language modeling + vocabSize=10000 + nbrClass=50 + ] + + # configuration file, base parameters + SGD=[ + learningRatesPerSample=0.1 + momentumPerMB=0 + gradientClippingWithTruncation=true + clippingThresholdPerSample=15.0 + maxEpochs=40 + unroll=false + numMBsToShowResult=2000 + # gradUpdateType=AdaGrad + gradUpdateType=None + + modelPath=$ExpFolder$\modelRnnCNTK + loadBestModel=true + + # settings for Auto Adjust Learning Rate + AutoAdjust=[ + # auto learning rate adjustment + autoAdjustLR=adjustafterepoch + reduceLearnRateIfImproveLessThan=0.001 + continueReduce=true + increaseLearnRateIfImproveMoreThan=1000000000 + learnRateDecreaseFactor=0.5 + learnRateIncreaseFactor=1.382 + numMiniBatch4LRSearch=100 + numPrevLearnRates=5 + numBestSearchEpoch=1 + ] + + dropoutRate=0.0 + ] + + reader=[ + readerType=SequenceReader + randomize=None + nbruttsineachrecurrentiter=1 + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.train.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] + + cvReader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.valid.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.valid.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + # it should be the same as that in the training set + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] + + +test=[ + action=eval + + # correspond to the number of words/characteres to train in a minibatch + minibatchSize=1 + # need to be small since models are updated for each minibatch + traceLevel=1 + deviceId=Auto + epochSize=4430000 + # which is 886 * 5000 + recurrentLayer=1 + defaultHiddenActivity=0.1 + useValidation=true + rnnType=CLASSLM + + modelPath=$ExpFolder$\modelRnnCNTK + + reader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + # wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + # windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.test.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] \ No newline at end of file diff --git a/ExampleSetups/LM/RNNLM/CPU/fnnlm.config b/ExampleSetups/LM/RNNLM/CPU/fnnlm.config new file mode 100644 index 000000000..a2cd1da61 --- /dev/null +++ b/ExampleSetups/LM/RNNLM/CPU/fnnlm.config @@ -0,0 +1,413 @@ +# configuration file for class based RNN training + +ExpFolder=$ExpDir$ +ConfigFolder=$ConfigDir$ +DataFolder=$DataDir$ + +stderr=$ExpFolder$ +# command=dumpNodeInfo +command=train +#command=test +#command=train:test +#command=writeWordAndClassInfo + +type=double + +writeWordAndClassInfo=[ + action=writeWordAndClass + inputFile=$DataFolder$\vocab.txt + outputWord2Cls=$ExpFolder$\word2cls.txt + outputCls2Index=$ExpFolder$\cls2idx.txt + vocabSize=10000 + nbrClass=50 + printValues=true +] + +dumpNodeInfo=[ + action=dumpnode + modelPath=$ExpFolder$\modelRnnCNTK + #nodeName=W0 + printValues=true +] + +devtest=[action=devtest] + +train=[ + action=trainRNN + minibatchSize=10 + traceLevel=1 + deviceId=-1 + epochSize=4430000 + # which is 886 * 5000 + defaultHiddenActivity=0.1 + useValidation=true + rnnType=CLASSLM + # rnnType=LSTM + + # uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM + # NDLNetworkBuilder=[ + # networkDescription=$ConfigFolder$\rnnlm.ndl + # ] + + SimpleNetworkBuilder=[ + trainingCriterion=classcrossentropywithsoftmax + evalCriterion=classcrossentropywithsoftmax + nodeType=Sigmoid + initValueScale=6.0 + layerSizes=10000:200:10000 + addPrior=false + addDropoutNodes=false + applyMeanVarNorm=false + uniformInit=true; + + # these are for the class information for class-based language modeling + vocabSize=10000 + nbrClass=50 + ] + + # configuration file, base parameters + SGD=[ + learningRatesPerSample=0.1 + momentumPerMB=0 + gradientClippingWithTruncation=true + clippingThresholdPerSample=15.0 + maxEpochs=40 + unroll=false + numMBsToShowResult=2000 + # gradUpdateType=AdaGrad + gradUpdateType=None + + modelPath=$ExpFolder$\modelRnnCNTK + loadBestModel=true + + # settings for Auto Adjust Learning Rate + AutoAdjust=[ + # auto learning rate adjustment + autoAdjustLR=adjustafterepoch + reduceLearnRateIfImproveLessThan=0.001 + continueReduce=true + increaseLearnRateIfImproveMoreThan=1000000000 + learnRateDecreaseFactor=0.5 + learnRateIncreaseFactor=1.382 + numMiniBatch4LRSearch=100 + numPrevLearnRates=5 + numBestSearchEpoch=1 + ] + + dropoutRate=0.0 + ] + + reader=[ + readerType=SequenceReader + randomize=None + nbruttsineachrecurrentiter=1 + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.train.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] + + cvReader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.valid.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.valid.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + # it should be the same as that in the training set + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] + + +test=[ + action=eval + + # correspond to the number of words/characteres to train in a minibatch + minibatchSize=1 + # need to be small since models are updated for each minibatch + traceLevel=1 + deviceId=-1 + epochSize=4430000 + # which is 886 * 5000 + recurrentLayer=1 + defaultHiddenActivity=0.1 + useValidation=true + rnnType=CLASSLM + + modelPath=$ExpFolder$\modelRnnCNTK + + reader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + # wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + # windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.test.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] \ No newline at end of file diff --git a/ExampleSetups/LM/RNNLM/GPU/fnnlm.config b/ExampleSetups/LM/RNNLM/GPU/fnnlm.config new file mode 100644 index 000000000..d1da353e6 --- /dev/null +++ b/ExampleSetups/LM/RNNLM/GPU/fnnlm.config @@ -0,0 +1,403 @@ +# configuration file for class based RNN training + +ExpFolder=$ExpDir$ +ConfigFolder=$ConfigDir$ +DataFolder=$DataDir$ + +stderr=$ExpFolder$ +# command=dumpNodeInfo +# command=train +# command=test +command=train:test + +type=double + +dumpNodeInfo=[ + action=dumpnode + modelPath=$ExpFolder$\modelRnnCNTK + #nodeName=W0 + printValues=true +] + +devtest=[action=devtest] + +train=[ + action=trainRNN + minibatchSize=10 + traceLevel=1 + deviceId=0 + epochSize=4430000 + # which is 886 * 5000 +# recurrentLayer=1 + defaultHiddenActivity=0.0 + useValidation=true + rnnType=CLASSLM + # rnnType=LSTM + + # uncomment below and comment SimpleNetworkBuilder section to use NDL to train RNN LM + # NDLNetworkBuilder=[ + # networkDescription=$ConfigFolder$\rnnlm.ndl + # ] + + SimpleNetworkBuilder=[ + trainingCriterion=classcrossentropywithsoftmax + evalCriterion=classcrossentropywithsoftmax + nodeType=Sigmoid + initValueScale=6.0 + layerSizes=10000:200:10000 + addPrior=false + addDropoutNodes=false + applyMeanVarNorm=false + uniformInit=true; + + # these are for the class information for class-based language modeling + vocabSize=10000 + nbrClass=50 + ] + + # configuration file, base parameters + SGD=[ + learningRatesPerSample=0.1 + momentumPerMB=0 + gradientClippingWithTruncation=true + clippingThresholdPerSample=15.0 + maxEpochs=40 + unroll=false + numMBsToShowResult=2000 + # gradUpdateType=AdaGrad + gradUpdateType=None + + modelPath=$ExpFolder$\modelRnnCNTK + loadBestModel=true + + # settings for Auto Adjust Learning Rate + AutoAdjust=[ + # auto learning rate adjustment + autoAdjustLR=adjustafterepoch + reduceLearnRateIfImproveLessThan=0.001 + continueReduce=true + increaseLearnRateIfImproveMoreThan=1000000000 + learnRateDecreaseFactor=0.5 + learnRateIncreaseFactor=1.382 + numMiniBatch4LRSearch=100 + numPrevLearnRates=5 + numBestSearchEpoch=1 + ] + + dropoutRate=0.0 + ] + + reader=[ + readerType=SequenceReader + randomize=None + nbruttsineachrecurrentiter=1 + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.train.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] + + cvReader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.valid.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + #wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + #windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.valid.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + # it should be the same as that in the training set + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] + + +test=[ + action=eval + + # correspond to the number of words/characteres to train in a minibatch + minibatchSize=1 + # need to be small since models are updated for each minibatch + traceLevel=1 + deviceId=-1 + epochSize=4430000 + # which is 886 * 5000 + recurrentLayer=1 + defaultHiddenActivity=0.1 + useValidation=true + rnnType=CLASSLM + + modelPath=$ExpFolder$\modelRnnCNTK + + reader=[ + # reader to use + readerType=SequenceReader + randomize=None + + # word class info + wordclass=$DataFolder$\vocab.txt + + # if writerType is set, we will cache to a binary file + # if the binary file exists, we will use it instead of parsing this file + # writerType=BinaryReader + + #### write definition + wfile=$ExpFolder$\sequenceSentence.bin + #wsize - inital size of the file in MB + # if calculated size would be bigger, that is used instead + wsize=256 + + # wrecords - number of records we should allocate space for in the file + # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file + wrecords=1000 + # windowSize - number of records we should include in BinaryWriter window + windowSize=10000 + + file=$DataFolder$\ptb.test.cntk.txt + + #additional features sections + #for now store as expanded category data (including label in) + features=[ + # sentence has no features, so need to set dimension to zero + dim=0 + ### write definition + sectionType=data + ] + # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops + sequence=[ + dim=1 + wrecords=2 + ### write definition + sectionType=data + ] + #labels sections + labelIn=[ + dim=1 + + # vocabulary size + labelDim=10000 + labelMappingFile=$ExpFolder$\sentenceLabels.txt + labelType=Category + beginSequence="" + endSequence="" + + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=11 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=11 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + #labels sections + labels=[ + dim=1 + labelType=NextWord + beginSequence="O" + endSequence="O" + + # vocabulary size + labelDim=10000 + + labelMappingFile=$ExpFolder$\sentenceLabels.out.txt + #### Write definition #### + # sizeof(unsigned) which is the label index type + elementSize=4 + sectionType=labels + mapping=[ + #redefine number of records for this section, since we don't need to save it for each data record + wrecords=3 + #variable size so use an average string size + elementSize=10 + sectionType=labelMapping + ] + category=[ + dim=3 + #elementSize=sizeof(ElemType) is default + sectionType=categoryLabels + ] + ] + ] +] \ No newline at end of file diff --git a/ExampleSetups/LM/RNNLM/GPU/rnnlm.config b/ExampleSetups/LM/RNNLM/GPU/rnnlm.config index d1da353e6..56e54db9e 100644 --- a/ExampleSetups/LM/RNNLM/GPU/rnnlm.config +++ b/ExampleSetups/LM/RNNLM/GPU/rnnlm.config @@ -28,7 +28,7 @@ train=[ deviceId=0 epochSize=4430000 # which is 886 * 5000 -# recurrentLayer=1 + recurrentLayer=1 defaultHiddenActivity=0.0 useValidation=true rnnType=CLASSLM diff --git a/MachineLearning/cn/SGD.h b/MachineLearning/cn/SGD.h index 17904594c..629a500bf 100644 --- a/MachineLearning/cn/SGD.h +++ b/MachineLearning/cn/SGD.h @@ -184,8 +184,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { rpi.max = (double)configSGD("rms_wgt_max", "10.0"); rpi.gamma = (double)configSGD("rms_gamma", "0.99"); - bool needAveMultiplier = (bool)configSGD("needAveMultiplier", "true"); + bool needAveMultiplier = (bool)configSGD("normWithAveMultiplier", "true"); ElemType L2RegWeight = (ElemType)configSGD("L2RegWeight", "0"); + ElemType L1RegWeight = (ElemType)configSGD("L1RegWeight", "0"); /// for backward support. future setup should use gradUpdateType=AdaGrad, instead of /// useAdagrad=true @@ -213,7 +214,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { loadBestModel, numMiniBatch4LRSearch, numPrevLearnRates, numBestSearchEpoch, traceLevel, numMBsToShowResult, maxTempMemSizeInSamplesForCNN, gUpdateInfo, keepCheckPointFiles, adaptationRegType, adaptationRegWeight, trainCriterionNodeName, evalCriterionNodeName, doGradientCheck, gradientCheckSigDigit, validateAfterModelReloading, - rpi, learnRateAdjustInterval, UsingAllDataForPreComputedNode, needAveMultiplier, L2RegWeight); + rpi, learnRateAdjustInterval, UsingAllDataForPreComputedNode, needAveMultiplier, L2RegWeight, L1RegWeight); } void setMomentum(float momentum) @@ -235,7 +236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { const GradientUpdateInfo gradUpdateType = GradientUpdateInfo(), const bool keepCheckPointFiles=false, const AdaptationRegType adaptationRegType = AdaptationRegType::None, const ElemType adaptationRegWeight = 0.0f, const wstring trainCriterionNodeName= L"", const wstring evalCriterionNodeName=L"", const bool doGradientCheck = false, const ElemType gradientCheckSigDigit = 6, const bool validateAfterModelReloading = true, - RMSPropInfo rpi = RMSPropInfo(), size_t learnRateAdjustInterval = 1, const bool UsingAllDataForPreComputed = true, const bool needAveMultiplier = true, const ElemType L2RegWeight = 0) + RMSPropInfo rpi = RMSPropInfo(), size_t learnRateAdjustInterval = 1, const bool UsingAllDataForPreComputed = true, const bool needAveMultiplier = true, const ElemType L2RegWeight = 0, const ElemType L1RegWeight = 0) { m_numPrevLearnRates = numPrevLearnRates; m_mbSize=mbSize; @@ -276,6 +277,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_needAveMultiplier = needAveMultiplier; m_L2RegWeight = L2RegWeight; + m_L1RegWeight = L1RegWeight; for (size_t i=0; i& smoothedGradient = (*smoothedGradientIter); - UpdateWeights(node, smoothedGradient, learnRatePerSample, actualMBSize, m_mbSize[epochNumber], m_L2RegWeight, m_needAveMultiplier); + UpdateWeights(node, smoothedGradient, learnRatePerSample, actualMBSize, m_mbSize[epochNumber], m_L2RegWeight, m_L1RegWeight, m_needAveMultiplier); } } @@ -1103,7 +1105,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } public: // UpdateWeightsS - static version of UpdateWeights() - static void UpdateWeightsS(const SGD* sgd, Matrix& functionValues, Matrix& gradientValues, Matrix& smoothedGradient, const ElemType learnRatePerSample, size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const bool needAveMultiplier) + static void UpdateWeightsS(const SGD* sgd, Matrix& functionValues, Matrix& gradientValues, Matrix& smoothedGradient, const ElemType learnRatePerSample, size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const ElemType L1RegWeight, const bool needAveMultiplier) { #if DUMPOUTPUT fprintf(stderr, "learnRatePerSample=%0.8f, actualMBSize=%ld, expectedMBSize=%ld\n",learnRatePerSample, actualMBSize, expectedMBSize); @@ -1128,8 +1130,8 @@ public: } // L2 regularizer - if (L2RegWeight > 0) - Matrix::ScaleAndAdd(L2RegWeight, functionValues, gradientValues); + if (L2RegWeight > 0) //*actualMBSize so that it's invariant to minibatch size since learning rate is per sample + Matrix::ScaleAndAdd(L2RegWeight*actualMBSize, functionValues, gradientValues); if (adpType == GradientsUpdateType::None) { @@ -1155,18 +1157,23 @@ public: { Matrix::ScaleAndAdd(1.0, sgdUpdateNoise, functionValues); } + + // L1 regularizer with proximal gradient descent method + if (L1RegWeight > 0) //*actualMBSize so that it's invariant to minibatch size since learning rate is per sample + functionValues.InplaceSoftThreshold(learnRatePerSample*L1RegWeight*actualMBSize); + #if DUMPOUTPUT functionValues.Print("Parameter Update"); #endif } protected: // UpdateWeights - update the weights in - void UpdateWeights(const ComputationNodePtr node, Matrix& smoothedGradient, const ElemType learnRatePerSample, const size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const bool needAveMultiplier) const + void UpdateWeights(const ComputationNodePtr node, Matrix& smoothedGradient, const ElemType learnRatePerSample, const size_t actualMBSize, const size_t expectedMBSize, const ElemType L2RegWeight, const ElemType L1RegWeight, const bool needAveMultiplier) const { #if DUMPOUTPUT fprintf(stderr, "Update_%ls\n",node->NodeName().c_str()); #endif - UpdateWeightsS(this, node->FunctionValues(), node->GradientValues(), smoothedGradient, learnRatePerSample, actualMBSize, expectedMBSize, L2RegWeight, needAveMultiplier); + UpdateWeightsS(this, node->FunctionValues(), node->GradientValues(), smoothedGradient, learnRatePerSample, actualMBSize, expectedMBSize, L2RegWeight, L1RegWeight, needAveMultiplier); node->UpdateEvalTimeStamp(); } @@ -1357,9 +1364,6 @@ protected: irow = max(0, irow); icol = max(0, icol); - if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE) - continue; - fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str()); // node->FunctionValues().Print(); ElemType eOrg = node->FunctionValues()(irow, icol); @@ -1368,6 +1372,10 @@ protected: node->UpdateEvalTimeStamp(); net.ComputeGradient(criterionNodes[npos]); //use only the first criterion. Is +// if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE && node->GradientValues().GetDeviceId() != CPUDEVICE) + if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE) + break; + //ElemType mbEvalCri = criterionNodes[npos]->FunctionValues().Get00Element(); //criterionNode should be a scalar ElemType eGradErr = node->GradientValues()(irow, icol); @@ -1473,6 +1481,7 @@ protected: bool m_needAveMultiplier; ElemType m_L2RegWeight; + ElemType m_L1RegWeight; }; template class SGD; template class SGD; diff --git a/MachineLearning/cn/SimpleNetworkBuilder.cpp b/MachineLearning/cn/SimpleNetworkBuilder.cpp index 19b09d52e..6a4a2293c 100644 --- a/MachineLearning/cn/SimpleNetworkBuilder.cpp +++ b/MachineLearning/cn/SimpleNetworkBuilder.cpp @@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numHiddenLayers > 0) { //TODO: to figure out sparse matrix size - u = m_net->CreateSparseLearnableParameter(L"U0", m_layerSizes[1], m_layerSizes[0], 0); + u = m_net->CreateLearnableParameter(L"U0", m_layerSizes[1], m_layerSizes[0]); m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale); if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1) @@ -76,7 +76,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int i=1; iCreateSparseLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i], 0); + u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]); m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale); if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1) @@ -227,7 +227,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { clsweight = m_net->CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); - clslogpostprob = m_net->LogSoftmax(m_net->Times(clsweight, input), L"ClassPostProb"); + clslogpostprob = m_net->Times(clsweight, input, L"ClassPostProb"); output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy", clslogpostprob); @@ -770,7 +770,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template - ComputationNode* SimpleNetworkBuilder::BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs, bool inputWeightSparse) + ComputationNode* SimpleNetworkBuilder::BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs) { size_t numHiddenLayers = m_layerSizes.size()-2; @@ -784,20 +784,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr bit=nullptr, bft=nullptr, bct=nullptr; input = inputObs; - if(inputWeightSparse) - { - Wxo = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXO%d", iLayer), outputDim, inputDim); - Wxi = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXI%d", iLayer), outputDim, inputDim); - Wxf = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXF%d", iLayer), outputDim, inputDim); - Wxc = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"WXC%d", iLayer), outputDim, inputDim); - } - else - { - Wxo = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXO%d", iLayer), outputDim, inputDim); - Wxi = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXI%d", iLayer), outputDim, inputDim); - Wxf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXF%d", iLayer), outputDim, inputDim); - Wxc = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXC%d", iLayer), outputDim, inputDim); - } + Wxo = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXO%d", iLayer), outputDim, inputDim); + Wxi = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXI%d", iLayer), outputDim, inputDim); + Wxf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXF%d", iLayer), outputDim, inputDim); + Wxc = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WXC%d", iLayer), outputDim, inputDim); + m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale); m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale); m_net->InitLearnableParameters(Wxf, m_uniformInit, randomSeed++, m_initValueScale); @@ -1082,10 +1073,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr; ComputationNodePtr Wxo = nullptr, Who=nullptr, Wco=nullptr, bo = nullptr, Wxi=nullptr, Whi=nullptr, Wci=nullptr, bi=nullptr; - ComputationNodePtr Wxf=nullptr, Whf=nullptr, Wcf=nullptr, bf=nullptr, Wxc=nullptr, Whc=nullptr, bc=nullptr; - ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, ct=nullptr, ht=nullptr; - ComputationNodePtr delayHI = nullptr, delayCI = nullptr, delayHO = nullptr, delayHF = nullptr, delayHC=nullptr, delayCF=nullptr, delayCC=nullptr; - ComputationNodePtr directWIO = nullptr, directInput=nullptr, directOutput=nullptr; + ComputationNodePtr clslogpostprob = nullptr; + ComputationNodePtr clsweight = nullptr; ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = {nullptr}; input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); @@ -1120,7 +1109,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int offset = m_lookupTableOrder > 0? 1 : 0; if (numHiddenLayers > 0) { - output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset]*(offset?m_lookupTableOrder:1), m_layerSizes[offset+1], input, true); + output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset]*(offset?m_lookupTableOrder:1), m_layerSizes[offset+1], input); input = output; outputFromEachLayer[offset+1] = input; @@ -1157,20 +1146,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { input = output; } - // TODO: verify the change is okay - // w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers]*(MAX_WORDS_PER_CLASS+MAX_CLASSES)*mbSize*NUM_UTTS_IN_RECURRENT_ITER); - w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]); + /// need to have [input_dim x output_dim] matrix + /// e.g., [200 x 10000], where 10000 is the vocabulary size + /// this is for speed-up issue as per word matrix can be simply obtained using column slice + w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - // TODO: verify the change is okay - //label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize, 2*mbSize*NUM_UTTS_IN_RECURRENT_ITER); - label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize); - - AddTrainAndEvalCriterionNodes(input, label, w); - - output = m_net->Times(w, input, L"outputs"); - + + /// the label is a dense matrix. each element is the word index + label = m_net->CreateInputNode(L"labels", 4, mbSize); + + clsweight = m_net->CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); + m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); + clslogpostprob = m_net->Times(clsweight, input, L"ClassPostProb"); + + output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy", + clslogpostprob); + m_net->OutputNodes().push_back(output); + if (m_needPrior) { prior = m_net->Mean(label); diff --git a/MachineLearning/cn/SimpleNetworkBuilder.h b/MachineLearning/cn/SimpleNetworkBuilder.h index bdf97a2f1..ec0d61ca7 100644 --- a/MachineLearning/cn/SimpleNetworkBuilder.h +++ b/MachineLearning/cn/SimpleNetworkBuilder.h @@ -330,7 +330,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork& BuildClassEntropyNetwork(size_t mbSize = 1); - ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, bool inputWeightSparse = false); + ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input); ComputationNode* BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode); diff --git a/MachineLearning/cn/TrainingCriterionNode.h b/MachineLearning/cn/TrainingCriterionNode.h index 217a9a233..8284529e5 100644 --- a/MachineLearning/cn/TrainingCriterionNode.h +++ b/MachineLearning/cn/TrainingCriterionNode.h @@ -1041,6 +1041,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { clsLogSoftmax.Resize(input_cls_log_post_prob.GetNumRows(), nT); clsSoftmax.Resize(input_cls_log_post_prob.GetNumRows(), nT); + clsLogSoftmax = input_cls_log_post_prob; + clsLogSoftmax.InplaceLogSoftmax(true); /// 50 x nT + clsSoftmax.AssignExpOf(clsLogSoftmax); + /// loop over time functionValues.SetValue(0); sz = 0; @@ -1075,12 +1079,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0); /// add the class log posterior probability - Matrix clsLogSoftmax_t = clsLogSoftmax.ColumnSlice(t, 1); - clsLogSoftmax_t.SetValue(input_cls_log_post_prob.ColumnSlice(t, 1)); - clsLogSoftmax_t.InplaceLogSoftmax(true); /// 50 x 1 - Matrix clsSoftmax_t = clsSoftmax.ColumnSlice(t, 1); - clsSoftmax_t.AssignExpOf(clsLogSoftmax_t); - Matrix::AddElementToElement(clsLogSoftmax_t, c_t, 0, functionValues, 0, 0); + Matrix::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0); sz += nbr_wrd; } diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 2fe2083bc..7c953051d 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -2296,7 +2296,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix& CPUMatrix::InplaceTruncate (const ElemType threshold) { if (IsEmpty()) - throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); + throw std::logic_error("InplaceTruncate: Matrix is empty."); auto& us=*this; ElemType locThresholdPos = abs(threshold); @@ -2342,6 +2342,60 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + //x= x-threshold if x>threshold, x+threshold if x<-threshold, 0 otherwise + template + CPUMatrix& CPUMatrix::InplaceSoftThreshold(const ElemType threshold) + { + if (IsEmpty()) + throw std::logic_error("InplaceTruncate: Matrix is empty."); + + long m = (long)GetNumElements(); + +#pragma omp parallel for + for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling + { + if (m_pArray[i] > threshold) + m_pArray[i] -= threshold; + else if (m_pArray[i] < -threshold) + m_pArray[i] += threshold; + else + m_pArray[i] = 0; + + if (m_pArray[i+1] > threshold) + m_pArray[i+1] -= threshold; + else if (m_pArray[i+1] < -threshold) + m_pArray[i+1] += threshold; + else + m_pArray[i+1] = 0; + + if (m_pArray[i+2] > threshold) + m_pArray[i+2] -= threshold; + else if (m_pArray[i+2] < -threshold) + m_pArray[i+2] += threshold; + else + m_pArray[i+2] = 0; + + if (m_pArray[i+3] > threshold) + m_pArray[i+3] -= threshold; + else if (m_pArray[i+3] < -threshold) + m_pArray[i+3] += threshold; + else + m_pArray[i+3] = 0; + } + //handle remaining stuffs + for (long i = m & ~3; i threshold) + m_pArray[i] -= threshold; + else if (m_pArray[i] < -threshold) + m_pArray[i] += threshold; + else + m_pArray[i] = 0; + } + + return *this; + } + //Threshold truncating: this[i] = max( a[i], threshold ) template CPUMatrix& CPUMatrix::AssignTruncateBottomOf (const CPUMatrix& a, const ElemType threshold) diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index 6141b828e..7f4ada76c 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -197,6 +197,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix& InplaceTruncateTop (const ElemType threshold); CPUMatrix& AssignTruncateTopOf (const CPUMatrix& a, const ElemType threshold); CPUMatrix& InplaceTruncate (const ElemType threshold); + CPUMatrix& InplaceSoftThreshold(const ElemType threshold); CPUMatrix& SetToZeroIfAbsLessThan (const ElemType threshold); diff --git a/Math/Math/CPUSparseMatrix.cpp b/Math/Math/CPUSparseMatrix.cpp index 3b2139138..91438b7e2 100644 --- a/Math/Math/CPUSparseMatrix.cpp +++ b/Math/Math/CPUSparseMatrix.cpp @@ -25,6 +25,7 @@ #include "basetypes.h" #include "fileutil.h" +#pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this #ifndef USE_MKL // use ACML as default. @@ -703,38 +704,244 @@ namespace Microsoft { namespace MSR { namespace CNTK { return 1; } + template + CPUSparseMatrix& CPUSparseMatrix::InplaceTruncateTop(const ElemType threshold) + { + long m = (long)NzCount(); + ElemType *nzValues = NzValues(); + +#pragma omp parallel for + for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling + { + if (nzValues[i] > threshold) + nzValues[i] = threshold; + + if (nzValues[i+1] > threshold) + nzValues[i+1] = threshold; + + if (nzValues[i+2] > threshold) + nzValues[i+2] = threshold; + + if (nzValues[i+3] > threshold) + nzValues[i+3] = threshold; + + } + //handle remaining stuffs + for (long i = m & ~3; i threshold) + nzValues[i] = threshold; + } + + return *this; + } + + template + CPUSparseMatrix& CPUSparseMatrix::InplaceTruncateBottom(const ElemType threshold) + { + long m = (long)NzCount(); + ElemType *nzValues = NzValues(); + +#pragma omp parallel for + for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling + { + if (nzValues[i] < threshold) + nzValues[i] = threshold; + + if (nzValues[i + 1] < threshold) + nzValues[i + 1] = threshold; + + if (nzValues[i + 2] < threshold) + nzValues[i + 2] = threshold; + + if (nzValues[i + 3] < threshold) + nzValues[i + 3] = threshold; + + } + //handle remaining stuffs + for (long i = m & ~3; i CPUSparseMatrix& CPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow) - { - ElemType locThresholdPos = abs(threshold); - ElemType locTHresholdNeg = -locThresholdPos; + ElemType locThresholdPos = abs(threshold); + ElemType locTHresholdNeg = -locThresholdPos; - for(size_t j = 0; j < m_blockSize; j++) - { - size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols(); - size_t start = j* len; - for (size_t p = start; p < start+len; p++) - { - if (m_pArray[p] > locThresholdPos) - { - m_pArray[p] = locThresholdPos; - } - else if (m_pArray[p] < locTHresholdNeg) - { - m_pArray[p] = locTHresholdNeg; - } - } - } - } - else + long m = (long)NzCount(); + ElemType *nzValues = NzValues(); + +#pragma omp parallel for + for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling { - throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix"); + if (nzValues[i] > locThresholdPos) + nzValues[i] = locThresholdPos; + else if (nzValues[i] < locTHresholdNeg) + nzValues[i] = locTHresholdNeg; + + if (nzValues[i+1] > locThresholdPos) + nzValues[i+1] = locThresholdPos; + else if (nzValues[i+1] < locTHresholdNeg) + nzValues[i+1] = locTHresholdNeg; + + if (nzValues[i+2] > locThresholdPos) + nzValues[i+2] = locThresholdPos; + else if (nzValues[i+2] < locTHresholdNeg) + nzValues[i+2] = locTHresholdNeg; + + if (nzValues[i+3] > locThresholdPos) + nzValues[i+3] = locThresholdPos; + else if (nzValues[i+3] < locTHresholdNeg) + nzValues[i+3] = locTHresholdNeg; } + //handle remaining stuffs + for (long i = m & ~3; i locThresholdPos) + nzValues[i] = locThresholdPos; + else if (nzValues[i] < locTHresholdNeg) + nzValues[i] = locTHresholdNeg; + } + return *this; } + template + CPUSparseMatrix& CPUSparseMatrix::InplaceSoftThreshold(const ElemType threshold) + { + long m = (long)NzCount(); + ElemType *nzValues = NzValues(); + +#pragma omp parallel for + for (long i = 0; i<(m & ~3); i += 4) //four-way unrolling + { + if (nzValues[i] > threshold) + nzValues[i] -= threshold; + else if (nzValues[i] < -threshold) + nzValues[i] += threshold; + else + nzValues[i] = 0; + + if (nzValues[i + 1] > threshold) + nzValues[i + 1] -= threshold; + else if (nzValues[i + 1] < -threshold) + nzValues[i + 1] += threshold; + else + nzValues[i + 1] = 0; + + if (nzValues[i + 2] > threshold) + nzValues[i + 2] -= threshold; + else if (nzValues[i + 2] < -threshold) + nzValues[i + 2] += threshold; + else + nzValues[i + 2] = 0; + + if (nzValues[i + 3] > threshold) + nzValues[i + 3] -= threshold; + else if (nzValues[i + 3] < -threshold) + nzValues[i + 3] += threshold; + else + nzValues[i + 3] = 0; + } + //handle remaining stuffs + for (long i = m & ~3; i threshold) + nzValues[i] -= threshold; + else if (nzValues[i] < -threshold) + nzValues[i] += threshold; + else + nzValues[i] = 0; + } + return *this; + } + + template + ElemType CPUSparseMatrix::FrobeniusNorm() const + { + if (IsEmpty()) + throw std::logic_error("FrobeniusNorm: Matrix is empty."); + + ElemType v = 0; + + long m = (long)NzCount(); + const ElemType *nzValues = NzValues(); + + //four-way unrolling +#pragma omp parallel for reduction(+:v) + for (long i = 0; i<(m & ~3); i += 4) + { + v += nzValues[i] * nzValues[i] + nzValues[i + 1] * nzValues[i + 1] + nzValues[i + 2] * nzValues[i + 2] + nzValues[i + 3] * nzValues[i + 3]; + } + //handle remaining stuffs + for (long i = m & ~3; i + ElemType CPUSparseMatrix::SumOfAbsElements() const + { + if (IsEmpty()) + throw std::logic_error("SumOfAbsElements: Matrix is empty."); + + if (sizeof(ElemType) == sizeof(double)) + { +#ifndef USE_MKL + return (ElemType)dasum((int)NzCount(), reinterpret_cast (m_pArray), 1); +#else + return (ElemType)cblas_dasum((int)NzCount(), reinterpret_cast (m_pArray), 1); +#endif + } + else + { +#pragma warning (suppress: 4244) +#ifndef USE_MKL + return sasum((int)NzCount(), reinterpret_cast (m_pArray), 1); +#else + return cblas_sasum((int)NzCount(), reinterpret_cast (m_pArray), 1); +#endif + } + } + + + //sum of all elements + template + ElemType CPUSparseMatrix::SumOfElements() const + { + if (IsEmpty()) + throw std::logic_error("SumOfElements: Matrix is empty."); + + ElemType sum = 0; + + long m = (long)NzCount(); + const ElemType *nzValues = NzValues(); + + //four-way unrolling +#pragma omp parallel for reduction(+:sum) + for (long i = 0; i<(m & ~3); i += 4) + { + sum += nzValues[i] + nzValues[i + 1] + nzValues[i + 2] + nzValues[i + 3]; + } + //handle remaining stuffs + for (long i = m & ~3; i MATH_API File& operator>>(File& stream, CPUSparseMatrix& us) { diff --git a/Math/Math/CPUSparseMatrix.h b/Math/Math/CPUSparseMatrix.h index 8e2b6589e..d0b2e9ff5 100644 --- a/Math/Math/CPUSparseMatrix.h +++ b/Math/Math/CPUSparseMatrix.h @@ -108,10 +108,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { void NormalGrad(CPUMatrix& c, const ElemType momentum); ElemType Adagrad(CPUMatrix& c, const bool needAveMultiplier); - public: - CPUSparseMatrix& InplaceTruncateTop (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } - CPUSparseMatrix& InplaceTruncateBottom (const ElemType /*threshold*/) { NOT_IMPLEMENTED; } - CPUSparseMatrix& InplaceTruncate (const ElemType /*threshold*/); + public: + CPUSparseMatrix& InplaceTruncateTop(const ElemType threshold); + CPUSparseMatrix& InplaceTruncateBottom(const ElemType threshold); + CPUSparseMatrix& InplaceTruncate (const ElemType threshold); + CPUSparseMatrix& InplaceSoftThreshold(const ElemType threshold); + + ElemType FrobeniusNorm() const; //useful for comparing CPU and GPU results + + ElemType SumOfAbsElements() const; //sum of all abs(elements) + ElemType SumOfElements() const; //sum of all elements public: //void Print(const char* /*matrixName*/) const { NOT_IMPLEMENTED; } diff --git a/Math/Math/CommonMatrix.h b/Math/Math/CommonMatrix.h index 724cf5a89..81c35d19e 100644 --- a/Math/Math/CommonMatrix.h +++ b/Math/Math/CommonMatrix.h @@ -11,10 +11,10 @@ #define AUTOPLACEMATRIX 1000 // used in parameters only #define MANAGEDEXTERN -2 // managed externally (i.e. PTask) #define CPUDEVICE -1 // device is the CPU -#define EPS_IN_INVERSE 1e-30f // min float is 1.4e-45 and max float is 3.4e-38 -#define EPS_IN_LOG 1e-40f -#define LOG_OF_EPS_IN_LOG -92.1f // log(EPS_IN_LOG) -#define LOG10_OF_EPS_IN_LOG -40 // log_10(EPS_IN_LOG) +#define EPS_IN_INVERSE 1e-30f // 1e-37 is the only guaranteed precision +#define EPS_IN_LOG 1e-37f // 1e-37 is the only guaranteed precision +#define LOG_OF_EPS_IN_LOG -85.1f // log(EPS_IN_LOG) +#define LOG10_OF_EPS_IN_LOG -37 // log_10(EPS_IN_LOG) #define LZERO -10e10 #define MINLOGEXP -9.2103 #define LSMALL -0.5E10 diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 432b40913..8b671c63a 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -2003,6 +2003,42 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + + template + GPUMatrix& GPUMatrix::InplaceTruncate(const ElemType threshold) + { + if (IsEmpty()) + throw std::logic_error("InplaceTruncate: Matrix is empty."); + + LONG64 N = (LONG64)GetNumElements(); + int blocksPerGrid = (int)ceil(N*1.0 / threadsPerBlock); + PrepareDevice(); + cudaEvent_t done = nullptr; + if (do_sync) CUDA_CALL(cudaEventCreate(&done)); + _inplaceTruncate << > >(m_pArray, threshold, N); + if (do_sync) CUDA_CALL(cudaEventRecord(done)); + if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); + if (do_sync) CUDA_CALL(cudaEventDestroy(done)); + return *this; + } + + template + GPUMatrix& GPUMatrix::InplaceSoftThreshold(const ElemType threshold) + { + if (IsEmpty()) + throw std::logic_error("InplaceSoftThreshold: Matrix is empty."); + + LONG64 N = (LONG64)GetNumElements(); + int blocksPerGrid = (int)ceil(N*1.0 / threadsPerBlock); + PrepareDevice(); + cudaEvent_t done = nullptr; + if (do_sync) CUDA_CALL(cudaEventCreate(&done)); + _inplaceSoftThreshold << > >(m_pArray, threshold, N); + if (do_sync) CUDA_CALL(cudaEventRecord(done)); + if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); + if (do_sync) CUDA_CALL(cudaEventDestroy(done)); + return *this; + } template GPUMatrix& GPUMatrix::SetToZeroIfAbsLessThan (const ElemType threshold) { diff --git a/Math/Math/GPUMatrix.h b/Math/Math/GPUMatrix.h index 20682abc2..c4fe537b6 100644 --- a/Math/Math/GPUMatrix.h +++ b/Math/Math/GPUMatrix.h @@ -218,6 +218,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& AssignTruncateBottomOf (const GPUMatrix& a, const ElemType threshold); GPUMatrix& InplaceTruncateTop (const ElemType threshold); GPUMatrix& AssignTruncateTopOf (const GPUMatrix& a, const ElemType threshold); + GPUMatrix& InplaceTruncate(const ElemType threshold); + GPUMatrix& InplaceSoftThreshold(const ElemType threshold); GPUMatrix& SetToZeroIfAbsLessThan (const ElemType threshold); diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 933822626..2698fd4a6 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -2784,6 +2784,29 @@ __global__ void _inplaceTruncate( } } +template +__global__ void _inplaceSoftThreshold( + ElemType* a, + const ElemType threshold, + const LONG64 N) +{ + LONG64 id = blockDim.x * blockIdx.x + threadIdx.x; + if (id >= N) + return; + + if (a[id] > threshold) + { + a[id] -= threshold; + } + else if (a[id] < -threshold) + { + a[id] += threshold; + } + else + a[id] = 0; +} + + template __global__ void _normalGradForSparseBlock( const ElemType momentum, diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 627bf547b..2e5d2f1a4 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -1108,26 +1108,37 @@ namespace Microsoft { namespace MSR { namespace CNTK { template GPUSparseMatrix& GPUSparseMatrix::InplaceTruncate (const ElemType threshold) { - if(m_format == matrixFormatSparseBlockCol || m_format == matrixFormatSparseBlockRow || - m_format == matrixFormatSparseCSR || m_format == matrixFormatSparseCSC) - { - long N=(long)GetNumNZElements(); - int blocksPerGrid =(int)ceil(N*1.0/threadsPerBlock); - cudaEvent_t done = nullptr; - if (do_sync) CUDACALL(cudaEventCreate(&done)); - ElemType * values = NzValues(); - _inplaceTruncate<<>>(values,threshold,N); - if (do_sync) CUDACALL(cudaEventRecord(done)); - if (do_sync) CUDACALL(cudaEventSynchronize(done)); - if (do_sync) CUDACALL(cudaEventDestroy(done)); - } - else - { - NOT_IMPLEMENTED; - } + long N=(long)GetNumNZElements(); + + long blocksPerGrid = (long)ceil(N*1.0 / threadsPerBlock); + cudaEvent_t done = nullptr; + if (do_sync) CUDACALL(cudaEventCreate(&done)); + ElemType * values = NzValues(); + _inplaceTruncate<<>>(values,threshold,N); + if (do_sync) CUDACALL(cudaEventRecord(done)); + if (do_sync) CUDACALL(cudaEventSynchronize(done)); + if (do_sync) CUDACALL(cudaEventDestroy(done)); + return *this; } + template + GPUSparseMatrix& GPUSparseMatrix::InplaceSoftThreshold(const ElemType threshold) + { + long N = (long)GetNumNZElements(); + + long blocksPerGrid = (long)ceil(N*1.0 / threadsPerBlock); + cudaEvent_t done = nullptr; + if (do_sync) CUDACALL(cudaEventCreate(&done)); + ElemType * values = NzValues(); + _inplaceSoftThreshold << > >(values, threshold, N); + if (do_sync) CUDACALL(cudaEventRecord(done)); + if (do_sync) CUDACALL(cudaEventSynchronize(done)); + if (do_sync) CUDACALL(cudaEventDestroy(done)); + + return *this; + } + // normal update for smoothed gradients c and current gradients (this) template void GPUSparseMatrix::NormalGrad(GPUMatrix& c, const ElemType momentum) diff --git a/Math/Math/GPUSparseMatrix.h b/Math/Math/GPUSparseMatrix.h index e0b2e4dcc..2edb5679e 100644 --- a/Math/Math/GPUSparseMatrix.h +++ b/Math/Math/GPUSparseMatrix.h @@ -232,6 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUSparseMatrix& AssignAbsOf (const GPUSparseMatrix& a); GPUSparseMatrix& InplaceTruncate (const ElemType threshold); + GPUSparseMatrix& InplaceSoftThreshold(const ElemType threshold); GPUSparseMatrix& InplaceTruncateBottom (const ElemType threshold); GPUSparseMatrix& AssignTruncateBottomOf (const GPUSparseMatrix& a, const ElemType threshold); diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index bc02191da..67c8bf170 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -2440,7 +2440,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& Matrix::InplaceTruncate(const ElemType threshold) { if (IsEmpty()) - throw std::logic_error("InplaceTruncateBottom: Matrix is empty."); + throw std::logic_error("InplaceTruncate: Matrix is empty."); if (sizeof(ElemType)==sizeof(float)) { @@ -2456,7 +2456,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { DISPATCH_MATRIX_ON_FLAG(this, this, this->m_CPUMatrix->InplaceTruncate(threshold), - this->m_GPUMatrix->InplaceTruncateTop(fabs(threshold)); this->m_GPUMatrix->InplaceTruncateBottom(-fabs(threshold)), + this->m_GPUMatrix->InplaceTruncate(threshold), this->m_CPUSparseMatrix->InplaceTruncate(threshold), this->m_GPUSparseMatrix->InplaceTruncate(threshold) ); @@ -2464,6 +2464,27 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + template + Matrix& Matrix::InplaceSoftThreshold(const ElemType threshold) + { + assert(threshold >= 0); + + if (IsEmpty()) + throw std::logic_error("InplaceSoftThreshold: Matrix is empty."); + + if (threshold == 0) + return *this; + + DISPATCH_MATRIX_ON_FLAG(this, + this, + this->m_CPUMatrix->InplaceSoftThreshold(threshold), + this->m_GPUMatrix->InplaceSoftThreshold(threshold), + this->m_CPUSparseMatrix->InplaceSoftThreshold(threshold), + this->m_GPUSparseMatrix->InplaceSoftThreshold(threshold) + ); + + return *this; + } //Threshold truncating: this[i] = max( this[i], threshold ) template Matrix& Matrix::InplaceTruncateBottom (const ElemType threshold) @@ -2486,7 +2507,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { this, this->m_CPUMatrix->InplaceTruncateBottom(threshold), this->m_GPUMatrix->InplaceTruncateBottom(threshold), - NOT_IMPLEMENTED, + this->m_CPUSparseMatrix->InplaceTruncateBottom(threshold), this->m_GPUSparseMatrix->InplaceTruncateBottom(threshold) ); @@ -2542,18 +2563,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!isfinite((float)threshold)) return *this; - } + } else { if (!isfinite(threshold)) return *this; - } + } DISPATCH_MATRIX_ON_FLAG(this, this, this->m_CPUMatrix->InplaceTruncateTop(threshold), this->m_GPUMatrix->InplaceTruncateTop(threshold), - NOT_IMPLEMENTED, + this->m_CPUSparseMatrix->InplaceTruncateTop(threshold), this->m_GPUSparseMatrix->InplaceTruncateTop(threshold) ); @@ -2626,7 +2647,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { nullptr, return this->m_CPUMatrix->SumOfElements(), return this->m_GPUMatrix->SumOfElements(), - NOT_IMPLEMENTED, + return this->m_CPUSparseMatrix->SumOfElements(), return this->m_GPUSparseMatrix->SumOfElements() ); @@ -2869,7 +2890,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { nullptr, return this->m_CPUMatrix->FrobeniusNorm(), return this->m_GPUMatrix->FrobeniusNorm(), - NOT_IMPLEMENTED, + return this->m_CPUSparseMatrix->FrobeniusNorm(), return this->m_GPUSparseMatrix->FrobeniusNorm() ); } diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 68a3bf021..552ae4510 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -236,6 +236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& InplaceTruncateTop (const ElemType threshold); Matrix& AssignTruncateTopOf (const Matrix& a, const ElemType threshold); Matrix& InplaceTruncate (const ElemType threshold); + Matrix& InplaceSoftThreshold(const ElemType threshold); Matrix& SetToZeroIfAbsLessThan (const ElemType threshold);