removing files and folders as announced (examples and readers)

This commit is contained in:
Philipp Kranen 2016-01-11 13:52:31 +01:00
Родитель 3ff2a677a0
Коммит 6fdf48d58f
180 изменённых файлов: 0 добавлений и 61704 удалений

Просмотреть файл

@ -1,40 +0,0 @@
This directory contains experiments for grapheme-to-phoneme experiments reported in the following paper
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme to phoneme conversion"
submitted to Interspeech 2015
encoder-decoder LSTM : scripts/s36.noreg.log
best performing systemis
s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
unidirectional LSTM : scripts/s23.unidirectional.log
bidirectional LSTM: scripts/s30.bidirectional.log
-----------------------
to score:
suppose the G2P results are in xxx.output
in Python. use the following commands
cd d:/dev/kaisheny/dev/exp/lts/lts/scripts/
import const as cn
import score
outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reps30bilstm/test_bw1_iter35/output.rec.txt'
outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reps23mb100fw6/test_bw1_iter34/output.rec.txt'
outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2/test_bw1_iter43/output.rec.txt'
outputfn='//speechstore5/transient/kaishengy/exp/lts/result/s30rndjointconditionalbilstmn300n300n300/test_bw1_iter35/output.rec.txt'
outputfn='//speechstore5/transient/kaishengy/exp/lts/result/s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2/test_bw1_iter43/output.rec.txt'
lexicon = {}
score.ReadPronunciations(cn._TEST_PRON, lexicon)
score.BleuScore(lexicon,
cn._TEST_FN,
outputfn,
False)
score.CORRECT_RATE(lexicon,
cn._TEST_FN,
outputfn,
False)

Просмотреть файл

@ -1,10 +0,0 @@
REM test.s26conditionalhashingbilstmn300n300
set TESTITER=%1
set MdlDir=%2
set BW=%3
set Q_ROOT=rpc://speech-data:6673
set QEXE=\\speechstore5\q
set PATH=\\speechstore5\q;%PATH%
set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$

Просмотреть файл

@ -1,11 +0,0 @@
REM test.s26conditionalhashingbilstmn300n300
set TESTITER=%1
set MdlDir=%2
set BW=%3
set Q_ROOT=rpc://speech-data:6673
set QEXE=\\speechstore5\q
set PATH=\\speechstore5\q;%PATH%
set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$

Просмотреть файл

@ -1,11 +0,0 @@
REM test.s26conditionalhashingbilstmn300n300
set TESTITER=%1
set MdlDir=%2
set BW=%3
set Q_ROOT=rpc://speech-data:6673
set QEXE=\\speechstore5\q
set PATH=\\speechstore5\q;%PATH%
set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$

Просмотреть файл

@ -1,15 +0,0 @@
# this reproduces uni-directional model performances
# from so far best, increase projection layer dimension, use more forward observations
# reps23mb100fw6
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.05]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6
for %i in (0,1) do q sub -J testbm%is23mb100fw6conditionallstmp400n400lr005 -x \\speechstore5\userdata\kaishengy\exp\lts\exes\job.s23mb100.fw6.bat 34 \\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 %i
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 command=LSTMTest Iter=8 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 bw=1 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
for %i in (0,1) do q sub -J s23bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s23.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 %i
# small scale
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 LSTM=[SGD=[learningRatesPerSample=0.05]] ExpDir=d:\exp\lts\result\reps23mb100fw6conditionallstmp400n400lr005 LSTM=[SGD=[gradientcheck=true]] LSTM=[SGD=[unittest=true]]

Просмотреть файл

@ -1,31 +0,0 @@
# the following uses one more layer, so totally two layers of LSTMs, to model directional predictions
# --------- training -------------
# s30jointconditionalbilstmn300n300n300
#local
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reps30bilstm LSTM=[SGD=[gradientcheck=true]]
C:\dev\cntk5\x64\Release\CNTK.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\temp\reps30bilstm
C:\dev\cntk5\x64\Release\CNTK.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config deviceNumber=-1 command=LSTMTest Iter=39 MdlDir=d:\temp\reps30bilstm bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$ command=LSTMTest
# reps30bilstm
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm
# local with NDL
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.ndl.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reps30bilstmndl LSTM=[SGD=[gradientcheck=true]]
# ---------- test ----------------
for %i in (0,1) do q sub -J testbm%ibilstm -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s30.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm %i
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=35 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
#************************************************
# test on the previously trained model
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=34 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\s30jointconditionalbilstmn300n300n300 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
for %i in (0,1) do q sub -J oldmodeltestbm%ibilstm -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s30.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\s30rndjointconditionalbilstmn300n300n300 %i

Просмотреть файл

@ -1,49 +0,0 @@
# no regularization but small learning rate
# run multipass of data
# even larger learning rate
# s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=1 LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
for %i in (0, 1, 5) do q sub -J bm%is36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 -x \\speechstore5\userdata\kaishengy\exp\lts\exes\job.s36.bat 46 \\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
# in local directory
# does unit test
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=0 LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=-1 LSTM=[SGD=[gradientcheck=true]] LSTM=[SGD=[unittest=true]]
# in local directory
# does gradient checking on CPU
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=c:\dev\cntk5\examplesetups\g2p\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=Auto LSTM=[SGD=[gradientcheck=true]]
# in local directory
# does gradient checking on GPU
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[SGD=[gradientcheck=true]]
# decoder and encoder networks are on different devices. For example, encoder network is on GPU and decoder network is on CPU
# intereting, didn't notice this before.
# use more data
# gradient check passed
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[SGD=[gradientcheck=true]] LSTM=[epochSize=70] LSTM=[minibatchSize=30] DataDir=d:/data/lts
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[epochSize=70] LSTM=[minibatchSize=30] DeviceNumber=0
#test
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=0 MdlDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$ DataDir=d:/data/lts
--------------------------- full train/test -----------------------------------
# train on full data
# reprs36noregrnds2sencoderh500c500decoderh500c50mb100mpdlr01layers2
\\speechstore5\transient\kaishengy\bin\binmay29\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=0
# run in local
C:\dev\cntk5\x64\Release\CNTK.exe configFile=c:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+c:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config LSTM=[SGD=[numMBsToShowResult=100]] ExpDir=d:\temp\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=0
# test
for %i in (0,1) do q sub -J s36bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s36.bat 43 \\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=46 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=1 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-------------------- test on the old models to make sure that decoder and forward pass is right --------------
for %i in (0,1) do q sub -J olds36bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s36.bat 43 \\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=43 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$

Просмотреть файл

@ -1,899 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM:LSTMTest
type=double
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=train
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
epochSize=486085
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
# NDLNetworkBuilder=[
# networkDescription=$NdlDir$\lstmNDL.txt
# ]
SimpleNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=195:50:300:300:108
# the letter stream doesn't support context-dependent inputs
streamSizes=108:87
lookupTableOrderSizes=1:3
rnnType=JOINTCONDITIONALBILSTMSTREAMS
# rnnType=UNIDIRECTIONALLSTMWITHPASTPREDICTION
lookupTableOrder=3
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.007
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
# gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
dataMultiPass=false
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.train_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.train.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
cvReader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.dev_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.validate.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s01.01.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]
# change the ordering of test sentences
LSTMTest2=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s30.02.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s30.02.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]

Просмотреть файл

@ -1,899 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM:LSTMTest
type=double
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=train
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
epochSize=486085
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
# NDLNetworkBuilder=[
# networkDescription=$NdlDir$\lstmNDL.txt
# ]
SimpleNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=195:50:300:300:108
# the letter stream doesn't support context-dependent inputs
streamSizes=108:87
lookupTableOrderSizes=1:3
rnnType=JOINTCONDITIONALBILSTMSTREAMS
# rnnType=UNIDIRECTIONALLSTMWITHPASTPREDICTION
lookupTableOrder=3
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.007
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
# gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
dataMultiPass=true
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.train_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.train.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
cvReader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.dev_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.validate.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s01.01.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]
# change the ordering of test sentences
LSTMTest2=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s30.02.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s30.02.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]

Просмотреть файл

@ -1,872 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM:LSTMTest
type=double
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=train
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
epochSize=486085
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
NDLNetworkBuilder=[
networkDescription=$NdlDir$\lstm.ndl
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.007
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
# gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
dataMultiPass=true
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.train_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=29
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.train.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
cvReader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
# ioNodeNames=delayedTargetStream
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.dev_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=29
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.validate.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s01.01.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=29
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]
# change the ordering of test sentences
LSTMTest2=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s30.02.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=87
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s30.02.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]

Просмотреть файл

@ -1,12 +0,0 @@
#WorkDir=//speechstore5/transient/kaishengy/data/lts/Data/CNTK
WorkDir=d:/exp/lts
DataDir=d:/data/lts
#DataDir=d:/data/ltsdbg
NdlDir=c:/dev/cntk5/ExampleSetups/G2P/setups
PredictionModelFeatureDir=\\speechstore5\transient\kaishengy\exp\lts\result\expbilstmce300n\s4
ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\explstm
OutDir=$ExpDir$
LogDir=$ExpDir$\log
DeviceNumber=0
MdlDir=$ExpDir$

Просмотреть файл

@ -1,680 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM:LSTMTest
type=double
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=train
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
epochSize=486085
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
# NDLNetworkBuilder=[
# networkDescription=$NdlDir$\lstmNDL.txt
# ]
SimpleNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=282:50:300:300:108
# the letter stream doesn't support context-dependent inputs
streamSizes=108:174
lookupTableOrderSizes=1:6
rnnType=TRANSDUCER
lookupTableOrder=6
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.007
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
# gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
dataMultiPass=true
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.train_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.train.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
cvReader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.dev_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.validate.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s01.01.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]

Просмотреть файл

@ -1,680 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM:LSTMTest
type=double
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=train
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
epochSize=486085
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
# NDLNetworkBuilder=[
# networkDescription=$NdlDir$\lstmNDL.txt
# ]
SimpleNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=282:50:300:300:108
# the letter stream doesn't support context-dependent inputs
streamSizes=108:174
lookupTableOrderSizes=1:6
rnnType=TRANSDUCER
lookupTableOrder=6
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.007
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
# gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
dataMultiPass=true
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.train_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.train.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
cvReader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s26.01.dev_without_oovs
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.validate.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=beamSearch
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=-1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=outputs
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
reader=[
# reader to use
readerType=LUSequenceReader
ioNodeNames=delayedTargetStream:letterInForward
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\ltr.map
file=$DataDir$\s01.01.test_letters
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2:3:4:5
randomize=None
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
ltrForward=[
dim=174
]
#labels sections
labelInForward=[
dim=1
usewordmap=true
# if having labelDim, this is for output label
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\ltr.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
delayedTargetStream=[
# this stream is used for training phone LM
unk="<unk>"
wordmap=$DataDir$\phn.map
file=$DataDir$\s6.test.phone
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
TestEncodingForDecoding=false
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
featureDelayedTarget=[
dim=108
]
labelIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
labelType=Category
beginSequence="OBOS"
#wildcat match
endSequence="OEOS"
usewordmap=true
# is a node for proposal generation
isproposal=true
proposalSymbolList=$DataDir$\phn.list
# input word list
token=$DataDir$\phn.list
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="OBOS"
endSequence="OEOS"
# output token list
token=$DataDir$\phn.list
labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\phn.list
]
]
]

Просмотреть файл

@ -1,186 +0,0 @@
load=ndlMacroDefine
run=ndlBiLSTMCreateNetwork
ndlMacroDefine=[
# Macro definitions
DelayNode(x)
{
D=Delay(x, delayInput=Dout, delayTime=1)
}
Lookup(x, indim, outdim)
[
E = Parameter(outdim, indim)
Lookup=Times(E, x)
]
MeanVarNorm(x)
{
xMean = Mean(x)
xStdDev = InvStdDev(x)
xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
}
LogPrior(labels)
{
Prior=Mean(labels)
LogPrior=Log(Prior)
}
LSTMComponent(inputDim, outputDim, inputVal)
{
Wxo = Parameter(outputDim, inputDim)
Wxi = Parameter(outputDim, inputDim)
Wxf = Parameter(outputDim, inputDim)
Wxc = Parameter(outputDim, inputDim)
bo = Parameter(outputDim, init=fixedvalue, value=-1.0)
bc = Parameter(outputDim, init=fixedvalue, value=0.0)
bi = Parameter(outputDim, init=fixedvalue, value=-1.0)
bf = Parameter(outputDim, init=fixedvalue, value=-1.0)
Whi = Parameter(outputDim, outputDim)
Wci = Parameter(outputDim)
Whf = Parameter(outputDim, outputDim)
Wcf = Parameter(outputDim)
Who = Parameter(outputDim, outputDim)
Wco = Parameter(outputDim)
Whc = Parameter(outputDim, outputDim)
delayHI = Delay(outputDim, output, delayTime=1)
delayHF = Delay(outputDim, output, delayTime=1)
delayHO = Delay(outputDim, output, delayTime=1)
delayHC = Delay(outputDim, output, delayTime=1)
delayCI = Delay(outputDim, ct, delayTime=1)
delayCF = Delay(outputDim, ct, delayTime=1)
delayCC = Delay(outputDim, ct, delayTime=1)
WxiInput = Times(Wxi, inputVal)
WhidelayHI = Times(Whi, delayHI)
WcidelayCI = DiagTimes(Wci, delayCI)
it = Sigmoid (Plus ( Plus (Plus (WxiInput, bi), WhidelayHI), WcidelayCI))
WxcInput = Times(Wxc, inputVal)
WhcdelayHC = Times(Whc, delayHC)
bit = ElementTimes(it, Tanh( Plus(WxcInput, Plus(WhcdelayHC, bc))))
Wxfinput = Times(Wxf, inputVal)
WhfdelayHF = Times(Whf, delayHF)
WcfdelayCF = DiagTimes(Wcf, delayCF)
ft = Sigmoid( Plus (Plus (Plus(Wxfinput, bf), WhfdelayHF), WcfdelayCF))
bft = ElementTimes(ft, delayCC)
ct = Plus(bft, bit)
Wxoinput = Times(Wxo, inputVal)
WhodelayHO = Times(Who, delayHO)
Wcoct = DiagTimes(Wco, ct)
ot = Sigmoid( Plus( Plus( Plus(Wxoinput, bo), WhodelayHO), Wcoct))
output = ElementTimes(ot, Tanh(ct))
}
LSTMNodeComponent(outputDim, colDim1, colDim2, inputVal)
{
inputGate = Parameter(outputDim, colDim1)
forgetGate = Parameter(outputDim, colDim1)
outputGate = Parameter(outputDim, colDim1)
memoryCell = Parameter(outputDim, colDim2)
LSTMNodeComponent = LSTM(inputVal, inputGate, forgetGate, outputGate, memoryCell)
}
]
ndlCreateNetwork=[
#define basic i/o
featDim=72
labelDim=183
hiddenDim=1024
features=Input(featDim, tag=feature)
labels=Input(labelDim, tag=label)
# define network
featNorm = MeanVarNorm(features)
LSTMoutput = LSTMComponent(featDim, hiddenDim, featNorm)
W1 = Parameter(labelDim, hiddenDim)
LSTMoutputW1 = Times(W1, LSTMoutput)
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
Err = ErrorPrediction(labels,LSTMoutputW1,tag=Eval)
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag=Output)
]
ndlBiLSTMCreateNetwork=[
#define basic i/o
ltrDim=29
labelDim=108
embDim=50
hiddenDim=300
ltrForward=Input(ltrDim, tag=feature)
featureDelayedTarget=Input(labelDim, tag=feature)
labels=Input(labelDim, tag=label)
# projection
Wxo = Parameter(embDim, ltrDim)
Wxi = Parameter(embDim, labelDim)
ltrEmb = Times(Wxo, ltrForward)
prnEmb = Times(Wxi, featureDelayedTarget)
# first layer of LSTM
# 50 + 300 + 2
lstmCol1L1=352
# 50 + 300 + 1
lstmCol2L1=351
ltrLSTM = LSTMNodeComponent(hiddenDim, lstmCol1L1, lstmCol2L1, ltrEmb)
prnLSTM = LSTMNodeComponent(hiddenDim, lstmCol1L1, lstmCol2L1, prnEmb)
#backward direction
ltrBackward = TimeReverse(ltrLSTM)
# depth 2
forwardParallelLayer2 = Parallel(ltrLSTM, prnLSTM)
# 600 + 300 + 2
lstmCol3L2=902
# 600 + 300 + 1
lstmCol4L2=901
forwardLayer2 = LSTMNodeComponent(hiddenDim, lstmCol3L2, lstmCol4L2, forwardParallelLayer2)
# 300 + 300 + 2
lstmCol1L2=602
# 300 + 300 + 1
lstmCol2L2=601
backwardLayer2 = LSTMNodeComponent(hiddenDim, lstmCol1L2, lstmCol2L2, ltrBackward)
# depth 3
backwardLayer3 = TimeReverse(backwardLayer2)
depth3activity = Parallel(forwardLayer2, backwardLayer3)
# 600 + 300 + 2
lstmCol3L3=902
# 600 + 300 + 1
lstmCol4L3=901
LSTMoutput = LSTMNodeComponent(hiddenDim, lstmCol3L3, lstmCol4L3, depth3activity)
W1 = Parameter(labelDim, hiddenDim)
LSTMoutputW1 = Times(W1, LSTMoutput)
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
Err = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Eval)
outputs = Softmax(LSTMoutputW1, tag=Output)
]

Просмотреть файл

@ -1,800 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\ATIS\log
command=LSTM
type=float
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=trainEncoderDecoder
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
# for each epoch, maximum number of input words is set below
epochSize=486085
EncoderNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=84:500:500:500
# the letter stream doesn't support context-dependent inputs
streamSizes=84
lookupTableOrderSizes=3
rnnType=LSTMENCODER
lookupTableOrder=3
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
DecoderNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
# the second layer must have the same dimension as the first layer
# because 40 is matched to the output layer dimension from encoder network
layerSizes=40:500:500:500:40
recurrentLayer=2:3
# the letter stream doesn't support context-dependent inputs
streamSizes=40
lookupTableOrderSizes=1
rnnType=LSTM
lookupTableOrder=1
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.01
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# use hidden states for encoder decoder training
useHiddenStates=true
encoderNodes="LSTM0:LSTM2"
decoderNodes="LSTM0:LSTM2"
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
#gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
encoderReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=letterInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
dataMultiPass=true
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.encoder.input.map
file=$DataDir$\s31.s2s.encoder.train.txt
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=Auto
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
features=[
dim=84
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
labelType=Category
beginSequence="BOS"
usewordmap=true
# input word list
token=$DataDir$\s31.encoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="<EOS>"
# output token list
token=$DataDir$\s31.decoder.input.lst
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
decoderReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=phnInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
dataMultiPass=true
### set to true so that it can can use state activities from an encoder network
ignoresentencebegintag=true
phnInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.decoder.input.map
file=$DataDir$\s31.s2s.decoder.train.txt
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=Auto
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
features=[
dim=40
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
labelType=Category
beginSequence="<EOS>"
usewordmap=true
# input word list
token=$DataDir$\s31.decoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
# output token list
token=$DataDir$\s31.decoder.input.lst
endSequence="<EOS>"
labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
encoderCVReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=letterInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.encoder.input.map
file=$DataDir$\s31.s2s.encoder.validation.txt
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
features=[
dim=84
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
labelType=Category
beginSequence="BOS"
usewordmap=true
# input word list
token=$DataDir$\s31.encoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="<EOS>"
# output token list
token=$DataDir$\s31.decoder.input.lst
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
decoderCVReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=phnInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
### set to true so that it can can use state activities from an encoder network
ignoresentencebegintag=true
phnInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.decoder.input.map
file=$DataDir$\s31.s2s.decoder.validation.txt
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=100
# this node must be exist in the network description
features=[
dim=40
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
labelType=Category
beginSequence="<EOS>"
usewordmap=true
# input word list
token=$DataDir$\s31.decoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
# output token list
token=$DataDir$\s31.decoder.input.lst
endSequence="<EOS>"
labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
]
]
# set output files path
# set the nodes for outputs
# for LSTM
# accuracy: 98.16%; precision: 94.37%; recall: 94.57%; FB1: 94.47
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=testEncoderDecoder
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=$DeviceNumber$
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$MdlDir$\cntkdebug.dnn
# this is the node to evaluate scores
evalNodeNames=PosteriorProb
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
encoderNodes="LSTM0:LSTM2"
decoderNodes="LSTM0:LSTM2"
encoderReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=letterInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
letterInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.encoder.input.map
file=$DataDir$\s31.s2s.encoder.test.txt
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0:1:2
randomize=None
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
features=[
dim=84
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
labelType=Category
beginSequence="BOS"
usewordmap=true
# input word list
token=$DataDir$\s31.encoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
beginSequence="<EOS>"
# output token list
token=$DataDir$\s31.decoder.input.lst
labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
decoderReader=[
# reader to use for encoder
# this is letter only observations
readerType=LUSequenceReader
ioNodeNames=phnInForward
#### write definition
wfile=$ExpDir$\ltrsequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=10000
equalLength=false
### set to true so that it can can use state activities from an encoder network
ignoresentencebegintag=true
phnInForward=[
unk="<unk>"
wordmap=$DataDir$\s31.decoder.input.map
file=$DataDir$\s31.s2s.decoder.test.txt
TestEncodingForDecoding=false
#typedef argvector<size_t> intargvector which is not compatible with negative number
wordContext=0
randomize=None
inputLabel=labelsIn
outputLabel=labels
# number of utterances to be allocated for each minibatch
nbruttsineachrecurrentiter=1
# this node must be exist in the network description
features=[
dim=40
]
#labels sections
# this name must be exist in the network description
labelsIn=[
dim=1
usewordmap=true
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
labelType=Category
beginSequence="<EOS>"
usewordmap=true
isproposal=true
proposalSymbolList=$DataDir$\s31.decoder.input.proposal.lst
# input word list
token=$DataDir$\s31.decoder.input.lst
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
# this name must be exist in the network description
labels=[
dim=1
labelType=Category
# output token list
token=$DataDir$\s31.decoder.input.lst
endSequence="<EOS>"
labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
mapping=[
sectionType=labelMapping
]
category=[
sectionType=categoryLabels
]
]
]
]
writer=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\s31.decoder.input.lst
]
]
]

Просмотреть файл

@ -1,3 +0,0 @@
# News comments setup
# steps are under steps directory

Просмотреть файл

@ -1,92 +0,0 @@
# python scripts
'''
add silence ending to the begining and ending of a sentence
the silence ending and begining symbol is </s>
example:
add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt')
'''
def add_silence_ending(fn, fnout):
outfile = open(fnout, 'wt')
with open(fn) as infile:
for line in infile:
line = line.strip()
newline = '</s> ' + line + ' </s>'
outfile.write(newline + '\n')
outfile.close()
'''
create validation (first 100), test (last 100) and training data (remainning) split
example:
split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.test.txt')
'''
def split_data_into_train_valid_test(fn, fntrain, fnvalid, fntest):
outfile_train = open(fntrain, 'wt')
outfile_valid = open(fnvalid, 'wt')
outfile_test = open(fntest, 'wt')
# first get the line numbers
totalln = 0
with open(fn) as infile:
for ln in infile:
totalln += 1
linenbr = 0
with open(fn) as infile:
for line in infile:
if linenbr < 0.1 * totalln:
outfile_valid.write(line)
elif linenbr > 0.9 * totalln:
outfile_test.write(line)
else:
outfile_train.write(line)
linenbr += 1
outfile_train.close()
outfile_test.close()
outfile_valid.close()
'''
convert to ascii file
example:
util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt')
'''
def convert2ascii(fn, fnout):
import codecs
of = open(fnout, 'wt')
with open (fn) as infile:
for line in infile:
line = line.strip()
if len(line) > 0:
lineu = line.decode('utf8')
of.write(lineu.encode("ASCII", 'ignore'))
of.write('\n')
of.close()
'''
remove agency
util.removeagency('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.ascii.noagency.txt')
'''
def removeagency(fn, fnout):
import codecs
of = open(fnout, 'wt')
with open (fn) as infile:
for line in infile:
line = line.strip()
agency_index = 0
if ') -' in line:
agency_index = line.find(') -')
agency_index += 4
if agency_index == 0 and ') ' in line:
agency_index = line.find(') ')
agency_index += 3
nline = line[agency_index:]
of.write(nline)
of.write('\n')
of.close()

Просмотреть файл

@ -1,5 +0,0 @@
ExpDir=\\speechstore5\transient\kaishengy\exp\News\steps1\
ConfigDir=D:\dev\cntkcodeplex\ExampleSetups\News\setups\
#DataDir=\\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
DataDir=d:\data\newscomments\2015\03-23
DEVICE=-1

Просмотреть файл

@ -1,28 +0,0 @@
#WorkDir=//speechstore5/transient/kaishengy/data/lts/Data/CNTK
WorkDir=d:/exp/smt
DataDir=d:\data\newscomments\2015\03-23
#DataDir=//speechstore5/transient/kaishengy/data/newscomments/2015/03-23
ExpDir=d:\exp\smt\result\expcsen
OutDir=$ExpDir$
LogDir=$ExpDir$\log
DeviceNumber=Auto
MdlDir=$ExpDir$
# source side info
SRCFEATDIM=160001
SRCBEGINSYMBOL=BOS
TGTBEGINSYMBOL=<EOS>
TGTENDSYMBOL=<EOS>
# target side info
#TGTFEATDIM=82922
TGTFEATDIM=80002
# dimensionality
VLSTMDIM=100
VEMBDIM=50
TRAINSRCFILE=news.cntk.train.txt
TRAINTGTFILE=comments.cntk.train.txt
VALIDATESRCFILE=news.cntk.valid.txt
VALIDATETGTFILE=comments.cntk.valid.txt
TESTSRCFILE=news.cntk.test.txt
TESTTGTFILE=comments.cntk.test.txt
VOCABSIZE=2264
CLASSSIZE=100

Просмотреть файл

@ -1,457 +0,0 @@
# configuration file for class based RNN training
ExpFolder=$ExpDir$
ConfigFolder=$ConfigDir$
DataFolder=$DataDir$
stderr=$ExpFolder$
numCPUThreads=4
# command=dumpNodeInfo
#command=train
#command=test
command=writeWordAndClassInfo:train
#command=writeWordAndClassInfo:train:test
#command=train:test
type=double
DEVICEID=$DEVICE$
NBR=2
NOISE=100
RATE=0.1
VOCABSIZE=2263
CLASSSIZE=50
makeMode=true
TRAINFILE=comments.cntk.train.txt
VALIDFILE=comments.cntk.valid.txt
TESTFILE=comments.cntk.test.txt
#number of threads
nthreads=4
writeWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataFolder$\$TRAINFILE$
outputVocabFile=$ExpFolder$\vocab.txt
outputWord2Cls=$ExpFolder$\word2cls.txt
outputCls2Index=$ExpFolder$\cls2idx.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
dumpNodeInfo=[
action=dumpnode
modelPath=$ExpFolder$\modelRnnCNTK
#nodeName=W0
printValues=true
]
devtest=[action=devtest]
train=[
action=train
minibatchSize=10
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLSTM
SimpleNetworkBuilder=[
trainingCriterion=classcrossentropywithsoftmax
evalCriterion=classcrossentropywithsoftmax
nodeType=Sigmoid
initValueScale=6.0
layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true;
lookupTableOrder=1
# these are for the class information for class-based language modeling
vocabSize=$VOCABSIZE$
nbrClass=$CLASSSIZE$
]
# configuration file, base parameters
SGD=[
makeMode=true
learningRatesPerSample=$RATE$
momentumPerMB=0
gradientClippingWithTruncation=true
clippingThresholdPerSample=15.0
maxEpochs=40
unroll=false
numMBsToShowResult=2000
# gradUpdateType=AdaGrad
gradUpdateType=None
modelPath=$ExpFolder$\modelRnnCNTK
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0.001
continueReduce=false
increaseLearnRateIfImproveMoreThan=1000000000
learnRateDecreaseFactor=0.5
learnRateIncreaseFactor=1.382
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0.0
]
reader=[
readers=textIn:binaryIn
# readers=textIn
randomize=None
nbruttsineachrecurrentiter=$NBR$
textIn=[
readerType=LMSequenceReader
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TRAINFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=$VOCABSIZE$
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
binaryIn=[
readerType=UCIFastReader
onelineperfile=true
binaryFeature=[
onelineperfile=true
# onelineperfile: each line has all data for a file
dim=20
start=0
file=$DataFolder$\train.lda.dat
]
]
]
cvReader=[
readers=textIn:binaryIn
# readers=textIn
randomize=None
nbruttsineachrecurrentiter=6
textIn=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
# writerType=BinaryReader
#### write definition
wfile=$ExpFolder$\sequenceSentence.valid.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$VALIDFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
# it should be the same as that in the training set
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
binaryIn=[
readerType=UCIFastReader
onelineperfile=true
binaryFeature=[
onelineperfile=true
# onelineperfile: each line has all data for a file
dim=20
start=0
file=$DataFolder$\validate.lda.dat
]
]
]
]
test=[
action=eval
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLSTM
modelPath=$ExpFolder$\modelRnnCNTK
evalNodeNames=EvalNodeClassBasedCrossEntrpy
reader=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
# wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
# windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TESTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="</s>"
endSequence="</s>"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]

Просмотреть файл

@ -1,424 +0,0 @@
# configuration file for class based RNN training
ExpFolder=$ExpDir$
ConfigFolder=$ConfigDir$
DataFolder=$DataDir$
stderr=$ExpFolder$
numCPUThreads=4
# command=dumpNodeInfo
#command=train
#command=test
#command=writeWordAndClassInfo
command=writeWordAndClassInfo:train:test
#command=train:test
type=double
DEVICEID=$DEVICE$
NOISE=100
RATE=0.1
VOCABSIZE=2263
CLASSSIZE=50
makeMode=true
TRAINFILE=comments.cntk.train.txt
VALIDFILE=comments.cntk.valid.txt
TESTFILE=comments.cntk.test.txt
#number of threads
nthreads=4
writeWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataFolder$\$TRAINFILE$
outputVocabFile=$ExpFolder$\vocab.txt
outputWord2Cls=$ExpFolder$\word2cls.txt
outputCls2Index=$ExpFolder$\cls2idx.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
dumpNodeInfo=[
action=dumpnode
modelPath=$ExpFolder$\modelRnnCNTK
#nodeName=W0
printValues=true
]
devtest=[action=devtest]
train=[
action=train
minibatchSize=10
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLSTM
SimpleNetworkBuilder=[
trainingCriterion=classcrossentropywithsoftmax
evalCriterion=classcrossentropywithsoftmax
nodeType=Sigmoid
initValueScale=6.0
layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true;
lookupTableOrder=1
# these are for the class information for class-based language modeling
vocabSize=$VOCABSIZE$
nbrClass=$CLASSSIZE$
]
# configuration file, base parameters
SGD=[
makeMode=true
learningRatesPerSample=$RATE$
momentumPerMB=0
gradientClippingWithTruncation=true
clippingThresholdPerSample=15.0
maxEpochs=40
unroll=false
numMBsToShowResult=2000
# gradUpdateType=AdaGrad
gradUpdateType=None
modelPath=$ExpFolder$\modelRnnCNTK
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0.001
continueReduce=false
increaseLearnRateIfImproveMoreThan=1000000000
learnRateDecreaseFactor=0.5
learnRateIncreaseFactor=1.382
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0.0
]
reader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=10
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TRAINFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
cvReader=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
# writerType=BinaryReader
#### write definition
wfile=$ExpFolder$\sequenceSentence.valid.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$VALIDFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
# it should be the same as that in the training set
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]
test=[
action=eval
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLSTM
modelPath=$ExpFolder$\modelRnnCNTK
evalNodeNames=EvalNodeClassBasedCrossEntrpy
reader=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
# wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
# windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TESTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="</s>"
endSequence="</s>"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]

Просмотреть файл

@ -1,424 +0,0 @@
# configuration file for class based RNN training
ExpFolder=$ExpDir$
ConfigFolder=$ConfigDir$
DataFolder=$DataDir$
stderr=$ExpFolder$
numCPUThreads=4
# command=dumpNodeInfo
#command=train
#command=test
#command=writeWordAndClassInfo
command=writeWordAndClassInfo:train:test
#command=train:test
type=double
DEVICEID=$DEVICE$
NOISE=100
RATE=0.1
VOCABSIZE=2263
CLASSSIZE=50
makeMode=true
TRAINFILE=comments.cntk.train.txt
VALIDFILE=comments.cntk.valid.txt
TESTFILE=comments.cntk.test.txt
#number of threads
nthreads=4
writeWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataFolder$\$TRAINFILE$
outputVocabFile=$ExpFolder$\vocab.txt
outputWord2Cls=$ExpFolder$\word2cls.txt
outputCls2Index=$ExpFolder$\cls2idx.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
dumpNodeInfo=[
action=dumpnode
modelPath=$ExpFolder$\modelRnnCNTK
#nodeName=W0
printValues=true
]
devtest=[action=devtest]
train=[
action=train
minibatchSize=10
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLSTM
SimpleNetworkBuilder=[
trainingCriterion=classcrossentropywithsoftmax
evalCriterion=classcrossentropywithsoftmax
nodeType=Sigmoid
initValueScale=6.0
layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true;
lookupTableOrder=1
# these are for the class information for class-based language modeling
vocabSize=$VOCABSIZE$
nbrClass=$CLASSSIZE$
]
# configuration file, base parameters
SGD=[
makeMode=true
learningRatesPerSample=$RATE$
momentumPerMB=0
gradientClippingWithTruncation=true
clippingThresholdPerSample=15.0
maxEpochs=40
unroll=false
numMBsToShowResult=2000
# gradUpdateType=AdaGrad
gradUpdateType=None
modelPath=$ExpFolder$\modelRnnCNTK
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0.001
continueReduce=false
increaseLearnRateIfImproveMoreThan=1000000000
learnRateDecreaseFactor=0.5
learnRateIncreaseFactor=1.382
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0.0
]
reader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=10
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TRAINFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
cvReader=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
# writerType=BinaryReader
#### write definition
wfile=$ExpFolder$\sequenceSentence.valid.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$VALIDFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
# it should be the same as that in the training set
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]
test=[
action=eval
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=0
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLSTM
modelPath=$ExpFolder$\modelRnnCNTK
evalNodeNames=EvalNodeClassBasedCrossEntrpy
reader=[
# reader to use
readerType=LMSequenceReader
randomize=None
# word class info
wordclass=$ExpFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
# wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
# windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TESTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="</s>"
endSequence="</s>"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]

Просмотреть файл

@ -1,421 +0,0 @@
# configuration file for class based RNN training
ExpFolder=$ExpDir$
ConfigFolder=$ConfigDir$
DataFolder=$DataDir$
stderr=$ExpFolder$
numCPUThreads=4
# command=dumpNodeInfo
#command=train
#command=test
command=writeWordAndClassInfo
#command=writeWordAndClassInfo:train:test
#command=train:test
type=double
DEVICEID=$DEVICE$
NOISE=100
RATE=0.1
VOCABSIZE=2263
CLASSSIZE=50
makeMode=true
TRAINFILE=comments.cntk.train.txt
VALIDFILE=comments.cntk.valid.txt
TESTFILE=comments.cntk.test.txt
#number of threads
nthreads=4
writeWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataFolder$\$TRAINFILE$
outputVocabFile=$DataFolder$\vocab.txt
outputWord2Cls=$ExpFolder$\word2cls.txt
outputCls2Index=$ExpFolder$\cls2idx.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
dumpNodeInfo=[
action=dumpnode
modelPath=$ExpFolder$\modelRnnCNTK
#nodeName=W0
printValues=true
]
devtest=[action=devtest]
train=[
action=train
minibatchSize=10
traceLevel=1
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=NCELSTM
SimpleNetworkBuilder=[
trainingCriterion=NoiseContrastiveEstimationNode
evalCriterion=NoiseContrastiveEstimationNode
nodeType=Sigmoid
initValueScale=6.0
layerSizes=$VOCABSIZE$:200:$VOCABSIZE$
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true;
# these are for the class information for class-based language modeling
vocabSize=$VOCABSIZE$
#nbrClass=$CLASSSIZE$
noise_number=$NOISE$
]
# configuration file, base parameters
SGD=[
makeMode=true
learningRatesPerSample=$RATE$
momentumPerMB=0
gradientClippingWithTruncation=true
clippingThresholdPerSample=15.0
maxEpochs=40
unroll=false
numMBsToShowResult=2000
# gradUpdateType=AdaGrad
gradUpdateType=None
modelPath=$ExpFolder$\modelRnnCNTK
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0.001
continueReduce=false
increaseLearnRateIfImproveMoreThan=1000000000
learnRateDecreaseFactor=0.5
learnRateIncreaseFactor=1.382
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0.0
]
reader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=10
# word class info
wordclass=$DataFolder$\vocab.txt
noise_number=$NOISE$
mode=nce
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TRAINFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
cvReader=[
# reader to use
readerType=LMSequenceReader
randomize=None
mode=softmax
# word class info
wordclass=$DataFolder$\vocab.txt
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
# writerType=BinaryReader
#### write definition
wfile=$ExpFolder$\sequenceSentence.valid.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$VALIDFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
# it should be the same as that in the training set
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]
test=[
action=eval
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=$DEVICEID$
epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=NCELSTM
modelPath=$ExpFolder$\modelRnnCNTK
evalNodeNames=EvalNodeNCEBasedCrossEntrpy
reader=[
# reader to use
readerType=LMSequenceReader
randomize=None
mode=softmax
# word class info
wordclass=$DataFolder$\vocab.txt
#### write definition
wfile=$ExpFolder$\sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
# wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
# windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataFolder$\$TESTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="</s>"
endSequence="</s>"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]

Просмотреть файл

@ -1,792 +0,0 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$LogDir$\SMT\log
command=writeEncoderWordAndClassInfo:writeDecoderWordAndClassInfo:LSTM
LSTMDIM=$VLSTMDIM$
EMBDIM=$VEMBDIM$
NBRUTT=1
type=double
writeEncoderWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataDir$\$TRAINSRCFILE$
outputVocabFile=$ExpDir$\vocab.src.txt
outputWord2Cls=$ExpDir$\word2cls.src.txt
outputCls2Index=$ExpDir$\cls2idx.src.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
writeDecoderWordAndClassInfo=[
action=writeWordAndClass
inputFile=$DataDir$\$TRAINTGTFILE$
outputVocabFile=$ExpDir$\vocab.tgt.txt
outputWord2Cls=$ExpDir$\word2cls.tgt.txt
outputCls2Index=$ExpDir$\cls2idx.tgt.txt
vocabSize=$VOCABSIZE$
cutoff=2
nbrClass=$CLASSSIZE$
printValues=true
]
LSTM=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=trainEncoderDecoder
makeMode=true
# recurrent networks are trained with minibatch
# minibatch size, for example in language model, is the number of input words
# e.g., 6, corresponds to having 6 inputs words from one sentence
# In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
minibatchSize=1000
# need to be small since models are updated for each minibatch
traceLevel=1
# CPU is -1
deviceId=$DeviceNumber$
# for each epoch, maximum number of input words is set below
# epochSize=12075604 sentences
# nbr of words
epochSize=315956058
# half pass of data
# epochSize=6075604
# 1% of trainnig data
# epochSize=3159560
EncoderNetworkBuilder=[
trainingCriterion=crossentropywithsoftmax
evalCriterion=crossentropywithsoftmax
sparseinput=true
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
layerSizes=$VOCABSIZE$:$EMBDIM$:$LSTMDIM$
# the letter stream doesn't support context-dependent inputs
streamSizes=$SRCFEATDIM$
lookupTableOrderSizes=1
rnnType=LSTMENCODER
lookupTableOrder=1
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
DecoderNetworkBuilder=[
trainingCriterion=ClassCrossEntropyWithSoftmax
evalCriterion=ClassCrossEntropyWithSoftmax
sparseinput=true
nbrClass=79
# # default hidden layer activity
defaultHiddenActivity=0.1
# randomization range
initValueScale=1.6
# first layer, second layer, and output layer size
# the second layer must have the same dimension as the first layer
# because 40 is matched to the output layer dimension from encoder network
layerSizes=$VOCABSIZE$:$EMBDIM$:$LSTMDIM$:$VOCABSIZE$
recurrentLayer=2
# the letter stream doesn't support context-dependent inputs
streamSizes=40
lookupTableOrderSizes=1
rnnType=ALIGNMENTSIMILARITYGENERATOR
lookupTableOrder=1
# these are for the class information for class-based language modeling
vocabSize=$VOCABSIZE$
nbrClass=$CLASSSIZE$
addPrior=false
addDropoutNodes=false
applyMeanVarNorm=false
uniformInit=true
]
# configuration file, base parameters
SGD=[
learningRatesPerSample=0.0001
momentumPerMB=0.0
gradientClippingWithTruncation=true
clippingThresholdPerSample=5.0
# use hidden states for encoder decoder training
useHiddenStates=true
encoderNodes="LSTM0"
decoderNodes="LSTM0"
# maximum number of epochs
maxEpochs=100
# gradientcheck=true
sigFigs=4
# for information purpose, number of minibatches to report progress
numMBsToShowResult=1000
# Whether use AdaGrad
#gradUpdateType=AdaGrad
# output model path
modelPath=$ExpDir$\smt.lstm
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel=true
# settings for Auto Adjust Learning Rate
AutoAdjust=[
# auto learning rate adjustment
autoAdjustLR=adjustafterepoch
reduceLearnRateIfImproveLessThan=0
increaseLearnRateIfImproveMoreThan=1000000000
# how much learning rate is reduced
learnRateDecreaseFactor=0.5
# if continously improved, can increase learning rate by the following ratio
learnRateIncreaseFactor=1.0
numMiniBatch4LRSearch=100
numPrevLearnRates=5
numBestSearchEpoch=1
]
dropoutRate=0
]
encoderReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.src.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.src.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataDir$\$TRAINSRCFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
decoderReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.tgt.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.tgt.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataDir$\$TRAINTGTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
encoderCVReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.src.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.src.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataDir$\$VALIDATESRCFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
decoderCVReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.tgt.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.tgt.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataDir$\$VALIDATETGTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]
]
LSTMTest=[
# this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
# can be considered as the maximum length of a sentence
action=testEncoderDecoder
# correspond to the number of words/characteres to train in a minibatch
minibatchSize=1
# need to be small since models are updated for each minibatch
traceLevel=1
deviceId=$DeviceNumber$
epochSize=5000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
encoderModelPath=$MdlDir$\smt.lstm.encoder
decoderModelPath=$MdlDir$\smt.lstm.decoder
# this is the node to evaluate scores
evalNodeNames=PosteriorProb
# this is the node to output results
outputNodeNames=outputs
beamWidth=1
maxNbrTokens=10
minibatchSize=1000
encoderReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.src.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.src.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
file=$DataDir$\$TESTSRCFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
decoderReader=[
readerType=LMSequenceReader
randomize=None
nbruttsineachrecurrentiter=$NBRUTT$
# word class info
wordclass=$ExpDir$\vocab.tgt.txt
#### write definition
wfile=$ExpDir$\sequenceSentence.tgt.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
unk="<unk>"
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
wrecords=1000
#windowSize - number of records we should include in BinaryWriter window
windowSize=$VOCABSIZE$
#file=$DataDir$\$TESTTGTFILE$
#additional features sections
#for now store as expanded category data (including label in)
features=[
# sentence has no features, so need to set dimension to zero
dim=0
### write definition
sectionType=data
]
# sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
sequence=[
dim=1
wrecords=2
### write definition
sectionType=data
]
#labels sections
labelIn=[
dim=1
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
labelType=Category
beginSequence="</s>"
endSequence="</s>"
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=11
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=11
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
#labels sections
labels=[
dim=1
labelType=NextWord
beginSequence="O"
endSequence="O"
# vocabulary size
labelDim=$VOCABSIZE$
labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
sectionType=labels
mapping=[
#redefine number of records for this section, since we don't need to save it for each data record
wrecords=3
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=3
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
writer=[
writerType=LMSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$ExpDir$\vocab.tgt.txt
]
]
]

Просмотреть файл

@ -1,71 +0,0 @@
# steps 1
# generate LSTM LM on news comments
# -----------------------------
# preparing data
# data is also saved at
# \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
# -----------------------------
# first have </s> to the begining and ending of sentences
# go to ../scripts directory
python
>>> import util
>>> util.add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt')
# the output sentences look like
# </s> god speed gentlemen . </s>
# convert data to ASCII format
>>> util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt')
# create test (last 10%), validation (first 10%) data, and train (remaining 80%) data
>>> util.split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.test.txt')
# prepare source side data
>>> import util
>>> util.add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.txt')
>>> util.add_silence_ending('d:/data/newscomments/2015/03-23/news.txt', 'd:/data/newscomments/2015/03-23/news.cntk.txt')
# the output sentences look like
# </s> god speed gentlemen . </s>
# convert data to ASCII format
>>> util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.ascii.txt')
>>> util.convert2ascii('d:/data/newscomments/2015/03-23/news.cntk.txt', 'd:/data/newscomments/2015/03-23/news.cntk.ascii.txt')
# create test (last 10%), validation (first 10%) data, and train (remaining 80%) data
>>> util.split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.test.txt')
>>> util.split_data_into_train_valid_test('d:/data/newscomments/2015/03-23/news.cntk.ascii.txt', 'd:/data/newscomments/2015/03-23/news.cntk.train.txt', 'd:/data/newscomments/2015/03-23/news.cntk.valid.txt', 'd:/data/newscomments/2015/03-23/news.cntk.test.txt')
# -----------------------------
# create word cluster
# -----------------------------
# goto ../steps directory
# use cutoff = 2
mkdir \\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments
D:\dev\cntkcodeplex\x64\Debug\CNTK.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=writeWordAndClassInfo
# check $ExpDir$ and $DataFolder$ for outputs
# -----------------------------
# Train LSTM LM using NCE criterion
# -----------------------------
# the vocabulary size is 2263
# in local
D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=train
# on server
\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=train
# test PPL : 61.31 with 1 layer of 200 hidden dimension LSTM
\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test
# ---- 2 layers of LSTM --------
# in local wit 2 layers of LSTM
D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=d:\exp\news\s1.lstmlm.comments.2layers command=train train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:$VOCABSIZE$]] DataDir=d:\data\newscomments\2015\03-23
# on server
\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.2layers command=train train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:$VOCABSIZE$]]
# test
# PPL Perplexity = 36.1999
\\speechstore5\userdata\kaishengy\bin\binmay28\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.2layers test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test
# use 3 layers of LSTM 52.85
\\speechstore5\userdata\kaishengy\bin\binmay28\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.3layers train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:200:$VOCABSIZE$]] test=[modelPath=$ExpFolder$\modelRnnCNTK] DEVICE=0

Просмотреть файл

@ -1,40 +0,0 @@
# this use K-N ngram model to construct a language model
set BINDIR=\\speechstore5\transient\kaishengy\tools\SRILM\SRILM\bin\msvc64
set DATADIR=\\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
set TRAINFILE=%DATADIR%\comments.cntk.train.txt
set TESTFILE=%DATADIR%\comments.cntk.test.txt
set EXPDIR=\\speechstore5\transient\kaishengy\exp\news\s2.knlm.comments
set OUTLMFN=%EXPDIR%\kn3.lm
# ----------------------
# 3-gram
# ----------------------
%BINDIR%\ngram-count.exe -no-sos -no-eos -text %TRAINFILE% -lm %OUTLMFN% -unk
# test PPL
%BINDIR%\ngram.exe -lm %OUTLMFN% -ppl %TESTFILE% > %EXPDIR%\ppl_3gm.ppl
# results from KN 3-gram model
# D:\dev\cntkcodeplex\ExampleSetups\News\steps>more %EXPDIR%\ppl_3gm.ppl
# file \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23\comments.cntk.test.txt: 153 sentences, 8848 words, 0 OOVs
# 0 zeroprobs, logprob= -17617.4 ppl= 90.6291 ppl1= 97.9744
# ----------------------
# class-based LM
# ----------------------
set CLASSCNTFILE=%EXPDIR%\class.cnt
set CLASSFILE=%EXPDIR%\class.txt
%BINDIR%\ngram-class -numclasses 50 -class-counts %CLASSCNTFILE% -classes %CLASSFILE% -text %TRAINFILE%
# test PPL
%BINDIR%\ngram.exe -lm %OUTLMFN% -classes %CLASSFILE% -ppl %TESTFILE% > %EXPDIR%\ppl_3gm.50classes.ppl
# results from 50 class 3-gram LM
# D:\dev\cntkcodeplex\ExampleSetups\News\steps>more %EXPDIR%\ppl_3gm.50classes.ppl
# file \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23\comments.cntk.test.txt: 153 sentences, 8848 words, 0 OOVs
# 0 zeroprobs, logprob= -17617.4 ppl= 90.629 ppl1= 97.9744

Просмотреть файл

@ -1,14 +0,0 @@
# -----------------------------
# Train LSTM LM using class-based criterion
# -----------------------------
# in local
C:\dev\cntk5\x64\Release\CNTK.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.classlm.config ExpDir=d:\exp\news\s1.lstmclslm.comments
# ------------ use LSTM components --------------
# use LSTM components
# 1-layer Perplexity = 29.668369
\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm CLASSSIZE=100 train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$]] DEVICE=0
# 2-layers PPL
\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm.2layer CLASSSIZE=100 train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:100:200:200:$VOCABSIZE$]] DEVICE=1
\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm.2layer test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test

Просмотреть файл

@ -1,8 +0,0 @@
# -----------------------------
# Train LSTM LM using class-based criterion
# -----------------------------
# in local
D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=d:\dev\cntkcodeplex\examplesetups\news\setups\global.config+d:\dev\cntkcodeplex\examplesetups\news\setups\lstmlm.gpu.classlm.2stream.config ExpDir=d:\exp\news\s4.lstmclslm.dbg command=train train=[SGD=[gradientcheck=true]] DEVICE=-1
# debug with not using multi-streams to make sure it is backward compatible
D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.classlm.config ExpDir=d:\exp\news\s4.lstmclslm.dbg command=writeWordAndClassInfo:train

Просмотреть файл

@ -1,10 +0,0 @@
# this uses smaller learning rate
d:\exp\bin\cntk\cntk.exe configFile=..\setups\global.s2s.config+..\setups\s2s.class.alignment.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\news\s2sryrun LSTM=[SGD=[learningRatesPerSample=0.00001]] LSTM=[SGD=[gradientcheck=TRUE]]
# with 1 layer of LSTM, embeding 50, lstm dimension 100
d:\bin\cntkjuly7\cntk.exe configFile=..\setups\global.s2s.config+..\setups\s2s.class.alignment.config DeviceNumber=1 LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\news\attention_dryrun LSTM=[SGD=[learningRatesPerSample=0.001]]
# test
\\speechstore5\transient\kaishengy\bin\binjuly2\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\smt\setups\global.cs-en.config+\\speechstore5\userdata\kaishengy\exp\smt\setups\s2s.class.alignment.config LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\smt\c2elr1e3cs200emb50lstm100 MdlDir=//speechstore5/transient/kaishengy/exp/smt/c2elr1e3cs200emb50lstm100 LSTMTest=[encoderModelPath=$MdlDir$\smt.lstm.encoder.27] LSTMTest=[decoderModelPath=$MdlDir$\smt.lstm.decoder.27] NBRUTT=1 command=LSTMTest DeviceNumber=-1 LSTMTest=[beamWidth=0.05]

Просмотреть файл

@ -1,33 +0,0 @@
//
// <copyright file="DataReaderClient.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// DataReaderClient.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "DataReader.h"
using namespace std;
using namespace Microsoft::MSR::CNTK;
int _tmain(int argc, _TCHAR* argv[])
{
size_t vdim = 429;
size_t udim = 1504;
vector<wstring> filepaths;
filepaths.push_back( wstring(L"C:\\speech\\swb300h\\data\\archive.swb_mini.52_39.notestspk.dev.small.scplocal"));
filepaths.push_back( wstring(L"C:\\speech\\swb300h\\data\\swb_mini.1504.align.small.statemlf"));
DataReader<float> dataReader(vdim, udim, filepaths, wstring(L""), 4096);
Matrix<float> features;
Matrix<float> labels;
dataReader.StartMinibatchLoop(256, 0);
int i = 0;
while (dataReader.GetMinibatch(features, labels))
{
fprintf(stderr,"%4d: features dim: %d x %d - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0,0), features(0,1), labels.GetNumRows(), labels.GetNumCols(), (int)labels(0,0), (int)labels(1,0));
}
return 0;
}

Просмотреть файл

@ -1,194 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{89A1BB74-345D-4CB4-BC56-2C05D8076183}</ProjectGuid>
<SccProjectName>SAK</SccProjectName>
<SccAuxPath>SAK</SccAuxPath>
<SccLocalPath>SAK</SccLocalPath>
<SccProvider>SAK</SccProvider>
<Keyword>Win32Proj</Keyword>
<RootNamespace>DataReaderClient</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v110</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSDK_LibraryPath_x86);</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IncludePath>..\..\common\include;..\..\Source\Math;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSDK_LibraryPath_x86);</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>..\..\common\include;..\..\Source\Math;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);</LibraryPath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>..;..\..\Source\Math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
<PostBuildEvent>
<Command>copy $(ACML_PATH)/lib/*.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
<AdditionalDependencies>Math.lib;HTKMLFReader.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>copy $(ACML_PATH)\lib\*.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>Use</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>..;..\..\Source\Math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<PostBuildEvent>
<Command>copy $(ACML_PATH)/lib/*.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>Use</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
<AdditionalDependencies>Math.lib;HTKMLFReader.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>copy $(ACML_PATH)\lib\*.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<Text Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="DataReaderClient.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -1,40 +0,0 @@
========================================================================
CONSOLE APPLICATION : DataReaderClient Project Overview
========================================================================
AppWizard has created this DataReaderClient application for you.
This file contains a summary of what you will find in each of the files that
make up your DataReaderClient application.
DataReaderClient.vcxproj
This is the main project file for VC++ projects generated using an Application Wizard.
It contains information about the version of Visual C++ that generated the file, and
information about the platforms, configurations, and project features selected with the
Application Wizard.
DataReaderClient.vcxproj.filters
This is the filters file for VC++ projects generated using an Application Wizard.
It contains information about the association between the files in your project
and the filters. This association is used in the IDE to show grouping of files with
similar extensions under a specific node (for e.g. ".cpp" files are associated with the
"Source Files" filter).
DataReaderClient.cpp
This is the main application source file.
/////////////////////////////////////////////////////////////////////////////
Other standard files:
StdAfx.h, StdAfx.cpp
These files are used to build a precompiled header (PCH) file
named DataReaderClient.pch and a precompiled types file named StdAfx.obj.
/////////////////////////////////////////////////////////////////////////////
Other notes:
AppWizard uses "TODO:" comments to indicate parts of the source code you
should add to or customize.
/////////////////////////////////////////////////////////////////////////////

Просмотреть файл

@ -1,13 +0,0 @@
//
// <copyright file="stdafx.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// stdafx.cpp : source file that includes just the standard includes
// DataReaderClient.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

Просмотреть файл

@ -1,20 +0,0 @@
//
// <copyright file="stdafx.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "targetver.h"
#include <stdio.h>
#include <tchar.h>
// TODO: reference additional headers your program requires here

Просмотреть файл

@ -1,13 +0,0 @@
//
// <copyright file="targetver.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>

Просмотреть файл

@ -1,84 +0,0 @@
//
// <copyright file="DataReaderUnitTest.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#include "CppUnitTest.h"
#include "DataReader.h"
using namespace std;
using namespace Microsoft::MSR::CNTK;
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
namespace DataReaderTest
{
TEST_CLASS(UCIDataReaderUnitTest)
{
public:
// StandardLoopTest
// Test of the DataReader loop
TEST_METHOD(TestMode)
{
size_t vdim = 785;
size_t udim = 10;
size_t epochSize = 500;
size_t mbSize = 256;
size_t epochs = 2;
vector<wstring> filepaths;
filepaths.push_back( wstring(L"C:\\speech\\mnist\\mnist_test.txt"));
DataReader<float, int> dataReader(vdim, udim, filepaths, wstring(L"-label:none -minibatchmode:partial "), randomizeNone); //-labels:regression
Matrix<float> features;
Matrix<float> labels;
for (int epoch = 0; epoch < epochs; epoch++)
{
dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
for (int cnt = 0;dataReader.GetMinibatch(features, labels);cnt++)
{
Assert::IsTrue(labels.GetNumRows() == 0);
Assert::IsTrue(features.GetNumRows() == 785);
Assert::IsTrue(features.GetNumCols() == (cnt?244:mbSize));
for (int i = 1;i < features.GetNumCols();i++)
{
// really labels, these should be in order
Assert::IsTrue(features(0,i-1) <= features(0,i));
}
}
}
}
TEST_METHOD(Partial)
{
size_t vdim = 784;
size_t udim = 10;
size_t epochSize = 500;
size_t mbSize = 256;
size_t epochs = 2;
vector<wstring> filepaths;
filepaths.push_back( wstring(L"C:\\speech\\mnist\\mnist_test.txt"));
DataReader<float, int> dataReader(vdim, udim, filepaths, wstring(L"-label:first -labeltype:category -minibatchmode:partial "), randomizeNone); //-labels:regression
Matrix<float> features;
Matrix<float> labels;
for (int epoch = 0; epoch < epochs; epoch++)
{
dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
for (int cnt = 0;dataReader.GetMinibatch(features, labels);cnt++)
{
Assert::IsTrue(labels.GetNumRows() == udim);
Assert::IsTrue(features.GetNumRows() == 785);
Assert::IsTrue(features.GetNumCols() == (cnt?244:mbSize));
for (int i = 1;i < features.GetNumCols();i++)
{
// really labels, these should be in order
Assert::IsTrue(features(0,i-1) <= features(0,i));
}
}
}
}
};
}

Просмотреть файл

@ -1,13 +0,0 @@
//
// <copyright file="stdafx.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// stdafx.cpp : source file that includes just the standard includes
// DataReaderTest.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

Просмотреть файл

@ -1,18 +0,0 @@
//
// <copyright file="stdafx.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "targetver.h"
// Headers for CppUnitTest
#include "CppUnitTest.h"
// TODO: reference additional headers your program requires here

Просмотреть файл

@ -1,13 +0,0 @@
//
// <copyright file="targetver.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>

Просмотреть файл

@ -1,73 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HTKMLFReader", "HTKMLFReader\HTKMLFReader.vcxproj", "{33D2FD22-DEF2-4507-A58A-368F641AEBE5}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMath", "..\Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DataReaderClient", "DataReaderClient\DataReaderClient.vcxproj", "{89A1BB74-345D-4CB4-BC56-2C05D8076183}"
ProjectSection(ProjectDependencies) = postProject
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Global
GlobalSection(TeamFoundationVersionControl) = preSolution
SccNumberOfProjects = 3
SccEnterpriseProvider = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
SccTeamFoundationServer = http://vstfcodebox:8080/tfs/kappa
SccProjectUniqueName0 = ..\\Math\\Math\\Math.vcxproj
SccProjectName0 = ../Math/Math
SccAuxPath0 = http://vstfcodebox:8080/tfs/kappa
SccLocalPath0 = ..\\Math\\Math
SccProvider0 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
SccProjectUniqueName1 = DataReaderClient\\DataReaderClient.vcxproj
SccProjectName1 = DataReaderClient
SccAuxPath1 = http://vstfcodebox:8080/tfs/kappa
SccLocalPath1 = DataReaderClient
SccProvider1 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
SccProjectUniqueName2 = HTKMLFReader\\HTKMLFReader.vcxproj
SccProjectName2 = HTKMLFReader
SccAuxPath2 = http://vstfcodebox:8080/tfs/kappa
SccLocalPath2 = HTKMLFReader
SccProvider2 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.ActiveCfg = Debug|Win32
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.Build.0 = Debug|Win32
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.ActiveCfg = Release|Win32
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.Build.0 = Release|Win32
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.Build.0 = Release|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.ActiveCfg = Debug|Win32
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.Build.0 = Debug|Win32
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.ActiveCfg = Debug|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.Build.0 = Debug|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.ActiveCfg = Release|Win32
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.Build.0 = Release|Win32
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.ActiveCfg = Release|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.Build.0 = Release|x64
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|Win32.ActiveCfg = Debug|Win32
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|Win32.Build.0 = Debug|Win32
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|x64.ActiveCfg = Debug|x64
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|x64.Build.0 = Debug|x64
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|Win32.ActiveCfg = Release|Win32
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|Win32.Build.0 = Release|Win32
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|x64.ActiveCfg = Release|x64
{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

Просмотреть файл

@ -1,66 +0,0 @@
//
// <copyright file="DataReader.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// DataReader.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#ifdef _WIN32
#include <objbase.h>
#endif
#include "basetypes.h"
#include "htkfeatio.h" // for reading HTK features
//#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
//#include "simplesenonehmm.h" // for MMI scoring
//#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
#include "rollingwindowsource.h" // minibatch sources
#include "utterancesource.h"
//#include "readaheadsource.h"
#include "chunkevalsource.h"
#define DATAREADER_EXPORTS
#include "DataReader.h"
#include "HTKMLFReader.h"
#include "Config.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
{
*preader = new HTKMLFReader<ElemType>();
}
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
{
GetReader(preader);
}
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
{
GetReader(preader);
}
// Utility function, in ConfigFile.cpp, but HTKMLFReader doesn't need that code...
// Trim - trim white space off the start and end of the string
// str - string to trim
// NOTE: if the entire string is empty, then the string will be set to an empty string
/* void Trim(std::string& str)
{
auto found = str.find_first_not_of(" \t");
if (found == npos)
{
str.erase(0);
return;
}
str.erase(0, found);
found = str.find_last_not_of(" \t");
if (found != npos)
str.erase(found+1);
}*/
}}}

Просмотреть файл

@ -1,104 +0,0 @@
//
// <copyright file="DataWriter.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// DataWriter.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#include "basetypes.h"
#include "htkfeatio.h" // for reading HTK features
#define DATAWRITER_EXPORTS
#include "DataWriter.h"
#include "HTKMLFWriter.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void DATAWRITER_API GetWriter(IDataWriter<ElemType>** pwriter)
{
*pwriter = new HTKMLFWriter<ElemType>();
}
extern "C" DATAWRITER_API void GetWriterF(IDataWriter<float>** pwriter)
{
GetWriter(pwriter);
}
extern "C" DATAWRITER_API void GetWriterD(IDataWriter<double>** pwriter)
{
GetWriter(pwriter);
}
template<class ElemType>
template<class ConfigRecordType> void DataWriter<ElemType>::InitFromConfig(const ConfigRecordType & writerConfig)
{
m_dataWriter = new HTKMLFWriter<ElemType>();
m_dataWriter->Init(writerConfig);
}
// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template<class ElemType>
void DataWriter<ElemType>::Destroy()
{
delete m_dataWriter;
m_dataWriter = NULL;
}
// DataWriter Constructor
// config - [in] configuration data for the data writer
template<class ElemType>
template<class ConfigRecordType> DataWriter<ElemType>::DataWriter(const ConfigRecordType & config)
{
Init(config);
}
// destructor - cleanup temp files, etc.
template<class ElemType>
DataWriter<ElemType>::~DataWriter()
{
delete m_dataWriter;
m_dataWriter = NULL;
}
// GetSections - Get the sections of the file
// sections - a map of section name to section. Data sepcifications from config file will be used to determine where and how to save data
template<class ElemType>
void DataWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections)
{
m_dataWriter->GetSections(sections);
}
// SaveData - save data in the file/files
// recordStart - Starting record number
// matricies - a map of section name (section:subsection) to data pointer. Data sepcifications from config file will be used to determine where and how to save data
// numRecords - number of records we are saving, can be zero if not applicable
// datasetSize - Size of the dataset
// byteVariableSized - for variable sized data, size of current block to be written, zero when not used, or ignored if not variable sized data
template<class ElemType>
bool DataWriter<ElemType>::SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized)
{
return m_dataWriter->SaveData(recordStart, matrices, numRecords, datasetSize, byteVariableSized);
}
// SaveMapping - save a map into the file
// saveId - name of the section to save into (section:subsection format)
// labelMapping - map we are saving to the file
template<class ElemType>
void DataWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& labelMapping)
{
m_dataWriter->SaveMapping(saveId, labelMapping);
}
//The explicit instantiation
template class DataWriter<double>;
template class DataWriter<float>;
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,135 +0,0 @@
//
// <copyright file="HTKMLFReader.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples
#pragma once
#include "DataReader.h"
#include "Config.h" // for intargvector
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class HTKMLFReader : public IDataReader<ElemType>
{
private:
msra::dbn::minibatchiterator* m_mbiter;
msra::dbn::minibatchsource* m_frameSource;
//msra::dbn::minibatchreadaheadsource* m_readAheadSource;
msra::dbn::FileEvalSource* m_fileEvalSource;
msra::dbn::latticesource* m_lattices;
map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
vector<bool> m_sentenceEnd;
bool m_readAhead;
bool m_truncated;
bool m_framemode;
vector<size_t> m_processedFrame;
size_t m_numberOfuttsPerMinibatch;
size_t m_actualnumberOfuttsPerMinibatch;
size_t m_mbSize;
vector<size_t> m_toProcess;
vector<size_t> m_switchFrame;
bool m_noData;
bool m_trainOrTest; // if false, in file writing mode
using LabelType = typename IDataReader<ElemType>::LabelType;
using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
std::map<LabelIdType, LabelType> m_idToLabelMap;
bool m_partialMinibatch; // allow partial minibatches?
std::vector<ElemType*> m_featuresBufferMultiUtt;
std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
std::vector<ElemType*> m_labelsBufferMultiUtt;
std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
std::vector<size_t> m_featuresStartIndexMultiUtt;
std::vector<size_t> m_labelsStartIndexMultiUtt;
std::vector<ElemType*> m_featuresBufferMultiIO;
std::vector<size_t> m_featuresBufferAllocatedMultiIO;
std::vector<ElemType*> m_labelsBufferMultiIO;
std::vector<size_t> m_labelsBufferAllocatedMultiIO;
std::map<std::wstring,size_t> m_featureNameToIdMap;
std::map<std::wstring,size_t> m_labelNameToIdMap;
std::map<std::wstring,size_t> m_nameToTypeMap;
std::map<std::wstring,size_t> m_featureNameToDimMap;
std::map<std::wstring,size_t> m_labelNameToDimMap;
// for writing outputs to files (standard single input/output network) - deprecate eventually
bool m_checkDictionaryKeys;
bool m_convertLabelsToTargets;
std::vector <bool> m_convertLabelsToTargetsMultiIO;
std::vector<std::vector<std::wstring>> m_inputFilesMultiIO;
size_t m_inputFileIndex;
std::vector<size_t> m_featDims;
std::vector<size_t> m_labelDims;
std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
void PrepareForTrainingOrTesting(const ConfigParameters& config);
void PrepareForWriting(const ConfigParameters& config);
bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
void StartMinibatchLoopToWrite(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
bool ReNewBufferForMultiIO(size_t i);
size_t GetNumParallelSequences() { return m_numberOfuttsPerMinibatch; }
void SetNumParallelSequences(const size_t) { };
size_t NumberSlicesInEachRecurrentIter() { return m_numberOfuttsPerMinibatch ;}
void SetNbrSlicesEachRecurrentIter(const size_t) { };
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels);
size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
enum InputOutputTypes
{
real,
category,
};
public:
MBLayoutPtr m_pMBLayout;
/// by default it is false
/// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
/// default will have truncated BPTT, which only does BPTT inside a minibatch
bool mIgnoreSentenceBeginTag;
HTKMLFReader() : m_pMBLayout(make_shared<MBLayout>())
{
}
template<class ConfigRecordType> void InitFromConfig(const ConfigRecordType &);
virtual void Init(const ConfigParameters & config) override { InitFromConfig(config); }
virtual void Init(const ScriptableObjects::IConfigRecord & config) override { InitFromConfig(config); }
virtual void Destroy() {delete this;}
virtual ~HTKMLFReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
virtual bool DataEnd(EndDataType endDataType);
void CopyMBLayoutTo(MBLayoutPtr);
void SetSentenceEndInBatch(vector<size_t> &/*sentenceEnd*/);
void SetSentenceEnd(int /*actualMbSize*/){};
void SetRandomSeed(int) { NOT_IMPLEMENTED };
bool RequireSentenceSeg() { return !m_framemode; };
};
}}}

Просмотреть файл

@ -1,280 +0,0 @@
//
// <copyright file="HTKMLFWriter.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// HTKMLFReader.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#include "basetypes.h"
#include "htkfeatio.h" // for reading HTK features
//#ifndef __unix__
#include "ssematrix.h"
//#endif
//#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
//#include "simplesenonehmm.h" // for MMI scoring
//#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
//#include "rollingwindowsource.h" // minibatch sources
//#include "utterancesource.h"
//#include "readaheadsource.h"
//#include "chunkevalsource.h"
//#include "minibatchiterator.h"
#define DATAWRITER_EXPORTS // creating the exports here
#include "DataWriter.h"
#include "Config.h"
#include "HTKMLFWriter.h"
#include "Config.h"
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection
#endif
namespace Microsoft { namespace MSR { namespace CNTK {
// Create a Data Writer
//DATAWRITER_API IDataWriter* DataWriterFactory(void)
template<class ElemType>
void HTKMLFWriter<ElemType>::Init(const ConfigParameters& writerConfig)
{
m_tempArray = nullptr;
m_tempArraySize = 0;
vector<wstring> scriptpaths;
vector<wstring> filelist;
size_t numFiles;
size_t firstfilesonly = SIZE_MAX; // set to a lower value for testing
ConfigArray outputNames = writerConfig(L"outputNodeNames","");
if (outputNames.size()<1)
RuntimeError("writer needs at least one outputNodeName specified in config");
foreach_index(i, outputNames) // inputNames should map to node names
{
ConfigParameters thisOutput = writerConfig(outputNames[i]);
if (thisOutput.Exists("dim"))
udims.push_back(thisOutput(L"dim"));
else
RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output");
if (thisOutput.Exists("file"))
scriptpaths.push_back(thisOutput(L"file"));
else if (thisOutput.Exists("scpFile"))
scriptpaths.push_back(thisOutput(L"scpFile"));
else
RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output");
if (thisOutput.Exists("Kaldicmd"))
{
kaldicmd.push_back(thisOutput(L"Kaldicmd"));
}
outputNameToIdMap[outputNames[i]]= i;
outputNameToDimMap[outputNames[i]]=udims[i];
wstring type = thisOutput(L"type","Real");
if (type == L"Real")
{
outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal;
}
else
{
throw std::runtime_error ("HTKMLFWriter::Init: output type for writer output expected to be Real");
}
}
numFiles=0;
foreach_index(i,scriptpaths)
{
filelist.clear();
std::wstring scriptPath = scriptpaths[i];
fprintf(stderr, "HTKMLFWriter::Init: reading output script file %S ...", scriptPath.c_str());
size_t n = 0;
for (msra::files::textreader reader(scriptPath); reader && filelist.size() <= firstfilesonly/*optimization*/; )
{
filelist.push_back (reader.wgetline());
n++;
}
fprintf (stderr, " %zd entries\n", n);
if (i==0)
numFiles=n;
else
if (n!=numFiles)
throw std::runtime_error (msra::strfun::strprintf ("HTKMLFWriter:Init: number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));
outputFiles.push_back(filelist);
}
outputFileIndex=0;
sampPeriod=100000;
}
template<class ElemType>
void HTKMLFWriter<ElemType>::Destroy()
{
delete [] m_tempArray;
m_tempArray = nullptr;
m_tempArraySize = 0;
}
template<class ElemType>
void HTKMLFWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& /*sections*/)
{
}
template<class ElemType>
bool HTKMLFWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
{
if (kaldicmd.size() == 0)
{
//std::map<std::wstring, void*, nocase_compare>::iterator iter;
if (outputFileIndex>=outputFiles[0].size())
RuntimeError("index for output scp file out of range...");
for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
{
wstring outputName = iter->first;
Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
size_t id = outputNameToIdMap[outputName];
size_t dim = outputNameToDimMap[outputName];
wstring outFile = outputFiles[id][outputFileIndex];
assert(outputData.GetNumRows()==dim); dim;
SaveToKaldiFile(outFile,outputData);
}
outputFileIndex++;
} else
{
if (outputFileIndex>=outputFiles[0].size())
RuntimeError("index for output scp file out of range...");
for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
{
wstring outputName = iter->first;
Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
size_t id = outputNameToIdMap[outputName];
size_t dim = outputNameToDimMap[outputName];
wstring outFile = outputFiles[id][outputFileIndex];
string wfea = "ark:" + msra::strfun::utf8(outFile);
wfea = msra::strfun::utf8(kaldicmd[0]);
kaldi::BaseFloatMatrixWriter feature_writer(wfea);
kaldi::Matrix<kaldi::BaseFloat> nnet_out_host;
assert(outputData.GetNumRows()==dim); dim;
const std::string outputPath = msra::strfun::utf8(outFile);
const std::string file_key = removeExtension(basename(outputPath));
nnet_out_host.Resize(outputData.GetNumCols(), outputData.GetNumRows());
outputData.CopyToArray(m_tempArray, m_tempArraySize);
ElemType * pValue = m_tempArray;
for (int j=0; j< outputData.GetNumCols(); j++)
{
for (int i=0; i<outputData.GetNumRows(); i++)
{
nnet_out_host(j,i) = (float)*pValue++;
if (nnet_out_host(j,i) > 50)
{
nnet_out_host(j,i) = -(float)log(1.0/outputData.GetNumCols());
}
}
}
fprintf (stderr, "evaluate: writing %zd frames of %s\n", outputData.GetNumCols(), wfea.c_str());
feature_writer.Write(file_key, nnet_out_host);
}
outputFileIndex++;
}
return true;
}
template<class ElemType>
void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
{
msra::dbn::matrix output;
output.resize(outputData.GetNumRows(),outputData.GetNumCols());
outputData.CopyToArray(m_tempArray, m_tempArraySize);
ElemType * pValue = m_tempArray;
for (int j=0; j< outputData.GetNumCols(); j++)
{
for (int i=0; i<outputData.GetNumRows(); i++)
{
output(i,j) = (float)*pValue++;
}
}
const size_t nansinf = output.countnaninf();
if (nansinf > 0)
fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
// save it
msra::files::make_intermediate_dirs (outputFile);
msra::util::attempt (5, [&]()
{
msra::asr::htkfeatwriter::write (outputFile, "USER", this->sampPeriod, output);
});
fprintf (stderr, "evaluate: writing %zd frames of %S\n", output.cols(), outputFile.c_str());
}
template<class ElemType>
void HTKMLFWriter<ElemType>::SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
{
msra::dbn::matrix output;
output.resize(outputData.GetNumRows(),outputData.GetNumCols());
outputData.CopyToArray(m_tempArray, m_tempArraySize);
ElemType * pValue = m_tempArray;
for (int j=0; j< outputData.GetNumCols(); j++)
{
for (int i=0; i<outputData.GetNumRows(); i++)
{
output(i,j) = (float)*pValue++;
}
}
const size_t nansinf = output.countnaninf();
if (nansinf > 0)
fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
// save it
msra::files::make_intermediate_dirs (outputFile);
msra::util::attempt (5, [&]()
{
msra::asr::htkfeatwriter::writeKaldi (outputFile, "USER", this->sampPeriod, output, sizeof(ElemType));
});
fprintf (stderr, "evaluate: writing %zd frames of %S\n", output.cols(), outputFile.c_str());
}
template<class ElemType>
void HTKMLFWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& /*labelMapping*/)
{
}
template class HTKMLFWriter<float>;
template class HTKMLFWriter<double>;
}}}

Просмотреть файл

@ -1,52 +0,0 @@
//
// <copyright file="HTKMLFWriter.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples
#pragma once
#include "DataWriter.h"
#include <map>
#include <vector>
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class HTKMLFWriter : public IDataWriter<ElemType>
{
private:
std::vector<size_t> outputDims;
std::vector<std::vector<std::wstring>> outputFiles;
std::vector<std::wstring> kaldicmd;
std::vector<size_t> udims;
std::map<std::wstring,size_t> outputNameToIdMap;
std::map<std::wstring,size_t> outputNameToDimMap;
std::map<std::wstring,size_t> outputNameToTypeMap;
unsigned int sampPeriod;
size_t outputFileIndex;
void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
ElemType * m_tempArray;
size_t m_tempArraySize;
enum OutputTypes
{
outputReal,
outputCategory,
};
public:
using LabelType = typename IDataWriter<ElemType>::LabelType;
using LabelIdType = typename IDataWriter<ElemType>::LabelIdType;
template<class ConfigRecordType>
void InitFromConfig(const ConfigRecordType & writerConfig);
virtual void Init(const ConfigParameters & config) { InitFromConfig(config); }
virtual void Init(const ScriptableObjects::IConfigRecord & config) { InitFromConfig(config); }
virtual void Destroy();
virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections);
virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized);
virtual void SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& labelMapping);
};
}}}

Просмотреть файл

@ -1,219 +0,0 @@
// base/io-funcs-inl.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian; Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_IO_FUNCS_INL_H_
#define KALDI_BASE_IO_FUNCS_INL_H_ 1
// Do not include this file directly. It is included by base/io-funcs.h
#include <limits>
#include <vector>
namespace kaldi {
// Template that covers integers.
template<class T> void WriteBasicType(std::ostream &os,
bool binary, T t) {
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1)
* static_cast<char>(sizeof(t));
os.put(len_c);
os.write(reinterpret_cast<const char *>(&t), sizeof(t));
} else {
if (sizeof(t) == 1)
os << static_cast<int16>(t) << " ";
else
os << t << " ";
}
if (os.fail()) {
throw std::runtime_error("Write failure in WriteBasicType.");
}
}
// Template that covers integers.
template<class T> inline void ReadBasicType(std::istream &is,
bool binary, T *t) {
KALDI_PARANOID_ASSERT(t != NULL);
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
int len_c_in = is.get();
if (len_c_in == -1)
KALDI_ERR << "ReadBasicType: encountered end of stream.";
char len_c = static_cast<char>(len_c_in), len_c_expected
= (std::numeric_limits<T>::is_signed ? 1 : -1)
* static_cast<char>(sizeof(*t));
if (len_c != len_c_expected) {
KALDI_ERR << "ReadBasicType: did not get expected integer type, "
<< static_cast<int>(len_c)
<< " vs. " << static_cast<int>(len_c_expected)
<< ". You can change this code to successfully"
<< " read it later, if needed.";
// insert code here to read "wrong" type. Might have a switch statement.
}
is.read(reinterpret_cast<char *>(t), sizeof(*t));
} else {
if (sizeof(*t) == 1) {
int16 i;
is >> i;
*t = i;
} else {
is >> *t;
}
}
if (is.fail()) {
KALDI_ERR << "Read failure in ReadBasicType, file position is "
<< is.tellg() << ", next char is " << is.peek();
}
}
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
const std::vector<T> &v) {
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
char sz = sizeof(T); // this is currently just a check.
os.write(&sz, 1);
int32 vecsz = static_cast<int32>(v.size());
KALDI_ASSERT((size_t)vecsz == v.size());
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
if (vecsz != 0) {
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
}
} else {
// focus here is on prettiness of text form rather than
// efficiency of reading-in.
// reading-in is dominated by low-level operations anyway:
// for efficiency use binary.
os << "[ ";
typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
for (; iter != end; ++iter) {
if (sizeof(T) == 1)
os << static_cast<int16>(*iter) << " ";
else
os << *iter << " ";
}
os << "]\n";
}
if (os.fail()) {
throw std::runtime_error("Write failure in WriteIntegerType.");
}
}
template<class T> inline void ReadIntegerVector(std::istream &is,
bool binary,
std::vector<T> *v) {
KALDI_ASSERT_IS_INTEGER_TYPE(T);
KALDI_ASSERT(v != NULL);
if (binary) {
int sz = is.peek();
if (sz == sizeof(T)) {
is.get();
} else { // this is currently just a check.
KALDI_ERR << "ReadIntegerVector: expected to see type of size "
<< sizeof(T) << ", saw instead " << sz << ", at file position "
<< is.tellg();
}
int32 vecsz;
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
if (is.fail() || vecsz < 0) goto bad;
v->resize(vecsz);
if (vecsz > 0) {
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
}
} else {
std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
// due to resizing.
is >> std::ws;
if (is.peek() != static_cast<int>('[')) {
KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
<< is.peek() << ", at file position " << is.tellg();
}
is.get(); // consume the '['.
is >> std::ws; // consume whitespace.
while (is.peek() != static_cast<int>(']')) {
if (sizeof(T) == 1) { // read/write chars as numbers.
int16 next_t;
is >> next_t >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back((T)next_t);
} else {
T next_t;
is >> next_t >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back(next_t);
}
}
is.get(); // get the final ']'.
*v = tmp_v; // could use std::swap to use less temporary memory, but this
// uses less permanent memory.
}
if (!is.fail()) return;
bad:
KALDI_ERR << "ReadIntegerVector: read failure at file position "
<< is.tellg();
}
// Initialize an opened stream for writing by writing an optional binary
// header and modifying the floating-point precision.
inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
// This does not throw exceptions (does not check for errors).
if (binary) {
os.put('\0');
os.put('B');
}
// Note, in non-binary mode we may at some point want to mess with
// the precision a bit.
// 7 is a bit more than the precision of float..
if (os.precision() < 7)
os.precision(7);
}
/// Initialize an opened stream for reading by detecting the binary header and
// setting the "binary" value appropriately.
inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
// Sets the 'binary' variable.
// Throws exception in the very unusual situation that stream
// starts with '\0' but not then 'B'.
if (is.peek() == '\0') { // seems to be binary
is.get();
if (is.peek() != 'B') {
return false;
}
is.get();
*binary = true;
return true;
} else {
*binary = false;
return true;
}
}
} // end namespace kaldi.
#endif // KALDI_BASE_IO_FUNCS_INL_H_

Просмотреть файл

@ -1,137 +0,0 @@
// base/io-funcs-test.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
namespace kaldi {
void UnitTestIo(bool binary) {
{
const char *filename = "tmpf";
std::ofstream outfile(filename, std::ios_base::out | std::ios_base::binary);
InitKaldiOutputStream(outfile, binary);
if (!binary) outfile << "\t";
int64 i1 = rand() % 10000;
WriteBasicType(outfile, binary, i1);
uint16 i2 = rand() % 10000;
WriteBasicType(outfile, binary, i2);
if (!binary) outfile << "\t";
char c = rand();
WriteBasicType(outfile, binary, c);
if (!binary && rand()%2 == 0) outfile << " \n";
std::vector<int32> vec1;
WriteIntegerVector(outfile, binary, vec1);
if (!binary && rand()%2 == 0) outfile << " \n";
std::vector<uint16> vec2;
for (size_t i = 0; i < 10; i++) vec2.push_back(rand()%100 - 10);
WriteIntegerVector(outfile, binary, vec2);
if (!binary) outfile << " \n";
std::vector<char> vec3;
for (size_t i = 0; i < 10; i++) vec3.push_back(rand()%100);
WriteIntegerVector(outfile, binary, vec3);
if (!binary && rand()%2 == 0) outfile << " \n";
const char *token1 = "Hi";
WriteToken(outfile, binary, token1);
if (!binary) outfile << " \n";
std::string token2 = "There.";
WriteToken(outfile, binary, token2);
if (!binary && rand()%2 == 0) outfile << " \n";
std::string token3 = "You.";
WriteToken(outfile, binary, token3);
if (!binary && rand()%2 == 0) outfile << " ";
float f1 = RandUniform();
WriteBasicType(outfile, binary, f1);
if (!binary && rand()%2 == 0) outfile << "\t";
float f2 = RandUniform();
WriteBasicType(outfile, binary, f2);
double d1 = RandUniform();
WriteBasicType(outfile, binary, d1);
if (!binary && rand()%2 == 0) outfile << "\t";
double d2 = RandUniform();
WriteBasicType(outfile, binary, d2);
if (!binary && rand()%2 == 0) outfile << "\t";
outfile.close();
{
std::ifstream infile(filename, std::ios_base::in | std::ios_base::binary);
bool binary_in;
InitKaldiInputStream(infile, &binary_in);
int64 i1_in;
ReadBasicType(infile, binary_in, &i1_in);
KALDI_ASSERT(i1_in == i1);
uint16 i2_in;
ReadBasicType(infile, binary_in, &i2_in);
KALDI_ASSERT(i2_in == i2);
char c_in;
ReadBasicType(infile, binary_in, &c_in);
KALDI_ASSERT(c_in == c);
std::vector<int32> vec1_in;
ReadIntegerVector(infile, binary_in, &vec1_in);
KALDI_ASSERT(vec1_in == vec1);
std::vector<uint16> vec2_in;
ReadIntegerVector(infile, binary_in, &vec2_in);
KALDI_ASSERT(vec2_in == vec2);
std::vector<char> vec3_in;
ReadIntegerVector(infile, binary_in, &vec3_in);
KALDI_ASSERT(vec3_in == vec3);
std::string token1_in, token2_in;
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
KALDI_ASSERT(PeekToken(infile, binary_in) == (int)*token1); // Note:
// the stuff with skipping over '<' is tested in ../util/kaldi-io-test.cc,
// since we need to make sure it works with pipes.
ReadToken(infile, binary_in, &token1_in);
KALDI_ASSERT(token1_in == std::string(token1));
ReadToken(infile, binary_in, &token2_in);
KALDI_ASSERT(token2_in == std::string(token2));
if (rand() % 2 == 0)
ExpectToken(infile, binary_in, token3.c_str());
else
ExpectToken(infile, binary_in, token3);
float f1_in; // same type.
ReadBasicType(infile, binary_in, &f1_in);
AssertEqual(f1_in, f1);
double f2_in; // wrong type.
ReadBasicType(infile, binary_in, &f2_in);
AssertEqual(f2_in, f2);
double d1_in; // same type.
ReadBasicType(infile, binary_in, &d1_in);
AssertEqual(d1_in, d1);
float d2_in; // wrong type.
ReadBasicType(infile, binary_in, &d2_in);
AssertEqual(d2_in, d2);
KALDI_ASSERT(Peek(infile, binary_in) == -1);
KALDI_ASSERT(PeekToken(infile, binary_in) == -1);
}
}
}
} // end namespace kaldi.
int main() {
using namespace kaldi;
for (size_t i = 0; i < 10; i++) {
UnitTestIo(false);
UnitTestIo(true);
}
KALDI_ASSERT(1); // just wanted to check that KALDI_ASSERT does not fail for 1.
return 0;
}

Просмотреть файл

@ -1,206 +0,0 @@
// base/io-funcs.cc
// Copyright 2009-2011 Microsoft Corporation; Saarland University
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
namespace kaldi {
template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
os << (b ? "T":"F");
if (!binary) os << " ";
if (os.fail())
KALDI_ERR << "Write failure in WriteBasicType<bool>";
}
template<>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
KALDI_PARANOID_ASSERT(b != NULL);
if (!binary) is >> std::ws; // eat up whitespace.
char c = is.peek();
if (c == 'T') {
*b = true;
is.get();
} else if (c == 'F') {
*b = false;
is.get();
} else {
KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
<< is.tellg() << ", next char is " << CharToString(c);
}
}
template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
if (binary) {
char c = sizeof(f);
os.put(c);
os.write(reinterpret_cast<const char *>(&f), sizeof(f));
} else {
os << f << " ";
}
}
template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
if (binary) {
char c = sizeof(f);
os.put(c);
os.write(reinterpret_cast<const char *>(&f), sizeof(f));
} else {
os << f << " ";
}
}
template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
KALDI_PARANOID_ASSERT(f != NULL);
if (binary) {
double d;
int c = is.peek();
if (c == sizeof(*f)) {
is.get();
is.read(reinterpret_cast<char*>(f), sizeof(*f));
} else if (c == sizeof(d)) {
ReadBasicType(is, binary, &d);
*f = d;
} else {
KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
<< ", at file position " << is.tellg();
}
} else {
is >> *f;
}
if (is.fail()) {
KALDI_ERR << "ReadBasicType: failed to read, at file position "
<< is.tellg();
}
}
template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
KALDI_PARANOID_ASSERT(d != NULL);
if (binary) {
float f;
int c = is.peek();
if (c == sizeof(*d)) {
is.get();
is.read(reinterpret_cast<char*>(d), sizeof(*d));
} else if (c == sizeof(f)) {
ReadBasicType(is, binary, &f);
*d = f;
} else {
KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
<< ", at file position " << is.tellg();
}
} else {
is >> *d;
}
if (is.fail()) {
KALDI_ERR << "ReadBasicType: failed to read, at file position "
<< is.tellg();
}
}
void CheckToken(const char *token) {
KALDI_ASSERT(*token != '\0'); // check it's nonempty.
while (*token != '\0') {
KALDI_ASSERT(!::isspace(*token));
token++;
}
}
void WriteToken(std::ostream &os, bool binary, const char *token) {
// binary mode is ignored;
// we use space as termination character in either case.
KALDI_ASSERT(token != NULL);
CheckToken(token); // make sure it's valid (can be read back)
os << token << " ";
if (os.fail()) {
throw std::runtime_error("Write failure in WriteToken.");
}
}
int Peek(std::istream &is, bool binary) {
if (!binary) is >> std::ws; // eat up whitespace.
return is.peek();
}
void WriteToken(std::ostream &os, bool binary, const std::string & token) {
WriteToken(os, binary, token.c_str());
}
void ReadToken(std::istream &is, bool binary, std::string *str) {
KALDI_ASSERT(str != NULL);
if (!binary) is >> std::ws; // consume whitespace.
is >> *str;
if (is.fail()) {
KALDI_ERR << "ReadToken, failed to read token at file position "
<< is.tellg();
}
if (!isspace(is.peek())) {
KALDI_ERR << "ReadToken, expected space after token, saw instead "
<< static_cast<char>(is.peek())
<< ", at file position " << is.tellg();
}
is.get(); // consume the space.
}
int PeekToken(std::istream &is, bool binary) {
if (!binary) is >> std::ws; // consume whitespace.
bool read_bracket;
if (static_cast<char>(is.peek()) == '<') {
read_bracket = true;
is.get();
} else {
read_bracket = false;
}
int ans = is.peek();
if (read_bracket) {
if (!is.unget())
KALDI_WARN << "Error ungetting '<' in PeekToken";
}
return ans;
}
void ExpectToken(std::istream &is, bool binary, const char *token) {
int pos_at_start = is.tellg();
KALDI_ASSERT(token != NULL);
CheckToken(token); // make sure it's valid (can be read back)
if (!binary) is >> std::ws; // consume whitespace.
std::string str;
is >> str;
is.get(); // consume the space.
if (is.fail()) {
KALDI_ERR << "Failed to read token [started at file position "
<< pos_at_start << "], expected " << token;
}
if (strcmp(str.c_str(), token) != 0) {
KALDI_ERR << "Expected token \"" << token << "\", got instead \""
<< str <<"\".";
}
}
void ExpectToken(std::istream &is, bool binary, const std::string &token) {
ExpectToken(is, binary, token.c_str());
}
} // end namespace kaldi

Просмотреть файл

@ -1,231 +0,0 @@
// base/io-funcs.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_IO_FUNCS_H_
#define KALDI_BASE_IO_FUNCS_H_
// This header only contains some relatively low-level I/O functions.
// The full Kaldi I/O declarations are in ../util/kaldi-io.h
// and ../util/kaldi-table.h
// They were put in util/ in order to avoid making the Matrix library
// dependent on them.
#include <cctype>
#include <vector>
#include <string>
#include "base/kaldi-common.h"
namespace kaldi {
/*
This comment describes the Kaldi approach to I/O. All objects can be written
and read in two modes: binary and text. In addition we want to make the I/O
work if we redefine the typedef "BaseFloat" between floats and doubles.
We also want to have control over whitespace in text mode without affecting
the meaning of the file, for pretty-printing purposes.
Errors are handled by throwing an exception (std::runtime_error).
For integer and floating-point types (and boolean values):
WriteBasicType(std::ostream &, bool binary, const T&);
ReadBasicType(std::istream &, bool binary, T*);
and we expect these functions to be defined in such a way that they work when
the type T changes between float and double, so you can read float into double
and vice versa]. Note that for efficiency and space-saving reasons, the Vector
and Matrix classes do not use these functions [but they preserve the type
interchangeability in their own way]
For a class (or struct) C:
class C {
..
Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
..
}
NOTE: The only actual optional args we used are the "add" arguments in
Vector/Matrix classes, which specify whether we should sum the data already
in the class with the data being read.
For types which are typedef's involving stl classes, I/O is as follows:
typedef std::vector<std::pair<A, B> > MyTypedefName;
The user should define something like:
WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
The user would have to write these functions.
For a type std::vector<T>:
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
For other types, e.g. vectors of pairs, the user should create a routine of the
type WriteMyTypedefName. This is to avoid introducing confusing templated functions;
we could easily create templated functions to handle most of these cases but they
would have to share the same name.
It also often happens that the user needs to write/read special tokens as part
of a file. These might be class headers, or separators/identifiers in the class.
We provide special functions for manipulating these. These special tokens must
be nonempty and must not contain any whitespace.
void WriteToken(std::ostream &os, bool binary, const char*);
void WriteToken(std::ostream &os, bool binary, const std::string & token);
int Peek(std::istream &is, bool binary);
void ReadToken(std::istream &is, bool binary, std::string *str);
void PeekToken(std::istream &is, bool binary, std::string *str);
WriteToken writes the token and one space (whether in binary or text mode).
Peek returns the first character of the next token, by consuming whitespace
(in text mode) and then returning the peek() character. It returns -1 at EOF;
it doesn't throw. It's useful if a class can have various forms based on
typedefs and virtual classes, and wants to know which version to read.
ReadToken allow the caller to obtain the next token. PeekToken works just
like ReadToken, but seeks back to the beginning of the token. A subsequent
call to ReadToken will read the same token again. This is useful when
different object types are written to the same file; using PeekToken one can
decide which of the objects to read.
There is currently no special functionality for writing/reading strings (where the strings
contain data rather than "special tokens" that are whitespace-free and nonempty). This is
because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
table entries (and these have their own format).
NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
such as int and size_t, that are machine-independent -- at least not
if you want your file formats to port between machines. Use int32 and
int64 where necessary. There is no way to detect this using compile-time
assertions because C++ only keeps track of the internal representation of
the type.
*/
/// \addtogroup io_funcs_basic
/// @{
/// WriteBasicType is the name of the write function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);
/// ReadBasicType is the name of the read function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);
// Declare specialization for bool.
template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
template <>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
// Declare specializations for float and double.
template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f);
template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f);
template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f);
template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *f);
// Define ReadBasicType that accepts an "add" parameter to add to
// the destination. Caution: if used in Read functions, be careful
// to initialize the parameters concerned to zero in the default
// constructor.
template<class T>
inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
if (!add) {
ReadBasicType(is, binary, t);
} else {
T tmp = T(0);
ReadBasicType(is, binary, &tmp);
*t += tmp;
}
}
/// Function for writing STL vectors of integer types.
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
const std::vector<T> &v);
/// Function for reading STL vector of integer types.
template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
std::vector<T> *v);
/// The WriteToken functions are for writing nonempty sequences of non-space
/// characters. They are not for general strings.
void WriteToken(std::ostream &os, bool binary, const char *token);
void WriteToken(std::ostream &os, bool binary, const std::string & token);
/// Peek consumes whitespace (if binary == false) and then returns the peek()
/// value of the stream.
int Peek(std::istream &is, bool binary);
/// ReadToken gets the next token and puts it in str (exception on failure).
void ReadToken(std::istream &is, bool binary, std::string *token);
/// PeekToken will return the first character of the next token, or -1 if end of
/// file. It's the same as Peek(), except if the first character is '<' it will
/// skip over it and will return the next character. It will unget the '<' so
/// the stream is where it was before you did PeekToken().
int PeekToken(std::istream &is, bool binary);
/// ExpectToken tries to read in the given token, and throws an exception
/// on failure.
void ExpectToken(std::istream &is, bool binary, const char *token);
void ExpectToken(std::istream &is, bool binary, const std::string & token);
/// ExpectPretty attempts to read the text in "token", but only in non-binary
/// mode. Throws exception on failure. It expects an exact match except that
/// arbitrary whitespace matches arbitrary whitespace.
void ExpectPretty(std::istream &is, bool binary, const char *token);
void ExpectPretty(std::istream &is, bool binary, const std::string & token);
/// @} end "addtogroup io_funcs_basic"
/// InitKaldiOutputStream initializes an opened stream for writing by writing an
/// optional binary header and modifying the floating-point precision; it will
/// typically not be called by users directly.
inline void InitKaldiOutputStream(std::ostream &os, bool binary);
/// InitKaldiInputStream initializes an opened stream for reading by detecting
/// the binary header and setting the "binary" value appropriately;
/// It will typically not be called by users directly.
inline bool InitKaldiInputStream(std::istream &is, bool *binary);
} // end namespace kaldi.
#include "base/io-funcs-inl.h"
#endif // KALDI_BASE_IO_FUNCS_H_

Просмотреть файл

@ -1,41 +0,0 @@
// base/kaldi-common.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_COMMON_H_
#define KALDI_BASE_KALDI_COMMON_H_ 1
#include <cstddef>
#include <cstdlib>
#include <cstring> // C string stuff like strcpy
#include <string>
#include <sstream>
#include <stdexcept>
#include <cassert>
#include <vector>
#include <iostream>
#include <fstream>
#include "base/kaldi-utils.h"
#include "base/kaldi-error.h"
#include "base/kaldi-types.h"
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
#endif // KALDI_BASE_KALDI_COMMON_H_

Просмотреть файл

@ -1,53 +0,0 @@
// base/kaldi-error-test.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
// testing that we get the stack trace.
namespace kaldi {
void MyFunction2() {
KALDI_ERR << "Ignore this error";
}
void MyFunction1() {
MyFunction2();
}
void UnitTestError() {
{
std::cerr << "Ignore next error:\n";
MyFunction1();
}
}
} // end namespace kaldi.
int main() {
kaldi::g_program_name = "/foo/bar/kaldi-error-test";
try {
kaldi::UnitTestError();
KALDI_ASSERT(0); // should not happen.
} catch (std::runtime_error &r) {
std::cout << "UnitTestError: the error we generated was: " << r.what();
}
}

Просмотреть файл

@ -1,184 +0,0 @@
// base/kaldi-error.cc
// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; Ondrej Glembek
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifdef HAVE_EXECINFO_H
#include <execinfo.h> // To get stack trace in error messages.
// If this #include fails there is an error in the Makefile, it does not
// support your platform well. Make sure HAVE_EXECINFO_H is undefined, and the
// code will compile.
#ifdef HAVE_CXXABI_H
#include <cxxabi.h> // For name demangling.
// Useful to decode the stack trace, but only used if we have execinfo.h
#endif // HAVE_CXXABI_H
#endif // HAVE_EXECINFO_H
#include "base/kaldi-common.h"
#include "base/kaldi-error.h"
namespace kaldi {
int32 g_kaldi_verbose_level = 0; // Just initialize this global variable.
const char *g_program_name = NULL;
// If the program name was set (g_program_name != ""), the function
// GetProgramName returns the program name (without the path) followed by a
// colon, e.g. "gmm-align:". Otherwise it returns the empty string "".
const char *GetProgramName() {
if (g_program_name == NULL) return "";
else return g_program_name;
}
// Given a filename like "/a/b/c/d/e/f.cc", GetShortFileName
// returns "e/f.cc". Does not currently work if backslash is
// the filename separator.
const char *GetShortFileName(const char *filename) {
const char *last_slash = strrchr(filename, '/');
if (!last_slash) { return filename; }
else {
while (last_slash > filename && last_slash[-1] != '/')
last_slash--;
return last_slash;
}
}
#if defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
// The function name looks like a macro: it's a macro if we don't have ccxxabi.h
inline void KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(std::string &ans,
const char *to_append) {
// at input the string "to_append" looks like:
// ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
// We want to extract the name e.g. '_ZN5kaldi13UnitTestErrorEv",
// demangle it and return it.
int32 status;
const char *paren = strchr(to_append, '(');
const char *plus = (paren ? strchr(paren, '+') : NULL);
if (!plus) { // did not find the '(' or did not find the '+'
// This is a soft failure in case we did not get what we expected.
ans += to_append;
return;
}
std::string stripped(paren+1, plus-(paren+1)); // the bit between ( and +.
char *demangled_name = abi::__cxa_demangle(stripped.c_str(), 0, 0, &status);
// if status != 0 it is an error (demangling failure), but not all names seem
// to demangle, so we don't check it.
if (demangled_name != NULL) {
ans += demangled_name;
free(demangled_name);
} else {
ans += to_append; // add the original string.
}
}
#else // defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
#define KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, to_append) ans += to_append
#endif // defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
#ifdef HAVE_EXECINFO_H
std::string KaldiGetStackTrace() {
#define KALDI_MAX_TRACE_SIZE 50
#define KALDI_MAX_TRACE_PRINT 10 // must be even.
std::string ans;
void *array[KALDI_MAX_TRACE_SIZE];
size_t size = backtrace(array, KALDI_MAX_TRACE_SIZE);
char **strings = backtrace_symbols(array, size);
if (size <= KALDI_MAX_TRACE_PRINT) {
for (size_t i = 0; i < size; i++) {
KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
ans += "\n";
}
} else { // print out first+last (e.g.) 5.
for (size_t i = 0; i < KALDI_MAX_TRACE_PRINT/2; i++) {
KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
ans += "\n";
}
ans += ".\n.\n.\n";
for (size_t i = size - KALDI_MAX_TRACE_PRINT/2; i < size; i++) {
KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
ans += "\n";
}
if (size == KALDI_MAX_TRACE_SIZE)
ans += ".\n.\n.\n"; // stack was too long, probably a bug.
}
free(strings); // it's all in one big malloc()ed block.
#ifdef HAVE_CXXABI_H // demangle the name, if possible.
#endif // HAVE_CXXABI_H
return ans;
}
#endif
void KaldiAssertFailure_(const char *func, const char *file,
int32 line, const char *cond_str) {
std::cerr << "KALDI_ASSERT: at " << GetProgramName() << func << ':'
<< GetShortFileName(file)
<< ':' << line << ", failed: " << cond_str << '\n';
#ifdef HAVE_EXECINFO_H
std::cerr << "Stack trace is:\n" << KaldiGetStackTrace();
#endif
std::cerr.flush();
abort(); // Will later throw instead if needed.
}
KaldiWarnMessage::KaldiWarnMessage(const char *func, const char *file,
int32 line) {
this->stream() << "WARNING (" << GetProgramName() << func << "():"
<< GetShortFileName(file) << ':' << line << ") ";
}
KaldiLogMessage::KaldiLogMessage(const char *func, const char *file,
int32 line) {
this->stream() << "LOG (" << GetProgramName() << func << "():"
<< GetShortFileName(file) << ':' << line << ") ";
}
KaldiVlogMessage::KaldiVlogMessage(const char *func, const char *file,
int32 line, int32 verbose) {
this->stream() << "VLOG[" << verbose << "] (" << GetProgramName() << func
<< "():" << GetShortFileName(file) << ':' << line << ") ";
}
KaldiErrorMessage::KaldiErrorMessage(const char *func, const char *file,
int32 line) {
this->stream() << "ERROR (" << GetProgramName() << func << "():"
<< GetShortFileName(file) << ':' << line << ") ";
}
KaldiErrorMessage::~KaldiErrorMessage() {
// (1) Print the message to stderr.
std::cerr << ss.str() << '\n';
// (2) Throw an exception with the message, plus traceback info if available.
if (!std::uncaught_exception()) {
#ifdef HAVE_EXECINFO_H
throw std::runtime_error(ss.str() + "\n\n[stack trace: ]\n" +
KaldiGetStackTrace() + "\n");
#else
throw std::runtime_error(ss.str());
#endif
} else {
abort(); // This may be temporary...
}
}
} // end namespace kaldi

Просмотреть файл

@ -1,145 +0,0 @@
// base/kaldi-error.h
// Copyright 2009-2011 Microsoft Corporation; Ondrej Glembek; Lukas Burget;
// Saarland University
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_ERROR_H_
#define KALDI_BASE_KALDI_ERROR_H_ 1
#include <stdexcept>
#include <string>
#include <cstring>
#include <sstream>
#include <cstdio>
#include "base/kaldi-types.h"
#include "base/kaldi-utils.h"
/* Important that this file does not depend on any other kaldi headers. */
namespace kaldi {
/// \addtogroup error_group
/// @{
/// This is set by util/parse-options.{h, cc} if you set --verbose = ? option
extern int32 g_kaldi_verbose_level;
/// This is set by util/parse-options.{h, cc} (from argv[0]) and used (if set)
/// in error reporting code to display the name of the program (this is because
/// in our scripts, we often mix together the stderr of many programs). it is
/// the base-name of the program (no directory), followed by ':' We don't use
/// std::string, due to the static initialization order fiasco.
extern const char *g_program_name;
inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
/// This should be rarely used; command-line programs set the verbose level
/// automatically from ParseOptions.
inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
// Class KaldiLogMessage is invoked from the KALDI_WARN, KALDI_VLOG and
// KALDI_LOG macros. It prints the message to stderr. Note: we avoid
// using cerr, due to problems with thread safety. fprintf is guaranteed
// thread-safe.
// class KaldiWarnMessage is invoked from the KALDI_WARN macro.
class KaldiWarnMessage {
public:
inline std::ostream &stream() { return ss; }
KaldiWarnMessage(const char *func, const char *file, int32 line);
~KaldiWarnMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
private:
std::ostringstream ss;
};
// class KaldiLogMessage is invoked from the KALDI_LOG macro.
class KaldiLogMessage {
public:
inline std::ostream &stream() { return ss; }
KaldiLogMessage(const char *func, const char *file, int32 line);
~KaldiLogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
private:
std::ostringstream ss;
};
// Class KaldiVlogMessage is invoked from the KALDI_VLOG macro.
class KaldiVlogMessage {
public:
KaldiVlogMessage(const char *func, const char *file, int32 line,
int32 verbose_level);
inline std::ostream &stream() { return ss; }
~KaldiVlogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
private:
std::ostringstream ss;
};
// class KaldiErrorMessage is invoked from the KALDI_ERROR macro.
// The destructor throws an exception.
class KaldiErrorMessage {
public:
KaldiErrorMessage(const char *func, const char *file, int32 line);
inline std::ostream &stream() { return ss; }
~KaldiErrorMessage(); // defined in kaldi-error.cc
private:
std::ostringstream ss;
};
#ifdef _MSC_VER
#define __func__ __FUNCTION__
#endif
#ifndef NDEBUG
#define KALDI_ASSERT(cond) \
if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
#else
#define KALDI_ASSERT(cond)
#endif
// also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h,
// and KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE,
// also defined there.
#ifdef KALDI_PARANOID // some more expensive asserts only checked if this defined
#define KALDI_PARANOID_ASSERT(cond) \
if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
#else
#define KALDI_PARANOID_ASSERT(cond)
#endif
#define KALDI_ERR kaldi::KaldiErrorMessage(__func__, __FILE__, __LINE__).stream()
#define KALDI_WARN kaldi::KaldiWarnMessage(__func__, __FILE__, __LINE__).stream()
#define KALDI_LOG kaldi::KaldiLogMessage(__func__, __FILE__, __LINE__).stream()
#define KALDI_VLOG(v) if (v <= kaldi::g_kaldi_verbose_level) \
kaldi::KaldiVlogMessage(__func__, __FILE__, __LINE__, v).stream()
inline bool IsKaldiError(const std::string &str) {
return(!strncmp(str.c_str(), "ERROR ", 6));
}
void KaldiAssertFailure_(const char *func, const char *file,
int32 line, const char *cond_str);
/// @} end "addtogroup error_group"
} // namespace kaldi
#endif // KALDI_BASE_KALDI_ERROR_H_

Просмотреть файл

@ -1,258 +0,0 @@
// base/kaldi-math-test.cc
// Copyright 2009-2011 Microsoft Corporation; Yanmin Qian; Jan Silovsky
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-math.h"
namespace kaldi {
template<class I> void UnitTestGcdTpl() {
for (I a = 1; a < 15; a++) { // a is min gcd.
I b = (I)(rand() % 10);
I c = (I)(rand() % 10);
if (rand()%2 == 0 && std::numeric_limits<I>::is_signed) b = -b;
if (rand()%2 == 0 && std::numeric_limits<I>::is_signed) c = -c;
if (b == 0 && c == 0) continue; // gcd not defined for such numbers.
I g = Gcd(b*a, c*a);
KALDI_ASSERT(g >= a);
KALDI_ASSERT((b*a) % g == 0);
KALDI_ASSERT((c*a) % g == 0);
}
}
void UnitTestRoundUpToNearestPowerOfTwo() {
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(1) == 1);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(2) == 2);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(3) == 4);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(4) == 4);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(7) == 8);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(8) == 8);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(255) == 256);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(256) == 256);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(257) == 512);
KALDI_ASSERT(RoundUpToNearestPowerOfTwo(1073700000) == 1073741824 );
}
void UnitTestGcd() {
UnitTestGcdTpl<int>();
UnitTestGcdTpl<char>();
UnitTestGcdTpl<size_t>();
UnitTestGcdTpl<unsigned short>();
}
void UnitTestRand() {
// Testing random-number generation.
using namespace kaldi;
std::cout << "Testing random-number generation. "
<< "If there is an error this may not terminate.\n";
std::cout << "If this does not terminate, look more closely. "
<< "There might be a problem [but might not be]\n";
for (int i = 1; i < 10; i++) {
{ // test RandUniform.
std::cout << "Test RandUniform\n";
KALDI_ASSERT(RandUniform() >= 0 && RandUniform() <= 1);
float sum = RandUniform()-0.5;
for (int j = 0; ; j++) {
sum += RandUniform()-0.5;
if (std::abs(sum) < 0.5*sqrt((double)j)) break;
}
}
{ // test RandGauss.
float sum = RandGauss();
for (int j = 0; ; j++) {
sum += RandGauss();
if (std::abs(sum) < 0.5*sqrt((double)j)) break;
}
}
{ // test poisson_rand().
KALDI_ASSERT(RandPoisson(3.0) >= 0);
KALDI_ASSERT(RandPoisson(0.0) == 0);
std::cout << "Test RandPoisson\n";
float lambda = RandUniform() * 3.0; // between 0 and 3.
double sum = RandPoisson(lambda) - lambda; // expected value is zero.
for (int j = 0; ; j++) {
sum += RandPoisson(lambda) - lambda;
if (std::abs(sum) < 0.5*sqrt((double)j)) break;
}
}
{ // test WithProb().
for (int32 i = 0; i < 10; i++) {
KALDI_ASSERT((WithProb(0.0) == false) && (WithProb(1.0) == true));
}
{
int32 tot = 0, n = 10000;
BaseFloat p = 0.5;
for (int32 i = 0; i < n; i++)
tot += WithProb(p);
KALDI_ASSERT(tot > (n * p * 0.8) && tot < (n * p * 1.2));
}
{
int32 tot = 0, n = 10000;
BaseFloat p = 0.25;
for (int32 i = 0; i < n; i++)
tot += WithProb(p);
KALDI_ASSERT(tot > (n * p * 0.8) && tot < (n * p * 1.2));
}
}
{ // test RandInt().
KALDI_ASSERT(RandInt(0, 3) >= 0 && RandInt(0, 3) <= 3);
std::cout << "Test RandInt\n";
int minint = rand() % 200;
int maxint = minint + 1 + rand() % 20;
float sum = RandInt(minint, maxint) + 0.5*(minint+maxint);
for (int j = 0; ; j++) {
sum += RandInt(minint, maxint) - 0.5*(minint+maxint);
if (std::abs((float)sum) < 0.5*sqrt((double)j)*(maxint-minint)) break;
}
}
{ // test RandPrune in basic way.
KALDI_ASSERT(RandPrune(1.1, 1.0) == 1.1);
KALDI_ASSERT(RandPrune(0.0, 0.0) == 0.0);
KALDI_ASSERT(RandPrune(-1.1, 1.0) == -1.1);
KALDI_ASSERT(RandPrune(0.0, 1.0) == 0.0);
KALDI_ASSERT(RandPrune(0.5, 1.0) >= 0.0);
KALDI_ASSERT(RandPrune(-0.5, 1.0) <= 0.0);
BaseFloat f = RandPrune(-0.5, 1.0);
KALDI_ASSERT(f == 0.0 || f == -1.0);
f = RandPrune(0.5, 1.0);
KALDI_ASSERT(f == 0.0 || f == 1.0);
}
}
}
void UnitTestLogAddSub() {
using namespace kaldi;
for (int i = 0; i < 100; i++) {
double f1 = rand() % 10000, f2 = rand() % 20;
double add1 = exp(LogAdd(log(f1), log(f2)));
double add2 = exp(LogAdd(log(f2), log(f1)));
double add = f1 + f2, thresh = add*0.00001;
KALDI_ASSERT(std::abs(add-add1) < thresh && std::abs(add-add2) < thresh);
try {
double f2_check = exp(LogSub(log(add), log(f1))), thresh = (f2*0.01)+0.001;
KALDI_ASSERT(std::abs(f2_check-f2) < thresh);
} catch(...) {
KALDI_ASSERT(f2 == 0); // It will probably crash for f2=0.
}
}
}
void UnitTestDefines() { // Yes, we even unit-test the preprocessor statements.
KALDI_ASSERT(exp(kLogZeroFloat) == 0.0);
KALDI_ASSERT(exp(kLogZeroDouble) == 0.0);
BaseFloat den = 0.0;
KALDI_ASSERT(KALDI_ISNAN(0.0 / den));
KALDI_ASSERT(!KALDI_ISINF(0.0 / den));
KALDI_ASSERT(!KALDI_ISFINITE(0.0 / den));
KALDI_ASSERT(!KALDI_ISNAN(1.0 / den));
KALDI_ASSERT(KALDI_ISINF(1.0 / den));
KALDI_ASSERT(!KALDI_ISFINITE(1.0 / den));
KALDI_ASSERT(KALDI_ISFINITE(0.0));
KALDI_ASSERT(!KALDI_ISINF(0.0));
KALDI_ASSERT(!KALDI_ISNAN(0.0));
std::cout << 1.0+DBL_EPSILON;
std::cout << 1.0 + 0.5*DBL_EPSILON;
KALDI_ASSERT(1.0 + DBL_EPSILON != 1.0 && 1.0 + (0.5*DBL_EPSILON) == 1.0
&& "If this test fails, you can probably just comment it out-- may mean your CPU exceeds expected floating point precision");
KALDI_ASSERT(1.0f + FLT_EPSILON != 1.0f && 1.0f + (0.5f*FLT_EPSILON) == 1.0f
&& "If this test fails, you can probably just comment it out-- may mean your CPU exceeds expected floating point precision");
KALDI_ASSERT(std::abs(sin(M_PI)) < 1.0e-05 && std::abs(cos(M_PI)+1.0) < 1.0e-05);
KALDI_ASSERT(std::abs(sin(M_2PI)) < 1.0e-05 && std::abs(cos(M_2PI)-1.0) < 1.0e-05);
KALDI_ASSERT(std::abs(sin(exp(M_LOG_2PI))) < 1.0e-05);
KALDI_ASSERT(std::abs(cos(exp(M_LOG_2PI)) - 1.0) < 1.0e-05);
}
void UnitTestAssertFunc() { // Testing Assert** *functions
using namespace kaldi;
for (int i = 1; i < 100; i++) {
float f1 = rand() % 10000 + 1, f2 = rand() % 20 + 1;
float tmp1 = f1 * f2;
float tmp2 = (1/f1 + 1/f2);
float tmp3 = (1/(f1 - 1.0) + 1/(f2 - 1.0));
float tmp4 = (1/(f1 + 1.0) + 1/(f2 + 1.0));
float add = f1 + f2;
float addeql = tmp1 * tmp2, addgeq = tmp1 * tmp3, addleq = tmp1 * tmp4;
float thresh = 0.00001;
AssertEqual(add, addeql, thresh); // test AssertEqual()
AssertGeq(addgeq, add, thresh); // test AsserGeq()
AssertLeq(addleq, add, thresh); // test AsserLeq()
}
}
template<class I> void UnitTestFactorizeTpl() {
for (int p= 0; p < 100; p++) {
I m = rand() % 100000;
if (m >= 1) {
std::vector<I> factors;
Factorize(m, &factors);
I m2 = 1;
for (size_t i = 0; i < factors.size(); i++) {
m2 *= factors[i];
if (i+1 < factors.size())
KALDI_ASSERT(factors[i+1] >= factors[i]); // check sorted.
}
KALDI_ASSERT(m2 == m); // check correctness.
}
}
}
void UnitTestFactorize() {
UnitTestFactorizeTpl<int>();
UnitTestFactorizeTpl<size_t>();
UnitTestFactorizeTpl<unsigned short>();
}
void UnitTestApproxEqual() {
KALDI_ASSERT(ApproxEqual(1.0, 1.00001));
KALDI_ASSERT(ApproxEqual(1.0, 1.00001, 0.001));
KALDI_ASSERT(!ApproxEqual(1.0, 1.1));
KALDI_ASSERT(!ApproxEqual(1.0, 1.01, 0.001));
KALDI_ASSERT(!ApproxEqual(1.0, 0.0));
KALDI_ASSERT(ApproxEqual(0.0, 0.0));
KALDI_ASSERT(!ApproxEqual(0.0, 0.00001));
KALDI_ASSERT(!ApproxEqual(std::numeric_limits<float>::infinity(),
-std::numeric_limits<float>::infinity()));
KALDI_ASSERT(ApproxEqual(std::numeric_limits<float>::infinity(),
std::numeric_limits<float>::infinity()));
KALDI_ASSERT(ApproxEqual(-std::numeric_limits<float>::infinity(),
-std::numeric_limits<float>::infinity()));
KALDI_ASSERT(!ApproxEqual(-std::numeric_limits<float>::infinity(),
0));
KALDI_ASSERT(!ApproxEqual(-std::numeric_limits<float>::infinity(),
1));
}
} // end namespace kaldi.
int main() {
using namespace kaldi;
UnitTestApproxEqual();
UnitTestGcd();
UnitTestFactorize();
UnitTestDefines();
UnitTestLogAddSub();
UnitTestRand();
UnitTestAssertFunc();
UnitTestRoundUpToNearestPowerOfTwo();
}

Просмотреть файл

@ -1,107 +0,0 @@
// base/kaldi-math.cc
// Copyright 2009-2011 Microsoft Corporation; Yanmin Qian;
// Saarland University; Jan Silovsky
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "base/kaldi-math.h"
namespace kaldi {
// These routines are tested in matrix/matrix-test.cc
int32 RoundUpToNearestPowerOfTwo(int32 n) {
KALDI_ASSERT(n > 0);
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return n+1;
}
bool WithProb(BaseFloat prob) {
KALDI_ASSERT(prob >= 0 && prob <= 1.1); // prob should be <= 1.0,
// but we allow slightly larger values that could arise from roundoff in
// previous calculations.
KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
if (prob == 0) return false;
else if (prob == 1.0) return true;
else if (prob * RAND_MAX < 128.0) {
// prob is very small but nonzero, and the "main algorithm"
// wouldn't work that well. So: with probability 1/128, we
// return WithProb (prob * 128), else return false.
if (rand() < RAND_MAX / 128) { // with probability 128...
// Note: we know that prob * 128.0 < 1.0, because
// we asserted RAND_MAX > 128 * 128.
return WithProb(prob * 128.0);
} else {
return false;
}
} else {
return (rand() < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
}
}
int32 RandInt(int32 min_val, int32 max_val) { // This is not exact.
KALDI_ASSERT(max_val >= min_val);
if (max_val == min_val) return min_val;
#ifdef _MSC_VER
// RAND_MAX is quite small on Windows -> may need to handle larger numbers.
if (RAND_MAX > (max_val-min_val)*8) {
// *8 to avoid large inaccuracies in probability, from the modulus...
return min_val + ((unsigned int)rand() % (unsigned int)(max_val+1-min_val));
} else {
if ((unsigned int)(RAND_MAX*RAND_MAX) > (unsigned int)((max_val+1-min_val)*8)) {
// *8 to avoid inaccuracies in probability, from the modulus...
return min_val + ( (unsigned int)( (rand()+RAND_MAX*rand()))
% (unsigned int)(max_val+1-min_val));
} else {
throw std::runtime_error(std::string()
+"rand_int failed because we do not support "
+"such large random numbers. "
+"(Extend this function).");
}
}
#else
return min_val +
(static_cast<int32>(rand()) % (int32)(max_val+1-min_val));
#endif
}
// Returns poisson-distributed random number.
// Take care: this takes time proportinal
// to lambda. Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda) {
// Knuth's algorithm.
KALDI_ASSERT(lambda >= 0);
float L = expf(-lambda), p = 1.0;
int32 k = 0;
do {
k++;
float u = RandUniform();
p *= u;
} while (p > L);
return k-1;
}
} // end namespace kaldi

Просмотреть файл

@ -1,309 +0,0 @@
// base/kaldi-math.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Yanmin Qian;
// Jan Silovsky; Saarland University
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_MATH_H_
#define KALDI_BASE_KALDI_MATH_H_ 1
#ifdef _MSC_VER
#include <float.h>
#endif
#include <cmath>
#include <limits>
#include <vector>
#include "base/kaldi-types.h"
#include "base/kaldi-common.h"
#ifndef DBL_EPSILON
#define DBL_EPSILON 2.2204460492503131e-16
#endif
#ifndef FLT_EPSILON
#define FLT_EPSILON 1.19209290e-7f
#endif
#ifndef M_PI
# define M_PI 3.1415926535897932384626433832795
#endif
#ifndef M_SQRT2
# define M_SQRT2 1.4142135623730950488016887
#endif
#ifndef M_2PI
# define M_2PI 6.283185307179586476925286766559005
#endif
#ifndef M_SQRT1_2
# define M_SQRT1_2 0.7071067811865475244008443621048490
#endif
#ifndef M_LOG_2PI
#define M_LOG_2PI 1.8378770664093454835606594728112
#endif
#ifndef M_LN2
#define M_LN2 0.693147180559945309417232121458
#endif
#ifdef _MSC_VER
# define KALDI_ISNAN _isnan
# define KALDI_ISINF(x) (!_isnan(x) && _isnan(x-x))
# define KALDI_ISFINITE _finite
#else
# define KALDI_ISNAN std::isnan
# define KALDI_ISINF std::isinf
# define KALDI_ISFINITE(x) std::isfinite(x)
#endif
#if !defined(KALDI_SQR)
# define KALDI_SQR(x) ((x) * (x))
#endif
namespace kaldi {
// -infinity
const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
const BaseFloat kBaseLogZero = -std::numeric_limits<BaseFloat>::infinity();
// Big numbers
const BaseFloat kBaseFloatMax = std::numeric_limits<BaseFloat>::max();
// Returns a random integer between min and max inclusive.
int32 RandInt(int32 min, int32 max);
bool WithProb(BaseFloat prob); // Returns true with probability "prob",
// with 0 <= prob <= 1 [we check this].
// Internally calls rand(). This function is carefully implemented so
// that it should work even if prob is very small.
inline float RandUniform() { // random intended to be strictly between 0 and 1.
return static_cast<float>((rand() + 1.0) / (RAND_MAX+2.0));
}
inline float RandGauss() {
return static_cast<float>(sqrt (-2 * std::log(RandUniform()))
* cos(2*M_PI*RandUniform()));
}
// Returns poisson-distributed random number. Uses Knuth's algorithm.
// Take care: this takes time proportinal
// to lambda. Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda);
// Also see Vector<float,double>::RandCategorical().
// This is a randomized pruning mechanism that preserves expectations,
// that we typically use to prune posteriors.
template<class Float>
inline Float RandPrune(Float post, BaseFloat prune_thresh) {
KALDI_ASSERT(prune_thresh >= 0.0);
if (post == 0.0 || std::abs(post) >= prune_thresh)
return post;
return (post >= 0 ? 1.0 : -1.0) *
(RandUniform() <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
}
static const double kMinLogDiffDouble = std::log(DBL_EPSILON); // negative!
static const float kMinLogDiffFloat = std::log(FLT_EPSILON); // negative!
inline double LogAdd(double x, double y) {
double diff;
if (x < y) {
diff = x - y;
x = y;
} else {
diff = y - x;
}
// diff is negative. x is now the larger one.
if (diff >= kMinLogDiffDouble) {
double res;
#ifdef _MSC_VER
res = x + log(1.0 + exp(diff));
#else
res = x + log1p(exp(diff));
#endif
return res;
} else {
return x; // return the larger one.
}
}
inline float LogAdd(float x, float y) {
float diff;
if (x < y) {
diff = x - y;
x = y;
} else {
diff = y - x;
}
// diff is negative. x is now the larger one.
if (diff >= kMinLogDiffFloat) {
float res;
#ifdef _MSC_VER
res = x + logf(1.0 + expf(diff));
#else
res = x + log1pf(expf(diff));
#endif
return res;
} else {
return x; // return the larger one.
}
}
// returns exp(x) - exp(y).
inline double LogSub(double x, double y) {
if (y >= x) { // Throws exception if y>=x.
if (y == x)
return kLogZeroDouble;
else
KALDI_ERR << "Cannot subtract a larger from a smaller number.";
}
double diff = y - x; // Will be negative.
double res = x + log(1.0 - exp(diff));
// res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
if (KALDI_ISNAN(res))
return kLogZeroDouble;
return res;
}
// returns exp(x) - exp(y).
inline float LogSub(float x, float y) {
if (y >= x) { // Throws exception if y>=x.
if (y == x)
return kLogZeroDouble;
else
KALDI_ERR << "Cannot subtract a larger from a smaller number.";
}
float diff = y - x; // Will be negative.
float res = x + logf(1.0 - expf(diff));
// res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
if (KALDI_ISNAN(res))
return kLogZeroFloat;
return res;
}
// return (a == b)
static inline bool ApproxEqual(float a, float b,
float relative_tolerance = 0.001) {
// a==b handles infinities.
if (a==b) return true;
float diff = std::abs(a-b);
if (diff == std::numeric_limits<float>::infinity()
|| diff!=diff) return false; // diff is +inf or nan.
return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
}
// assert (a == b)
static inline void AssertEqual(float a, float b,
float relative_tolerance = 0.001) {
// a==b handles infinities.
KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
}
// assert (a>=b)
static inline void AssertGeq(float a, float b,
float relative_tolerance = 0.001) {
KALDI_ASSERT(a-b >= -relative_tolerance * (std::abs(a)+std::abs(b)));
}
// assert (a<=b)
static inline void AssertLeq(float a, float b,
float relative_tolerance = 0.001) {
KALDI_ASSERT(a-b <= -relative_tolerance * (std::abs(a)+std::abs(b)));
}
// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
int32 RoundUpToNearestPowerOfTwo(int32 n);
template<class I> I Gcd(I m, I n) {
if (m == 0 || n == 0) {
if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors.
KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
}
return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
// return absolute value of whichever is nonzero
}
// could use compile-time assertion
// but involves messing with complex template stuff.
KALDI_ASSERT(std::numeric_limits<I>::is_integer);
while (1) {
m %= n;
if (m == 0) return (n > 0 ? n : -n);
n %= m;
if (n == 0) return (m > 0 ? m : -m);
}
}
template<class I> void Factorize(I m, std::vector<I> *factors) {
// Splits a number into its prime factors, in sorted order from
// least to greatest, with duplication. A very inefficient
// algorithm, which is mainly intended for use in the
// mixed-radix FFT computation (where we assume most factors
// are small).
KALDI_ASSERT(factors != NULL);
KALDI_ASSERT(m >= 1); // Doesn't work for zero or negative numbers.
factors->clear();
I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
// First try small factors.
for (I i = 0; i < 10; i++) {
if (m == 1) return; // We're done.
while (m % small_factors[i] == 0) {
m /= small_factors[i];
factors->push_back(small_factors[i]);
}
}
// Next try all odd numbers starting from 31.
for (I j = 31;; j += 2) {
if (m == 1) return;
while (m % j == 0) {
m /= j;
factors->push_back(j);
}
}
}
inline double Hypot(double x, double y) { return hypot(x, y); }
inline float Hypot(float x, float y) { return hypotf(x, y); }
inline double Log1p(double x) { return log1p(x); }
inline float Log1p(float x) { return log1pf(x); }
} // namespace kaldi
#endif // KALDI_BASE_KALDI_MATH_H_

Просмотреть файл

@ -1,61 +0,0 @@
// base/kaldi-types.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_TYPES_H_
#define KALDI_BASE_KALDI_TYPES_H_ 1
namespace kaldi {
// TYPEDEFS ..................................................................
#if (KALDI_DOUBLEPRECISION != 0)
typedef double BaseFloat;
#else
typedef float BaseFloat;
#endif
}
#ifdef _MSC_VER
namespace kaldi {
typedef unsigned __int16 uint16;
typedef unsigned __int32 uint32;
typedef __int16 int16;
typedef __int32 int32;
typedef __int64 int64;
typedef unsigned __int64 uint64;
typedef float float32;
typedef double double64;
}
#else
// we can do this a different way if some platform
// we find in the future lacks stdint.h
#include <stdint.h>
namespace kaldi {
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef float float32;
typedef double double64;
} // end namespace kaldi
#endif
#endif // KALDI_BASE_KALDI_TYPES_H_

Просмотреть файл

@ -1,33 +0,0 @@
// base/kaldi-utils.cc
// Copyright 2009-2011 Karel Vesely; Yanmin Qian; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "base/kaldi-common.h"
namespace kaldi {
std::string CharToString(const char &c) {
char buf[20];
if (std::isprint(c))
sprintf(buf, "\'%c\'", c);
else
sprintf(buf, "[character %d]", (int) c);
return (std::string) buf;
}
} // end namespace kaldi

Просмотреть файл

@ -1,133 +0,0 @@
// base/kaldi-utils.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation;
// Saarland University; Karel Vesely; Yanmin Qian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_UTILS_H_
#define KALDI_BASE_KALDI_UTILS_H_ 1
#include <limits>
#include <string>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
#define __restrict__
#endif
#ifdef HAVE_POSIX_MEMALIGN
# define KALDI_MEMALIGN(align, size, pp_orig) \
(!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
# define KALDI_MEMALIGN_FREE(x) free(x)
#elif defined(HAVE_MEMALIGN)
/* Some systems have memalign() but no declaration for it */
void * memalign(size_t align, size_t size);
# define KALDI_MEMALIGN(align, size, pp_orig) \
(*(pp_orig) = memalign(align, size))
# define KALDI_MEMALIGN_FREE(x) free(x)
#elif defined(_MSC_VER)
# define KALDI_MEMALIGN(align, size, pp_orig) \
(*(pp_orig) = _aligned_malloc(size, align))
# define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
#else
#error Manual memory alignment is no longer supported
#endif
#ifdef __ICC
#pragma warning(disable: 383) // ICPC remark we don't want.
#pragma warning(disable: 810) // ICPC remark we don't want.
#pragma warning(disable: 981) // ICPC remark we don't want.
#pragma warning(disable: 1418) // ICPC remark we don't want.
#pragma warning(disable: 444) // ICPC remark we don't want.
#pragma warning(disable: 869) // ICPC remark we don't want.
#pragma warning(disable: 1287) // ICPC remark we don't want.
#pragma warning(disable: 279) // ICPC remark we don't want.
#pragma warning(disable: 981) // ICPC remark we don't want.
#endif
namespace kaldi {
// CharToString prints the character in a human-readable form, for debugging.
std::string CharToString(const char &c);
inline int MachineIsLittleEndian() {
int check = 1;
return (*reinterpret_cast<char*>(&check) != 0);
}
}
#define KALDI_SWAP8(a) { \
int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[7]; ((char*)&a)[7]=t;\
t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[6]; ((char*)&a)[6]=t;\
t = ((char*)&a)[2]; ((char*)&a)[2]=((char*)&a)[5]; ((char*)&a)[5]=t;\
t = ((char*)&a)[3]; ((char*)&a)[3]=((char*)&a)[4]; ((char*)&a)[4]=t;}
#define KALDI_SWAP4(a) { \
int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[3]; ((char*)&a)[3]=t;\
t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[2]; ((char*)&a)[2]=t;}
#define KALDI_SWAP2(a) { \
int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[1]; ((char*)&a)[1]=t;}
// Makes copy constructor and operator= private. Same as in compat.h of OpenFst
// toolkit. If using VS, for which this results in compilation errors, we
// do it differently.
#if defined(_MSC_VER)
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \
void operator = (const type&)
#else
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \
type(const type&); \
void operator = (const type&)
#endif
template<bool B> class KaldiCompileTimeAssert { };
template<> class KaldiCompileTimeAssert<true> {
public:
static inline void Check() { }
};
#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
&& std::numeric_limits<I>::is_integer>::Check()
#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
&& !std::numeric_limits<F>::is_integer>::Check()
#ifdef _MSC_VER
#define KALDI_STRCASECMP _stricmp
#else
#define KALDI_STRCASECMP strcasecmp
#endif
#ifdef _MSC_VER
# define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
#else
# define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
#endif
#define KALDI_STRTOD(cur_cstr, end_cstr) strtod(cur_cstr, end_cstr)
#endif // KALDI_BASE_KALDI_UTILS_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,885 +0,0 @@
// TODO: This is a dup, we should get back to the shared one. But this one has some stuff the other doesn't.
//
// <copyright file="basetypes.old.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#ifndef _BASETYPES_
#define _BASETYPES_
// [kit]: seems SECURE_SCL=0 doesn't work - causes crashes in release mode
// there are some complaints along this line on the web
// so disabled for now
//
//// we have agreed that _SECURE_SCL is disabled for release builds
//// it would be super dangerous to mix projects where this is inconsistent
//// this is one way to detect possible mismatches
//#ifdef NDEBUG
//#if !defined(_CHECKED) && _SECURE_SCL != 0
//#error "_SECURE_SCL should be disabled for release builds"
//#endif
//#endif
#ifndef UNDER_CE // fixed-buffer overloads not available for wince
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#endif
#pragma warning (push)
#pragma warning (disable: 4793) // caused by varargs
// disable certain parts of basetypes for wince compilation
#ifdef UNDER_CE
#define BASETYPES_NO_UNSAFECRTOVERLOAD // disable unsafe CRT overloads (safe functions don't exist in wince)
#define BASETYPES_NO_STRPRINTF // dependent functions here are not defined for wince
#endif
#ifndef OACR // dummies when we are not compiling under Office
#define OACR_WARNING_SUPPRESS(x, y)
#define OACR_WARNING_DISABLE(x, y)
#define OACR_WARNING_PUSH
#define OACR_WARNING_POP
#endif
#ifndef OACR_ASSUME // this seems to be a different one
#define OACR_ASSUME(x)
#endif
// following oacr warnings are not level1 or level2-security
// in currect stage we want to ignore those warnings
// if necessay this can be fixed at later stage
// not a bug
OACR_WARNING_DISABLE(EXC_NOT_CAUGHT_BY_REFERENCE, "Not indicating a bug or security threat.");
OACR_WARNING_DISABLE(LOCALDECLHIDESLOCAL, "Not indicating a bug or security threat.");
// not reviewed
OACR_WARNING_DISABLE(MISSING_OVERRIDE, "Not level1 or level2_security.");
OACR_WARNING_DISABLE(EMPTY_DTOR, "Not level1 or level2_security.");
OACR_WARNING_DISABLE(DEREF_NULL_PTR, "Not level1 or level2_security.");
OACR_WARNING_DISABLE(INVALID_PARAM_VALUE_1, "Not level1 or level2_security.");
OACR_WARNING_DISABLE(VIRTUAL_CALL_IN_CTOR, "Not level1 or level2_security.");
OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_security.");
// determine WIN32 api calling convention
// it seems this is normally stdcall?? but when compiling as /clr:pure or /clr:Safe
// this is not supported, so in this case, we need to use the 'default' calling convention
// TODO: can we reuse the #define of WINAPI??
#ifdef _WIN32
#ifdef _M_CEE_SAFE
#define WINAPI_CC __clrcall
#elif _M_CEE
#define WINAPI_CC __clrcall
#else
#define WINAPI_CC __stdcall
#endif
#endif
// fix some warnings in STL
#if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED)
#pragma warning(disable : 4702) // unreachable code
#endif
#include <stdarg.h>
#include <stdio.h>
#include <string.h> // include here because we redefine some names later
#include <string>
#include <vector>
#include <cmath> // for HUGE_VAL
#include <assert.h>
#include <map>
#ifdef __windows__
#include <windows.h> // for CRITICAL_SECTION
#include <strsafe.h> // for strbcpy() etc templates
#endif
#if __unix__
#include <strings.h>
#include <chrono>
#include <thread>
#include <unistd.h>
#include <sys/stat.h>
#include <dlfcn.h>
typedef unsigned char byte;
#endif
#pragma push_macro("STRSAFE_NO_DEPRECATE")
#define STRSAFE_NO_DEPRECATE // deprecation managed elsewhere, not by strsafe
#pragma pop_macro("STRSAFE_NO_DEPRECATE")
// CRT error handling seems to not be included in wince headers
// so we define our own imports
#ifdef UNDER_CE
// TODO: is this true - is GetLastError == errno?? - also this adds a dependency on windows.h
#define errno GetLastError()
// strerror(x) - x here is normally errno - TODO: make this return errno as a string
#define strerror(x) "strerror error but can't report error number sorry!"
#endif
#ifndef __in // dummies for sal annotations if compiler does not support it
#define __in
#define __inout_z
#define __in_count(x)
#define __inout_cap(x)
#define __inout_cap_c(x)
#endif
#ifndef __out_z_cap // non-VS2005 annotations
#define __out_cap(x)
#define __out_z_cap(x)
#define __out_cap_c(x)
#endif
#ifndef __override // and some more non-std extensions required by Office
#define __override virtual
#endif
// disable warnings for which fixing would make code less readable
#pragma warning(disable : 4290) // throw() declaration ignored
#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data
// ----------------------------------------------------------------------------
// basic macros
// ----------------------------------------------------------------------------
#define SAFE_DELETE(p) { if(p) { delete (p); (p)=NULL; } }
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } // nasty! use CComPtr<>
#ifndef assert
#ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h)
extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp);
#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
#else
#define assert assert
#endif
#endif
using namespace std;
// ----------------------------------------------------------------------------
// basic data types
// ----------------------------------------------------------------------------
namespace msra { namespace basetypes {
// class std::vector -- std::vector with array-bounds checking
// VS 2008 and above do this, so there is no longer a need for this.
template<class _ElemType>
class std::vector : public std::vector<_ElemType>
{
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
static void throwOutOfBounds()
{ // (moved to separate function hoping to keep inlined code smaller
OACR_WARNING_PUSH;
OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
"[rogeryu 2006/03/24]");
OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
assert (("std::vector::operator[] out of bounds", false));
OACR_WARNING_POP;
}
#endif
public:
std::vector() : std::vector<_ElemType> () { }
std::vector (int size) : std::vector<_ElemType> (size) { }
#if defined (_DEBUG) || defined (_CHECKED) // debug version with range checking
// ------------------------------------------------------------------------
// operator[]: with array-bounds checking
// ------------------------------------------------------------------------
inline _ElemType & operator[] (int index) // writing
{
if (index < 0 || index >= size()) throwOutOfBounds();
return (*(std::vector<_ElemType>*) this)[index];
}
// ------------------------------------------------------------------------
inline const _ElemType & operator[] (int index) const // reading
{
if (index < 0 || index >= size()) throwOutOfBounds();
return (*(std::vector<_ElemType>*) this)[index];
}
#endif
// ------------------------------------------------------------------------
// size(): same as base class, but returning an 'int' instead of 'size_t'
// to allow for better readable code
// ------------------------------------------------------------------------
inline int size() const
{
size_t siz = ((std::vector<_ElemType>*) this)->size();
return (int) siz;
}
};
// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
template<class _T> inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw()
{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
// class fixed_vector - non-resizable vector
template<class _T> class fixed_vector
{
_T * p; // pointer array
size_t n; // number of elements
void check (int index) const { index; assert (index >= 0 && (size_t) index < n); }
void check (size_t index) const { index; assert (index < n); }
// ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.
//fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; }
public:
fixed_vector() : n (0), p (NULL) { }
void resize (int size) { clear(); if (size > 0) { p = new _T[size]; n = size; } }
void resize (size_t size) { clear(); if (size > 0) { p = new _T[size]; n = size; } }
fixed_vector (int size) : n (size), p (size > 0 ? new _T[size] : NULL) { }
fixed_vector (size_t size) : n ((int) size), p (size > 0 ? new _T[size] : NULL) { }
~fixed_vector() { delete[] p; }
inline int size() const { return (int) n; }
inline int capacity() const { return (int) n; }
inline bool empty() const { return n == 0; }
void clear() { delete[] p; p = NULL; n = 0; }
_T * begin() { return p; }
const _T * begin() const { return p; }
_T * end() { return p + n; } // note: n == 0 so result is NULL
inline _T & operator[] (int index) { check (index); return p[index]; } // writing
inline const _T & operator[] (int index) const { check (index); return p[index]; } // reading
inline _T & operator[] (size_t index) { check (index); return p[index]; } // writing
inline const _T & operator[] (size_t index) const { check (index); return p[index]; } // reading
inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
{
int other_n = (int) other.size();
fixed_vector tmp (other_n);
for (int k = 0; k < other_n; k++) tmp[k] = other[k];
swap (tmp);
return *this;
}
fixed_vector & operator= (const fixed_vector & other)
{
int other_n = (int) other.size();
fixed_vector tmp (other_n);
for (int k = 0; k < other_n; k++) tmp[k] = other[k];
swap (tmp);
return *this;
}
template<class VECTOR> fixed_vector (const VECTOR & other) : n (0), p (NULL) { *this = other; }
};
template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) throw() { L.swap (R); }
// class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j)
// stored as concatenation of rows
template<class T> class matrix : fixed_vector<T>
{
size_t numcols;
size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; }
public:
typedef T elemtype;
matrix() : numcols (0) {}
matrix (size_t n, size_t m) { resize (n, m); }
void resize (size_t n, size_t m) { numcols = m; fixed_vector<T>::resize (n * m); }
size_t cols() const { return numcols; }
size_t rows() const { return empty() ? 0 : size() / cols(); }
size_t size() const { return fixed_vector<T>::size(); } // use this for reading and writing... not nice!
bool empty() const { return fixed_vector<T>::empty(); }
T & operator() (size_t i, size_t j) { return (*this)[locate(i,j)]; }
const T & operator() (size_t i, size_t j) const { return (*this)[locate(i,j)]; }
void swap (matrix & other) throw() { std::swap (numcols, other.numcols); fixed_vector<T>::swap (other); }
};
template<class _T> inline void swap (matrix<_T> & L, matrix<_T> & R) throw() { L.swap (R); }
// TODO: get rid of these
typedef std::string STRING;
typedef std::wstring WSTRING;
#ifdef __unix__
typedef wchar_t TCHAR;
#endif
typedef std::basic_string<TCHAR> TSTRING; // wide/narrow character string
// derive from this for noncopyable classes (will get you private unimplemented copy constructors)
// ... TODO: change all of basetypes classes/structs to use this
class noncopyable
{
noncopyable & operator= (const noncopyable &);
noncopyable (const noncopyable &);
public:
noncopyable(){}
};
struct throw_hr
{
const char * msg;
inline throw_hr (const char * msg = NULL) : msg (msg) {}
};
// back-mapping of exceptions to HRESULT codes
// usage pattern: HRESULT COM_function (...) { try { exception-based function body } catch_hr_return; }
#define catch_hr_return \
catch (const bad_alloc &) { return E_OUTOFMEMORY; } \
catch (const bad_hr & e) { return e.hr; } \
catch (const invalid_argument &) { return E_INVALIDARG; } \
catch (const runtime_error &) { return E_FAIL; } \
catch (const logic_error &) { return E_UNEXPECTED; } \
catch (const exception &) { return E_FAIL; } \
return S_OK;
};}; // namespace
#ifndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions
// ----------------------------------------------------------------------------
// overloads for "unsafe" CRT functions used in our code base
// ----------------------------------------------------------------------------
// strlen/wcslen overloads for fixed-buffer size
// Note: Careful while fixing bug related to these templates.
// In all attempted experiments, in seems all 6 definitions are required
// below to get the correct behaviour. Be very very careful
// not to delete something without testing that case 5&6 have "size" deduced.
// 1. char *
// 2. char * const
// 3. const char *
// 4. const char * const
// 5. char (&) [size]
// 6. const char (&) [size]
// the following includes all headers that use strlen() and fail because of the mapping below
// to find those, change #define strlen strlen_ to something invalid e.g. strlen::strlen_
#if _MSC_VER >= 1600 // VS 2010 --TODO: fix this by correct include order instead
#include <intrin.h> // defines strlen() as an intrinsic in VS 2010
#include <typeinfo> // uses strlen()
#include <xlocale> // uses strlen()
#endif
#define strlen strlen_
template<typename _T>
size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
template<typename _T> inline size_t strlen_(const _T &s) { return strnlen(static_cast<const char *>(s), SIZE_MAX); }
template<> inline size_t strlen_(char * &s) { return strnlen(s, SIZE_MAX); }
template<> inline size_t strlen_(const char * &s) { return strnlen(s, SIZE_MAX); }
template<size_t n> inline size_t strlen_(const char (&s)[n]) { return (strnlen(s, n)); }
template<size_t n> inline size_t strlen_(char (&s)[n]) { return (strnlen(s, n)); }
#define wcslen wcslen_
template<typename _T>
size_t wcslen_(_T &s) { return wcsnlen_s(static_cast<const wchar_t *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
template<> inline size_t wcslen_(wchar_t * &s) { return wcsnlen(s, SIZE_MAX); }
template<> inline size_t wcslen_(const wchar_t * &s) { return wcsnlen(s, SIZE_MAX); }
template<size_t n> inline size_t wcslen_(const wchar_t (&s)[n]) { return (wcsnlen(s, n)); }
template<size_t n> inline size_t wcslen_(wchar_t (&s)[n]) { return (wcsnlen(s, n)); }
// xscanf wrappers -- one overload for each actual use case in our code base
static inline int sscanf (const char * buf, const char * format, int * i1) { return sscanf (buf, format, i1); }
static inline int sscanf (const char * buf, const char * format, int * i1, int * i2) { return sscanf (buf, format, i1, i2); }
static inline int sscanf (const char * buf, const char * format, int * i1, int * i2, int * i3) { return sscanf (buf, format, i1, i2, i3); }
static inline int sscanf (const char * buf, const char * format, double * f1) { return sscanf (buf, format, f1); }
static inline int swscanf (const wchar_t * buf, const wchar_t * format, int * i1) { return swscanf (buf, format, i1); }
static inline int fscanf (FILE * file, const char * format, float * f1) { return fscanf (file, format, f1); }
// cacpy -- fixed-size character array (same as original strncpy (dst, src, sizeof (dst)))
// NOTE: THIS FUNCTION HAS NEVER BEEN TESTED. REMOVE THIS COMMENT ONCE IT HAS.
template<class T, size_t n> static inline void cacpy (T (&dst)[n], const T * src)
{ for (int i = 0; i < n; i++) { dst[i] = *src; if (*src) src++; } }
// { return strncpy (dst, src, n); } // using original C std lib function
#endif
// ----------------------------------------------------------------------------
// frequently missing string functions
// ----------------------------------------------------------------------------
namespace msra { namespace strfun {
#ifndef BASETYPES_NO_STRPRINTF
template<typename C> struct basic_cstring : public std::basic_string<C>
{
template<typename S> basic_cstring (S p) : std::basic_string<C> (p) { }
operator const C * () const { return this->c_str(); }
};
typedef basic_cstring<char> cstring;
typedef basic_cstring<wchar_t> wcstring;
// [w]strprintf() -- like sprintf() but resulting in a C++ string
template<class _T> struct _strprintf : public std::basic_string<_T>
{ // works for both wchar_t* and char*
_strprintf (const _T * format, ...)
{
va_list args; va_start (args, format); // varargs stuff
size_t n = _cprintf (format, args); // num chars excl. '\0'
const int FIXBUF_SIZE = 128; // incl. '\0'
if (n < FIXBUF_SIZE)
{
_T fixbuf[FIXBUF_SIZE];
this->assign (_sprintf (&fixbuf[0], sizeof (fixbuf)/sizeof (*fixbuf), format, args), n);
}
else // too long: use dynamically allocated variable-size buffer
{
std::vector<_T> varbuf (n + 1); // incl. '\0'
this->assign (_sprintf (&varbuf[0], varbuf.size(), format, args), n);
}
}
private:
// helpers
inline size_t _cprintf (const wchar_t * format, va_list args) { return _vscwprintf (format, args); }
inline size_t _cprintf (const char * format, va_list args) { return _vscprintf (format, args); }
inline const wchar_t * _sprintf (wchar_t * buf, size_t bufsiz, const wchar_t * format, va_list args) { vswprintf_s (buf, bufsiz, format, args); return buf; }
inline const char * _sprintf ( char * buf, size_t bufsiz, const char * format, va_list args) { vsprintf_s (buf, bufsiz, format, args); return buf; }
};
typedef strfun::_strprintf<char> strprintf; // char version
typedef strfun::_strprintf<wchar_t> wstrprintf; // wchar_t version
#endif
//http://www.nanobit.net/putty/doxy/PUTTY_8H-source.html
#ifndef CP_UTF8
#define CP_UTF8 65001
#endif
// string-encoding conversion functions
#ifdef _WIN32
struct utf8 : std::string { utf8 (const std::wstring & p) // utf-16 to -8
{
size_t len = p.length();
if (len == 0) { return;} // empty string
msra::basetypes::fixed_vector<char> buf (3 * len + 1); // max: 1 wchar => up to 3 mb chars
// ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
std::fill (buf.begin (), buf.end (), 0);
int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len,
&buf[0], (int) buf.size(), NULL, NULL);
if (rc == 0) throw std::runtime_error ("WideCharToMultiByte");
(*(std::string*)this) = &buf[0];
}};
struct utf16 : std::wstring { utf16 (const std::string & p) // utf-8 to -16
{
size_t len = p.length();
if (len == 0) { return;} // empty string
msra::basetypes::fixed_vector<wchar_t> buf (len + 1);
// ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
std::fill (buf.begin (), buf.end (), (wchar_t) 0);
int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len,
&buf[0], (int) buf.size());
if (rc == 0) throw std::runtime_error ("MultiByteToWideChar");
assert (rc < buf.size ());
(*(std::wstring*)this) = &buf[0];
}};
#endif
#pragma warning(push)
#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs)
static inline std::string wcstombs (const std::wstring & p) // output: MBCS
{
size_t len = p.length();
msra::basetypes::fixed_vector<char> buf (2 * len + 1); // max: 1 wchar => 2 mb chars
std::fill (buf.begin (), buf.end (), 0);
::wcstombs (&buf[0], p.c_str(), 2 * len + 1);
return std::string (&buf[0]);
}
static inline std::wstring mbstowcs (const std::string & p) // input: MBCS
{
size_t len = p.length();
msra::basetypes::fixed_vector<wchar_t> buf (len + 1); // max: >1 mb chars => 1 wchar
std::fill (buf.begin (), buf.end (), (wchar_t) 0);
OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
::mbstowcs (&buf[0], p.c_str(), len + 1);
return std::wstring (&buf[0]);
}
#pragma warning(pop)
static inline std::string utf8 (const std::wstring & p) { return msra::strfun::wcstombs (p.c_str()); } // output: UTF-8... not really
static inline std::wstring utf16 (const std::string & p) { return msra::strfun::mbstowcs(p.c_str()); } // input: UTF-8... not really
// split and join -- tokenize a string like strtok() would, join() strings together
template<class _T> static inline std::vector<std::basic_string<_T>> split (const std::basic_string<_T> & s, const _T * delim)
{
std::vector<std::basic_string<_T>> res;
for (size_t st = s.find_first_not_of (delim); st != std::basic_string<_T>::npos; )
{
size_t en = s.find_first_of (delim, st +1);
if (en == std::basic_string<_T>::npos) en = s.length();
res.push_back (s.substr (st, en-st));
st = s.find_first_not_of (delim, en +1); // may exceed
}
return res;
}
template<class _T> static inline std::basic_string<_T> join (const std::vector<std::basic_string<_T>> & a, const _T * delim)
{
std::basic_string<_T> res;
for (int i = 0; i < (int) a.size(); i++)
{
if (i > 0) res.append (delim);
res.append (a[i]);
}
return res;
}
#ifdef _WIN32
// parsing strings to numbers
static inline int toint (const wchar_t * s)
{
return _wtoi (s); // ... TODO: check it
}
#endif
static inline int toint (const char * s)
{
return atoi (s); // ... TODO: check it
}
static inline int toint (const std::wstring & s) { return toint (s.c_str()); }
static inline double todouble (const char * s)
{
char * ep; // will be set to point to first character that failed parsing
double value = strtod (s, &ep);
if (*s == 0 || *ep != 0)
throw std::runtime_error ("todouble: invalid input string");
return value;
}
// TODO: merge this with todouble(const char*) above
static inline double todouble (const std::string & s)
{
s.size(); // just used to remove the unreferenced warning
double value = 0.0;
// stod supposedly exists in VS2010, but some folks have compilation errors
// If this causes errors again, change the #if into the respective one for VS 2010.
#if _MSC_VER > 1400 // VS 2010+
size_t * idx = 0;
value = std::stod (s, idx);
if (idx) throw std::runtime_error ("todouble: invalid input string");
#else
char *ep = 0; // will be updated by strtod to point to first character that failed parsing
value = strtod (s.c_str(), &ep);
// strtod documentation says ep points to first unconverted character OR
// return value will be +/- HUGE_VAL for overflow/underflow
if (ep != s.c_str() + s.length() || value == HUGE_VAL || value == -HUGE_VAL)
throw std::runtime_error ("todouble: invalid input string");
#endif
return value;
}
static inline double todouble (const std::wstring & s)
{
wchar_t * endptr;
double value = wcstod (s.c_str(), &endptr);
if (*endptr) throw std::runtime_error ("todouble: invalid input string");
return value;
}
// ----------------------------------------------------------------------------
// tokenizer -- utility for white-space tokenizing strings in a character buffer
// This simple class just breaks a string, but does not own the string buffer.
// ----------------------------------------------------------------------------
class tokenizer : public std::vector<char*>
{
const char * delim;
public:
tokenizer (const char * delim, size_t cap) : delim (delim) { reserve (cap); }
// Usage: tokenizer tokens (delim, capacity); tokens = buf; tokens.size(), tokens[i]
void operator= (char * buf)
{
resize (0);
// strtok_s not available on all platforms - so backoff to strtok on those
#ifdef strtok_s
char * context; // for strtok_s()
for (char * p = strtok_s (buf, delim, &context); p; p = strtok_s (NULL, delim, &context))
push_back (p);
#else
for (char * p = strtok (buf, delim); p; p = strtok (NULL, delim))
push_back (p);
#endif
}
};
};}; // namespace
static inline msra::strfun::cstring charpath (const std::wstring & p)
{
#ifdef _WIN32
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(p);
#else // old version, delete once we know it works
size_t len = p.length();
std::vector<char> buf(2 * len + 1, 0); // max: 1 wchar => 2 mb chars
::wcstombs(buf.data(), p.c_str(), 2 * len + 1);
return msra::strfun::cstring (&buf[0]);
#endif
}
static inline FILE* _wfopen (const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); }
static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono::milliseconds (ms)); }
// ----------------------------------------------------------------------------
// wrappers for some basic types (files, handles, timer)
// ----------------------------------------------------------------------------
namespace msra { namespace basetypes {
// FILE* with auto-close; use auto_file_ptr instead of FILE*.
// Warning: do not pass an auto_file_ptr to a function that calls fclose(),
// except for fclose() itself.
class auto_file_ptr
{
FILE * f;
FILE * operator= (auto_file_ptr &); // can't ref-count: no assignment
auto_file_ptr (auto_file_ptr &);
// implicit close (destructor, assignment): we ignore error
void close() throw() { if (f) try { if (f != stdin && f != stdout && f != stderr) ::fclose (f); } catch (...) { } f = NULL; }
void openfailed (const std::string & path) { throw std::runtime_error ("auto_file_ptr: error opening file '" + path + "': " + strerror (errno)); }
protected:
friend int fclose (auto_file_ptr&); // explicit close (note: may fail)
int fclose() { int rc = ::fclose (f); if (rc == 0) f = NULL; return rc; }
public:
auto_file_ptr() : f (NULL) { }
~auto_file_ptr() { close(); }
auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); }
auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); }
FILE * operator= (FILE * other) { close(); f = other; return f; }
auto_file_ptr (FILE * other) : f (other) { }
operator FILE * () const { return f; }
FILE * operator->() const { return f; }
void swap (auto_file_ptr & other) throw() { std::swap (f, other.f); }
};
inline int fclose (auto_file_ptr & af) { return af.fclose(); }
};};
namespace msra { namespace files {
// ----------------------------------------------------------------------------
// textreader -- simple reader for text files --we need this all the time!
// Currently reads 8-bit files, but can return as wstring, in which case
// they are interpreted as UTF-8 (without BOM).
// Note: Not suitable for pipes or typed input due to readahead (fixable if needed).
// ----------------------------------------------------------------------------
class textreader
{
msra::basetypes::auto_file_ptr f;
std::vector<char> buf; // read buffer (will only grow, never shrink)
int ch; // next character (we need to read ahead by one...)
char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; }
public:
textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); }
operator bool() const { return ch != EOF; } // true if still a line to read
std::string getline() // get and consume the next line
{
if (ch == EOF) throw std::logic_error ("textreader: attempted to read beyond EOF");
assert (buf.empty());
// get all line's characters --we recognize UNIX (LF), DOS (CRLF), and Mac (CR) convention
while (ch != EOF && ch != '\n' && ch != '\r') buf.push_back (getch());
if (ch != EOF && getch() == '\r' && ch == '\n') getch(); // consume EOLN char
std::string line (buf.begin(), buf.end());
buf.clear();
return line;
}
std::wstring wgetline() { return msra::strfun::utf16 (getline()); }
};
};};
// ----------------------------------------------------------------------------
// functional-programming style helper macros (...do this with templates?)
// ----------------------------------------------------------------------------
#define foreach_index(_i,_dat) for (int _i = 0; _i < (int) (_dat).size(); _i++)
#define map_array(_x,_expr,_y) { _y.resize (_x.size()); foreach_index(_i,_x) _y[_i]=_expr(_x[_i]); }
#define reduce_array(_x,_expr,_y) { foreach_index(_i,_x) _y = (_i==0) ? _x[_i] : _expr(_y,_x[_i]); }
// ----------------------------------------------------------------------------
// frequently missing utility functions
// ----------------------------------------------------------------------------
namespace msra { namespace util {
// to (slightly) simplify processing of command-line arguments.
// command_line args (argc, argv);
// while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); }
// for (const wchar_t * arg = args.shift(); arg; arg = args.shift()) { process (arg); }
class command_line
{
int num;
const wchar_t * * args;
public:
command_line (int argc, wchar_t * argv[]) : num (argc), args ((const wchar_t **) argv) { shift(); }
inline int size() const { return num; }
inline bool has (int left) { return size() >= left; }
const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; }
const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; }
};
// byte-reverse a variable --reverse all bytes (intended for integral types and float)
template<typename T> static inline void bytereverse (T & v) throw()
{ // note: this is more efficient than it looks because sizeof (v[0]) is a constant
char * p = (char *) &v;
const size_t elemsize = sizeof (v);
for (int k = 0; k < elemsize / 2; k++) // swap individual bytes
swap (p[k], p[elemsize-1 - k]);
}
// byte-swap an entire array
template<class V> static inline void byteswap (V & v) throw()
{
foreach_index (i, v)
bytereverse (v[i]);
}
// execute a block with retry
// Block must be restartable.
// Use this when writing small files to those unreliable Windows servers.
// TODO: This will fail to compile under VS 2008--we need an #ifdef around this
template<typename FUNCTION> static void attempt (int retries, const FUNCTION & body)
{
for (int attempt = 1; ; attempt++)
{
try
{
body();
if (attempt > 1) fprintf (stderr, "attempt: success after %d retries\n", attempt);
break;
}
catch (const std::exception & e)
{
if (attempt >= retries)
throw; // failed N times --give up and rethrow the error
fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries);
::Sleep (1000); // wait a little, then try again
}
}
}
};}; // namespace
#ifdef _WIN32
// ----------------------------------------------------------------------------
// frequently missing Win32 functions
// ----------------------------------------------------------------------------
// strerror() for Win32 error codes
static inline std::wstring FormatWin32Error (DWORD error)
{
wchar_t buf[1024] = { 0 };
::FormatMessageW (FORMAT_MESSAGE_FROM_SYSTEM, "", error, 0, buf, sizeof (buf)/sizeof (*buf) -1, NULL);
std::wstring res (buf);
// eliminate newlines (and spaces) from the end
size_t last = res.find_last_not_of (L" \t\r\n");
if (last != std::string::npos) res.erase (last +1, res.length());
return res;
}
// we always wanted this!
#pragma warning (push)
#pragma warning (disable: 6320) // Exception-filter expression is the constant EXCEPTION_EXECUTE_HANDLER
#pragma warning (disable: 6322) // Empty _except block
static inline void SetCurrentThreadName (const char* threadName)
{ // from http://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx
::Sleep(10);
#pragma pack(push,8)
struct { DWORD dwType; LPCSTR szName; DWORD dwThreadID; DWORD dwFlags; } info = { 0x1000, threadName, (DWORD) -1, 0 };
#pragma pack(pop)
__try { RaiseException (0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info); }
__except(EXCEPTION_EXECUTE_HANDLER) { }
}
#pragma warning (pop)
// return a string as a CoTaskMemAlloc'ed memory object
// Returns NULL if out of memory (we don't throw because we'd just catch it outside and convert to HRESULT anyway).
static inline LPWSTR CoTaskMemString (const wchar_t * s)
{
size_t n = wcslen (s) + 1; // number of chars to allocate and copy
LPWSTR p = (LPWSTR) ::CoTaskMemAlloc (sizeof (*p) * n);
if (p) for (size_t i = 0; i < n; i++) p[i] = s[i];
return p;
}
template<class S> static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof (s)); }
#endif
// ----------------------------------------------------------------------------
// machine dependent
// ----------------------------------------------------------------------------
#define MACHINE_IS_BIG_ENDIAN (false)
using namespace msra::basetypes; // for compatibility
#pragma warning (pop)
// RuntimeError - throw a std::runtime_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void RuntimeError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start(args, format);
vsprintf(buffer, format, args);
throw std::runtime_error(buffer);
};
// LogicError - throw a std::logic_error with a formatted error string
#ifdef _MSC_VER
__declspec(noreturn)
#endif
static inline void LogicError(const char * format, ...)
{
va_list args;
char buffer[1024];
va_start(args, format);
vsprintf(buffer, format, args);
throw std::logic_error(buffer);
};
// ----------------------------------------------------------------------------
// dynamic loading of modules
// ----------------------------------------------------------------------------
#ifdef _WIN32
class Plugin
{
HMODULE m_hModule; // module handle for the writer DLL
std::wstring m_dllName; // name of the writer DLL
public:
Plugin() { m_hModule = NULL; }
template<class STRING> // accepts char (UTF-8) and wide string
FARPROC Load(const STRING & plugin, const std::string & proc)
{
m_dllName = msra::strfun::utf16(plugin);
m_dllName += L".dll";
m_hModule = LoadLibrary(m_dllName.c_str());
if (m_hModule == NULL)
RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName));
// create a variable of each type just to call the proper templated version
return GetProcAddress(m_hModule, proc.c_str());
}
~Plugin() { if (m_hModule) FreeLibrary(m_hModule); }
};
#else
class Plugin
{
public:
template<class STRING> // accepts char (UTF-8) and wide string
void * Load(const STRING & plugin, const std::string & proc)
{
RuntimeError("Plugins not implemented on Linux yet");
return nullptr;
}
};
#endif
#endif // _BASETYPES_

Просмотреть файл

@ -1,123 +0,0 @@
//
// <copyright file="biggrowablevectors.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// biggrowablevectors.h -- big growable vector that uses two layers and optionally a disk backing store for paging
#pragma once
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// growablevectorbase -- helper for two-layer growable random-access array
// This allows both a fully allocated vector (with push_back()), e.g. for uids,
// as well as a partially allocated one (content managed by derived class), for features and lattice blocks.
// TODO:
// - test this (make copy of binary first before full compilation; or rebuild the previous version)
// - fully move in-mem range here, test again
// - then we can move towards paging from archive directly (biggrowablevectorarray gets tossed)
// ---------------------------------------------------------------------------
template<class BLOCKTYPE> class growablevectorbase
{
protected: // fix this later
const size_t elementsperblock;
size_t n; // number of elements
std::vector<std::unique_ptr<BLOCKTYPE>> blocks; // the data blocks
void operator= (const growablevectorbase &); // (non-assignable)
void check (size_t t) const { if (t >= n) throw std::logic_error ("growablevectorbase: out of bounds"); } // bounds check helper
// resize intermediate level, but do not allocate blocks
// (may deallocate if shrinking)
void resize_without_commit (size_t T)
{
blocks.resize ((T + elementsperblock-1) / elementsperblock);
n = T;
// TODO: update allocated range
}
// commit memory
// begin/end must be block boundaries
void commit (size_t begin, size_t end, BLOCKTYPE * blockdata)
{
auto blockptr = getblock (begin, end); // memory leak: if this fails (logic error; should never happen)
blockptr.set (blockdata); // take ownership of the block
// TODO: update allocated range --also enforce consecutiveness
}
// flush a block
// begin/end must be block boundaries
void flush (size_t begin, size_t end)
{
auto blockptr = getblock (begin, end); // memory leak: if this fails (logic error; should never happen)
blockptr.reset(); // release it
// TODO: update allocated range --also enforce consecutiveness
}
// helper to get a block pointer, with block referenced as its entire range
std::unique_ptr<BLOCKTYPE> & getblockptr (size_t t) // const
{
check (t);
return blocks[t / elementsperblock];
}
// helper to get a block pointer, with block referenced as its entire range
std::unique_ptr<BLOCKTYPE> & getblockptr (size_t begin, size_t end) const
{
// BUGBUG: last block may be shorter than elementsperblock
if (end - begin != elementsperblock || getblockt (begin) != 0)
throw std::logic_error ("growablevectorbase: non-block boundaries passed to block-level function");
return getblockptr (begin);
}
public:
growablevectorbase (size_t elementsperblock) : elementsperblock (elementsperblock), n (0) { blocks.reserve (1000); }
size_t size() const { return n; } // number of frames
bool empty() const { return size() == 0; }
// to access an element t -> getblock(t)[getblockt(t)]
BLOCKTYPE & getblock (size_t t) const
{
check (t);
const size_t blockid = t / elementsperblock;
return *blocks[blockid].get();
}
size_t getblockt (size_t t) const
{
check (t);
return t % elementsperblock;
}
};
// ---------------------------------------------------------------------------
// biggrowablevector -- big vector we can push_back to
// ---------------------------------------------------------------------------
template<typename ELEMTYPE> class biggrowablevector : public growablevectorbase<std::vector<ELEMTYPE>>
{
using base_t = growablevectorbase<std::vector<ELEMTYPE>>;
public:
biggrowablevector() : growablevectorbase<std::vector<ELEMTYPE>>::growablevectorbase (65536) { }
template<typename VALTYPE> void push_back (VALTYPE e) // VALTYPE could be an rvalue reference
{
size_t i = base_t::size();
base_t::resize_without_commit (i + 1);
auto & block = base_t::getblockptr (i);
if (block.get() == NULL)
block.reset (new std::vector<ELEMTYPE> (this->elementsperblock));
(*block)[base_t::getblockt (i)] = e;
}
ELEMTYPE & operator[] (size_t t) { return base_t::getblock(t)[base_t::getblockt (t)]; } // get an element
const ELEMTYPE & operator[] (size_t t) const { return base_t::getblock(t)[base_t::getblockt (t)]; } // get an element
void resize (const size_t n)
{
base_t::resize_without_commit (n);
foreach_index (i, this->blocks)
if (this->blocks[i].get() == NULL)
this->blocks[i].reset (new std::vector<ELEMTYPE> (this->elementsperblock));
}
};
};};

Просмотреть файл

@ -1,373 +0,0 @@
//
// <copyright file="chunkevalsource.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
//#include <objbase.h>
#include "basetypes.h" // for attempt()
#include "htkfeatio.h" // for reading HTK features
#include "minibatchsourcehelpers.h"
#ifndef __unix__
#include "ssematrix.h"
#endif
#ifdef LEAKDETECT
#include <vld.h> // for memory leak detection
#endif
namespace msra { namespace dbn {
class chunkevalsource // : public numamodelmanager
{
const size_t chunksize; // actual block size to perform computation on
// data FIFO
msra::dbn::matrix feat;
std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
std::vector<char> boundaryflags; // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
std::vector<size_t> numframes; // [k] number of frames for all appended files
std::vector<std::wstring> outpaths; // [k] and their pathnames
std::vector<unsigned int> sampperiods; // [k] and sample periods (they should really all be the same...)
size_t vdim; // input dimension
size_t udim; // output dimension
bool minibatchready;
void operator=(const chunkevalsource &);
private:
void clear() // empty the FIFO
{
frames.clear();
boundaryflags.clear();
numframes.clear();
outpaths.clear();
sampperiods.clear();
minibatchready=false;
}
void saveandflush(msra::dbn::matrix &pred)
{
const size_t framesinblock = frames.size();
// write out all files
size_t firstframe = 0;
foreach_index (k, numframes)
{
const wstring & outfile = outpaths[k];
unsigned int sampperiod = sampperiods[k];
size_t n = numframes[k];
msra::files::make_intermediate_dirs (outfile);
fprintf (stderr, "saveandflush: writing %zd frames to %S\n", n, outfile.c_str());
msra::dbn::matrixstripe thispred (pred, firstframe, n);
// some sanity check for the data we've written
const size_t nansinf = thispred.countnaninf();
if (nansinf > 0)
fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
// save it
msra::util::attempt (5, [&]()
{
msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
});
firstframe += n;
}
assert (firstframe == framesinblock); framesinblock;
// and we are done --forget the FIFO content & get ready for next chunk
clear();
}
public:
chunkevalsource (size_t numinput, size_t numoutput, size_t chunksize)
:vdim(numinput),udim(numoutput),chunksize(chunksize)
{
frames.reserve (chunksize * 2);
feat.resize(vdim,chunksize); // initialize to size chunksize
}
// append data to chunk
template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath)
{
// append to frames; also expand neighbor frames
if (feat.cols() < 2)
throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
foreach_column (t, feat)
{
std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
frames.push_back (v);
boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
}
numframes.push_back (feat.cols());
outpaths.push_back (outpath);
sampperiods.push_back (sampperiod);
}
void createevalminibatch()
{
const size_t framesinblock = frames.size();
feat.resize(vdim, framesinblock); // input features for whole utt (col vectors)
// augment the features
msra::dbn::augmentneighbors (frames, boundaryflags, 0, framesinblock, feat);
minibatchready=true;
}
void writetofiles(msra::dbn::matrix &pred){ saveandflush(pred); }
msra::dbn::matrix chunkofframes() { assert(minibatchready); return feat; }
bool isminibatchready() { return minibatchready; }
size_t currentchunksize() { return frames.size(); }
void flushinput(){createevalminibatch();}
void reset() { clear(); }
};
class chunkevalsourcemulti // : public numamodelmanager
{
const size_t chunksize; // actual block size to perform computation on
// data FIFO
std::vector<msra::dbn::matrix> feat;
std::vector<std::vector<std::vector<float>>> framesmulti; // [t] all feature frames concatenated into a big block
std::vector<char> boundaryflags; // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
std::vector<size_t> numframes; // [k] number of frames for all appended files
std::vector<std::vector<std::wstring>> outpaths; // [k] and their pathnames
std::vector<std::vector<unsigned int>> sampperiods; // [k] and sample periods (they should really all be the same...)
std::vector<size_t> vdims; // input dimension
std::vector<size_t> udims; // output dimension
bool minibatchready;
void operator=(const chunkevalsourcemulti &);
private:
void clear() // empty the FIFO
{
foreach_index(i, vdims)
{
framesmulti[i].clear();
outpaths[i].clear();
sampperiods[i].clear();
}
boundaryflags.clear();
numframes.clear();
minibatchready=false;
}
void saveandflush(msra::dbn::matrix &pred, size_t index)
{
const size_t framesinblock = framesmulti[index].size();
// write out all files
size_t firstframe = 0;
foreach_index (k, numframes)
{
const wstring & outfile = outpaths[index][k];
unsigned int sampperiod = sampperiods[index][k];
size_t n = numframes[k];
msra::files::make_intermediate_dirs (outfile);
fprintf (stderr, "saveandflush: writing %zd frames to %S\n", n, outfile.c_str());
msra::dbn::matrixstripe thispred (pred, firstframe, n);
// some sanity check for the data we've written
const size_t nansinf = thispred.countnaninf();
if (nansinf > 0)
fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
// save it
msra::util::attempt (5, [&]()
{
msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
});
firstframe += n;
}
assert (firstframe == framesinblock); framesinblock;
// and we are done --forget the FIFO content & get ready for next chunk
}
public:
chunkevalsourcemulti (std::vector<size_t> vdims, std::vector<size_t> udims, size_t chunksize)
:vdims(vdims),udims(udims),chunksize(chunksize)
{
foreach_index(i, vdims)
{
msra::dbn::matrix thisfeat;
std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
frames.reserve(chunksize * 2);
framesmulti.push_back(frames);
//framesmulti[i].reserve (chunksize * 2);
thisfeat.resize(vdims[i], chunksize);
feat.push_back(thisfeat);
outpaths.push_back(std::vector<std::wstring>());
sampperiods.push_back(std::vector<unsigned int>());
//feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
}
}
// append data to chunk
template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath, size_t index)
{
// append to frames; also expand neighbor frames
if (feat.cols() < 2)
throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
foreach_column (t, feat)
{
std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
framesmulti[index].push_back (v);
if (index==0)
boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
}
if (index==0)
numframes.push_back (feat.cols());
outpaths[index].push_back (outpath);
sampperiods[index].push_back (sampperiod);
}
void createevalminibatch()
{
foreach_index(i, framesmulti)
{
const size_t framesinblock = framesmulti[i].size();
feat[i].resize(vdims[i], framesinblock); // input features for whole utt (col vectors)
// augment the features
msra::dbn::augmentneighbors (framesmulti[i], boundaryflags, 0, framesinblock, feat[i]);
}
minibatchready=true;
}
void writetofiles(msra::dbn::matrix &pred, size_t index){ saveandflush(pred, index); }
msra::dbn::matrix chunkofframes(size_t index) { assert(minibatchready); assert(index<=feat.size()); return feat[index]; }
bool isminibatchready() { return minibatchready; }
size_t currentchunksize() { return framesmulti[0].size(); }
void flushinput(){createevalminibatch();}
void reset() { clear(); }
};
class FileEvalSource // : public numamodelmanager
{
const size_t chunksize; // actual block size to perform computation on
// data FIFO
std::vector<msra::dbn::matrix> feat;
std::vector<std::vector<std::vector<float>>> framesMulti; // [t] all feature frames concatenated into a big block
std::vector<char> boundaryFlags; // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
std::vector<size_t> numFrames; // [k] number of frames for all appended files
std::vector<std::vector<unsigned int>> sampPeriods; // [k] and sample periods (they should really all be the same...)
std::vector<size_t> vdims; // input dimension
std::vector<size_t> leftcontext;
std::vector<size_t> rightcontext;
bool minibatchReady;
size_t minibatchSize;
size_t frameIndex;
void operator=(const FileEvalSource &);
private:
void Clear() // empty the FIFO
{
foreach_index(i, vdims)
{
framesMulti[i].clear();
sampPeriods[i].clear();
}
boundaryFlags.clear();
numFrames.clear();
minibatchReady=false;
frameIndex=0;
}
public:
FileEvalSource(std::vector<size_t> vdims, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t chunksize) :vdims(vdims), leftcontext(leftcontext), rightcontext(rightcontext), chunksize(chunksize)
{
foreach_index(i, vdims)
{
msra::dbn::matrix thisfeat;
std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
frames.reserve(chunksize * 2);
framesMulti.push_back(frames);
//framesmulti[i].reserve (chunksize * 2);
thisfeat.resize(vdims[i], chunksize);
feat.push_back(thisfeat);
sampPeriods.push_back(std::vector<unsigned int>());
//feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
}
}
// append data to chunk
template<class MATRIX> void AddFile (const MATRIX & feat, const string & /*featkind*/, unsigned int sampPeriod, size_t index)
{
// append to frames; also expand neighbor frames
if (feat.cols() < 2)
throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
foreach_column (t, feat)
{
std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
framesMulti[index].push_back (v);
if (index==0)
boundaryFlags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
}
if (index==0)
numFrames.push_back (feat.cols());
sampPeriods[index].push_back (sampPeriod);
}
void CreateEvalMinibatch()
{
foreach_index(i, framesMulti)
{
const size_t framesInBlock = framesMulti[i].size();
feat[i].resize(vdims[i], framesInBlock); // input features for whole utt (col vectors)
// augment the features
size_t leftextent, rightextent;
// page in the needed range of frames
if (leftcontext[i] == 0 && rightcontext[i] == 0)
{
leftextent = rightextent = augmentationextent(framesMulti[i][0].size(), vdims[i]);
}
else
{
leftextent = leftcontext[i];
rightextent = rightcontext[i];
}
//msra::dbn::augmentneighbors(framesMulti[i], boundaryFlags, 0, leftcontext[i], rightcontext[i],)
msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, leftextent, rightextent, 0, framesInBlock, feat[i]);
}
minibatchReady=true;
}
void SetMinibatchSize(size_t mbSize){ minibatchSize=mbSize;}
msra::dbn::matrix ChunkOfFrames(size_t index) { assert(minibatchReady); assert(index<=feat.size()); return feat[index]; }
bool IsMinibatchReady() { return minibatchReady; }
size_t CurrentFileSize() { return framesMulti[0].size(); }
void FlushInput(){CreateEvalMinibatch();}
void Reset() { Clear(); }
};
};};

Просмотреть файл

@ -1,24 +0,0 @@
//
// <copyright file="dllmain.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// dllmain.cpp : Defines the entry point for the DLL application.
#include "stdafx.h"
BOOL APIENTRY DllMain( HMODULE /*hModule*/,
DWORD ul_reason_for_call,
LPVOID /*lpReserved*/
)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,620 +0,0 @@
//
// fileutil.h - file I/O with error checking
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
#pragma once
#ifndef _FILEUTIL_
#define _FILEUTIL_
#include "Platform.h"
#include <cstdio>
#ifdef __unix__
#include <sys/types.h>
#include <sys/stat.h>
#endif
#include <algorithm> // for std::find
#include <vector>
#include <map>
#include <functional>
#include <cctype>
#include <cerrno>
#include <cstdint>
#include <cassert>
#include <cstring> // for strerror()
using namespace std;
#define SAFE_CLOSE(f) (((f) == NULL) || (fcloseOrDie ((f)), (f) = NULL))
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
FILE * fopenOrDie (const string & pathname, const char * mode);
FILE * fopenOrDie (const wstring & pathname, const wchar_t * mode);
#ifndef __unix__
// ----------------------------------------------------------------------------
// fsetmode(): set mode to binary or text
// ----------------------------------------------------------------------------
void fsetmode (FILE * f, char type);
#endif
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie (void * ptr, size_t size, size_t count, FILE * f);
template<class _T>
void freadOrDie (_T & data, int num, FILE * f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
template<class _T>
void freadOrDie (_T & data, size_t num, FILE * f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f);
template<class _T>
void fwriteOrDie (const _T & data, FILE * f) // template for vector<>
{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fprintfOrDie (FILE * f, const char *format, ...);
// ----------------------------------------------------------------------------
// fcloseOrDie(): like fclose() but terminate with err msg in case of error
// not yet implemented, but we should
// ----------------------------------------------------------------------------
#define fcloseOrDie fclose
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie (FILE * f);
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes
// ----------------------------------------------------------------------------
size_t filesize (const wchar_t * pathname);
size_t filesize (FILE * f);
int64_t filesize64 (const wchar_t * pathname);
// ----------------------------------------------------------------------------
// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
// 32-bit offsets only
long fseekOrDie (FILE * f, long offset, int mode = SEEK_SET);
#define ftellOrDie ftell
// ----------------------------------------------------------------------------
// fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
uint64_t fgetpos (FILE * f);
void fsetpos (FILE * f, uint64_t pos);
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie (const std::string & pathname);
void unlinkOrDie (const std::wstring & pathname);
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
void renameOrDie (const std::string & from, const std::string & to);
void renameOrDie (const std::wstring & from, const std::wstring & to);
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists (const char * pathname);
bool fexists (const wchar_t * pathname);
inline bool fexists (const std::string & pathname) { return fexists (pathname.c_str()); }
inline bool fexists (const std::wstring & pathname) { return fexists (pathname.c_str()); }
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode
// ----------------------------------------------------------------------------
bool funicode (FILE * f);
// ----------------------------------------------------------------------------
// fskipspace(): skip space characters
// ----------------------------------------------------------------------------
bool fskipspace (FILE * F);
bool fskipwspace (FILE * F);
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets()), returned buffer is
// always 0-terminated; has second version that returns an STL string instead
// fgetstring(): read a 0-terminated string (terminate if error)
// fgetword(): read a space-terminated token (terminate if error)
// fskipNewLine(): skip all white space until end of line incl. the newline
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string (terminate if error)
// ----------------------------------------------------------------------------
void fputstring (FILE * f, const char *);
void fputstring (const HANDLE f, const char * str);
void fputstring (FILE * f, const std::string &);
void fputstring (FILE * f, const wchar_t *);
void fputstring (FILE * f, const std::wstring &);
template<class CHAR> CHAR * fgetline (FILE * f, CHAR * buf, int size);
template<class CHAR, size_t n> CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); }
string fgetline (FILE * f);
wstring fgetlinew (FILE * f);
void fgetline (FILE * f, std::string & s, std::vector<char> & buf);
void fgetline (FILE * f, std::wstring & s, std::vector<char> & buf);
void fgetline (FILE * f, std::vector<char> & buf);
void fgetline (FILE * f, std::vector<wchar_t> & buf);
const char * fgetstring (FILE * f, char * buf, int size);
template<size_t n> const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); }
const char * fgetstring (const HANDLE f, char * buf, int size);
template<size_t n> const char * fgetstring (const HANDLE f, char (& buf)[n]) { return fgetstring (f, buf, n); }
const wchar_t * fgetstring (FILE * f, wchar_t * buf, int size);
wstring fgetwstring (FILE * f);
string fgetstring (FILE * f);
const char * fgettoken (FILE * f, char * buf, int size);
template<size_t n> const char * fgettoken (FILE * f, char (& buf)[n]) { return fgettoken (f, buf, n); }
string fgettoken (FILE * f);
const wchar_t * fgettoken (FILE * f, wchar_t * buf, int size);
wstring fgetwtoken (FILE * f);
int fskipNewline (FILE * f, bool skip = true);
int fskipwNewline (FILE * f, bool skip = true);
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string (terminate if error)
// ----------------------------------------------------------------------------
void fputstring (FILE * f, const char *);
void fputstring (FILE * f, const std::string &);
void fputstring (FILE * f, const wchar_t *);
void fputstring (FILE * f, const std::wstring &);
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
string fgetTag (FILE * f);
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag (FILE * f, const char * expectedTag);
void fcheckTag_ascii (FILE * f, const string & expectedTag);
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag (const string & readTag, const string & expectedTag);
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag (FILE * f, const char * tag);
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring (FILE * f);
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// ----------------------------------------------------------------------------
void fpad (FILE * f, int n);
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte (FILE * f);
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort (FILE * f);
short fgetshort_bigendian (FILE * f);
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24 (FILE * f);
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint (FILE * f);
int fgetint_bigendian (FILE * f);
int fgetint_ascii (FILE * f);
// ----------------------------------------------------------------------------
// fgetlong(): read an long value
// ----------------------------------------------------------------------------
long fgetlong (FILE * f);
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat (FILE * f);
float fgetfloat_bigendian (FILE * f);
float fgetfloat_ascii (FILE * f);
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble (FILE * f);
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte (FILE * f, char val);
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort (FILE * f, short val);
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24 (FILE * f, int v);
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint (FILE * f, int val);
// ----------------------------------------------------------------------------
// fputlong(): write an long value
// ----------------------------------------------------------------------------
void fputlong (FILE * f, long val);
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat (FILE * f, float val);
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble (FILE * f, double val);
// template versions of put/get functions for binary files
template <typename T>
void fput(FILE * f, T v)
{
fwriteOrDie (&v, sizeof (v), 1, f);
}
// template versions of put/get functions for binary files
template <typename T>
void fget(FILE * f, T& v)
{
freadOrDie ((void *)&v, sizeof (v), 1, f);
}
// GetFormatString - get the format string for a particular type
template <typename T>
const wchar_t* GetFormatString(T /*t*/)
{
// if this _ASSERT goes off it means that you are using a type that doesn't have
// a read and/or write routine.
// If the type is a user defined class, you need to create some global functions that handles file in/out.
// for example:
//File& operator>>(File& stream, MyClass& test);
//File& operator<<(File& stream, MyClass& test);
//
// in your class you will probably want to add these functions as friends so you can access any private members
// friend File& operator>>(File& stream, MyClass& test);
// friend File& operator<<(File& stream, MyClass& test);
//
// if you are using wchar_t* or char* types, these use other methods because they require buffers to be passed
// either use std::string and std::wstring, or use the WriteString() and ReadString() methods
assert(false); // need a specialization
return NULL;
}
// GetFormatString - specalizations to get the format string for a particular type
template <> const wchar_t* GetFormatString(char);
template <> const wchar_t* GetFormatString(wchar_t);
template <> const wchar_t* GetFormatString(short);
template <> const wchar_t* GetFormatString(int);
template <> const wchar_t* GetFormatString(long);
template <> const wchar_t* GetFormatString(unsigned short);
template <> const wchar_t* GetFormatString(unsigned int);
template <> const wchar_t* GetFormatString(unsigned long);
template <> const wchar_t* GetFormatString(float);
template <> const wchar_t* GetFormatString(double);
template <> const wchar_t* GetFormatString(size_t);
template <> const wchar_t* GetFormatString(long long);
template <> const wchar_t* GetFormatString(const char*);
template <> const wchar_t* GetFormatString(const wchar_t*);
// GetScanFormatString - get the format string for a particular type
template <typename T>
const wchar_t* GetScanFormatString(T t)
{
assert(false); // need a specialization
return NULL;
}
// GetScanFormatString - specalizations to get the format string for a particular type
template <> const wchar_t* GetScanFormatString(char);
template <> const wchar_t* GetScanFormatString(wchar_t);
template <> const wchar_t* GetScanFormatString(short);
template <> const wchar_t* GetScanFormatString(int);
template <> const wchar_t* GetScanFormatString(long);
template <> const wchar_t* GetScanFormatString(unsigned short);
template <> const wchar_t* GetScanFormatString(unsigned int);
template <> const wchar_t* GetScanFormatString(unsigned long);
template <> const wchar_t* GetScanFormatString(float);
template <> const wchar_t* GetScanFormatString(double);
template <> const wchar_t* GetScanFormatString(size_t);
template <> const wchar_t* GetScanFormatString(long long);
// ----------------------------------------------------------------------------
// fgetText(): get a value from a text file
// ----------------------------------------------------------------------------
template <typename T>
void fgetText(FILE * f, T& v)
{
int rc = ftrygetText(f, v);
if (rc == 0)
throw std::runtime_error("error reading value from file (invalid format)");
else if (rc == EOF)
throw std::runtime_error(std::string("error reading from file: ") + strerror(errno));
assert(rc == 1);
}
// version to try and get a string, and not throw exceptions if contents don't match
template <typename T>
int ftrygetText(FILE * f, T& v)
{
const wchar_t* formatString = GetScanFormatString<T>(v);
int rc = fwscanf (f, formatString, &v);
assert(rc == 1 || rc == 0);
return rc;
}
template <> int ftrygetText<bool>(FILE * f, bool& v);
// ----------------------------------------------------------------------------
// fgetText() specializations for fwscanf_s differences: get a value from a text file
// ----------------------------------------------------------------------------
void fgetText(FILE * f, char& v);
void fgetText(FILE * f, wchar_t& v);
// ----------------------------------------------------------------------------
// fputText(): write a value out as text
// ----------------------------------------------------------------------------
template <typename T>
void fputText(FILE * f, T v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwprintf(f, formatString, v);
if (rc == 0)
throw std::runtime_error("error writing value to file, no values written");
else if (rc < 0)
throw std::runtime_error(std::string("error writing to file: ") + strerror(errno));
}
// ----------------------------------------------------------------------------
// fputText(): write a bool out as character
// ----------------------------------------------------------------------------
template <> void fputText<bool>(FILE * f, bool v);
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile (const wstring & pathname, const std::vector<char> & buffer);
void fputfile (const wstring & pathname, const std::wstring & string);
void fputfile (const wstring & pathname, const std::string & string);
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile (const wstring & pathname, std::vector<char> & buffer);
void fgetfile (FILE * f, std::vector<char> & buffer);
namespace msra { namespace files {
void fgetfilelines (const std::wstring & pathname, vector<char> & readbuffer, std::vector<std::string> & lines);
static inline std::vector<std::string> fgetfilelines (const std::wstring & pathname) { vector<char> buffer; std::vector<std::string> lines; fgetfilelines (pathname, buffer, lines); return lines; }
vector<char*> fgetfilelines (const wstring & pathname, vector<char> & readbuffer);
};};
// ----------------------------------------------------------------------------
// expand_wildcards() -- expand a path with wildcards (also intermediate ones)
// ----------------------------------------------------------------------------
void expand_wildcards (const wstring & path, vector<wstring> & paths);
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
namespace msra { namespace files {
void make_intermediate_dirs (const wstring & filepath);
};};
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
namespace msra { namespace files {
bool fuptodate (const wstring & target, const wstring & input, bool inputrequired = true);
};};
#if 0
// ----------------------------------------------------------------------------
// simple support for WAV file I/O
// ----------------------------------------------------------------------------
// define the header if we haven't seen it yet
#ifndef _WAVEFORMATEX_
#define _WAVEFORMATEX_
/*
* extended waveform format structure used for all non-PCM formats. this
* structure is common to all non-PCM formats.
*/
typedef unsigned short WORD; // in case not defined yet (i.e. linux)
typedef struct tWAVEFORMATEX
{
WORD wFormatTag; /* format type */
WORD nChannels; /* number of channels (i.e. mono, stereo...) */
DWORD nSamplesPerSec; /* sample rate */
DWORD nAvgBytesPerSec; /* for buffer estimation */
WORD nBlockAlign; /* block size of data */
WORD wBitsPerSample; /* number of bits per sample of mono data */
WORD cbSize; /* the count in bytes of the size of */
/* extra information (after cbSize) */
} WAVEFORMATEX, *PWAVEFORMATEX;
#endif /* _WAVEFORMATEX_ */
typedef struct wavehder{
char riffchar[4];
unsigned int RiffLength;
char wavechar[8];
unsigned int FmtLength;
signed short wFormatTag;
signed short nChannels;
unsigned int nSamplesPerSec;
unsigned int nAvgBytesPerSec;
signed short nBlockAlign;
signed short wBitsPerSample;
char datachar[4];
unsigned int DataLength;
private:
void prepareRest (int SampleCount);
public:
void prepare (unsigned int Fs, int Bits, int Channels, int SampleCount);
void prepare (const WAVEFORMATEX & wfx, int SampleCount);
unsigned int read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample);
void write (FILE * f);
static void update (FILE * f);
} WAVEHEADER;
// ----------------------------------------------------------------------------
// fgetwfx(), fputwfx(): I/O of wave file headers only
// ----------------------------------------------------------------------------
unsigned int fgetwfx (FILE *f, WAVEFORMATEX & wfx);
void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples);
// ----------------------------------------------------------------------------
// fgetraw(): read data of .wav file, and separate data of multiple channels.
// For example, data[i][j]: i is channel index, 0 means the first
// channel. j is sample index.
// ----------------------------------------------------------------------------
void fgetraw (FILE *f,std::vector< std::vector<short> > & data,const WAVEHEADER & wavhd);
#endif
// ----------------------------------------------------------------------------
// temp functions -- clean these up
// ----------------------------------------------------------------------------
// split a pathname into directory and filename
static inline void splitpath (const wstring & path, wstring & dir, wstring & file)
{
size_t pos = path.find_last_of (L"\\:/"); // DOS drives, UNIX, Windows
if (pos == path.npos) // no directory found
{
dir.clear();
file = path;
}
else
{
dir = path.substr (0, pos);
file = path.substr (pos +1);
}
}
// test if a pathname is a relative path
// A relative path is one that can be appended to a directory.
// Drive-relative paths, such as D:file, are considered non-relative.
static inline bool relpath (const wchar_t * path)
{ // this is a wild collection of pathname conventions in Windows
if (path[0] == '/' || path[0] == '\\') // e.g. \WINDOWS
return false;
if (path[0] && path[1] == ':') // drive syntax
return false;
// ... TODO: handle long NT paths
return true; // all others
}
template<class CHAR>
static inline bool relpath (const std::basic_string<CHAR> & s) { return relpath (s.c_str()); }
// trim from start
static inline std::string &ltrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
return s;
}
// trim from end
static inline std::string &rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
return s;
}
// trim from both ends
static inline std::string &trim(std::string &s) {
return ltrim(rtrim(s));
}
vector<string> sep_string(const string & str, const string & sep);
#endif // _FILEUTIL_

Просмотреть файл

@ -1,448 +0,0 @@
// TODO: this is a dup; use the one in Include/ instead
//
// <copyright file="fileutil.old.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#ifndef _FILEUTIL_
#define _FILEUTIL_
#include "basetypes.h"
#include <stdio.h>
#ifdef __WINDOWS__
#include <windows.h> // for mmreg.h and FILETIME
#include <mmreg.h>
#endif
#include <stdint.h>
using namespace std;
#define SAFE_CLOSE(f) (((f) == NULL) || (fcloseOrDie ((f)), (f) = NULL))
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
FILE * fopenOrDie (const STRING & pathname, const char * mode);
FILE * fopenOrDie (const WSTRING & pathname, const wchar_t * mode);
#ifndef __unix__ // don't need binary/text distinction on unix
// ----------------------------------------------------------------------------
// fsetmode(): set mode to binary or text
// ----------------------------------------------------------------------------
void fsetmode (FILE * f, char type);
#endif
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie (void * ptr, size_t size, size_t count, FILE * f);
void freadOrDie (void * ptr, size_t size, size_t count, const HANDLE f);
template<class _T>
void freadOrDie (_T & data, int num, FILE * f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
template<class _T>
void freadOrDie (_T & data, size_t num, FILE * f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
template<class _T>
void freadOrDie (_T & data, int num, const HANDLE f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
template<class _T>
void freadOrDie (_T & data, size_t num, const HANDLE f) // template for vector<>
{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f);
void fwriteOrDie (const void * ptr, size_t size, size_t count, const HANDLE f);
template<class _T>
void fwriteOrDie (const _T & data, FILE * f) // template for vector<>
{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
template<class _T>
void fwriteOrDie (const _T & data, const HANDLE f) // template for vector<>
{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fprintfOrDie (FILE * f, const char *format, ...);
// ----------------------------------------------------------------------------
// fcloseOrDie(): like fclose() but terminate with err msg in case of error
// not yet implemented, but we should
// ----------------------------------------------------------------------------
#define fcloseOrDie fclose
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie (FILE * f);
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes
// ----------------------------------------------------------------------------
size_t filesize (const wchar_t * pathname);
size_t filesize (FILE * f);
int64_t filesize64 (const wchar_t * pathname);
// ----------------------------------------------------------------------------
// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
// 32-bit offsets only
long fseekOrDie (FILE * f, long offset, int mode = SEEK_SET);
#define ftellOrDie ftell
uint64_t fgetpos (FILE * f);
void fsetpos (FILE * f, uint64_t pos);
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie (const std::string & pathname);
void unlinkOrDie (const std::wstring & pathname);
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
void renameOrDie (const std::string & from, const std::string & to);
void renameOrDie (const std::wstring & from, const std::wstring & to);
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists (const char * pathname);
bool fexists (const wchar_t * pathname);
inline bool fexists (const std::string & pathname) { return fexists (pathname.c_str()); }
inline bool fexists (const std::wstring & pathname) { return fexists (pathname.c_str()); }
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode
// ----------------------------------------------------------------------------
bool funicode (FILE * f);
// ----------------------------------------------------------------------------
// fskipspace(): skip space characters
// ----------------------------------------------------------------------------
void fskipspace (FILE * F);
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets()), returned buffer is
// always 0-terminated; has second version that returns an STL string instead
// fgetstring(): read a 0-terminated string (terminate if error)
// fgetword(): read a space-terminated token (terminate if error)
// fskipNewLine(): skip all white space until end of line incl. the newline
// ----------------------------------------------------------------------------
template<class CHAR> CHAR * fgetline (FILE * f, CHAR * buf, int size);
template<class CHAR, size_t n> CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); }
STRING fgetline (FILE * f);
WSTRING fgetlinew (FILE * f);
void fgetline (FILE * f, std::string & s, std::vector<char> & buf);
void fgetline (FILE * f, std::wstring & s, std::vector<char> & buf);
void fgetline (FILE * f, std::vector<char> & buf);
void fgetline (FILE * f, std::vector<wchar_t> & buf);
const char * fgetstring (FILE * f, char * buf, int size);
template<size_t n> const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); }
const char * fgetstring (const HANDLE f, char * buf, int size);
template<size_t n> const char * fgetstring (const HANDLE f, char (& buf)[n]) { return fgetstring (f, buf, n); }
wstring fgetwstring (FILE * f);
const char * fgettoken (FILE * f, char * buf, int size);
template<size_t n> const char * fgettoken (FILE * f, char (& buf)[n]) { return fgettoken (f, buf, n); }
STRING fgettoken (FILE * f);
void fskipNewline (FILE * f);
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string (terminate if error)
// ----------------------------------------------------------------------------
void fputstring (FILE * f, const char *);
void fputstring (const HANDLE f, const char * str);
void fputstring (FILE * f, const std::string &);
void fputstring (FILE * f, const wchar_t *);
void fputstring (FILE * f, const std::wstring &);
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
STRING fgetTag (FILE * f);
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag (FILE * f, const char * expectedTag);
void fcheckTag (const HANDLE f, const char * expectedTag);
void fcheckTag_ascii (FILE * f, const STRING & expectedTag);
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag (const STRING & readTag, const STRING & expectedTag);
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag (FILE * f, const char * tag);
void fputTag(const HANDLE f, const char * tag);
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring (FILE * f);
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// ----------------------------------------------------------------------------
void fpad (FILE * f, int n);
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte (FILE * f);
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort (FILE * f);
short fgetshort_bigendian (FILE * f);
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24 (FILE * f);
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint (FILE * f);
int fgetint (const HANDLE f);
int fgetint_bigendian (FILE * f);
int fgetint_ascii (FILE * f);
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat (FILE * f);
float fgetfloat_bigendian (FILE * f);
float fgetfloat_ascii (FILE * f);
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble (FILE * f);
// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file
// ----------------------------------------------------------------------------
void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate);
void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate);
// ----------------------------------------------------------------------------
// fputwav(): save data into a .wav file
// ----------------------------------------------------------------------------
void fputwav (FILE * f, const vector<short> & wav, int sampleRate, int nChannels = 1);
void fputwav (const wstring & fn, const vector<short> & wav, int sampleRate, int nChannels = 1);
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte (FILE * f, char val);
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort (FILE * f, short val);
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24 (FILE * f, int v);
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint (FILE * f, int val);
void fputint (const HANDLE f, int v);
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat (FILE * f, float val);
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble (FILE * f, double val);
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile (const WSTRING & pathname, const std::vector<char> & buffer);
void fputfile (const WSTRING & pathname, const std::wstring & string);
void fputfile (const WSTRING & pathname, const std::string & string);
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile (const WSTRING & pathname, std::vector<char> & buffer);
void fgetfile (FILE * f, std::vector<char> & buffer);
namespace msra { namespace files {
void fgetfilelines (const std::wstring & pathname, vector<char> & readbuffer, std::vector<std::string> & lines);
static inline std::vector<std::string> fgetfilelines (const std::wstring & pathname) { vector<char> buffer; std::vector<std::string> lines; fgetfilelines (pathname, buffer, lines); return lines; }
vector<char*> fgetfilelines (const wstring & pathname, vector<char> & readbuffer);
};};
// ----------------------------------------------------------------------------
// getfiletime(), setfiletime(): access modification time
// ----------------------------------------------------------------------------
bool getfiletime (const std::wstring & path, FILETIME & time);
void setfiletime (const std::wstring & path, const FILETIME & time);
// ----------------------------------------------------------------------------
// expand_wildcards() -- expand a path with wildcards (also intermediate ones)
// ----------------------------------------------------------------------------
void expand_wildcards (const wstring & path, vector<wstring> & paths);
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
namespace msra { namespace files {
void make_intermediate_dirs (const wstring & filepath);
};};
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
namespace msra { namespace files {
bool fuptodate (const wstring & target, const wstring & input, bool inputrequired = true);
};};
// ----------------------------------------------------------------------------
// simple support for WAV file I/O
// ----------------------------------------------------------------------------
typedef struct wavehder{
char riffchar[4];
unsigned int RiffLength;
char wavechar[8];
unsigned int FmtLength;
signed short wFormatTag;
signed short nChannels;
unsigned int nSamplesPerSec;
unsigned int nAvgBytesPerSec;
signed short nBlockAlign;
signed short wBitsPerSample;
char datachar[4];
unsigned int DataLength;
private:
void prepareRest (int SampleCount);
public:
void prepare (unsigned int Fs, int Bits, int Channels, int SampleCount);
void prepare (const WAVEFORMATEX & wfx, int SampleCount);
unsigned int read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample);
void write (FILE * f);
static void update (FILE * f);
} WAVEHEADER;
// ----------------------------------------------------------------------------
// fgetwfx(), fputwfx(): I/O of wave file headers only
// ----------------------------------------------------------------------------
unsigned int fgetwfx (FILE *f, WAVEFORMATEX & wfx);
void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples);
// ----------------------------------------------------------------------------
// fgetraw(): read data of .wav file, and separate data of multiple channels.
// For example, data[i][j]: i is channel index, 0 means the first
// channel. j is sample index.
// ----------------------------------------------------------------------------
void fgetraw (FILE *f,std::vector< std::vector<short> > & data,const WAVEHEADER & wavhd);
// ----------------------------------------------------------------------------
// temp functions -- clean these up
// ----------------------------------------------------------------------------
// split a pathname into directory and filename
static inline void splitpath (const wstring & path, wstring & dir, wstring & file)
{
size_t pos = path.find_last_of (L"\\:/"); // DOS drives, UNIX, Windows
if (pos == path.npos) // no directory found
{
dir.clear();
file = path;
}
else
{
dir = path.substr (0, pos);
file = path.substr (pos +1);
}
}
// test if a pathname is a relative path
// A relative path is one that can be appended to a directory.
// Drive-relative paths, such as D:file, are considered non-relative.
static inline bool relpath (const wchar_t * path)
{ // this is a wild collection of pathname conventions in Windows
if (path[0] == '/' || path[0] == '\\') // e.g. \WINDOWS
return false;
if (path[0] && path[1] == ':') // drive syntax
return false;
// ... TODO: handle long NT paths
return true; // all others
}
template<class CHAR>
static inline bool relpath (const std::basic_string<CHAR> & s) { return relpath (s.c_str()); }
#endif // _FILEUTIL_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,97 +0,0 @@
// itf/clusterable-itf.h
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_ITF_CLUSTERABLE_ITF_H_
#define KALDI_ITF_CLUSTERABLE_ITF_H_ 1
#include <string>
#include "base/kaldi-common.h"
namespace kaldi {
/** \addtogroup clustering_group
@{
A virtual class for clusterable objects; see \ref clustering for an
explanation if its function.
*/
class Clusterable {
public:
/// \name Functions that must be overridden
/// @{
/// Return a copy of this object.
virtual Clusterable *Copy() const = 0;
/// Return the objective function associated with the stats
/// [assuming ML estimation]
virtual BaseFloat Objf() const = 0;
/// Return the normalizer (typically, count) associated with the stats
virtual BaseFloat Normalizer() const = 0;
/// Set stats to empty.
virtual void SetZero() = 0;
/// Add other stats.
virtual void Add(const Clusterable &other) = 0;
/// Subtract other stats.
virtual void Sub(const Clusterable &other) = 0;
/// Scale the stats by a positive number f [not mandatory to supply this].
virtual void Scale(BaseFloat f) {
KALDI_ERR << "This Clusterable object does not implement Scale().";
}
/// Return a string that describes the inherited type.
virtual std::string Type() const = 0;
/// Write data to stream.
virtual void Write(std::ostream &os, bool binary) const = 0;
/// Read data from a stream and return the corresponding object (const
/// function; it's a class member because we need access to the vtable
/// so generic code can read derived types).
virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0;
virtual ~Clusterable() {}
/// @}
/// \name Functions that have default implementations
/// @{
// These functions have default implementations (but may be overridden for
// speed). Implementatons in tree/clusterable-classes.cc
/// Return the objective function of the combined object this + other.
virtual BaseFloat ObjfPlus(const Clusterable &other) const;
/// Return the objective function of the subtracted object this - other.
virtual BaseFloat ObjfMinus(const Clusterable &other) const;
/// Return the objective function decrease from merging the two
/// clusters, negated to be a positive number (or zero).
virtual BaseFloat Distance(const Clusterable &other) const;
/// @}
};
/// @} end of "ingroup clustering_group"
} // end namespace kaldi
#endif // KALDI_ITF_CLUSTERABLE_ITF_H_

Просмотреть файл

@ -1,80 +0,0 @@
// itf/context-dep-itf.h
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_
#define KALDI_ITF_CONTEXT_DEP_ITF_H_
#include "base/kaldi-common.h"
namespace kaldi {
/// @ingroup tree_group
/// @{
/// context-dep-itf.h provides a link between
/// the tree-building code in ../tree/, and the FST code in ../fstext/
/// (particularly, ../fstext/context-dep.h). It is an abstract
/// interface that describes an object that can map from a
/// phone-in-context to a sequence of integer leaf-ids.
class ContextDependencyInterface {
public:
/// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones
/// are considered for computing context.
virtual int ContextWidth() const = 0;
/// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical
/// triphone system. We have to see if we can do without this function.
virtual int CentralPosition() const = 0;
/// The "new" Compute interface. For typical topologies,
/// pdf_class would be 0, 1, 2.
/// Returns success or failure; outputs the pdf-id.
///
/// "Compute" is the main function of this interface, that takes a
/// sequence of N phones (and it must be N phones), possibly
/// including epsilons (symbol id zero) but only at positions other
/// than P [these represent unknown phone context due to end or
/// begin of sequence]. We do not insist that Compute must always
/// output (into stateseq) a nonempty sequence of states, but we
/// anticipate that stateseq will alyway be nonempty at output in
/// typical use cases. "Compute" returns false if expansion somehow
/// failed. Normally the calling code should raise an exception if
/// this happens. We can define a different interface later in
/// order to handle other kinds of information-- the underlying
/// data-structures from event-map.h are very flexible.
virtual bool Compute(const std::vector<int32> &phoneseq, int32 pdf_class,
int32 *pdf_id) const = 0;
/// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
virtual int32 NumPdfs() const = 0;
virtual ~ContextDependencyInterface() {};
ContextDependencyInterface() {}
/// Returns pointer to new object which is copy of current one.
virtual ContextDependencyInterface *Copy() const = 0;
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface);
};
/// @}
} // namespace Kaldi
#endif

Просмотреть файл

@ -1,61 +0,0 @@
// itf/decodable-itf.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Mirko Hannemann; Go Vivace Inc.
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_ITF_DECODABLE_ITF_H_
#define KALDI_ITF_DECODABLE_ITF_H_ 1
#include "base/kaldi-common.h"
namespace kaldi {
/// @ingroup Interfaces
/// @{
/// decodable-itf.h provides a link between the (acoustic-modeling and
/// feature-processing) code and the decoder. The idea is to make this
/// interface as small as possible, and to make it as agnostic as possible about
/// the form of the acoustic model (e.g. don't assume the probabilities are a
/// function of just a vector of floats), and about the decoder (e.g. don't
/// assume it accesses frames in strict left-to-right order). For normal
/// models, without on-line operation, the "decodable" sub-class will just be a
/// wrapper around a matrix of features and an acoustic model, and it will
/// answer the question 'what is the acoustic likelihood for this index and this
/// frame?'.
/// An interface for a feature-file and model; see \ref decodable_interface
class DecodableInterface {
public:
/// Returns the log likelihood, which will be negated in the decoder.
virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
/// Returns true if this is the last frame. Frames are one-based.
virtual bool IsLastFrame(int32 frame) = 0;
// virtual int32 NumFrames() = 0;
/// Returns the number of indices that the decodable object can accept;
/// Indices are one-based! This is for compatibility with OpenFst.
virtual int32 NumIndices() = 0;
virtual ~DecodableInterface() {}
};
/// @}
} // namespace Kaldi
#endif // KALDI_ITF_DECODABLE_ITF_H_

Просмотреть файл

@ -1,51 +0,0 @@
// itf/optimizable-itf.h
// Copyright 2009-2011 Go Vivace Inc.; Microsoft Corporation; Georg Stemmer
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_
#define KALDI_ITF_OPTIMIZABLE_ITF_H_
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"
namespace kaldi {
/// @ingroup Interfaces
/// @{
/// OptimizableInterface provides
/// a virtual class for optimizable objects.
/// E.g. a class that computed a likelihood function and
/// its gradient using some parameter
/// that has to be optimized on data
/// could inherit from it.
template<class Real>
class OptimizableInterface {
public:
/// computes gradient for a parameter params and returns it
/// in gradient_out
virtual void ComputeGradient(const Vector<Real> &params,
Vector<Real> *gradient_out) = 0;
/// computes the function value for a parameter params
/// and returns it
virtual Real ComputeValue(const Vector<Real> &params) = 0;
virtual ~OptimizableInterface() {}
};
/// @} end of "Interfaces"
} // end namespace kaldi
#endif

Просмотреть файл

@ -1,49 +0,0 @@
// itf/options-itf.h
// Copyright 2013 Tanel Alumae, Tallinn University of Technology
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_ITF_OPTIONS_ITF_H_
#define KALDI_ITF_OPTIONS_ITF_H_ 1
#include "base/kaldi-common.h"
namespace kaldi {
class OptionsItf {
public:
virtual void Register(const std::string &name,
bool *ptr, const std::string &doc) = 0;
virtual void Register(const std::string &name,
int32 *ptr, const std::string &doc) = 0;
virtual void Register(const std::string &name,
uint32 *ptr, const std::string &doc) = 0;
virtual void Register(const std::string &name,
float *ptr, const std::string &doc) = 0;
virtual void Register(const std::string &name,
double *ptr, const std::string &doc) = 0;
virtual void Register(const std::string &name,
std::string *ptr, const std::string &doc) = 0;
virtual ~OptionsItf() {}
};
} // namespace Kaldi
#endif // KALDI_ITF_OPTIONS_ITF_H_

Просмотреть файл

@ -1,743 +0,0 @@
//
// <copyright file="latticearchive.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "stdafx.h"
#include "basetypes.h"
#include "fileutil.h"
#include "htkfeatio.h" // for MLF reading for numer lattices
#include "latticearchive.h"
#include "msra_mgram.h" // for MLF reading for numer lattices
#include <stdio.h>
#include <stdint.h>
#include <vector>
#include <string>
#include <set>
#include <hash_map>
#include <regex>
#pragma warning(disable : 4996)
namespace msra { namespace lattices {
// helper to write a symbol hash (string -> int) to a file
// File has two sections:
// - physicalunitname // line number is mapping, starting with 0
// - logunitname physicalunitname // establishes a mapping; logunitname will get the same numeric index as physicalunitname
template<class UNITMAP>
static void writeunitmap (const wstring & symlistpath, const UNITMAP & unitmap)
{
std::vector<std::string> units;
units.reserve (unitmap.size());
std::vector<std::string> mappings;
mappings.reserve (unitmap.size());
for (auto iter = unitmap.cbegin(); iter != unitmap.cend(); iter++) // why would 'for (auto iter : unitmap)' not work?
{
const std::string label = iter->first;
const size_t unitid = iter->second;
if (units.size() <= unitid)
units.resize (unitid + 1); // we grow it on demand; the result must be compact (all entries filled), we check that later
if (!units[unitid].empty()) // many-to-one mapping: remember the unit; look it up while writing
mappings.push_back (label);
else
units[unitid] = label;
}
auto_file_ptr flist = fopenOrDie (symlistpath, L"wb");
// write (physical) units
foreach_index (k, units)
{
if (units[k].empty())
throw std::logic_error ("build: unitmap has gaps");
fprintfOrDie (flist, "%s\n", units[k].c_str());
}
// write log-phys mappings
foreach_index (k, mappings)
{
const std::string unit = mappings[k]; // logical name
const size_t unitid = unitmap.find (unit)->second; // get its unit id; this indexes the units array
const std::string tounit = units[unitid]; // and get the name from tehre
fprintfOrDie (flist, "%s %s\n", unit.c_str(), tounit.c_str());
}
fflushOrDie (flist);
}
// (little helper to do a map::find() with default value)
template<typename MAPTYPE, typename KEYTYPE, typename VALTYPE>
static size_t tryfind (const MAPTYPE & map, const KEYTYPE & key, VALTYPE deflt)
{
auto iter = map.find (key);
if (iter == map.end())
return deflt;
else
return iter->second;
}
// archive format:
// - output files of build():
// - OUTPATH --the resulting archive (a huge file), simple concatenation of binary blocks
// - OUTPATH.toc --contains keys and offsets; this is how content in archive is found
// KEY=ARCHIVE[BYTEOFFSET] // where ARCHIVE can be empty, meaning same as previous
// - OUTPATH.symlist --list of all unit names encountered, in order of numeric index used in archive (first = index 0)
// This file is suitable as an input to HHEd's AU command.
// - in actual use,
// - .toc files can be concatenated
// - .symlist files must remain paired with the archive file
// - for actual training, user also needs to provide, typically from an HHEd AU run:
// - OUTPATH.tying --map from triphone units to senone sequence by name; get full phone set from .symlist above
// UNITNAME SENONE[2] SENONE[3] SENONE[4]
/*static*/ void archive::build (const std::vector<std::wstring> & infiles, const std::wstring & outpath,
const std::unordered_map<std::string,size_t> & modelsymmap,
const msra::asr::htkmlfreader<msra::asr::htkmlfentry,msra::lattices::lattice::htkmlfwordsequence> & labels, // non-empty: build numer lattices
const msra::lm::CMGramLM & unigram, const msra::lm::CSymbolSet & unigramsymbols) // for numer lattices
{
#if 0 // little unit test helper for testing the read function
bool test = true;
if (test)
{
archive a;
a.open (outpath + L".toc");
lattice L;
std::hash_map<string,size_t> symmap;
a.getlattice (L"sw2001_A_1263622500_1374610000", L, symmap);
a.getlattice (L"sw2001_A_1391162500_1409287500", L, symmap);
return;
}
#endif
const bool numermode = !labels.empty(); // if labels are passed then we shall convert the MLFs to lattices, and 'infiles' are regular keys
const std::wstring tocpath = outpath + L".toc";
const std::wstring symlistpath = outpath + L".symlist";
// process all files
std::set<std::wstring> seenkeys; // (keep track of seen keys; throw error for duplicate keys)
msra::files::make_intermediate_dirs (outpath);
auto_file_ptr f = fopenOrDie (outpath, L"wb");
auto_file_ptr ftoc = fopenOrDie (tocpath, L"wb");
size_t brokeninputfiles = 0;
foreach_index (i, infiles)
{
const std::wstring & inlatpath = infiles[i];
fprintf (stderr, "build: processing lattice '%S'\n", inlatpath.c_str());
// get key
std::wstring key = regex_replace (inlatpath, wregex (L"=.*"), wstring()); // delete mapping
key = regex_replace (key, wregex (L".*[\\\\/]"), wstring()); // delete path
key = regex_replace (key, wregex (L"\\.[^\\.\\\\/:]*$"), wstring()); // delete extension (or not if none)
if (!seenkeys.insert (key).second)
throw std::runtime_error (msra::strfun::strprintf ("build: duplicate key for lattice '%S'", inlatpath.c_str()));
// we fail all the time due to totally broken HDecode/copy process, OK if not too many files are missing
bool latticeread = false;
try
{
// fetch lattice
lattice L;
if (!numermode)
L.fromhtklattice (inlatpath, modelsymmap); // read HTK lattice
else
L.frommlf (key, modelsymmap, labels, unigram, unigramsymbols); // read MLF into a numerator lattice
latticeread = true;
// write to archive
uint64_t offset = fgetpos (f);
L.fwrite (f);
fflushOrDie (f);
// write reference to TOC file --note: TOC file is a headerless UTF8 file; so don't use fprintf %S format (default code page)
fprintfOrDie (ftoc, "%s=%s[%llu]\n", msra::strfun::utf8 (key).c_str(), ((i - brokeninputfiles) == 0) ? msra::strfun::utf8 (outpath).c_str() : "", offset);
fflushOrDie (ftoc);
fprintf (stderr, "written lattice to offset %llu as '%S'\n", offset, key.c_str());
}
catch (const exception & e)
{
if (latticeread) throw; // write failure
// we ignore read failures
fprintf (stderr, "ERROR: skipping unreadable lattice '%S': %s\n", inlatpath.c_str(), e.what());
brokeninputfiles++;
}
}
// write out the unit map
// TODO: This is sort of redundant now--it gets the symmap from the HMM, i.e. always the same for all archives.
writeunitmap (symlistpath, modelsymmap);
fprintf (stderr, "completed %lu out of %lu lattices (%lu read failures, %.1f%%)\n", infiles.size(), infiles.size()-brokeninputfiles, brokeninputfiles, 100.0f * brokeninputfiles / infiles.size());
}
// helper to set a context value (left, right) with checking of uniqueness
void lattice::nodecontext::setcontext (int & lr, int val)
{
if (lr == unknown)
lr = val;
else if (lr != val)
lr = (signed short) ambiguous;
}
// helper for merge() to determine the unique node contexts
vector<lattice::nodecontext> lattice::determinenodecontexts (const msra::asr::simplesenonehmm & hset) const
{
const size_t spunit = tryfind (hset.getsymmap(), "sp", SIZE_MAX);
const size_t silunit = tryfind (hset.getsymmap(), "sil", SIZE_MAX);
vector<lattice::nodecontext> nodecontexts (nodes.size());
nodecontexts.front().left = nodecontext::start;
nodecontexts.front().right = nodecontext::ambiguous; // (should not happen, but won't harm either)
nodecontexts.back().right = nodecontext::end;
nodecontexts.back().left = nodecontext::ambiguous; // (should not happen--we require !sent_end; but who knows)
size_t multispseen = 0; // bad entries with multi-sp
foreach_index (j, edges)
{
const auto & e = edges[j];
const size_t S = e.S;
const size_t E = e.E;
auto a = getaligninfo (j);
if (a.size() == 0) // !NULL edge
throw std::logic_error ("determinenodecontexts: !NULL edges not allowed in merging, should be removed before");
size_t A = a[0].unit;
size_t Z = a[a.size()-1].unit;
if (Z == spunit)
{
if (a.size() < 2)
throw std::runtime_error ("determinenodecontexts: context-free unit (/sp/) found as a single-phone word");
else
{
Z = a[a.size()-2].unit;
if (Z == spunit) // a bugg lattice --I got this from HVite, to be tracked down
{
// search from end once again, to print a warning
int n;
for (n = (int) a.size() -1; n >= 0; n--)
if (a[n].unit != spunit)
break;
// ends with n = position of furthest non-sp
if (n < 0) // only sp?
throw std::runtime_error ("determinenodecontexts: word consists only of /sp/");
fprintf (stderr, "determinenodecontexts: word with %lu /sp/ at the end found, edge %d\n", a.size() -1 - n, j);
multispseen++;
Z = a[n].unit;
}
}
}
if (A == spunit || Z == spunit)
{
#if 0
fprintf (stderr, "A=%d Z=%d fa=%d j=%d/N=%d L=%d n=%d totalalign=%d ts/te=%d/%d\n", (int) A, (int) Z, (int) e.firstalign,(int) j, (int) edges.size(), (int) nodes.size(), (int) a.size(), (int) align.size(),
nodes[S].t, nodes[E].t);
foreach_index (kk, a)
fprintf (stderr, "a[%d] = %d\n", kk, a[kk].unit);
dump (stderr, [&] (size_t i) { return hset.gethmm (i).getname(); });
#endif
throw std::runtime_error ("determinenodecontexts: context-free unit (/sp/) found as a start phone or second last phone");
}
const auto & Ahmm = hset.gethmm (A);
const auto & Zhmm = hset.gethmm (Z);
int Aid = (int) Ahmm.gettransPindex();
int Zid = (int) Zhmm.gettransPindex();
nodecontexts[S].setright (Aid);
nodecontexts[E].setleft (Zid);
}
if (multispseen > 0)
fprintf (stderr, "determinenodecontexts: %lu broken edges in %lu with multiple /sp/ at the end seen\n", multispseen, edges.size());
// check CI conditions and put in 't'
// We make the hard assumption that there is only one CI phone, /sil/.
const auto & silhmm = hset.gethmm (silunit);
int silid = silhmm.gettransPindex();
foreach_index (i, nodecontexts)
{
auto & nc = nodecontexts[i];
if ((nc.left == nodecontext::unknown) ^ (nc.right == nodecontext::unknown))
throw std::runtime_error ("determinenodecontexts: invalid dead-end node in lattice");
if (nc.left == nodecontext::ambiguous && nc.right != silid && nc.right != nodecontext::end)
throw std::runtime_error ("determinenodecontexts: invalid ambiguous left context (right context is not CI)");
if (nc.right == nodecontext::ambiguous && nc.left != silid && nc.left != nodecontext::start)
throw std::runtime_error ("determinenodecontexts: invalid ambiguous right context (left context is not CI)");
nc.t = nodes[i].t;
}
return nodecontexts; // (will this use a move constructor??)
}
// compar function for sorting and merging
bool lattice::nodecontext::operator< (const nodecontext & other) const
{
// sort by t, left, right, i --sort by i to make i appear before iother, as assumed in merge function
int diff = (int) t - (int) other.t;
if (diff == 0)
{
diff = left - other.left;
if (diff == 0)
{
diff = right - other.right;
if (diff == 0)
return i < other.i; // (cannot use 'diff=' pattern since unsigned but may be SIZE_MAX)
}
}
return diff < 0;
}
// remove that final !NULL edge
// We have that in HAPI lattices, but there can be only one at the end.
void lattice::removefinalnull()
{
const auto & lastedge = edges.back();
// last edge can be !NULL, recognized as having 0 alignment records
if (lastedge.firstalign < align.size()) // has alignment records --not !NULL
return;
if (lastedge.S != nodes.size() -2 || lastedge.E != nodes.size() -1)
throw std::runtime_error ("removefinalnull: malformed final !NULL edge");
edges.resize (edges.size() -1); // remove it
nodes.resize (nodes.size() -1); // its start node is now the new end node
foreach_index (j, edges)
if (edges[j].E >= nodes.size())
throw std::runtime_error ("removefinalnull: cannot have final !NULL edge and other edges connecting to end node at the same time");
}
// merge a secondary lattice into the first
// With lots of caveats:
// - this optimizes lattices to true unigram lattices where the only unique node condition is acoustic context
// - no !NULL edge at the end, call removefinalnull() before
// - this function returns an unsorted edges[] array, i.e. invalid. We sort in uniq'ed representation, which is easier.
// This function is not elegant at all, just hard labor!
void lattice::merge (const lattice & other, const msra::asr::simplesenonehmm & hset)
{
if (!edges2.empty() || !other.edges2.empty())
throw std::logic_error ("merge: lattice(s) must be in non-uniq'ed format (V1)");
if (!info.numframes || !other.info.numframes)
throw std::logic_error ("merge: lattice(s) must have identical number of frames");
// establish node contexts
auto contexts = determinenodecontexts (hset);
auto othercontexts = other.determinenodecontexts (hset);
// create joint node space and node mapping
// This also collapses non-unique nodes.
// Note the edge case sil-sil in one lattice which may be sil-ambiguous or ambiguous-sil on the other.
// We ignore this, keeping such nodes unmerged. That's OK since middle /sil/ words have zero LM, and thus it's OK to keep them non-connected.
foreach_index (i, contexts) contexts[i].i = i;
foreach_index (i, othercontexts) othercontexts[i].iother = i;
contexts.insert (contexts.end(), othercontexts.begin(), othercontexts.end()); // append othercontext
sort (contexts.begin(), contexts.end());
vector<size_t> nodemap (nodes.size(), SIZE_MAX);
vector<size_t> othernodemap (other.nodes.size(), SIZE_MAX);
int j = 0;
foreach_index (i, contexts) // merge identical nodes --this is the critical step
{
if (j == 0 || contexts[j-1].t != contexts[i].t || contexts[j-1].left != contexts[i].left || contexts[j-1].right != contexts[i].right)
contexts[j++] = contexts[i]; // entered a new one
// node map
if (contexts[i].i != SIZE_MAX)
nodemap[contexts[i].i] = j-1;
if (contexts[i].iother != SIZE_MAX)
othernodemap[contexts[i].iother] = j-1;
}
fprintf (stderr, "merge: joint node space uniq'ed to %d from %d\n", j, contexts.size());
contexts.resize (j);
// create a new node array (just copy the contexts[].t fields)
nodes.resize (contexts.size());
foreach_index (inew, nodes)
nodes[inew].t = (unsigned short) contexts[inew].t;
info.numnodes = nodes.size();
// incorporate the alignment records
const size_t alignoffset = align.size();
align.insert (align.end(), other.align.begin(), other.align.end());
// map existing edges' S and E fields, and also 'firstalign'
foreach_index (j, edges)
{
edges[j].S = nodemap[edges[j].S];
edges[j].E = nodemap[edges[j].E];
}
auto otheredges = other.edges;
foreach_index (j, otheredges)
{
otheredges[j].S = othernodemap[otheredges[j].S];
otheredges[j].E = othernodemap[otheredges[j].E];
otheredges[j].firstalign += alignoffset; // that's where they are now
}
// at this point, a new 'nodes' array exists, and the edges already are w.r.t. the new node space and align space
// now we are read to merge 'other' edges into this, simply by concatenation
edges.insert (edges.end(), otheredges.begin(), otheredges.end());
// remove acoustic scores --they are likely not identical if they come from different decoders
// If we don't do that, this will break the sorting in builduniquealignments()
info.hasacscores = 0;
foreach_index (j, edges)
edges[j].a = 0.0f;
// Note: we have NOT sorted or de-duplicated yet. That is best done after conversion to the uniq'ed format.
}
// remove duplicates
// This must be called in uniq'ed format.
void lattice::dedup()
{
if (edges2.empty())
throw std::logic_error ("dedup: lattice must be in uniq'ed format (V2)");
size_t k = 0;
foreach_index (j, edges2)
{
if (k > 0 && edges2[k-1].S == edges2[j].S && edges2[k-1].E == edges2[j].E && edges2[k-1].firstalign == edges2[j].firstalign)
{
if (edges2[k-1].implysp != edges2[j].implysp)
throw std::logic_error ("dedup: inconsistent 'implysp' flag for otherwise identical edges");
continue;
}
edges2[k++] = edges2[j];
}
fprintf (stderr, "dedup: edges reduced to %d from %d\n", k, edges2.size());
edges2.resize (k);
info.numedges = edges2.size();
edges.clear(); // (should already be, but isn't; make sure we no longer use it)
}
// load all lattices from a TOC file and write them to a new archive
// Use this to
// - upgrade the file format to latest in case of format changes
// - check consistency (read only; don't write out)
// - dump to stdout
// - merge two lattices (for merging numer into denom lattices)
// Input path is an actual TOC path, output is the stem (.TOC will be added). --yes, not nice, maybe fix it later
// Example command:
// convertlatticearchive --latticetocs dummy c:\smbrdebug\sw20_small.den.lats.toc.10 -w c:\smbrdebug\sw20_small.den.lats.converted --cdphonetying c:\smbrdebug\combined.tying --statelist c:\smbrdebug\swb300h.9304.aligned.statelist --transprobs c:\smbrdebug\MMF.9304.transprobs
// How to regenerate from my test lattices:
// buildlatticearchive c:\smbrdebug\sw20_small.den.lats.regenerated c:\smbrdebug\hvitelat\*lat
// We support two special output path syntaxs:
// - empty ("") -> don't output, just check the format
// - dash ("-") -> dump lattice to stdout instead
/*static*/ void archive::convert (const std::wstring & intocpath, const std::wstring & intocpath2, const std::wstring & outpath,
const msra::asr::simplesenonehmm & hset)
{
const auto & modelsymmap = hset.getsymmap();
const std::wstring tocpath = outpath + L".toc";
const std::wstring symlistpath = outpath + L".symlist";
// open input archive
// TODO: I find that HVite emits redundant physical triphones, and even HHEd seems so (in .tying file).
// Thus, we should uniq the units before sorting. We can do that here if we have the .tying file.
// And then use the modelsymmap to map them down.
// Do this directly in the hset module (it will be transparent).
std::vector<std::wstring> intocpaths (1, intocpath); // set of paths consisting of 1
msra::lattices::archive archive (intocpaths, modelsymmap);
// secondary archive for optional merging operation
const bool mergemode = !intocpath2.empty(); // true if merging two lattices
std::vector<std::wstring> intocpaths2;
if (mergemode)
intocpaths2.push_back (intocpath2);
msra::lattices::archive archive2 (intocpaths2, modelsymmap); // (if no merging then this archive2 is empty)
// read the intocpath file once again to get the keys in original order
std::vector<char> textbuffer;
auto toclines = msra::files::fgetfilelines (intocpath, textbuffer);
auto_file_ptr f = NULL;
auto_file_ptr ftoc = NULL;
// process all files
if (outpath != L"" && outpath != L"-") // test for special syntaxes that bypass to actually create an output archive
{
msra::files::make_intermediate_dirs (outpath);
f = fopenOrDie (outpath, L"wb");
ftoc = fopenOrDie (tocpath, L"wb");
}
vector<const char *> invmodelsymmap; // only used for dump() mode
// we must parse the toc file once again to get the keys in original order
size_t skippedmerges = 0;
foreach_index (i, toclines)
{
const char * line = toclines[i];
const char * p = strchr (line, '=');
if (p == NULL)
throw std::runtime_error ("open: invalid TOC line (no = sign): " + std::string (line));
const std::wstring key = msra::strfun::utf16 (std::string (line, p - line));
fprintf (stderr, "convert: processing lattice '%S'\n", key.c_str());
// fetch lattice --this performs any necessary format conversions already
lattice L;
archive.getlattice (key, L);
lattice L2;
if (mergemode)
{
if (!archive2.haslattice (key))
{
fprintf (stderr, "convert: cannot merge because lattice '%S' missing in secondary archive; skipping\n", key.c_str());
skippedmerges++;
continue;
}
archive2.getlattice (key, L2);
// merge it in
// This will connect each node with matching 1-phone context conditions; aimed at merging numer lattices.
L.removefinalnull(); // get rid of that final !NULL headache
L2.removefinalnull();
L.merge (L2, hset);
// note: we are left with dups due to true unigram merging (HTK lattices cannot represent true unigram lattices since id is on the nodes)
}
//L.removefinalnull();
//L.determinenodecontexts (hset);
// convert it --TODO: once we permanently use the new format, do this in fread() for V1
// Note: Merging may have left this in unsorted format; we need to be robust against that.
const size_t spunit = tryfind (modelsymmap, "sp", SIZE_MAX);
L.builduniquealignments (spunit);
if (mergemode)
L.dedup();
if (f && ftoc)
{
// write to archive
uint64_t offset = fgetpos (f);
L.fwrite (f);
fflushOrDie (f);
// write reference to TOC file --note: TOC file is a headerless UTF8 file; so don't use fprintf %S format (default code page)
fprintfOrDie (ftoc, "%s=%s[%llu]\n", msra::strfun::utf8 (key).c_str(), (i == 0) ? msra::strfun::utf8 (outpath).c_str() : "", offset);
fflushOrDie (ftoc);
fprintf (stderr, "written converted lattice to offset %llu as '%S'\n", offset, key.c_str());
}
else if (outpath == L"-")
{
if (invmodelsymmap.empty()) // build this lazily
{
invmodelsymmap.resize (modelsymmap.size());
for (auto iter = modelsymmap.begin(); iter != modelsymmap.end(); iter++)
invmodelsymmap[iter->second] = iter->first.c_str();
}
L.rebuildedges (false);
L.dump (stdout, [&] (size_t i) { return invmodelsymmap[i]; } );
}
} // end for (toclines)
if (skippedmerges > 0)
fprintf (stderr, "convert: %d out of %d merge operations skipped due to secondary lattice missing\n", skippedmerges, toclines.size());
// write out the updated unit map
if (f && ftoc)
writeunitmap (symlistpath, modelsymmap);
fprintf (stderr, "converted %d lattices\n", toclines.size());
}
// ---------------------------------------------------------------------------
// reading lattices from external formats (HTK lat, MLF)
// ---------------------------------------------------------------------------
// read an HTK lattice
// The lattice is expected to be freshly constructed (I did not bother to check).
void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std::string,size_t> & unitmap)
{
vector<char> textbuffer;
auto lines = msra::files::fgetfilelines (path, textbuffer);
if (lines.empty())
throw std::runtime_error ("lattice: mal-formed lattice--empty input file (or all-zeroes)");
auto iter = lines.begin();
// parse out LMF and WP
char dummychar = 0; // dummy for sscanf() end checking
for ( ; iter != lines.end() && strncmp (*iter, "N=", 2); iter++)
{
if (strncmp (*iter, "lmscale=", 8) == 0) // note: HTK sometimes generates extra garbage space at the end of this line
if (sscanf_s (*iter, "lmscale=%f wdpenalty=%f%c", &info.lmf, &info.wp, &dummychar, sizeof (dummychar)) != 2 && dummychar != ' ')
throw std::runtime_error ("lattice: mal-formed lmscale/wdpenalty line in lattice: " + string (*iter));
}
// parse N and L
if (iter != lines.end())
{
unsigned long N, L;
if (sscanf_s (*iter, "N=%lu L=%lu %c", &N, &L, &dummychar, sizeof (dummychar)) != 2)
throw std::runtime_error ("lattice: mal-formed N=/L= line in lattice: " + string (*iter));
info.numnodes = N;
info.numedges = L;
iter++;
}
else
throw std::runtime_error ("lattice: mal-formed before parse N=/L= line in lattice.");
assert(info.numnodes > 0);
nodes.reserve (info.numnodes);
// parse the nodes
for (size_t i = 0; i < info.numnodes; i++, iter++)
{
if (iter == lines.end())
throw std::runtime_error ("lattice: not enough I lines in lattice");
unsigned long itest;
float t;
if (sscanf_s (*iter, "I=%lu t=%f%c", &itest, &t, &dummychar, sizeof (dummychar)) < 2)
throw std::runtime_error ("lattice: mal-formed node line in lattice: " + string (*iter));
if (i != (size_t) itest)
throw std::runtime_error ("lattice: out-of-sequence node line in lattice: " + string (*iter));
nodes.push_back (nodeinfo ((unsigned int) (t / info.frameduration + 0.5)));
info.numframes = max (info.numframes, (size_t) nodes.back().t);
}
// parse the edges
assert(info.numedges > 0);
edges.reserve (info.numedges);
align.reserve (info.numedges * 10); // 10 phones per word on av. should be enough
std::string label;
for (size_t j = 0; j < info.numedges; j++, iter++)
{
if (iter == lines.end())
throw std::runtime_error ("lattice: not enough J lines in lattice");
unsigned long jtest;
unsigned long S, E;
float a, l;
char d[1024];
// example:
// J=12 S=1 E=13 a=-326.81 l=-5.090 d=:sil-t:s+k:e,0.03:dh:m-ax:m+sil,0.03:sil,0.02:
int nvals = sscanf_s (*iter, "J=%lu S=%lu E=%lu a=%f l=%f d=%s", &jtest, &S, &E, &a, &l, &d, sizeof (d));
if (nvals == 5 && j == info.numedges - 1) // special case: last edge is a !NULL and thus may have the d= record missing
strcpy (d, ":");
else if (nvals != 6)
throw std::runtime_error ("lattice: mal-formed edge line in lattice: " + string (*iter));
if (j != (size_t) jtest)
throw std::runtime_error ("lattice: out-of-sequence edge line in lattice: " + string (*iter));
edges.push_back (edgeinfowithscores (S, E, a, l, align.size()));
// build align array
size_t edgeframes = 0; // (for checking whether the alignment sums up right)
const char * p = d;
if (p[0] != ':' || (p[1] == 0 && j < info.numedges-1)) // last edge may be empty
throw std::runtime_error ("lattice: alignment info must start with a colon and must have at least one entry: " + string (*iter));
p++;
while (*p)
{
// p points to an entry of the form TRIPHONE,DURATION
const char * q = strchr (p, ',');
if (q == NULL)
throw std::runtime_error ("lattice: alignment entry lacking a comma: " + string (*iter));
if (q == p)
throw std::runtime_error ("lattice: alignment entry label empty: " + string (*iter));
label.assign (p, q-p); // the triphone label
q++;
char * ep;
double duration = strtod (q, &ep); // (weird--returns a non-const ptr in ep to a const object)
p = ep;
if (*p != ':')
throw std::runtime_error ("lattice: alignment entry not ending with a colon: " + string (*iter));
p++;
// create the alignment entry
const size_t frames = (unsigned int) (duration / info.frameduration + 0.5);
auto it = unitmap.find (label);
if (it == unitmap.end())
throw std::runtime_error ("lattice: unit in alignment that is not in model: " + label);
const size_t unitid = it->second;
//const size_t unitid = unitmap.insert (make_pair (label, unitmap.size())).first->second; // may create a new entry with index = #entries
align.push_back (aligninfo (unitid, frames));
edgeframes += frames;
}
if (edgeframes != nodes[E].t - (size_t) nodes[S].t)
{
char msg[128];
sprintf (msg, "\n-- where edgeframes=%d != (nodes[E].t - nodes[S].t=%d), the gap is %d.", edgeframes, nodes[E].t - (size_t) nodes[S].t, edgeframes + nodes[S].t - nodes[E].t);
throw std::runtime_error ("lattice: alignment info duration mismatches edge duration: " + string (*iter) + msg);
}
}
if (iter != lines.end())
throw std::runtime_error ("lattice: unexpected garbage at end of lattice: " + string (*iter));
checklattice();
// create more efficient storage for alignments
const size_t spunit = tryfind (unitmap, "sp", SIZE_MAX);
builduniquealignments (spunit);
showstats();
}
// construct a numerator lattice from an MLF entry
// The lattice is expected to be freshly constructed (I did not bother to check).
void lattice::frommlf (const wstring & key, const std::unordered_map<std::string,size_t> & unitmap,
const msra::asr::htkmlfreader<msra::asr::htkmlfentry,lattice::htkmlfwordsequence> & labels,
const msra::lm::CMGramLM & unigram, const msra::lm::CSymbolSet & unigramsymbols)
{
const auto & transcripts = labels.allwordtranscripts(); // (TODO: we could just pass the transcripts map--does not really matter)
// get the labels (state and word)
auto iter = transcripts.find (key);
if (iter == transcripts.end())
throw std::runtime_error ("frommlf: no reference word sequence in MLF for lattice with key " + strfun::utf8 (key));
const auto & transcript = iter->second;
if (transcript.words.size() == 0)
throw std::runtime_error ("frommlf: empty reference word sequence for lattice with key " + strfun::utf8 (key));
// determine unigram scores for all words
vector<float> lmscores (transcript.words.size());
size_t silence = unigramsymbols["!silence"];
size_t lmend = unigramsymbols["</s>"];
size_t sentstart = unigramsymbols["!sent_start"];
size_t sentend = unigramsymbols["!sent_end"];
// create the lattice
nodes.resize (transcript.words.size() +1);
edges.resize (transcript.words.size());
align.reserve (transcript.align.size());
size_t numframes = 0;
foreach_index (j, transcript.words)
{
const auto & w = transcript.words[j];
nodes[j].t = w.firstframe;
auto & e = edges[j];
e.unused = 0;
e.S = j;
e.E = j+1;
if (e.E != j+1)
throw std::runtime_error (msra::strfun::strprintf ("frommlf: too many tokens to be represented as edgeinfo::E in label set: %S", key.c_str()));
e.a = 0.0f; // no ac score
// LM score
// !sent_start and !silence are patched to LM score 0
size_t wid = w.wordindex;
if (wid == sentstart)
{
if (j != 0)
throw std::logic_error ("frommlf: found an !sent_start token not at the first position");
}
else if (wid == sentend)
{
if (j != (int) transcript.words.size()-1)
throw std::logic_error ("frommlf: found an !sent_end token not at the end position");
wid = lmend; // use </s> for score lookup
}
const int iwid = (int) wid;
e.l = (wid != sentstart && wid != silence) ? (float) unigram.score (&iwid, 1) : 0.0f;
// alignment
e.implysp = 0;
e.firstalign = align.size();
auto a = transcript.getaligninfo (j);
align.insert (align.end(), a.begin(), a.end());
foreach_index (k, a)
numframes += a[k].frames;
}
nodes[transcript.words.size()].t = (unsigned short) numframes;
if (nodes[transcript.words.size()].t != numframes)
throw std::runtime_error (msra::strfun::strprintf ("frommlf: too many frames to be represented as nodeinfo::t in label set: %S", key.c_str()));
info.lmf = -1.0f; // indicates not set
info.wp = 0.0f; // not set indicated by lmf < 0
info.numedges = edges.size();
info.numnodes = nodes.size();
info.numframes = numframes;
checklattice();
// create more efficient storage for alignments
const size_t spunit = tryfind (unitmap, "sp", SIZE_MAX);
builduniquealignments (spunit);
showstats();
}
};};

Просмотреть файл

@ -1,479 +0,0 @@
// matrix/cblas-wrappers.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey);
// Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_
#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1
#include <limits>
#include "matrix/sp-matrix.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/matrix-functions.h"
// Do not include this file directly. It is to be included
// by .cc files in this directory.
namespace kaldi {
inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
const int incY) {
cblas_scopy(N, X, incX, Y, incY);
}
inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
const int incY) {
cblas_dcopy(N, X, incX, Y, incY);
}
inline float cblas_Xasum(const int N, const float *X, const int incX) {
return cblas_sasum(N, X, incX);
}
inline double cblas_Xasum(const int N, const double *X, const int incX) {
return cblas_dasum(N, X, incX);
}
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
const int incY, const float c, const float s) {
cblas_srot(N, X, incX, Y, incY, c, s);
}
inline void cblas_Xrot(const int N, double *X, const int incX, double *Y,
const int incY, const double c, const double s) {
cblas_drot(N, X, incX, Y, incY, c, s);
}
inline float cblas_Xdot(const int N, const float *const X,
const int incX, const float *const Y,
const int incY) {
return cblas_sdot(N, X, incX, Y, incY);
}
inline double cblas_Xdot(const int N, const double *const X,
const int incX, const double *const Y,
const int incY) {
return cblas_ddot(N, X, incX, Y, incY);
}
inline void cblas_Xaxpy(const int N, const float alpha, const float *X,
const int incX, float *Y, const int incY) {
cblas_saxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_Xaxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY) {
cblas_daxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_Xscal(const int N, const float alpha, float *data,
const int inc) {
cblas_sscal(N, alpha, data, inc);
}
inline void cblas_Xscal(const int N, const double alpha, double *data,
const int inc) {
cblas_dscal(N, alpha, data, inc);
}
inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata,
const float *v, const int v_inc,
const float beta, float *y, const int y_inc) {
cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
}
inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata,
const double *v, const int v_inc,
const double beta, double *y, const int y_inc) {
cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
}
inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata,
const int num_rows, float *y, const int y_inc) {
cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata,
const int num_rows, double *y, const int y_inc) {
cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
// x = alpha * M * y + beta * x
inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata,
const float *ydata, MatrixIndexT ystride,
float beta, float *xdata, MatrixIndexT xstride) {
cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
ydata, ystride, beta, xdata, xstride);
}
inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata,
const double *ydata, MatrixIndexT ystride,
double beta, double *xdata, MatrixIndexT xstride) {
cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
ydata, ystride, beta, xdata, xstride);
}
// Implements A += alpha * (x y' + y x'); A is symmetric matrix.
inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata,
MatrixIndexT incX, const float *Ydata, MatrixIndexT incY,
float *Adata) {
cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
incX, Ydata, incY, Adata);
}
inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata,
MatrixIndexT incX, const double *Ydata, MatrixIndexT incY,
double *Adata) {
cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
incX, Ydata, incY, Adata);
}
// Implements A += alpha * (x x'); A is symmetric matrix.
inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata,
MatrixIndexT incX, float *Adata) {
cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
}
inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata,
MatrixIndexT incX, double *Adata) {
cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
}
// sgemv,dgemv: y = alpha M x + beta y.
inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, float alpha, const float *Mdata,
MatrixIndexT stride, const float *xdata,
MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
}
inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, double alpha, const double *Mdata,
MatrixIndexT stride, const double *xdata,
MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
}
// sgbmv, dgmmv: y = alpha M x + + beta * y.
inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, MatrixIndexT num_below,
MatrixIndexT num_above, float alpha, const float *Mdata,
MatrixIndexT stride, const float *xdata,
MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
incX, beta, ydata, incY);
}
inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, MatrixIndexT num_below,
MatrixIndexT num_above, double alpha, const double *Mdata,
MatrixIndexT stride, const double *xdata,
MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
incX, beta, ydata, incY);
}
template<typename Real>
inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, Real alpha, const Real *Mdata,
MatrixIndexT stride, const Real *xdata,
MatrixIndexT incX, Real beta, Real *ydata,
MatrixIndexT incY) {
if (trans == kNoTrans) {
if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY);
for (MatrixIndexT i = 0; i < num_cols; i++) {
Real x_i = xdata[i * incX];
if (x_i == 0.0) continue;
// Add to ydata, the i'th column of M, times alpha * x_i
cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY);
}
} else {
if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY);
for (MatrixIndexT i = 0; i < num_rows; i++) {
Real x_i = xdata[i * incX];
if (x_i == 0.0) continue;
// Add to ydata, the i'th row of M, times alpha * x_i
cblas_Xaxpy(num_cols, x_i * alpha,
Mdata + (i * stride), 1, ydata, incY);
}
}
}
inline void cblas_Xgemm(const float alpha,
MatrixTransposeType transA,
const float *Adata,
MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
MatrixTransposeType transB,
const float *Bdata, MatrixIndexT b_stride,
const float beta,
float *Mdata,
MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA),
static_cast<CBLAS_TRANSPOSE>(transB),
num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
alpha, Adata, a_stride, Bdata, b_stride,
beta, Mdata, stride);
}
inline void cblas_Xgemm(const double alpha,
MatrixTransposeType transA,
const double *Adata,
MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
MatrixTransposeType transB,
const double *Bdata, MatrixIndexT b_stride,
const double beta,
double *Mdata,
MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA),
static_cast<CBLAS_TRANSPOSE>(transB),
num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
alpha, Adata, a_stride, Bdata, b_stride,
beta, Mdata, stride);
}
inline void cblas_Xsymm(const float alpha,
MatrixIndexT sz,
const float *Adata,MatrixIndexT a_stride,
const float *Bdata,MatrixIndexT b_stride,
const float beta,
float *Mdata, MatrixIndexT stride) {
cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
a_stride, Bdata, b_stride, beta, Mdata, stride);
}
inline void cblas_Xsymm(const double alpha,
MatrixIndexT sz,
const double *Adata,MatrixIndexT a_stride,
const double *Bdata,MatrixIndexT b_stride,
const double beta,
double *Mdata, MatrixIndexT stride) {
cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
a_stride, Bdata, b_stride, beta, Mdata, stride);
}
// ger: M += alpha x y^T.
inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha,
const float *xdata, MatrixIndexT incX, const float *ydata,
MatrixIndexT incY, float *Mdata, MatrixIndexT stride) {
cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
Mdata, stride);
}
inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha,
const double *xdata, MatrixIndexT incX, const double *ydata,
MatrixIndexT incY, double *Mdata, MatrixIndexT stride) {
cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
Mdata, stride);
}
// syrk: symmetric rank-k update.
// if trans==kNoTrans, then C = alpha A A^T + beta C
// else C = alpha A^T A + beta C.
// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e.
// num-cols(A) if kNoTrans, or num-rows(A) if kTrans.
// We only need the row-major and lower-triangular option of this, and this
// is hard-coded.
inline void cblas_Xsyrk (
const MatrixTransposeType trans, const MatrixIndexT dim_c,
const MatrixIndexT other_dim_a, const float alpha, const float *A,
const MatrixIndexT a_stride, const float beta, float *C,
const MatrixIndexT c_stride) {
cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
}
inline void cblas_Xsyrk(
const MatrixTransposeType trans, const MatrixIndexT dim_c,
const MatrixIndexT other_dim_a, const double alpha, const double *A,
const MatrixIndexT a_stride, const double beta, double *C,
const MatrixIndexT c_stride) {
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
}
/// matrix-vector multiply using a banded matrix; we always call this
/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for
/// elementwise multiplication. We miss some of the arguments out of this
/// wrapper.
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const double *A,
const double alpha,
const double *x,
const double beta,
double *y) {
cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const float *A,
const float alpha,
const float *x,
const float beta,
float *y) {
cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
/// extend this somehow.
inline void mul_elements(
const MatrixIndexT dim,
const double *a,
double *b) { // does b *= a, elementwise.
double c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
inline void mul_elements(
const MatrixIndexT dim,
const float *a,
float *b) { // does b *= a, elementwise.
float c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
// add clapack here
#if !defined(HAVE_ATLAS)
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
}
inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) {
dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
}
//
inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols,
float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot,
KaldiBlasInt *result) {
sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
}
inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols,
double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot,
KaldiBlasInt *result) {
dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
}
//
inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
KaldiBlasInt *pivot, float *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
}
inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
KaldiBlasInt *pivot, double *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
}
//
inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
float *sv, float *Vdata, KaldiBlasInt *vstride,
float *Udata, KaldiBlasInt *ustride, float *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
sgesvd_(v, u,
num_cols, num_rows, Mdata, stride,
sv, Vdata, vstride, Udata, ustride,
p_work, l_work, result);
}
inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
double *sv, double *Vdata, KaldiBlasInt *vstride,
double *Udata, KaldiBlasInt *ustride, double *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
dgesvd_(v, u,
num_cols, num_rows, Mdata, stride,
sv, Vdata, vstride, Udata, ustride,
p_work, l_work, result);
}
//
void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata,
KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) {
ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
}
void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata,
KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) {
dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
}
//
void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata,
KaldiBlasInt *ipiv, KaldiBlasInt *result) {
ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
}
void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata,
KaldiBlasInt *ipiv, KaldiBlasInt *result) {
dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
}
#else
inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
float *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_sgetrf(CblasColMajor, num_rows, num_cols,
Mdata, stride, pivot);
}
inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
double *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_dgetrf(CblasColMajor, num_rows, num_cols,
Mdata, stride, pivot);
}
//
inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) {
return clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
Mdata, stride);
}
inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) {
return clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
Mdata, stride);
}
//
inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
}
inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
}
#endif
}
// namespace kaldi
#endif

Просмотреть файл

@ -1,558 +0,0 @@
// matrix/compressed-matrix.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Frantisek Skala
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "matrix/compressed-matrix.h"
#include <algorithm>
namespace kaldi {
template<typename Real>
void CompressedMatrix::CopyFromMat(
const MatrixBase<Real> &mat) {
if (data_ != NULL) {
delete [] static_cast<float*>(data_); // call delete [] because was allocated with new float[]
data_ = NULL;
}
if (mat.NumRows() == 0) { return; } // Zero-size matrix stored as zero pointer.
GlobalHeader global_header;
KALDI_COMPILE_TIME_ASSERT(sizeof(global_header) == 16); // otherwise
// something weird is happening and our code probably won't work or
// won't be robust across platforms.
// Below, the point of the "safety_margin" is that the minimum
// and maximum values in the matrix shouldn't coincide with
// the minimum and maximum ranges of the 16-bit range, because
// this could cause certain problems in ComputeColHeader, where
// we need to ensure that the percentile_0 through percentile_100
// are in strictly increasing order.
float min_value = mat.Min(), max_value = mat.Max();
if (max_value == min_value)
max_value = min_value + (1.0 + fabs(min_value)); // ensure it's strictly
// greater than min_value,
// even if matrix is
// constant.
global_header.min_value = min_value;
global_header.range = max_value - min_value;
// We can't compress the matrix if there are inf's or nan's.
// The caller should check for this first.
KALDI_ASSERT(KALDI_ISFINITE(global_header.min_value) &&
KALDI_ISFINITE(global_header.range));
// Avoid division by zero if the matrix is just a constant:
// make sure max_value > min_value.
if (global_header.range <= 0.0)
global_header.range = 1.0e-05;
global_header.num_rows = mat.NumRows();
global_header.num_cols = mat.NumCols();
int32 data_size = DataSize(global_header);
data_ = AllocateData(data_size);
*(reinterpret_cast<GlobalHeader*>(data_)) = global_header;
PerColHeader *header_data =
reinterpret_cast<PerColHeader*>(static_cast<char*>(data_) +
sizeof(GlobalHeader));
unsigned char *byte_data =
reinterpret_cast<unsigned char*>(header_data + global_header.num_cols);
const Real *matrix_data = mat.Data();
for (int32 col = 0; col < global_header.num_cols; col++) {
CompressColumn(global_header,
matrix_data + col, mat.Stride(),
global_header.num_rows,
header_data, byte_data);
header_data++;
byte_data += global_header.num_rows;
}
}
// Instantiate the template for float and double.
template
void CompressedMatrix::CopyFromMat(const MatrixBase<float> &mat);
template
void CompressedMatrix::CopyFromMat(const MatrixBase<double> &mat);
template<typename Real>
CompressedMatrix &CompressedMatrix::operator =(const MatrixBase<Real> &mat) {
this->CopyFromMat(mat);
return *this;
}
// Instantiate the template for float and double.
template
CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<float> &mat);
template
CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<double> &mat);
inline uint16 CompressedMatrix::FloatToUint16(
const GlobalHeader &global_header,
float value) {
float f = (value - global_header.min_value) /
global_header.range;
if (f > 1.0) f = 1.0; // Note: this should not happen.
if (f < 0.0) f = 0.0; // Note: this should not happen.
return static_cast<int>(f * 65535 + 0.499); // + 0.499 is to
// round to closest int; avoids bias.
}
inline float CompressedMatrix::Uint16ToFloat(
const GlobalHeader &global_header,
uint16 value) {
// the constant 1.52590218966964e-05 is 1/65535.
return global_header.min_value
+ global_header.range * 1.52590218966964e-05 * value;
}
template<typename Real> // static
void CompressedMatrix::ComputeColHeader(
const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, CompressedMatrix::PerColHeader *header) {
KALDI_ASSERT(num_rows > 0);
std::vector<Real> sdata(num_rows); // the sorted data.
for (size_t i = 0, size = sdata.size(); i < size; i++)
sdata[i] = data[i*stride];
if (num_rows >= 5) {
int quarter_nr = num_rows/4;
// std::sort(sdata.begin(), sdata.end());
// The elements at positions 0, quarter_nr,
// 3*quarter_nr, and num_rows-1 need to be in sorted order.
std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
// Now, sdata.begin() + quarter_nr contains the element that would appear
// in sorted order, in that position.
std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
// Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
// that would appear at those positions in sorted order.
std::nth_element(sdata.begin() + quarter_nr + 1,
sdata.begin() + (3*quarter_nr), sdata.end());
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
// 3*quarter_nr, contain the elements that would appear at those positions
// in sorted order.
std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
sdata.end());
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
// 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
// at those positions in sorted order.
header->percentile_0 = FloatToUint16(global_header, sdata[0]);
header->percentile_25 = std::max<uint16>(
FloatToUint16(global_header, sdata[quarter_nr]),
header->percentile_0 + static_cast<uint16>(1));
header->percentile_75 = std::max<uint16>(
FloatToUint16(global_header, sdata[3*quarter_nr]),
header->percentile_25 + static_cast<uint16>(1));
header->percentile_100 = std::max<uint16>(
FloatToUint16(global_header, sdata[num_rows-1]),
header->percentile_75 + static_cast<uint16>(1));
} else { // handle this pathological case.
std::sort(sdata.begin(), sdata.end());
// Note: we know num_rows is at least 1.
header->percentile_0 = FloatToUint16(global_header, sdata[0]);
if (num_rows > 1)
header->percentile_25 =
std::max<uint16>(FloatToUint16(global_header, sdata[1]),
header->percentile_0 + 1);
else
header->percentile_25 = header->percentile_0 + 1;
if (num_rows > 2)
header->percentile_75 =
std::max<uint16>(FloatToUint16(global_header, sdata[2]),
header->percentile_25 + 1);
else
header->percentile_75 = header->percentile_25 + 1;
if (num_rows > 3)
header->percentile_100 =
std::max<uint16>(FloatToUint16(global_header, sdata[3]),
header->percentile_75 + 1);
else
header->percentile_100 = header->percentile_75 + 1;
}
}
// static
inline unsigned char CompressedMatrix::FloatToChar(
float p0, float p25, float p75, float p100,
float value) {
int ans;
if (value < p25) { // range [ p0, p25 ) covered by
// characters 0 .. 64. We round to the closest int.
float f = (value - p0) / (p25 - p0);
ans = static_cast<int>(f * 64 + 0.5);
// Note: the checks on the next two lines
// are necessary in pathological cases when all the elements in a row
// are the same and the percentile_* values are separated by one.
if (ans < 0) ans = 0;
if (ans > 64) ans = 64;
} else if (value < p75) { // range [ p25, p75 )covered
// by characters 64 .. 192. We round to the closest int.
float f = (value - p25) / (p75 - p25);
ans = 64 + static_cast<int>(f * 128 + 0.5);
if (ans < 64) ans = 64;
if (ans > 192) ans = 192;
} else { // range [ p75, p100 ] covered by
// characters 192 .. 255. Note: this last range
// has fewer characters than the left range, because
// we go up to 255, not 256.
float f = (value - p75) / (p100 - p75);
ans = 192 + static_cast<int>(f * 63 + 0.5);
if (ans < 192) ans = 192;
if (ans > 255) ans = 255;
}
return static_cast<unsigned char>(ans);
}
// static
inline float CompressedMatrix::CharToFloat(
float p0, float p25, float p75, float p100,
unsigned char value) {
if (value <= 64) {
return p0 + (p25 - p0) * value * (1/64.0);
} else if (value <= 192) {
return p25 + (p75 - p25) * (value - 64) * (1/128.0);
} else {
return p75 + (p100 - p75) * (value - 192) * (1/63.0);
}
}
template<typename Real> // static
void CompressedMatrix::CompressColumn(
const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, CompressedMatrix::PerColHeader *header,
unsigned char *byte_data) {
ComputeColHeader(global_header, data, stride,
num_rows, header);
float p0 = Uint16ToFloat(global_header, header->percentile_0),
p25 = Uint16ToFloat(global_header, header->percentile_25),
p75 = Uint16ToFloat(global_header, header->percentile_75),
p100 = Uint16ToFloat(global_header, header->percentile_100);
for (int32 i = 0; i < num_rows; i++) {
Real this_data = data[i * stride];
byte_data[i] = FloatToChar(p0, p25, p75, p100, this_data);
}
}
// static
void* CompressedMatrix::AllocateData(int32 num_bytes) {
KALDI_ASSERT(num_bytes > 0);
KALDI_COMPILE_TIME_ASSERT(sizeof(float) == 4);
// round size up to nearest number of floats.
return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
}
#define DEBUG_COMPRESSED_MATRIX 0 // Must be zero for Kaldi to work; use 1 only
// for debugging.
void CompressedMatrix::Write(std::ostream &os, bool binary) const {
if (binary) { // Binary-mode write:
WriteToken(os, binary, "CM");
if (data_ != NULL) {
GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
MatrixIndexT size = DataSize(h); // total size of data in data_
os.write(reinterpret_cast<const char*>(data_), size);
} else { // special case: where data_ == NULL, we treat it as an empty
// matrix.
GlobalHeader h;
h.range = h.min_value = 0.0;
h.num_rows = h.num_cols = 0;
os.write(reinterpret_cast<const char*>(&h), sizeof(h));
}
} else {
// In text mode, just use the same format as a regular matrix.
// This is not compressed.
#if DEBUG_COMPRESSED_MATRIX == 0
Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
kUndefined);
this->CopyToMat(&temp_mat);
temp_mat.Write(os, binary);
#else
// Text-mode writing. Only really useful for debug, but we'll implement it.
if (data_ == NULL) {
os << 0.0 << ' ' << 0.0 << ' ' << 0 << ' ' << 0 << '\n';
} else {
GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
KALDI_ASSERT(h.num_cols != 0);
os << h.min_value << ' ' << h.range << ' ' << h.num_rows << ' ' << h.num_cols << '\n';
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(&h + 1);
unsigned char *c = reinterpret_cast<unsigned char*>(per_col_header + h.num_cols);
for (int32 i = 0; i < h.num_cols; i++, per_col_header++) {
os << per_col_header->percentile_0 << ' ' << per_col_header->percentile_25
<< ' ' << per_col_header->percentile_75
<< ' ' << per_col_header->percentile_100 << '\n';
for (int32 j = 0; j < h.num_rows; j++, c++)
os << static_cast<int>(*c) << ' ';
os << '\n';
}
}
#endif
}
if (os.fail())
KALDI_ERR << "Error writing compressed matrix to stream.";
}
void CompressedMatrix::Read(std::istream &is, bool binary) {
if (data_ != NULL) {
delete [] (static_cast<float*>(data_));
data_ = NULL;
}
if (binary) { // Binary-mode read.
// Caution: the following is not back compatible, if you were using
// CompressedMatrix before, the old format will not be readable.
int peekval = Peek(is, binary);
if (peekval == 'C') {
ExpectToken(is, binary, "CM");
GlobalHeader h;
is.read(reinterpret_cast<char*>(&h), sizeof(h));
if (is.fail())
KALDI_ERR << "Failed to read header";
if (h.num_cols == 0) { // empty matrix.
return;
}
int32 size = DataSize(h), remaining_size = size - sizeof(GlobalHeader);
data_ = AllocateData(size);
*(reinterpret_cast<GlobalHeader*>(data_)) = h;
is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
remaining_size);
} else {
// Assume that what we're reading is a regular Matrix. This might be the
// case if you changed your code, making a Matrix into a CompressedMatrix,
// and you want back-compatibility for reading.
Matrix<BaseFloat> M;
M.Read(is, binary); // This will crash if it was not a Matrix. This might happen,
// for instance, if the CompressedMatrix was written using the
// older code where we didn't write the token "CM", we just
// wrote the binary data directly.
this->CopyFromMat(M);
}
} else { // Text-mode read.
#if DEBUG_COMPRESSED_MATRIX == 0
Matrix<BaseFloat> temp;
temp.Read(is, binary);
this->CopyFromMat(temp);
#else
// The old reading code...
GlobalHeader h;
is >> h.min_value >> h.range >> h.num_rows >> h.num_cols;
if (is.fail())
KALDI_ERR << "Failed to read header.";
if (h.num_cols == 0) { // Empty matrix; null data_ pointer.
return;
}
int32 size = DataSize(h);
data_ = AllocateData(size);
*(reinterpret_cast<GlobalHeader*>(data_)) = h;
PerColHeader *per_col_header =
reinterpret_cast<PerColHeader*>(static_cast<char*>(data_)
+ sizeof(GlobalHeader));
unsigned char *c =
reinterpret_cast<unsigned char*>(per_col_header + h.num_cols);
for (int32 i = 0; i < h.num_cols; i++, per_col_header++) {
is >> per_col_header->percentile_0 >> per_col_header->percentile_25
>> per_col_header->percentile_75 >> per_col_header->percentile_100;
for (int32 j = 0; j < h.num_rows; j++, c++) {
int i;
is >> i;
KALDI_ASSERT(i >= 0 && i <= 255);
*c = static_cast<unsigned char>(i);
}
}
#endif
}
if (is.fail())
KALDI_ERR << "Failed to read data.";
}
template<typename Real>
void CompressedMatrix::CopyToMat(MatrixBase<Real> *mat) const {
if (data_ == NULL) {
KALDI_ASSERT(mat->NumRows() == 0);
KALDI_ASSERT(mat->NumCols() == 0);
} else {
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
h->num_cols);
int32 num_cols = h->num_cols, num_rows = h->num_rows;
KALDI_ASSERT(mat->NumRows() == num_rows);
KALDI_ASSERT(mat->NumCols() == num_cols);
for (int32 i = 0; i < num_cols; i++, per_col_header++) {
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 j = 0; j < num_rows; j++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*mat)(j, i) = f;
}
}
}
}
// Instantiate the template for float and double.
template
void CompressedMatrix::CopyToMat(MatrixBase<float> *mat) const;
template
void CompressedMatrix::CopyToMat(MatrixBase<double> *mat) const;
template<typename Real>
void CompressedMatrix::CopyRowToVec(MatrixIndexT row,
VectorBase<Real> *v) const {
KALDI_ASSERT(row < this->NumRows());
KALDI_ASSERT(row >= 0);
KALDI_ASSERT(v->Dim() == this->NumCols());
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
h->num_cols);
byte_data += row; // point to first value we are interested in
for (int32 i = 0; i < h->num_cols;
i++, per_col_header++, byte_data+=h->num_rows) {
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*v)(i) = f;
}
}
template<typename Real>
void CompressedMatrix::CopyColToVec(MatrixIndexT col,
VectorBase<Real> *v) const {
KALDI_ASSERT(col < this->NumCols());
KALDI_ASSERT(col >= 0);
KALDI_ASSERT(v->Dim() == this->NumRows());
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
h->num_cols);
byte_data += col*h->num_rows; // point to first value in the column we want
per_col_header += col;
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 i = 0; i < h->num_rows; i++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*v)(i) = f;
}
}
// instantiate the templates.
template void
CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<double> *) const;
template void
CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<float> *) const;
template void
CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<double> *) const;
template void
CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<float> *) const;
template<typename Real>
void CompressedMatrix::CopyToMat(int32 row_offset,
int32 column_offset,
MatrixBase<Real> *dest) const {
KALDI_PARANOID_ASSERT(row_offset < this->NumRows());
KALDI_PARANOID_ASSERT(column_offset < this->NumCols());
KALDI_PARANOID_ASSERT(row_offset >= 0);
KALDI_PARANOID_ASSERT(column_offset >= 0);
KALDI_ASSERT(row_offset+dest->NumRows() < this->NumRows());
KALDI_ASSERT(column_offset+dest->NumCols() < this->NumCols());
// everything is OK
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
h->num_cols);
int32 num_rows = h->num_rows;
int32 tgt_cols = dest->NumCols(), tgt_rows = dest->NumRows();
unsigned char *start_of_subcol = byte_data+row_offset; // skip appropriate
// number of columns
start_of_subcol += column_offset*num_rows; // skip appropriate number of rows
per_col_header += column_offset; // skip the appropriate number of headers
for (int32 i = 0;
i < tgt_cols;
i++, per_col_header++, start_of_subcol+=num_rows) {
byte_data = start_of_subcol;
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 j = 0; j < tgt_rows; j++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*dest)(j, i) = f;
}
}
}
// instantiate the templates.
template void CompressedMatrix::CopyToMat(int32,
int32,
MatrixBase<float> *dest) const;
template void CompressedMatrix::CopyToMat(int32,
int32,
MatrixBase<double> *dest) const;
void CompressedMatrix::Destroy() {
if (data_ != NULL) {
delete [] static_cast<float*>(data_);
data_ = NULL;
}
}
CompressedMatrix::CompressedMatrix(const CompressedMatrix &mat): data_(NULL) {
*this = mat; // use assignment operator.
}
CompressedMatrix &CompressedMatrix::operator = (const CompressedMatrix &mat) {
Destroy(); // now this->data_ == NULL.
if (mat.data_ != NULL) {
MatrixIndexT data_size = DataSize(*static_cast<GlobalHeader*>(mat.data_));
data_ = AllocateData(data_size);
memcpy(static_cast<void*>(data_),
static_cast<void*>(mat.data_),
data_size);
}
return *this;
}
} // namespace kaldi

Просмотреть файл

@ -1,166 +0,0 @@
// matrix/compressed-matrix.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Frantisek Skala
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
#include "kaldi-matrix.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
/// This class does lossy compression of a matrix. It only
/// supports copying to-from a KaldiMatrix. For large matrices,
/// each element is compressed into about one byte, but there
/// is a little overhead on top of that (globally, and also per
/// column).
/// The basic idea is for each column (in the normal configuration)
/// we work out the values at the 0th, 25th, 50th and 100th percentiles
/// and store them as 16-bit integers; we then encode each value in
/// the column as a single byte, in 3 separate ranges with different
/// linear encodings (0-25th, 25-50th, 50th-100th).
class CompressedMatrix {
public:
CompressedMatrix(): data_(NULL) { }
~CompressedMatrix() { Destroy(); }
template<typename Real>
CompressedMatrix(const MatrixBase<Real> &mat): data_(NULL) { CopyFromMat(mat); }
/// This will resize *this and copy the contents of mat to *this.
template<typename Real>
void CopyFromMat(const MatrixBase<Real> &mat);
CompressedMatrix(const CompressedMatrix &mat);
CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
template<typename Real>
CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
// Note: mat must have the correct size, CopyToMat no longer attempts
// to resize the matrix
template<typename Real>
void CopyToMat(MatrixBase<Real> *mat) const;
void Write(std::ostream &os, bool binary) const;
void Read(std::istream &is, bool binary);
/// Returns number of rows (or zero for emtpy matrix).
inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
(*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
/// Returns number of columns (or zero for emtpy matrix).
inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
(*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
/// Copies row #row of the matrix into vector v.
/// Note: v must have same size as #cols.
template<typename Real>
void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
/// Copies column #col of the matrix into vector v.
/// Note: v must have same size as #rows.
template<typename Real>
void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
/// Copies submatrix of compressed matrix into matrix dest.
/// Submatrix starts at row row_offset and column column_offset and it' size
/// is defined by size of provided matrix dest
template<typename Real>
void CopyToMat(int32 row_offset,
int32 column_offset,
MatrixBase<Real> *dest) const;
void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
friend class Matrix<float>;
friend class Matrix<double>;
private:
// allocates data using new [], ensures byte alignment
// sufficient for float.
static void *AllocateData(int32 num_bytes);
struct GlobalHeader {
float min_value;
float range;
int32 num_rows;
int32 num_cols;
};
static MatrixIndexT DataSize(const GlobalHeader &header) {
// Returns size in bytes of the data.
return sizeof(GlobalHeader) +
header.num_cols * (sizeof(PerColHeader) + header.num_rows);
}
struct PerColHeader {
uint16 percentile_0;
uint16 percentile_25;
uint16 percentile_75;
uint16 percentile_100;
};
template<typename Real>
static void CompressColumn(const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, PerColHeader *header,
unsigned char *byte_data);
template<typename Real>
static void ComputeColHeader(const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, PerColHeader *header);
static inline uint16 FloatToUint16(const GlobalHeader &global_header,
float value);
static inline float Uint16ToFloat(const GlobalHeader &global_header,
uint16 value);
static inline unsigned char FloatToChar(float p0, float p25,
float p75, float p100,
float value);
static inline float CharToFloat(float p0, float p25,
float p75, float p100,
unsigned char value);
void Destroy();
void *data_; // first GlobalHeader, then PerColHeader (repeated), then
// the byte data for each column (repeated). Note: don't intersperse
// the byte data with the PerColHeaders, because of alignment issues.
};
/// @} end of \addtogroup matrix_group
} // namespace kaldi
#endif // KALDI_MATRIX_COMPRESSED_MATRIX_H_

Просмотреть файл

@ -1,924 +0,0 @@
// matrix/jama-eig.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This file consists of a port and modification of materials from
// JAMA: A Java Matrix Package
// under the following notice: This software is a cooperative product of
// The MathWorks and the National Institute of Standards and Technology (NIST)
// which has been released to the public. This notice and the original code are
// available at http://math.nist.gov/javanumerics/jama/domain.notice
#ifndef KALDI_MATRIX_JAMA_EIG_H_
#define KALDI_MATRIX_JAMA_EIG_H_ 1
#include "matrix/kaldi-matrix.h"
namespace kaldi {
// This class is not to be used externally. See the Eig function in the Matrix
// class in kaldi-matrix.h. This is the external interface.
template<typename Real> class EigenvalueDecomposition {
// This class is based on the EigenvalueDecomposition class from the JAMA
// library (version 1.0.2).
public:
EigenvalueDecomposition(const MatrixBase<Real> &A);
~EigenvalueDecomposition(); // free memory.
void GetV(MatrixBase<Real> *V_out) { // V is what we call P externally; it's the matrix of
// eigenvectors.
KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_)
&& V_out->NumCols() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
(*V_out)(i, j) = V(i, j); // V(i, j) is member function.
}
void GetRealEigenvalues(VectorBase<Real> *r_out) {
// returns real part of eigenvalues.
KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
(*r_out)(i) = d_[i];
}
void GetImagEigenvalues(VectorBase<Real> *i_out) {
// returns imaginary part of eigenvalues.
KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
(*i_out)(i) = e_[i];
}
private:
inline Real &H(int r, int c) { return H_[r*n_ + c]; }
inline Real &V(int r, int c) { return V_[r*n_ + c]; }
// complex division
inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) {
Real r, d;
if (std::abs(yr) > std::abs(yi)) {
r = yi/yr;
d = yr + r*yi;
*cdivr = (xr + r*xi)/d;
*cdivi = (xi - r*xr)/d;
} else {
r = yr/yi;
d = yi + r*yr;
*cdivr = (r*xr + xi)/d;
*cdivi = (r*xi - xr)/d;
}
}
// Nonsymmetric reduction from Hessenberg to real Schur form.
void Hqr2 ();
int n_; // matrix dimension.
Real *d_, *e_; // real and imaginary parts of eigenvalues.
Real *V_; // the eigenvectors (P in our external notation)
Real *H_; // the nonsymmetric Hessenberg form.
Real *ort_; // working storage for nonsymmetric algorithm.
// Symmetric Householder reduction to tridiagonal form.
void Tred2 ();
// Symmetric tridiagonal QL algorithm.
void Tql2 ();
// Nonsymmetric reduction to Hessenberg form.
void Orthes ();
};
template class EigenvalueDecomposition<float>; // force instantiation.
template class EigenvalueDecomposition<double>; // force instantiation.
template<typename Real> void EigenvalueDecomposition<Real>::Tred2() {
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int j = 0; j < n_; j++) {
d_[j] = V(n_-1, j);
}
// Householder reduction to tridiagonal form.
for (int i = n_-1; i > 0; i--) {
// Scale to avoid under/overflow.
Real scale = 0.0;
Real h = 0.0;
for (int k = 0; k < i; k++) {
scale = scale + std::abs(d_[k]);
}
if (scale == 0.0) {
e_[i] = d_[i-1];
for (int j = 0; j < i; j++) {
d_[j] = V(i-1, j);
V(i, j) = 0.0;
V(j, i) = 0.0;
}
} else {
// Generate Householder vector.
for (int k = 0; k < i; k++) {
d_[k] /= scale;
h += d_[k] * d_[k];
}
Real f = d_[i-1];
Real g = std::sqrt(h);
if (f > 0) {
g = -g;
}
e_[i] = scale * g;
h = h - f * g;
d_[i-1] = f - g;
for (int j = 0; j < i; j++) {
e_[j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for (int j = 0; j < i; j++) {
f = d_[j];
V(j, i) = f;
g =e_[j] + V(j, j) * f;
for (int k = j+1; k <= i-1; k++) {
g += V(k, j) * d_[k];
e_[k] += V(k, j) * f;
}
e_[j] = g;
}
f = 0.0;
for (int j = 0; j < i; j++) {
e_[j] /= h;
f += e_[j] * d_[j];
}
Real hh = f / (h + h);
for (int j = 0; j < i; j++) {
e_[j] -= hh * d_[j];
}
for (int j = 0; j < i; j++) {
f = d_[j];
g = e_[j];
for (int k = j; k <= i-1; k++) {
V(k, j) -= (f * e_[k] + g * d_[k]);
}
d_[j] = V(i-1, j);
V(i, j) = 0.0;
}
}
d_[i] = h;
}
// Accumulate transformations.
for (int i = 0; i < n_-1; i++) {
V(n_-1, i) = V(i, i);
V(i, i) = 1.0;
Real h = d_[i+1];
if (h != 0.0) {
for (int k = 0; k <= i; k++) {
d_[k] = V(k, i+1) / h;
}
for (int j = 0; j <= i; j++) {
Real g = 0.0;
for (int k = 0; k <= i; k++) {
g += V(k, i+1) * V(k, j);
}
for (int k = 0; k <= i; k++) {
V(k, j) -= g * d_[k];
}
}
}
for (int k = 0; k <= i; k++) {
V(k, i+1) = 0.0;
}
}
for (int j = 0; j < n_; j++) {
d_[j] = V(n_-1, j);
V(n_-1, j) = 0.0;
}
V(n_-1, n_-1) = 1.0;
e_[0] = 0.0;
}
template<typename Real> void EigenvalueDecomposition<Real>::Tql2() {
// This is derived from the Algol procedures tql2, by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int i = 1; i < n_; i++) {
e_[i-1] = e_[i];
}
e_[n_-1] = 0.0;
Real f = 0.0;
Real tst1 = 0.0;
Real eps = std::numeric_limits<Real>::epsilon();
for (int l = 0; l < n_; l++) {
// Find small subdiagonal element
tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l]));
int m = l;
while (m < n_) {
if (std::abs(e_[m]) <= eps*tst1) {
break;
}
m++;
}
// If m == l, d_[l] is an eigenvalue,
// otherwise, iterate.
if (m > l) {
int iter = 0;
do {
iter = iter + 1; // (Could check iteration count here.)
// Compute implicit shift
Real g = d_[l];
Real p = (d_[l+1] - g) / (2.0 *e_[l]);
Real r = Hypot(p, static_cast<Real>(1.0)); // This is a Kaldi version of hypot that works with templates.
if (p < 0) {
r = -r;
}
d_[l] =e_[l] / (p + r);
d_[l+1] =e_[l] * (p + r);
Real dl1 = d_[l+1];
Real h = g - d_[l];
for (int i = l+2; i < n_; i++) {
d_[i] -= h;
}
f = f + h;
// Implicit QL transformation.
p = d_[m];
Real c = 1.0;
Real c2 = c;
Real c3 = c;
Real el1 =e_[l+1];
Real s = 0.0;
Real s2 = 0.0;
for (int i = m-1; i >= l; i--) {
c3 = c2;
c2 = c;
s2 = s;
g = c *e_[i];
h = c * p;
r = Hypot(p, e_[i]); // This is a Kaldi version of Hypot that works with templates.
e_[i+1] = s * r;
s =e_[i] / r;
c = p / r;
p = c * d_[i] - s * g;
d_[i+1] = h + s * (c * g + s * d_[i]);
// Accumulate transformation.
for (int k = 0; k < n_; k++) {
h = V(k, i+1);
V(k, i+1) = s * V(k, i) + c * h;
V(k, i) = c * V(k, i) - s * h;
}
}
p = -s * s2 * c3 * el1 *e_[l] / dl1;
e_[l] = s * p;
d_[l] = c * p;
// Check for convergence.
} while (std::abs(e_[l]) > eps*tst1);
}
d_[l] = d_[l] + f;
e_[l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for (int i = 0; i < n_-1; i++) {
int k = i;
Real p = d_[i];
for (int j = i+1; j < n_; j++) {
if (d_[j] < p) {
k = j;
p = d_[j];
}
}
if (k != i) {
d_[k] = d_[i];
d_[i] = p;
for (int j = 0; j < n_; j++) {
p = V(j, i);
V(j, i) = V(j, k);
V(j, k) = p;
}
}
}
}
template<typename Real>
void EigenvalueDecomposition<Real>::Orthes() {
// This is derived from the Algol procedures orthes and ortran,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutines in EISPACK.
int low = 0;
int high = n_-1;
for (int m = low+1; m <= high-1; m++) {
// Scale column.
Real scale = 0.0;
for (int i = m; i <= high; i++) {
scale = scale + std::abs(H(i, m-1));
}
if (scale != 0.0) {
// Compute Householder transformation.
Real h = 0.0;
for (int i = high; i >= m; i--) {
ort_[i] = H(i, m-1)/scale;
h += ort_[i] * ort_[i];
}
Real g = std::sqrt(h);
if (ort_[m] > 0) {
g = -g;
}
h = h - ort_[m] * g;
ort_[m] = ort_[m] - g;
// Apply Householder similarity transformation
// H = (I-u*u'/h)*H*(I-u*u')/h)
for (int j = m; j < n_; j++) {
Real f = 0.0;
for (int i = high; i >= m; i--) {
f += ort_[i]*H(i, j);
}
f = f/h;
for (int i = m; i <= high; i++) {
H(i, j) -= f*ort_[i];
}
}
for (int i = 0; i <= high; i++) {
Real f = 0.0;
for (int j = high; j >= m; j--) {
f += ort_[j]*H(i, j);
}
f = f/h;
for (int j = m; j <= high; j++) {
H(i, j) -= f*ort_[j];
}
}
ort_[m] = scale*ort_[m];
H(m, m-1) = scale*g;
}
}
// Accumulate transformations (Algol's ortran).
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
V(i, j) = (i == j ? 1.0 : 0.0);
}
}
for (int m = high-1; m >= low+1; m--) {
if (H(m, m-1) != 0.0) {
for (int i = m+1; i <= high; i++) {
ort_[i] = H(i, m-1);
}
for (int j = m; j <= high; j++) {
Real g = 0.0;
for (int i = m; i <= high; i++) {
g += ort_[i] * V(i, j);
}
// Double division avoids possible underflow
g = (g / ort_[m]) / H(m, m-1);
for (int i = m; i <= high; i++) {
V(i, j) += g * ort_[i];
}
}
}
}
}
template<typename Real> void EigenvalueDecomposition<Real>::Hqr2() {
// This is derived from the Algol procedure hqr2,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
int nn = n_;
int n = nn-1;
int low = 0;
int high = nn-1;
Real eps = std::numeric_limits<Real>::epsilon();
Real exshift = 0.0;
Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y;
// Store roots isolated by balanc and compute matrix norm
Real norm = 0.0;
for (int i = 0; i < nn; i++) {
if (i < low || i > high) {
d_[i] = H(i, i);
e_[i] = 0.0;
}
for (int j = std::max(i-1, 0); j < nn; j++) {
norm = norm + std::abs(H(i, j));
}
}
// Outer loop over eigenvalue index
int iter = 0;
while (n >= low) {
// Look for single small sub-diagonal element
int l = n;
while (l > low) {
s = std::abs(H(l-1, l-1)) + std::abs(H(l, l));
if (s == 0.0) {
s = norm;
}
if (std::abs(H(l, l-1)) < eps * s) {
break;
}
l--;
}
// Check for convergence
// One root found
if (l == n) {
H(n, n) = H(n, n) + exshift;
d_[n] = H(n, n);
e_[n] = 0.0;
n--;
iter = 0;
// Two roots found
} else if (l == n-1) {
w = H(n, n-1) * H(n-1, n);
p = (H(n-1, n-1) - H(n, n)) / 2.0;
q = p * p + w;
z = std::sqrt(std::abs(q));
H(n, n) = H(n, n) + exshift;
H(n-1, n-1) = H(n-1, n-1) + exshift;
x = H(n, n);
// Real pair
if (q >= 0) {
if (p >= 0) {
z = p + z;
} else {
z = p - z;
}
d_[n-1] = x + z;
d_[n] = d_[n-1];
if (z != 0.0) {
d_[n] = x - w / z;
}
e_[n-1] = 0.0;
e_[n] = 0.0;
x = H(n, n-1);
s = std::abs(x) + std::abs(z);
p = x / s;
q = z / s;
r = std::sqrt(p * p+q * q);
p = p / r;
q = q / r;
// Row modification
for (int j = n-1; j < nn; j++) {
z = H(n-1, j);
H(n-1, j) = q * z + p * H(n, j);
H(n, j) = q * H(n, j) - p * z;
}
// Column modification
for (int i = 0; i <= n; i++) {
z = H(i, n-1);
H(i, n-1) = q * z + p * H(i, n);
H(i, n) = q * H(i, n) - p * z;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
z = V(i, n-1);
V(i, n-1) = q * z + p * V(i, n);
V(i, n) = q * V(i, n) - p * z;
}
// Complex pair
} else {
d_[n-1] = x + p;
d_[n] = x + p;
e_[n-1] = z;
e_[n] = -z;
}
n = n - 2;
iter = 0;
// No convergence yet
} else {
// Form shift
x = H(n, n);
y = 0.0;
w = 0.0;
if (l < n) {
y = H(n-1, n-1);
w = H(n, n-1) * H(n-1, n);
}
// Wilkinson's original ad hoc shift
if (iter == 10) {
exshift += x;
for (int i = low; i <= n; i++) {
H(i, i) -= x;
}
s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2));
x = y = 0.75 * s;
w = -0.4375 * s * s;
}
// MATLAB's new ad hoc shift
if (iter == 30) {
s = (y - x) / 2.0;
s = s * s + w;
if (s > 0) {
s = std::sqrt(s);
if (y < x) {
s = -s;
}
s = x - w / ((y - x) / 2.0 + s);
for (int i = low; i <= n; i++) {
H(i, i) -= s;
}
exshift += s;
x = y = w = 0.964;
}
}
iter = iter + 1; // (Could check iteration count here.)
// Look for two consecutive small sub-diagonal elements
int m = n-2;
while (m >= l) {
z = H(m, m);
r = x - z;
s = y - z;
p = (r * s - w) / H(m+1, m) + H(m, m+1);
q = H(m+1, m+1) - z - r - s;
r = H(m+2, m+1);
s = std::abs(p) + std::abs(q) + std::abs(r);
p = p / s;
q = q / s;
r = r / s;
if (m == l) {
break;
}
if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) <
eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) +
std::abs(H(m+1, m+1))))) {
break;
}
m--;
}
for (int i = m+2; i <= n; i++) {
H(i, i-2) = 0.0;
if (i > m+2) {
H(i, i-3) = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for (int k = m; k <= n-1; k++) {
bool notlast = (k != n-1);
if (k != m) {
p = H(k, k-1);
q = H(k+1, k-1);
r = (notlast ? H(k+2, k-1) : 0.0);
x = std::abs(p) + std::abs(q) + std::abs(r);
if (x != 0.0) {
p = p / x;
q = q / x;
r = r / x;
}
}
if (x == 0.0) {
break;
}
s = std::sqrt(p * p + q * q + r * r);
if (p < 0) {
s = -s;
}
if (s != 0) {
if (k != m) {
H(k, k-1) = -s * x;
} else if (l != m) {
H(k, k-1) = -H(k, k-1);
}
p = p + s;
x = p / s;
y = q / s;
z = r / s;
q = q / p;
r = r / p;
// Row modification
for (int j = k; j < nn; j++) {
p = H(k, j) + q * H(k+1, j);
if (notlast) {
p = p + r * H(k+2, j);
H(k+2, j) = H(k+2, j) - p * z;
}
H(k, j) = H(k, j) - p * x;
H(k+1, j) = H(k+1, j) - p * y;
}
// Column modification
for (int i = 0; i <= std::min(n, k+3); i++) {
p = x * H(i, k) + y * H(i, k+1);
if (notlast) {
p = p + z * H(i, k+2);
H(i, k+2) = H(i, k+2) - p * r;
}
H(i, k) = H(i, k) - p;
H(i, k+1) = H(i, k+1) - p * q;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
p = x * V(i, k) + y * V(i, k+1);
if (notlast) {
p = p + z * V(i, k+2);
V(i, k+2) = V(i, k+2) - p * r;
}
V(i, k) = V(i, k) - p;
V(i, k+1) = V(i, k+1) - p * q;
}
} // (s != 0)
} // k loop
} // check convergence
} // while (n >= low)
// Backsubstitute to find vectors of upper triangular form
if (norm == 0.0) {
return;
}
for (n = nn-1; n >= 0; n--) {
p = d_[n];
q = e_[n];
// Real vector
if (q == 0) {
int l = n;
H(n, n) = 1.0;
for (int i = n-1; i >= 0; i--) {
w = H(i, i) - p;
r = 0.0;
for (int j = l; j <= n; j++) {
r = r + H(i, j) * H(j, n);
}
if (e_[i] < 0.0) {
z = w;
s = r;
} else {
l = i;
if (e_[i] == 0.0) {
if (w != 0.0) {
H(i, n) = -r / w;
} else {
H(i, n) = -r / (eps * norm);
}
// Solve real equations
} else {
x = H(i, i+1);
y = H(i+1, i);
q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i];
t = (x * s - z * r) / q;
H(i, n) = t;
if (std::abs(x) > std::abs(z)) {
H(i+1, n) = (-r - w * t) / x;
} else {
H(i+1, n) = (-s - y * t) / z;
}
}
// Overflow control
t = std::abs(H(i, n));
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
H(j, n) = H(j, n) / t;
}
}
}
}
// Complex vector
} else if (q < 0) {
int l = n-1;
// Last vector component imaginary so matrix is triangular
if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) {
H(n-1, n-1) = q / H(n, n-1);
H(n-1, n) = -(H(n, n) - p) / H(n, n-1);
} else {
Real cdivr, cdivi;
cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi);
H(n-1, n-1) = cdivr;
H(n-1, n) = cdivi;
}
H(n, n-1) = 0.0;
H(n, n) = 1.0;
for (int i = n-2; i >= 0; i--) {
Real ra, sa, vr, vi;
ra = 0.0;
sa = 0.0;
for (int j = l; j <= n; j++) {
ra = ra + H(i, j) * H(j, n-1);
sa = sa + H(i, j) * H(j, n);
}
w = H(i, i) - p;
if (e_[i] < 0.0) {
z = w;
r = ra;
s = sa;
} else {
l = i;
if (e_[i] == 0) {
Real cdivr, cdivi;
cdiv(-ra, -sa, w, q, &cdivr, &cdivi);
H(i, n-1) = cdivr;
H(i, n) = cdivi;
} else {
Real cdivr, cdivi;
// Solve complex equations
x = H(i, i+1);
y = H(i+1, i);
vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q;
vi = (d_[i] - p) * 2.0 * q;
if (vr == 0.0 && vi == 0.0) {
vr = eps * norm * (std::abs(w) + std::abs(q) +
std::abs(x) + std::abs(y) + std::abs(z));
}
cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi);
H(i, n-1) = cdivr;
H(i, n) = cdivi;
if (std::abs(x) > (std::abs(z) + std::abs(q))) {
H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x;
H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x;
} else {
cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi);
H(i+1, n-1) = cdivr;
H(i+1, n) = cdivi;
}
}
// Overflow control
t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n)));
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
H(j, n-1) = H(j, n-1) / t;
H(j, n) = H(j, n) / t;
}
}
}
}
}
}
// Vectors of isolated roots
for (int i = 0; i < nn; i++) {
if (i < low || i > high) {
for (int j = i; j < nn; j++) {
V(i, j) = H(i, j);
}
}
}
// Back transformation to get eigenvectors of original matrix
for (int j = nn-1; j >= low; j--) {
for (int i = low; i <= high; i++) {
z = 0.0;
for (int k = low; k <= std::min(j, high); k++) {
z = z + V(i, k) * H(k, j);
}
V(i, j) = z;
}
}
}
template<typename Real>
EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) {
KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1);
n_ = A.NumRows();
V_ = new Real[n_*n_];
d_ = new Real[n_];
e_ = new Real[n_];
H_ = NULL;
ort_ = NULL;
if (A.IsSymmetric(0.0)) {
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
V(i, j) = A(i, j); // Note that V(i, j) is a member function; A(i, j) is an operator
// of the matrix A.
// Tridiagonalize.
Tred2();
// Diagonalize.
Tql2();
} else {
H_ = new Real[n_*n_];
ort_ = new Real[n_];
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
H(i, j) = A(i, j); // as before: H is member function, A(i, j) is operator of matrix.
// Reduce to Hessenberg form.
Orthes();
// Reduce Hessenberg to real Schur form.
Hqr2();
}
}
template<typename Real>
EigenvalueDecomposition<Real>::~EigenvalueDecomposition() {
delete [] d_;
delete [] e_;
delete [] V_;
if (H_) delete [] H_;
if (ort_) delete [] ort_;
}
// see function MatrixBase<Real>::Eig in kaldi-matrix.cc
} // namespace kaldi
#endif // KALDI_MATRIX_JAMA_EIG_H_

Просмотреть файл

@ -1,531 +0,0 @@
// matrix/jama-svd.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This file consists of a port and modification of materials from
// JAMA: A Java Matrix Package
// under the following notice: This software is a cooperative product of
// The MathWorks and the National Institute of Standards and Technology (NIST)
// which has been released to the public. This notice and the original code are
// available at http://math.nist.gov/javanumerics/jama/domain.notice
#ifndef KALDI_MATRIX_JAMA_SVD_H_
#define KALDI_MATRIX_JAMA_SVD_H_ 1
#include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/cblas-wrappers.h"
namespace kaldi {
#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
// using ATLAS as our math library, which doesn't have SVD -> need
// to implement it.
// This routine is a modified form of jama_svd.h which is part of the TNT distribution.
// (originally comes from JAMA).
/** Singular Value Decomposition.
* <P>
* For an m-by-n matrix A with m >= n, the singular value decomposition is
* an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and
* an n-by-n orthogonal matrix V so that A = U*S*V'.
* <P>
* The singular values, sigma[k] = S(k, k), are ordered so that
* sigma[0] >= sigma[1] >= ... >= sigma[n-1].
* <P>
* The singular value decompostion always exists, so the constructor will
* never fail. The matrix condition number and the effective numerical
* rank can be computed from this decomposition.
* <p>
* (Adapted from JAMA, a Java Matrix Library, developed by jointly
* by the Mathworks and NIST; see http://math.nist.gov/javanumerics/jama).
*/
template<typename Real>
bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
MatrixBase<Real> *U_in,
MatrixBase<Real> *V_in) { // Destructive!
KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
int wantu = (U_in != NULL), wantv = (V_in != NULL);
Matrix<Real> Utmp, Vtmp;
MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
VectorBase<Real> &s = *s_in;
int m = num_rows_, n = num_cols_;
KALDI_ASSERT(m>=n && m != 0 && n != 0);
if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
KALDI_ASSERT((int)s.Dim() == n); // n<=m so n is min.
int nu = n;
U.SetZero(); // make sure all zero.
Vector<Real> e(n);
Vector<Real> work(m);
MatrixBase<Real> &A(*this);
Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
*udata = U.Data(), *vdata = V.Data();
int astride = static_cast<int>(A.Stride()),
ustride = static_cast<int>(U.Stride()),
vstride = static_cast<int>(V.Stride());
int i = 0, j = 0, k = 0;
// Reduce A to bidiagonal form, storing the diagonal elements
// in s and the super-diagonal elements in e.
int nct = std::min(m-1, n);
int nrt = std::max(0, std::min(n-2, m));
for (k = 0; k < std::max(nct, nrt); k++) {
if (k < nct) {
// Compute the transformation for the k-th column and
// place the k-th diagonal in s(k).
// Compute 2-norm of k-th column without under/overflow.
s(k) = 0;
for (i = k; i < m; i++) {
s(k) = hypot(s(k), A(i, k));
}
if (s(k) != 0.0) {
if (A(k, k) < 0.0) {
s(k) = -s(k);
}
for (i = k; i < m; i++) {
A(i, k) /= s(k);
}
A(k, k) += 1.0;
}
s(k) = -s(k);
}
for (j = k+1; j < n; j++) {
if ((k < nct) && (s(k) != 0.0)) {
// Apply the transformation.
Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
adata + astride*k + j, astride);
/*for (i = k; i < m; i++) {
t += adata[i*astride + k]*adata[i*astride + j]; // A(i, k)*A(i, j); // 3
}*/
t = -t/A(k, k);
cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
adata + k*astride + j, astride);
/*for (i = k; i < m; i++) {
adata[i*astride + j] += t*adata[i*astride + k]; // A(i, j) += t*A(i, k); // 5
}*/
}
// Place the k-th row of A into e for the
// subsequent calculation of the row transformation.
e(j) = A(k, j);
}
if (wantu & (k < nct)) {
// Place the transformation in U for subsequent back
// multiplication.
for (i = k; i < m; i++) {
U(i, k) = A(i, k);
}
}
if (k < nrt) {
// Compute the k-th row transformation and place the
// k-th super-diagonal in e(k).
// Compute 2-norm without under/overflow.
e(k) = 0;
for (i = k+1; i < n; i++) {
e(k) = hypot(e(k), e(i));
}
if (e(k) != 0.0) {
if (e(k+1) < 0.0) {
e(k) = -e(k);
}
for (i = k+1; i < n; i++) {
e(i) /= e(k);
}
e(k+1) += 1.0;
}
e(k) = -e(k);
if ((k+1 < m) & (e(k) != 0.0)) {
// Apply the transformation.
for (i = k+1; i < m; i++) {
work(i) = 0.0;
}
for (j = k+1; j < n; j++) {
for (i = k+1; i < m; i++) {
workdata[i] += edata[j] * adata[i*astride + j]; // work(i) += e(j)*A(i, j); // 5
}
}
for (j = k+1; j < n; j++) {
Real t(-e(j)/e(k+1));
cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
adata + (k+1)*astride + j, astride);
/*
for (i = k+1; i < m; i++) {
adata[i*astride + j] += t*workdata[i]; // A(i, j) += t*work(i); // 5
}*/
}
}
if (wantv) {
// Place the transformation in V for subsequent
// back multiplication.
for (i = k+1; i < n; i++) {
V(i, k) = e(i);
}
}
}
}
// Set up the final bidiagonal matrix or order p.
int p = std::min(n, m+1);
if (nct < n) {
s(nct) = A(nct, nct);
}
if (m < p) {
s(p-1) = 0.0;
}
if (nrt+1 < p) {
e(nrt) = A(nrt, p-1);
}
e(p-1) = 0.0;
// If required, generate U.
if (wantu) {
for (j = nct; j < nu; j++) {
for (i = 0; i < m; i++) {
U(i, j) = 0.0;
}
U(j, j) = 1.0;
}
for (k = nct-1; k >= 0; k--) {
if (s(k) != 0.0) {
for (j = k+1; j < nu; j++) {
Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
//for (i = k; i < m; i++) {
// t += udata[i*ustride + k]*udata[i*ustride + j]; // t += U(i, k)*U(i, j); // 8
// }
t = -t/U(k, k);
cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
udata + k*ustride + j, ustride);
/*for (i = k; i < m; i++) {
udata[i*ustride + j] += t*udata[i*ustride + k]; // U(i, j) += t*U(i, k); // 4
}*/
}
for (i = k; i < m; i++ ) {
U(i, k) = -U(i, k);
}
U(k, k) = 1.0 + U(k, k);
for (i = 0; i < k-1; i++) {
U(i, k) = 0.0;
}
} else {
for (i = 0; i < m; i++) {
U(i, k) = 0.0;
}
U(k, k) = 1.0;
}
}
}
// If required, generate V.
if (wantv) {
for (k = n-1; k >= 0; k--) {
if ((k < nrt) & (e(k) != 0.0)) {
for (j = k+1; j < nu; j++) {
Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
vdata + (k+1)*vstride + j, vstride);
/*Real t (0.0);
for (i = k+1; i < n; i++) {
t += vdata[i*vstride + k]*vdata[i*vstride + j]; // t += V(i, k)*V(i, j); // 7
}*/
t = -t/V(k+1, k);
cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
vdata + (k+1)*vstride + j, vstride);
/*for (i = k+1; i < n; i++) {
vdata[i*vstride + j] += t*vdata[i*vstride + k]; // V(i, j) += t*V(i, k); // 7
}*/
}
}
for (i = 0; i < n; i++) {
V(i, k) = 0.0;
}
V(k, k) = 1.0;
}
}
// Main iteration loop for the singular values.
int pp = p-1;
int iter = 0;
// note: -52.0 is from Jama code; the -23 is the extension
// to float, because mantissa length in (double, float)
// is (52, 23) bits respectively.
Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
// Note: the -966 was taken from Jama code, but the -120 is a guess
// of how to extend this to float... the exponent in double goes
// from -1022 .. 1023, and in float from -126..127. I'm not sure
// what the significance of 966 is, so -120 just represents a number
// that's a bit less negative than -126. If we get convergence
// failure in float only, this may mean that we have to make the
// -120 value less negative.
Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
while (p > 0) {
int k = 0;
int kase = 0;
if (iter == 500 || iter == 750) {
KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
eps = pow(static_cast<Real>(0.8), eps);
tiny = pow(static_cast<Real>(0.8), tiny);
}
if (iter > 1000) {
KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
return false;
}
// This section of the program inspects for
// negligible elements in the s and e arrays. On
// completion the variables kase and k are set as follows.
// kase = 1 if s(p) and e(k-1) are negligible and k < p
// kase = 2 if s(k) is negligible and k < p
// kase = 3 if e(k-1) is negligible, k < p, and
// s(k), ..., s(p) are not negligible (qr step).
// kase = 4 if e(p-1) is negligible (convergence).
for (k = p-2; k >= -1; k--) {
if (k == -1) {
break;
}
if (std::abs(e(k)) <=
tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
e(k) = 0.0;
break;
}
}
if (k == p-2) {
kase = 4;
} else {
int ks;
for (ks = p-1; ks >= k; ks--) {
if (ks == k) {
break;
}
Real t( (ks != p ? std::abs(e(ks)) : 0.) +
(ks != k+1 ? std::abs(e(ks-1)) : 0.));
if (std::abs(s(ks)) <= tiny + eps*t) {
s(ks) = 0.0;
break;
}
}
if (ks == k) {
kase = 3;
} else if (ks == p-1) {
kase = 1;
} else {
kase = 2;
k = ks;
}
}
k++;
// Perform the task indicated by kase.
switch (kase) {
// Deflate negligible s(p).
case 1: {
Real f(e(p-2));
e(p-2) = 0.0;
for (j = p-2; j >= k; j--) {
Real t( hypot(s(j), f));
Real cs(s(j)/t);
Real sn(f/t);
s(j) = t;
if (j != k) {
f = -sn*e(j-1);
e(j-1) = cs*e(j-1);
}
if (wantv) {
for (i = 0; i < n; i++) {
t = cs*V(i, j) + sn*V(i, p-1);
V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
V(i, j) = t;
}
}
}
}
break;
// Split at negligible s(k).
case 2: {
Real f(e(k-1));
e(k-1) = 0.0;
for (j = k; j < p; j++) {
Real t(hypot(s(j), f));
Real cs( s(j)/t);
Real sn(f/t);
s(j) = t;
f = -sn*e(j);
e(j) = cs*e(j);
if (wantu) {
for (i = 0; i < m; i++) {
t = cs*U(i, j) + sn*U(i, k-1);
U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
U(i, j) = t;
}
}
}
}
break;
// Perform one qr step.
case 3: {
// Calculate the shift.
Real scale = std::max(std::max(std::max(std::max(
std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
std::abs(s(k))), std::abs(e(k)));
Real sp = s(p-1)/scale;
Real spm1 = s(p-2)/scale;
Real epm1 = e(p-2)/scale;
Real sk = s(k)/scale;
Real ek = e(k)/scale;
Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
Real c = (sp*epm1)*(sp*epm1);
Real shift = 0.0;
if ((b != 0.0) || (c != 0.0)) {
shift = std::sqrt(b*b + c);
if (b < 0.0) {
shift = -shift;
}
shift = c/(b + shift);
}
Real f = (sk + sp)*(sk - sp) + shift;
Real g = sk*ek;
// Chase zeros.
for (j = k; j < p-1; j++) {
Real t = hypot(f, g);
Real cs = f/t;
Real sn = g/t;
if (j != k) {
e(j-1) = t;
}
f = cs*s(j) + sn*e(j);
e(j) = cs*e(j) - sn*s(j);
g = sn*s(j+1);
s(j+1) = cs*s(j+1);
if (wantv) {
cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
/*for (i = 0; i < n; i++) {
t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1]; // t = cs*V(i, j) + sn*V(i, j+1); // 13
vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1]; // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
vdata[i*vstride + j] = t; // V(i, j) = t; // 4
}*/
}
t = hypot(f, g);
cs = f/t;
sn = g/t;
s(j) = t;
f = cs*e(j) + sn*s(j+1);
s(j+1) = -sn*e(j) + cs*s(j+1);
g = sn*e(j+1);
e(j+1) = cs*e(j+1);
if (wantu && (j < m-1)) {
cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
/*for (i = 0; i < m; i++) {
t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1]; // t = cs*U(i, j) + sn*U(i, j+1); // 7
udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1]; // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
udata[i*ustride + j] = t; // U(i, j) = t; // 1
}*/
}
}
e(p-2) = f;
iter = iter + 1;
}
break;
// Convergence.
case 4: {
// Make the singular values positive.
if (s(k) <= 0.0) {
s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
if (wantv) {
for (i = 0; i <= pp; i++) {
V(i, k) = -V(i, k);
}
}
}
// Order the singular values.
while (k < pp) {
if (s(k) >= s(k+1)) {
break;
}
Real t = s(k);
s(k) = s(k+1);
s(k+1) = t;
if (wantv && (k < n-1)) {
for (i = 0; i < n; i++) {
t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
}
}
if (wantu && (k < m-1)) {
for (i = 0; i < m; i++) {
t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
}
}
k++;
}
iter = 0;
p--;
}
break;
}
}
return true;
}
#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
} // namespace kaldi
#endif // KALDI_MATRIX_JAMA_SVD_H_

Просмотреть файл

@ -1,129 +0,0 @@
// matrix/kaldi-blas.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_BLAS_H_
#define KALDI_MATRIX_KALDI_BLAS_H_
// This file handles the #includes for BLAS, LAPACK and so on.
// It manipulates the declarations into a common format that kaldi can handle.
// However, the kaldi code will check whether HAVE_ATLAS is defined as that
// code is called a bit differently from CLAPACK that comes from other sources.
// There are three alternatives:
// (i) you have ATLAS, which includes the ATLAS implementation of CBLAS
// plus a subset of CLAPACK (but with clapack_ in the function declarations).
// In this case, define HAVE_ATLAS and make sure the relevant directories are
// in the include path.
// (ii) you have CBLAS (some implementation thereof) plus CLAPACK.
// In this case, define HAVE_CLAPACK.
// [Since CLAPACK depends on BLAS, the presence of BLAS is implicit].
// (iii) you have the MKL library, which includes CLAPACK and CBLAS.
// Note that if we are using ATLAS, no Svd implementation is supplied,
// so we define HAVE_Svd to be zero and this directs our implementation to
// supply its own "by hand" implementation which is based on TNT code.
#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \
|| (defined(HAVE_ATLAS) && defined(HAVE_MKL))
#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL"
#endif
#ifdef HAVE_ATLAS
extern "C" {
#include <cblas.h>
#include <clapack.h>
}
#elif defined(HAVE_CLAPACK)
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
typedef __CLPK_integer integer;
typedef __CLPK_logical logical;
typedef __CLPK_real real;
typedef __CLPK_doublereal doublereal;
typedef __CLPK_complex complex;
typedef __CLPK_doublecomplex doublecomplex;
typedef __CLPK_ftnlen ftnlen;
#else
extern "C" {
// May be in /usr/[local]/include if installed; else this uses the one
// from the tools/CLAPACK_include directory.
#include <cblas.h>
#include <f2c.h>
#include <clapack.h>
// get rid of macros from f2c.h -- these are dangerous.
#undef abs
#undef dabs
#undef min
#undef max
#undef dmin
#undef dmax
#undef bit_test
#undef bit_clear
#undef bit_set
}
#endif
#elif defined(HAVE_MKL)
extern "C" {
#include <mkl.h>
}
#elif defined(HAVE_OPENBLAS)
extern "C" {
// getting cblas.h and lapacke.h from <openblas-install-dir>/.
// putting in "" not <> to search -I before system libraries.
#include "cblas.h"
#include "lapacke.h"
#undef I
#undef complex
// get rid of macros from f2c.h -- these are dangerous.
#undef abs
#undef dabs
#undef min
#undef max
#undef dmin
#undef dmax
#undef bit_test
#undef bit_clear
#undef bit_set
}
#else
#error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"
#endif
#ifdef HAVE_OPENBLAS
typedef int KaldiBlasInt; // try int.
#endif
#ifdef HAVE_CLAPACK
typedef integer KaldiBlasInt;
#endif
#ifdef HAVE_MKL
typedef MKL_INT KaldiBlasInt;
#endif
#ifdef HAVE_ATLAS
// in this case there is no need for KaldiBlasInt-- this typedef is only needed
// for Svd code which is not included in ATLAS (we re-implement it).
#endif
#endif // KALDI_MATRIX_KALDI_BLAS_H_

Просмотреть файл

@ -1,103 +0,0 @@
// matrix/kaldi-gpsr-test.cc
// Copyright 2012 Arnab Ghoshal
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "gmm/model-test-common.h"
#include "matrix/kaldi-gpsr.h"
#include "util/kaldi-io.h"
using kaldi::int32;
using kaldi::BaseFloat;
namespace ut = kaldi::unittest;
namespace kaldi {
template<typename Real> static void InitRand(VectorBase<Real> *v) {
for (MatrixIndexT i = 0;i < v->Dim();i++)
(*v)(i) = RandGauss();
}
template<typename Real> static void InitRand(MatrixBase<Real> *M) {
start:
for (MatrixIndexT i = 0;i < M->NumRows();i++)
for (MatrixIndexT j = 0;j < M->NumCols();j++)
(*M)(i, j) = RandGauss();
if (M->NumRows() != 0 && M->Cond() > 100) {
KALDI_WARN << "Condition number of random matrix large" << M->Cond()
<< ": trying again (this is normal)";
goto start;
}
}
template<typename Real> static void InitRand(SpMatrix<Real> *M) {
start_sp:
for (MatrixIndexT i = 0;i < M->NumRows();i++)
for (MatrixIndexT j = 0;j<=i;j++)
(*M)(i, j) = RandGauss();
if (M->NumRows() != 0 && M->Cond() > 100) {
KALDI_WARN << "Condition number of random matrix large" << M->Cond()
<< ": trying again (this is normal)";
goto start_sp;
}
}
template<typename Real> static void UnitTestGpsr() {
for (int32 i = 0; i < 5; i++) {
MatrixIndexT dim1 = (rand() % 10) + 10;
MatrixIndexT dim2 = (rand() % 10) + 10;
Matrix<Real> M(dim1, dim2);
InitRand(&M);
SpMatrix<Real> H(dim2);
H.AddMat2(1.0, M, kTrans, 0.0); // H = M^T M
// InitRand(&H);
// KALDI_LOG << "dim 1 " << dim1 << "; dim 2 " << dim2 << " LD " << H.LogDet()
// << " Cond " << H.Cond() << "\nH " << H;
// KALDI_ASSERT(H.IsPosDef());
Vector<Real> x(dim2);
InitRand(&x);
Vector<Real> g(dim2);
InitRand(&g);
GpsrConfig opts;
opts.debias = (rand()%2 == 0);
Real objf_old = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
opts.gpsr_tau * x.Norm(1.0);
GpsrBasic(opts, H, g, &x);
Real objf_new = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
opts.gpsr_tau * x.Norm(1.0);
KALDI_ASSERT(objf_old >= objf_new); // since we are minimizing
KALDI_LOG << "GPSR-basic: objf old = " << objf_old << "; new = " << objf_new;
Vector<Real> x2(x);
GpsrBB(opts, H, g, &x);
Real objf_new_bb = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
opts.gpsr_tau * x.Norm(1.0);
KALDI_ASSERT(objf_old >= objf_new_bb); // since we are minimizing
KALDI_LOG << "GPSR-BB: objf old = " << objf_old << "; new = " << objf_new_bb;
}
}
}
int main() {
kaldi::g_kaldi_verbose_level = 1;
kaldi::UnitTestGpsr<float>();
kaldi::UnitTestGpsr<double>();
std::cout << "Test OK.\n";
return 0;
}

Просмотреть файл

@ -1,496 +0,0 @@
// matrix/kaldi-gpsr.cc
// Copyright 2010-2012 Liang Lu, Arnab Ghoshal
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and
// Wright, "Gradient Projection for Sparse Reconstruction: Application to
// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected
// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007.
// http://dx.doi.org/10.1109/JSTSP.2007.910281
#include <algorithm>
#include <string>
#include <vector>
using std::vector;
#include "matrix/kaldi-gpsr.h"
namespace kaldi {
/// This calculates the objective function: \f$ c^T z + 0.5 * z^T B z, \f$
/// where z is formed by stacking u and v, and B = [H -H; -H H].
double GpsrObjective(const SpMatrix<double> &H, const Vector<double> &c,
const Vector<double> &u, const Vector<double> &v) {
KALDI_ASSERT(u.Dim() == v.Dim() && u.Dim() > 0);
KALDI_ASSERT(c.Dim() == 2 * u.Dim());
KALDI_VLOG(2) << "u dim = " << u.Dim() << ", v dim = " << v.Dim()
<< ", c dim = " << c.Dim();
MatrixIndexT dim = u.Dim();
Vector<double> H_x(dim), x(dim);
// x = u - v, where u_i = (x_i)_+; v_i = (-x_i)_+; and (x)_+ = max{0,x}
x.CopyFromVec(u);
x.AddVec(-1.0, v);
// Calculate c^T z = c^T [u^T v^T]^T
double objf = VecVec(c.Range(0, dim), u);
objf += VecVec(c.Range(dim, dim), v);
// Now, calculate the quadratic term: z^T B z = (u-v)^T H (u-v) = x^T H x
H_x.AddSpVec(1.0, H, x, 0.0);
objf += 0.5 * VecVec(x, H_x);
return objf;
}
/// This calculates the gradient: \f$ c + B z, \f$
/// where z is formed by stacking u and v, and B = [H -H; -H H].
void GpsrGradient(const SpMatrix<double> &H, const Vector<double> &c,
const Vector<double> &u, const Vector<double> &v,
Vector<double> *grad_u, Vector<double> *grad_v) {
KALDI_ASSERT(u.Dim() == v.Dim() && u.Dim() > 0);
KALDI_ASSERT(u.Dim() == grad_u->Dim() && v.Dim() == grad_v->Dim());
KALDI_ASSERT(c.Dim() == 2 * u.Dim());
KALDI_VLOG(2) << "u dim = " << u.Dim() << ", v dim = " << v.Dim()
<< ", c dim = " << c.Dim();
MatrixIndexT dim = u.Dim();
Vector<double> H_x(dim), x(dim);
// x = u - v, where u_i = (x_i)_+; v_i = (-x_i)_+; and (x)_+ = max{0,x}
x.CopyFromVec(u);
x.AddVec(-1.0, v);
// To calculate B z = [ H (u-v); -H (u-v) ] = [ H x; -H x ], we only need H x
H_x.AddSpVec(1.0, H, x, 0.0);
grad_u->CopyFromVec(c.Range(0, dim));
grad_u->AddVec(1.0, H_x);
grad_v->CopyFromVec(c.Range(dim, dim));
grad_v->AddVec(-1.0, H_x);
}
/// Returns the initial guess of step size in the feasible direction.
/// This is the exact minimizer of the objective function along the feasible
/// direction, which is the negative gradient projected on to the constraint
/// set, or the non-negative orthant, in this case:
/// \f[ \alpha = \frac{g^T g}{g^T B g}, \f]
/// where g is the projected gradient, formed by stacking the projected
/// gradients for the positive & negative parts (u & v); and B = [H -H; -H H].
double GpsrBasicAlpha(const SpMatrix<double> &H, const Vector<double> &u,
const Vector<double> &v, const Vector<double> &grad_u,
const Vector<double> &grad_v) {
KALDI_ASSERT(H.NumRows() == grad_u.Dim() && grad_u.Dim() == grad_v.Dim() &&
grad_u.Dim() > 0);
KALDI_VLOG(2) << "grad_u dim = " << grad_u.Dim() << ", grad_v dim = "
<< grad_v.Dim() << ", H rows = " << H.NumRows();
MatrixIndexT dim = grad_u.Dim();
// Find the projection of the gradient on the nonnegative orthant, or, more
// precisely, the projection s.t. the next iterate will be in the orthant.
Vector<double> proj_grad_u(dim);
Vector<double> proj_grad_v(dim);
for (MatrixIndexT i = 0; i < dim; i++) {
proj_grad_u(i) = (u(i) > 0 || grad_u(i) < 0)? grad_u(i) : 0;
proj_grad_v(i) = (v(i) > 0 || grad_v(i) < 0)? grad_v(i) : 0;
}
// The numerator: g^T g = g_u^T g_u + g_v^T g_v
double alpha = VecVec(proj_grad_u, proj_grad_u);
alpha += VecVec(proj_grad_v, proj_grad_v);
// The denominator: g^T B g = (g_u - g_v)^T H (g_u - g_v)
Vector<double> diff_g(proj_grad_u);
diff_g.AddVec(-1.0, proj_grad_v);
Vector<double> H_diff_g(dim);
H_diff_g.AddSpVec(1.0, H, diff_g, 0.0);
alpha /= (VecVec(diff_g, H_diff_g) + DBL_EPSILON);
return alpha;
}
/// This calculates the coefficient for the linear term used in the
/// bound-constrained quadratic program: c = \tau 1_{2n} + [-g; g]
void GpsrCalcLinearCoeff(double tau, const Vector<double> &g,
Vector<double> *c) {
KALDI_ASSERT(c->Dim() == 2 * g.Dim() && g.Dim() != 0);
MatrixIndexT dim = g.Dim();
c->Set(tau);
c->Range(0, dim).AddVec(-1.0, g);
c->Range(dim, dim).AddVec(1.0, g);
}
// This removes the L1 penalty term, and uses conjugate gradient to solve the
// resulting quadratic problem while keeping the zero elements fixed at 0.
double Debias(const GpsrConfig &opts, const SpMatrix<double> &H,
const Vector<double> &g, Vector<double> *x) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
// KALDI_ASSERT(H.IsPosDef() &&
// "Must have positive definite matrix for conjugate gradient.");
MatrixIndexT dim = x->Dim();
Vector<double> x_bias(*x);
Vector<double> nonzero_indices(dim);
// Initialize the index of non-zero elements in x
for (MatrixIndexT i = 0; i < dim; i++)
nonzero_indices(i) = (x_bias(i) == 0)? 0.0 : 1.0;
Vector<double> residual(dim);
Vector<double> conj_direction(dim);
Vector<double> resid_change(dim);
double alpha_cg; // CG step size for iterate: x <- x + \alpha p
double beta_cg; // CG step size for conj. direction: p <- \beta p - r
double resid_prod, resid_prod_new; // inner product of residual vectors
// Calculate the initial residual: r = H x_0 - g
residual.AddSpVec(1.0, H, x_bias, 0.0);
residual.AddVec(-1.0, g);
residual.MulElements(nonzero_indices); // only change non-zero elements of x
conj_direction.CopyFromVec(residual);
conj_direction.Scale(-1.0); // Initial conjugate direction p = -r
resid_prod = VecVec(residual, residual);
// set the convergence threshold for residual
double tol_debias = opts.stop_thresh_debias * VecVec(residual, residual);
for (int32 iter = 0; iter < opts.max_iters_debias; iter++) {
resid_change.AddSpVec(1.0, H, conj_direction, 0.0);
resid_change.MulElements(nonzero_indices); // only change non-zero elements
alpha_cg = resid_prod / VecVec(conj_direction, resid_change);
x_bias.AddVec(alpha_cg, conj_direction);
residual.AddVec(alpha_cg, resid_change);
resid_prod_new = VecVec(residual, residual);
beta_cg = resid_prod_new / resid_prod;
conj_direction.Scale(beta_cg);
conj_direction.AddVec(-1.0, residual);
resid_prod = resid_prod_new;
if (resid_prod < tol_debias) {
KALDI_VLOG(1) << "iter=" << iter << "\t residual =" << resid_prod
<< "\t tol_debias=" << tol_debias;
break;
}
} // end CG iters
x->CopyFromVec(x_bias);
return resid_prod;
}
template<>
double GpsrBasic(const GpsrConfig &opts, const SpMatrix<double> &H,
const Vector<double> &g, Vector<double> *x,
const char *debug_str) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
MatrixIndexT dim = x->Dim();
if (H.IsZero(0.0)) {
KALDI_WARN << "Zero quadratic term in GPSR for " << debug_str
<< ": leaving it unchanged.";
return 0.0;
}
// initialize the positive (u) and negative (v) parts of x, s.t. x = u - v
Vector<double> u(dim, kSetZero);
Vector<double> v(dim, kSetZero);
for (MatrixIndexT i = 0; i < dim; i++) {
if ((*x)(i) > 0) {
u(i) = (*x)(i);
} else {
v(i) = -(*x)(i);
}
}
double tau = opts.gpsr_tau; // May be modified later.
Vector<double> c(2*dim);
GpsrCalcLinearCoeff(tau, g, &c);
double objf_ori = GpsrObjective(H, c, u, v); // the obj. function at start
KALDI_VLOG(2) << "GPSR for " << debug_str << ": tau = " << tau
<< ";\t objf = " << objf_ori;
Vector<double> grad_u(dim);
Vector<double> grad_v(dim);
Vector<double> delta_u(dim);
Vector<double> delta_v(dim);
Vector<double> u_new(dim);
Vector<double> v_new(dim);
double objf_old, objf_new, num_zeros;
bool keep_going = true;
for (int32 iter = 0; keep_going; iter++) {
objf_old = GpsrObjective(H, c, u, v);
GpsrGradient(H, c, u, v, &grad_u, &grad_v);
double alpha = GpsrBasicAlpha(H, u, v, grad_u, grad_v);
if (alpha < opts.alpha_min) alpha = opts.alpha_min;
if (alpha > opts.alpha_max) alpha = opts.alpha_max;
// This is the backtracking line search part:
for (int32 k = 0; k < opts.max_iters_backtrak; k++) {
// Calculate the potential new iterate: [z_k - \alpha_k \grad F(z_k)]_+
u_new.CopyFromVec(u);
u_new.AddVec(-alpha, grad_u);
u_new.ApplyFloor(0.0);
v_new.CopyFromVec(v);
v_new.AddVec(-alpha, grad_v);
v_new.ApplyFloor(0.0);
delta_u.CopyFromVec(u_new);
delta_v.CopyFromVec(v_new);
delta_u.AddVec(-1.0, u);
delta_v.AddVec(-1.0, v);
double delta_objf_apx = opts.gpsr_mu * (VecVec(grad_u, delta_u) +
VecVec(grad_v, delta_v));
objf_new = GpsrObjective(H, c, u_new, v_new);
double delta_objf_real = objf_new - objf_old;
KALDI_VLOG(2) << "GPSR for " << debug_str << ": iter " << iter
<< "; tau = " << tau << ";\t objf = " << objf_new
<< ";\t alpha = " << alpha << ";\t delta_apx = "
<< delta_objf_apx << ";\t delta_real = " << delta_objf_real;
if (delta_objf_real < delta_objf_apx + DBL_EPSILON)
break;
else
alpha *= opts.gpsr_beta;
if (k == opts.max_iters_backtrak - 1) { // Stop further optimization
KALDI_WARN << "Backtracking line search did not decrease objective.";
u_new.CopyFromVec(u);
u_new.ApplyFloor(0.0);
v_new.CopyFromVec(v);
v_new.ApplyFloor(0.0);
delta_u.SetZero();
delta_v.SetZero();
}
} // end of backtracking line search
x->CopyFromVec(u_new);
x->AddVec(-1.0, v_new);
num_zeros = 0;
for (MatrixIndexT i = 0; i < dim; i++)
if ((*x)(i) == 0)
num_zeros++;
// ad hoc way to modify tau, if the solution is too sparse
if ((num_zeros / static_cast<double>(dim)) > opts.max_sparsity) {
std::ostringstream msg;
msg << num_zeros << " out of " << dim << " dimensions set to 0. "
<< "Changing tau from " << tau;
tau *= opts.tau_reduction;
GpsrCalcLinearCoeff(tau, g, &c); // Recalculate c with new tau
double tmp_objf = GpsrObjective(H, c, u, v);
msg << " to " << tau << ".\n\tStarting objective function changed from "
<< objf_ori << " to " << tmp_objf << ".";
KALDI_LOG << "GPSR for " << debug_str << ": " << msg.str();
iter = 0;
keep_going = true;
continue;
}
u.CopyFromVec(u_new);
v.CopyFromVec(v_new);
double delta = (delta_u.Norm(2.0) + delta_v.Norm(2.0)) / x->Norm(2.0);
KALDI_VLOG(1) << "GPSR for " << debug_str << ": iter " << iter
<< ", objf = " << objf_new << ", delta = " << delta;
keep_going = (iter < opts.max_iters) && (delta > opts.stop_thresh);
KALDI_VLOG(3) << "GPSR for " << debug_str << ": iter " << iter
<< ", objf = " << objf_new << ", value = " << x;
}
if (num_zeros != 0) {
KALDI_LOG << "GPSR for " << debug_str << ": number of 0's = " << num_zeros
<< " out of " << dim << " dimensions.";
}
if (opts.debias && num_zeros != 0) {
double residual = Debias(opts, H, g, x);
KALDI_LOG << "Debiasing: new residual = " << residual;
}
return objf_new - objf_ori;
}
template<>
float GpsrBasic(const GpsrConfig &opts, const SpMatrix<float> &H,
const Vector<float> &g, Vector<float> *x,
const char *debug_str) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
SpMatrix<double> Hd(H);
Vector<double> gd(g);
Vector<double> xd(*x);
float ans = GpsrBasic(opts, Hd, gd, &xd, debug_str);
x->CopyFromVec(xd);
return ans;
}
template<>
double GpsrBB(const GpsrConfig &opts, const SpMatrix<double> &H,
const Vector<double> &g, Vector<double> *x,
const char *debug_str) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
MatrixIndexT dim = x->Dim();
if (H.IsZero(0.0)) {
KALDI_WARN << "Zero quadratic term in GPSR for " << debug_str
<< ": leaving it unchanged.";
return 0.0;
}
// initialize the positive (u) and negative (v) parts of x, s.t. x = u - v
Vector<double> u(dim, kSetZero);
Vector<double> v(dim, kSetZero);
for (MatrixIndexT i = 0; i < dim; i++) {
if ((*x)(i) > 0) {
u(i) = (*x)(i);
} else {
v(i) = -(*x)(i);
}
}
double tau = opts.gpsr_tau; // May be modified later.
Vector<double> c(2*dim);
GpsrCalcLinearCoeff(tau, g, &c);
double objf_ori = GpsrObjective(H, c, u, v); // the obj. function at start
KALDI_VLOG(2) << "GPSR for " << debug_str << ": tau = " << tau
<< ";\t objf = " << objf_ori;
Vector<double> grad_u(dim);
Vector<double> grad_v(dim);
Vector<double> delta_u(dim);
Vector<double> delta_v(dim);
Vector<double> delta_x(dim);
Vector<double> H_delta_x(dim);
Vector<double> u_new(dim);
Vector<double> v_new(dim);
double objf_old, objf_new, num_zeros;
bool keep_going = true;
double alpha = 1.0;
for (int32 iter = 0; keep_going; iter++) {
objf_old = GpsrObjective(H, c, u, v);
GpsrGradient(H, c, u, v, &grad_u, &grad_v);
// Calculate the new step: [z_k - \alpha_k \grad F(z_k)]_+ - z_k
delta_u.CopyFromVec(u);
delta_u.AddVec(-alpha, grad_u);
delta_u.ApplyFloor(0.0);
delta_u.AddVec(-1.0, u);
delta_v.CopyFromVec(v);
delta_v.AddVec(-alpha, grad_v);
delta_v.ApplyFloor(0.0);
delta_v.AddVec(-1.0, v);
delta_x.CopyFromVec(delta_u);
delta_x.AddVec(-1.0, delta_v);
H_delta_x.AddSpVec(1.0, H, delta_x, 0.0);
double dx_H_dx = VecVec(delta_x, H_delta_x);
double lambda = -(VecVec(delta_u, grad_u) + VecVec(delta_v, grad_v))
/ (dx_H_dx + DBL_EPSILON); // step length
if (lambda < 0)
KALDI_WARN << "lambda is less than zero\n";
if (lambda > 1.0) lambda = 1.0;
//update alpha
alpha = (VecVec(delta_u, delta_u) + VecVec(delta_v, delta_v))
/ (dx_H_dx + DBL_EPSILON);
if (dx_H_dx <= 0) {
KALDI_WARN << "nonpositive curvature detected";
alpha = opts.alpha_max;
}
else if (alpha < opts.alpha_min)
alpha = opts.alpha_min;
else if (alpha > opts.alpha_max) alpha = opts.alpha_max;
u_new.CopyFromVec(delta_u);
u_new.Scale(lambda);
v_new.CopyFromVec(delta_v);
v_new.Scale(lambda);
u_new.AddVec(1.0, u);
v_new.AddVec(1.0, v);
objf_new = GpsrObjective(H, c, u_new, v_new);
double delta_objf = objf_old - objf_new;
KALDI_VLOG(2) << "GPSR for " << debug_str << ": iter " << iter
<< "; tau = " << tau << ";\t objf = " << objf_new
<< ";\t alpha = " << alpha << ";\t delta_real = "
<< delta_objf;
u.CopyFromVec(u_new);
v.CopyFromVec(v_new);
x->CopyFromVec(u);
x->AddVec(-1.0, v);
num_zeros = 0;
for (MatrixIndexT i = 0; i < dim; i++)
if ((*x)(i) == 0)
num_zeros++;
// ad hoc way to modify tau, if the solution is too sparse
if ((num_zeros / static_cast<double>(dim)) > opts.max_sparsity) {
std::ostringstream msg;
msg << num_zeros << " out of " << dim << " dimensions set to 0. "
<< "Changing tau from " << tau;
tau *= 0.9;
GpsrCalcLinearCoeff(tau, g, &c); // Recalculate c with new tau
double tmp_objf = GpsrObjective(H, c, u, v);
msg << " to " << tau << ".\n\tStarting objective function changed from "
<< objf_ori << " to " << tmp_objf << ".";
KALDI_LOG << "GPSR for " << debug_str << ": " << msg.str();
iter = 0;
keep_going = true;
continue;
}
double delta = (delta_u.Norm(2.0) + delta_v.Norm(2.0)) / x->Norm(2.0);
KALDI_VLOG(1) << "GPSR for " << debug_str << ": iter " << iter
<< ", objf = " << objf_new << ", delta = " << delta;
keep_going = (iter < opts.max_iters) && (delta > opts.stop_thresh);
KALDI_VLOG(3) << "GPSR for " << debug_str << ": iter " << iter
<< ", objf = " << objf_new << ", value = " << x;
}
if (num_zeros != 0) {
KALDI_LOG << "GPSR for " << debug_str << ": number of 0's = " << num_zeros
<< " out of " << dim << " dimensions.";
}
if (opts.debias && num_zeros != 0) {
double residual = Debias(opts, H, g, x);
KALDI_LOG << "Debiasing: new residual = " << residual;
}
return objf_new - objf_ori;
}
template<>
float GpsrBB(const GpsrConfig &opts, const SpMatrix<float> &H,
const Vector<float> &g, Vector<float> *x,
const char *debug_str) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
SpMatrix<double> Hd(H);
Vector<double> gd(g);
Vector<double> xd(*x);
float ans = GpsrBB(opts, Hd, gd, &xd, debug_str);
x->CopyFromVec(xd);
return ans;
}
} // namespace kaldi

Просмотреть файл

@ -1,166 +0,0 @@
// matrix/kaldi-gpsr.h
// Copyright 2012 Arnab Ghoshal
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_GPSR_H_
#define KALDI_MATRIX_KALDI_GPSR_H_
#include <string>
#include <vector>
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"
#include "itf/options-itf.h"
namespace kaldi {
/// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and
/// Wright, "Gradient Projection for Sparse Reconstruction: Application to
/// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected
/// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007.
/// http://dx.doi.org/10.1109/JSTSP.2007.910281
/// The GPSR algorithm, described in Figueiredo, et al., 2007, solves:
/// \f[ \min_x 0.5 * ||y - Ax||_2^2 + \tau ||x||_1, \f]
/// where \f$ x \in R^n, y \in R^k \f$, and \f$ A \in R^{n \times k} \f$.
/// In this implementation, we solve:
/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1, \f]
/// which is the more natural form in which such problems arise in our case.
/// Here, \f$ H = A^T A \in R^{n \times n} \f$ and \f$ g = A^T y \in R^n \f$.
/** \struct GpsrConfig
* Configuration variables needed in the GPSR algorithm.
*/
struct GpsrConfig {
bool use_gpsr_bb; ///< Use the Barzilai-Borwein gradient projection method
/// The following options are common to both the basic & Barzilai-Borwein
/// versions of GPSR
double stop_thresh; ///< Stopping threshold
int32 max_iters; ///< Maximum number of iterations
double gpsr_tau; ///< Regularization scale
double alpha_min; ///< Minimum step size in the feasible direction
double alpha_max; ///< Maximum step size in the feasible direction
double max_sparsity; ///< Maximum percentage of dimensions set to 0
double tau_reduction; ///< Multiply tau by this if max_sparsity reached
/// The following options are for the backtracking line search in basic GPSR.
/// Step size reduction factor in backtracking line search. 0 < beta < 1
double gpsr_beta;
/// Improvement factor in backtracking line search, i.e. the new objective
/// function must be less than the old one by mu times the gradient in the
/// direction of the change in x. 0 < mu < 1
double gpsr_mu;
int32 max_iters_backtrak; ///< Max iterations for backtracking line search
bool debias; ///< Do debiasing, i.e. unconstrained optimization at the end
double stop_thresh_debias; ///< Stopping threshold for debiasing stage
int32 max_iters_debias; ///< Maximum number of iterations for debiasing stage
GpsrConfig() {
use_gpsr_bb = true;
stop_thresh = 0.005;
max_iters = 100;
gpsr_tau = 10;
alpha_min = 1.0e-10;
alpha_max = 1.0e+20;
max_sparsity = 0.9;
tau_reduction = 0.8;
gpsr_beta = 0.5;
gpsr_mu = 0.1;
max_iters_backtrak = 50;
debias = false;
stop_thresh_debias = 0.001;
max_iters_debias = 50;
}
void Register(OptionsItf *po);
};
inline void GpsrConfig::Register(OptionsItf *po) {
std::string module = "GpsrConfig: ";
po->Register("use-gpsr-bb", &use_gpsr_bb, module+
"Use the Barzilai-Borwein gradient projection method.");
po->Register("stop-thresh", &stop_thresh, module+
"Stopping threshold for GPSR.");
po->Register("max-iters", &max_iters, module+
"Maximum number of iterations of GPSR.");
po->Register("gpsr-tau", &gpsr_tau, module+
"Regularization scale for GPSR.");
po->Register("alpha-min", &alpha_min, module+
"Minimum step size in feasible direction.");
po->Register("alpha-max", &alpha_max, module+
"Maximum step size in feasible direction.");
po->Register("max-sparsity", &max_sparsity, module+
"Maximum percentage of dimensions set to 0.");
po->Register("tau-reduction", &tau_reduction, module+
"Multiply tau by this if maximum sparsity is reached.");
po->Register("gpsr-beta", &gpsr_beta, module+
"Step size reduction factor in backtracking line search (0<beta<1).");
po->Register("gpsr-mu", &gpsr_mu, module+
"Improvement factor in backtracking line search (0<mu<1).");
po->Register("max-iters-backtrack", &max_iters_backtrak, module+
"Maximum number of iterations of backtracking line search.");
po->Register("debias", &debias, module+
"Do final debiasing step.");
po->Register("stop-thresh-debias", &stop_thresh_debias, module+
"Stopping threshold for debiaisng step.");
po->Register("max-iters-debias", &max_iters_debias, module+
"Maximum number of iterations of debiasing.");
}
/// Solves a quadratic program in \f$ x \f$, with L_1 regularization:
/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1. \f]
/// This is similar to SolveQuadraticProblem() in sp-matrix.h with an added
/// L_1 term.
template<typename Real>
Real Gpsr(const GpsrConfig &opts, const SpMatrix<Real> &H,
const Vector<Real> &g, Vector<Real> *x,
const char *debug_str = "[unknown]") {
if (opts.use_gpsr_bb)
return GpsrBB(opts, H, g, x, debug_str);
else
return GpsrBasic(opts, H, g, x, debug_str);
}
/// This is the basic GPSR algorithm, where the step size is determined by a
/// backtracking line search. The line search is called "Armijo rule along the
/// projection arc" in Bertsekas, Nonlinear Programming, 2nd ed. page 230.
template<typename Real>
Real GpsrBasic(const GpsrConfig &opts, const SpMatrix<Real> &H,
const Vector<Real> &g, Vector<Real> *x,
const char *debug_str = "[unknown]");
/// This is the paper calls the Barzilai-Borwein variant. This is a constrained
/// Netwon's method where the Hessian is approximated by scaled identity matrix
template<typename Real>
Real GpsrBB(const GpsrConfig &opts, const SpMatrix<Real> &H,
const Vector<Real> &g, Vector<Real> *x,
const char *debug_str = "[unknown]");
} // namespace kaldi
#endif // KALDI_MATRIX_KALDI_GPSR_H_

Просмотреть файл

@ -1,62 +0,0 @@
// matrix/kaldi-matrix-inl.h
// Copyright 2009-2011 Microsoft Corporation; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_
#define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1
#include "matrix/kaldi-vector.h"
namespace kaldi {
/// Empty constructor
template<typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
template<>
template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
template<>
template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
template<typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
M.Write(os, false);
return os;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
M.Read(is, false);
return is;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
M.Read(is, false);
return is;
}
}// namespace kaldi
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,960 +0,0 @@
// matrix/kaldi-matrix.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Lukas Burget;
// Saarland University; Petr Schwarz; Yanmin Qian;
// Karel Vesely; Go Vivace Inc.; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_MATRIX_H_
#define KALDI_MATRIX_KALDI_MATRIX_H_ 1
#include "matrix-common.h"
namespace kaldi {
/// @{ \addtogroup matrix_funcs_scalar
/// We need to declare this here as it will be a friend function.
/// tr(A B), or tr(A B^T).
template<typename Real>
Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
MatrixTransposeType trans = kNoTrans);
/// @}
/// \addtogroup matrix_group
/// @{
/// Base class which provides matrix operations not involving resizing
/// or allocation. Classes Matrix and SubMatrix inherit from it and take care
/// of allocation and resizing.
template<typename Real>
class MatrixBase {
public:
// so this child can access protected members of other instances.
friend class Matrix<Real>;
// friend declarations for CUDA matrices (see ../cudamatrix/)
friend class CuMatrixBase<Real>;
friend class CuMatrix<Real>;
friend class CuSubMatrix<Real>;
friend class CuPackedMatrix<Real>;
friend class PackedMatrix<Real>;
/// Returns number of rows (or zero for emtpy matrix).
inline MatrixIndexT NumRows() const { return num_rows_; }
/// Returns number of columns (or zero for emtpy matrix).
inline MatrixIndexT NumCols() const { return num_cols_; }
/// Stride (distance in memory between each row). Will be >= NumCols.
inline MatrixIndexT Stride() const { return stride_; }
/// Returns size in bytes of the data held by the matrix.
size_t SizeInBytes() const {
return static_cast<size_t>(num_rows_) * static_cast<size_t>(stride_) *
sizeof(Real);
}
/// Gives pointer to raw data (const).
inline const Real* Data() const {
return data_;
}
/// Gives pointer to raw data (non-const).
inline Real* Data() { return data_; }
/// Returns pointer to data for one row (non-const)
inline Real* RowData(MatrixIndexT i) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_;
}
/// Returns pointer to data for one row (const)
inline const Real* RowData(MatrixIndexT i) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_;
}
/// Indexing operator, non-const
/// (only checks sizes if compiled with -DKALDI_PARANOID)
inline Real& operator() (MatrixIndexT r, MatrixIndexT c) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_));
return *(data_ + r * stride_ + c);
}
/// Indexing operator, provided for ease of debugging (gdb doesn't work
/// with parenthesis operator).
Real &Index (MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); }
/// Indexing operator, const
/// (only checks sizes if compiled with -DKALDI_PARANOID)
inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_));
return *(data_ + r * stride_ + c);
}
/* Basic setting-to-special values functions. */
/// Sets matrix to zero.
void SetZero();
/// Sets all elements to a specific value.
void Set(Real);
/// Sets to zero, except ones along diagonal [for non-square matrices too]
void SetUnit();
/// Sets to random values of a normal distribution
void SetRandn();
/// Sets to numbers uniformly distributed on (0, 1)
void SetRandUniform();
/* Copying functions. These do not resize the matrix! */
/// Copy given matrix. (no resize is done).
template<typename OtherReal>
void CopyFromMat(const MatrixBase<OtherReal> & M,
MatrixTransposeType trans = kNoTrans);
/// Copy from compressed matrix.
void CopyFromMat(const CompressedMatrix &M);
/// Copy given spmatrix. (no resize is done).
template<typename OtherReal>
void CopyFromSp(const SpMatrix<OtherReal> &M);
/// Copy given tpmatrix. (no resize is done).
template<typename OtherReal>
void CopyFromTp(const TpMatrix<OtherReal> &M,
MatrixTransposeType trans = kNoTrans);
/// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h
template<typename OtherReal>
void CopyFromMat(const CuMatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans);
/// Inverse of vec() operator. Copies vector into matrix, row-by-row.
/// Note that rv.Dim() must either equal NumRows()*NumCols() or
/// NumCols()-- this has two modes of operation.
void CopyRowsFromVec(const VectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
void CopyRowsFromVec(const CuVectorBase<Real> &v);
template<typename OtherReal>
void CopyRowsFromVec(const VectorBase<OtherReal> &v);
/// Copies vector into matrix, column-by-column.
/// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows();
/// this has two modes of operation.
void CopyColsFromVec(const VectorBase<Real> &v);
/// Copy vector into specific column of matrix.
void CopyColFromVec(const VectorBase<Real> &v, const MatrixIndexT col);
/// Copy vector into specific row of matrix.
void CopyRowFromVec(const VectorBase<Real> &v, const MatrixIndexT row);
/// Copy vector into diagonal of matrix.
void CopyDiagFromVec(const VectorBase<Real> &v);
/* Accessing of sub-parts of the matrix. */
/// Return specific row of matrix [const].
inline const SubVector<Real> Row(MatrixIndexT i) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return SubVector<Real>(data_ + (i * stride_), NumCols());
}
/// Return specific row of matrix.
inline SubVector<Real> Row(MatrixIndexT i) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return SubVector<Real>(data_ + (i * stride_), NumCols());
}
/// Return a sub-part of matrix.
inline SubMatrix<Real> Range(const MatrixIndexT row_offset,
const MatrixIndexT num_rows,
const MatrixIndexT col_offset,
const MatrixIndexT num_cols) const {
return SubMatrix<Real>(*this, row_offset, num_rows,
col_offset, num_cols);
}
inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
const MatrixIndexT num_rows) const {
return SubMatrix<Real>(*this, row_offset, num_rows, 0, num_cols_);
}
inline SubMatrix<Real> ColRange(const MatrixIndexT col_offset,
const MatrixIndexT num_cols) const {
return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
}
/* Various special functions. */
/// Returns sum of all elements in matrix.
Real Sum() const;
/// Returns trace of matrix.
Real Trace(bool check_square = true) const;
// If check_square = true, will crash if matrix is not square.
/// Returns maximum element of matrix.
Real Max() const;
/// Returns minimum element of matrix.
Real Min() const;
/// Element by element multiplication with a given matrix.
void MulElements(const MatrixBase<Real> &A);
/// Divide each element by the corresponding element of a given matrix.
void DivElements(const MatrixBase<Real> &A);
/// Multiply each element with a scalar value.
void Scale(Real alpha);
/// Set, element-by-element, *this = max(*this, A)
void Max(const MatrixBase<Real> &A);
/// Equivalent to (*this) = (*this) * diag(scale). Scaling
/// each column by a scalar taken from that dimension of the vector.
void MulColsVec(const VectorBase<Real> &scale);
/// Equivalent to (*this) = diag(scale) * (*this). Scaling
/// each row by a scalar taken from that dimension of the vector.
void MulRowsVec(const VectorBase<Real> &scale);
/// divide each row into src.NumCols() groups,
/// and then scale i'th row's jth group of elements by src[i, j].
void MulRowsGroupMat(const MatrixBase<Real> &src);
/// Returns logdet of matrix.
Real LogDet(Real *det_sign = NULL) const;
/// matrix inverse.
/// if inverse_needed = false, will fill matrix with garbage.
/// (only useful if logdet wanted).
void Invert(Real *log_det = NULL, Real *det_sign = NULL,
bool inverse_needed = true);
/// matrix inverse [double].
/// if inverse_needed = false, will fill matrix with garbage
/// (only useful if logdet wanted).
/// Does inversion in double precision even if matrix was not double.
void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
bool inverse_needed = true);
/// Inverts all the elements of the matrix
void InvertElements();
/// Transpose the matrix. This one is only
/// applicable to square matrices (the one in the
/// Matrix child class works also for non-square.
void Transpose();
/// Copies column r from column indices[r] of src.
/// As a special case, if indexes[i] == -1, sets column i to zero
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void CopyCols(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Copies row r from row indices[r] of src.
/// As a special case, if indexes[i] == -1, sets row i to zero
/// "reorder".size() must equal this->NumRows(),
/// all elements of "reorder" must be in [-1, src.NumRows()-1],
/// and src.NumCols() must equal this.NumCols()
void CopyRows(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Applies floor to all matrix elements
void ApplyFloor(Real floor_val);
/// Applies floor to all matrix elements
void ApplyCeiling(Real ceiling_val);
/// Calculates log of all the matrix elemnts
void ApplyLog();
/// Exponentiate each of the elements.
void ApplyExp();
/// Applies power to all matrix elements
void ApplyPow(Real power);
/// Applies the Heaviside step function (x > 0 ? 1 : 0) to all matrix elements
/// Note: in general you can make different choices for x = 0, but for now
/// please leave it as it (i.e. returning zero) because it affects the
/// RectifiedLinearComponent in the neural net code.
void ApplyHeaviside();
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is
/// slightly complicated, due to the need for P to be real. In the symmetric
/// case D is diagonal and real, but in
/// the non-symmetric case there may be complex-conjugate pairs of eigenvalues.
/// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a
/// pair is lambda +- i*mu, D will have a corresponding 2x2 block
/// [lambda, mu; -mu, lambda].
/// Note that if the input matrix (*this) is non-invertible, P may not be invertible
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we have
/// instead (*this) P = P D.
///
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
void Eig(MatrixBase<Real> *P,
VectorBase<Real> *eigs_real,
VectorBase<Real> *eigs_imag) const;
/// The Power method attempts to take the matrix to a power using a method that
/// works in general for fractional and negative powers. The input matrix must
/// be invertible and have reasonable condition (or we don't guarantee the
/// results. The method is based on the eigenvalue decomposition. It will
/// return false and leave the matrix unchanged, if at entry the matrix had
/// real negative eigenvalues (or if it had zero eigenvalues and the power was
/// negative).
bool Power(Real pow);
/** Singular value decomposition
Major limitations:
For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return
the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
one on the left is rectangular.
In Svd, *this = U*diag(S)*Vt.
Null pointers for U and/or Vt at input mean we do not want that output. We
expect that S.Dim() == m, U is either NULL or m by n,
and v is either NULL or n by n.
The singular values are not sorted (use SortSvd for that). */
void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt); // Destroys calling matrix.
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already
/// transposed; the normal formulation is U diag(s) V^T.
/// Null pointers for U or V mean we don't want that output (this saves
/// compute). The singular values are not sorted (use SortSvd for that).
void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt) const;
/// Compute SVD but only retain the singular values.
void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
/// Returns smallest singular value.
Real MinSingularValue() const {
Vector<Real> tmp(std::min(NumRows(), NumCols()));
Svd(&tmp);
return tmp.Min();
}
void TestUninitialized() const; // This function is designed so that if any element
// if the matrix is uninitialized memory, valgrind will complain.
/// returns condition number by computing Svd. Works even if cols > rows.
Real Cond() const;
/// Returns true if matrix is Symmetric.
bool IsSymmetric(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if matrix is Diagonal.
bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number
/// returns true if matrix is all zeros, but ones on diagonal
/// (not necessarily square).
bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if matrix is all zeros.
bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number
/// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm,
/// or just "2-norm".
Real FrobeniusNorm() const;
/// Returns true if ((*this)-other).FrobeniusNorm()
/// <= tol * (*this).FrobeniusNorm().
bool ApproxEqual(const MatrixBase<Real> &other, float tol = 0.01) const;
/// Tests for exact equality. It's usually preferable to use ApproxEqual.
bool Equal(const MatrixBase<Real> &other) const;
/// largest absolute value.
Real LargestAbsElem() const; // largest absolute value.
/// Returns log(sum(exp())) without exp overflow
/// If prune > 0.0, it uses a pruning beam, discarding
/// terms less than (max - prune). Note: in future
/// we may change this so that if prune = 0.0, it takes
/// the max, so use -1 if you don't want to prune.
Real LogSumExp(Real prune = -1.0) const;
/// Apply soft-max to the collection of all elements of the
/// matrix and return normalizer (log sum of exponentials).
Real ApplySoftMax();
/// Set each element to the sigmoid of the corresponding element of "src".
void Sigmoid(const MatrixBase<Real> &src);
/// Set each element to y = log(1 + exp(x))
void SoftHinge(const MatrixBase<Real> &src);
/// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p)
/// where G = x.NumCols() / y.NumCols() must be an integer.
void GroupPnorm(const MatrixBase<Real> &src, Real power);
/// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this has the same dimension as "input", then it sets each element
/// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where
/// "output-elem" is whichever element of output depends on that input element.
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output,
Real power);
/// Set each element to the tanh of the corresponding element of "src".
void Tanh(const MatrixBase<Real> &src);
// Function used in backpropagating derivatives of the sigmoid function:
// element-by-element, set *this = diff * value * (1.0 - value).
void DiffSigmoid(const MatrixBase<Real> &value,
const MatrixBase<Real> &diff);
// Function used in backpropagating derivatives of the tanh function:
// element-by-element, set *this = diff * (1.0 - value^2).
void DiffTanh(const MatrixBase<Real> &value,
const MatrixBase<Real> &diff);
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
* positive semi-definite (check_thresh controls how stringent the check is;
* set it to 2 to ensure it won't ever complain, but it will zero out negative
* dimensions in your matrix.
*/
void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
Real check_thresh = 0.001);
friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
const MatrixBase<Real> &B, MatrixTransposeType trans); // tr (A B)
// so it can get around const restrictions on the pointer to data_.
friend class SubMatrix<Real>;
/// Add a scalar to each element
void Add(const Real alpha);
/// Add a scalar to each diagonal element.
void AddToDiag(const Real alpha);
/// *this += alpha * a * b^T
template<typename OtherReal>
void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a,
const VectorBase<OtherReal> &b);
/// [each row of *this] += alpha * v
template<typename OtherReal>
void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v);
/// [each col of *this] += alpha * v
template<typename OtherReal>
void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v);
/// *this += alpha * M [or M^T]
void AddMat(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transA = kNoTrans);
/// *this = beta * *this + alpha * M M^T, for symmetric matrices. It only
/// updates the lower triangle of *this. It will leave the matrix asymmetric;
/// if you need it symmetric as a regular matrix, do CopyLowerToUpper().
void SymAddMat2(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transA, Real beta);
/// *this = beta * *this + alpha * diag(v) * M [or M^T].
/// The same as adding M but scaling each row M_i by v(i).
void AddDiagVecMat(const Real alpha, VectorBase<Real> &v,
const MatrixBase<Real> &M, MatrixTransposeType transM,
Real beta = 1.0);
/// *this += alpha * S
template<typename OtherReal>
void AddSp(const Real alpha, const SpMatrix<OtherReal> &S);
void AddMatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// *this = a * b / c (by element; when c = 0, *this = a)
void AddMatMatDivMat(const MatrixBase<Real>& A,
const MatrixBase<Real>& B,
const MatrixBase<Real>& C);
/// A version of AddMatMat specialized for when the second argument
/// contains a lot of zeroes.
void AddMatSmat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// A version of AddMatMat specialized for when the first argument
/// contains a lot of zeroes.
void AddSmatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// this <-- beta*this + alpha*A*B*C.
void AddMatMatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const MatrixBase<Real>& C, MatrixTransposeType transC,
const Real beta);
/// this <-- beta*this + alpha*SpA*B.
// This and the routines below are really
// stubs that need to be made more efficient.
void AddSpMat(const Real alpha,
const SpMatrix<Real>& A,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A);
return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddTpMat(const Real alpha,
const TpMatrix<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A);
return AddMatMat(alpha, M, transA, B, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddMatSp(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const SpMatrix<Real>& B,
const Real beta) {
Matrix<Real> M(B);
return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
}
/// this <-- beta*this + alpha*A*B*C.
void AddSpMatSp(const Real alpha,
const SpMatrix<Real> &A,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const SpMatrix<Real>& C,
const Real beta) {
Matrix<Real> M(A), N(C);
return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddMatTp(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const TpMatrix<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(B);
return AddMatMat(alpha, A, transA, M, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddTpTp(const Real alpha,
const TpMatrix<Real>& A, MatrixTransposeType transA,
const TpMatrix<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A), N(B);
return AddMatMat(alpha, M, transA, N, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
// This one is more efficient, not like the others above.
void AddSpSp(const Real alpha,
const SpMatrix<Real>& A, const SpMatrix<Real>& B,
const Real beta);
/// Copy lower triangle to upper triangle (symmetrize)
void CopyLowerToUpper();
/// Copy upper triangle to lower triangle (symmetrize)
void CopyUpperToLower();
/// This function orthogonalizes the rows of a matrix using the Gram-Schmidt
/// process. It is only applicable if NumRows() <= NumCols(). It will use
/// random number generation to fill in rows with something nonzero, in cases
/// where the original matrix was of deficient row rank.
void OrthogonalizeRows();
/// stream read.
/// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure.
void Read(std::istream & in, bool binary, bool add = false);
/// write to stream.
void Write(std::ostream & out, bool binary) const;
// Below is internal methods for Svd, user does not have to know about this.
#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
// protected:
// Should be protected but used directly in testing routine.
// destroys *this!
void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt);
#else
protected:
// destroys *this!
bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *V);
#endif
protected:
/// Initializer, callable only from child.
explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) :
data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
/// Initializer, callable only from child.
/// Empty initializer, for un-initialized matrix.
explicit MatrixBase(): data_(NULL) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
// Make sure pointers to MatrixBase cannot be deleted.
~MatrixBase() { }
/// A workaround that allows SubMatrix to get a pointer to non-const data
/// for const Matrix. Unfortunately C++ does not allow us to declare a
/// "public const" inheritance or anything like that, so it would require
/// a lot of work to make the SubMatrix class totally const-correct--
/// we would have to override many of the Matrix functions.
inline Real* Data_workaround() const {
return data_;
}
/// data memory area
Real* data_;
/// these atributes store the real matrix size as it is stored in memory
/// including memalignment
MatrixIndexT num_cols_; /// < Number of columns
MatrixIndexT num_rows_; /// < Number of rows
/** True number of columns for the internal matrix. This number may differ
* from num_cols_ as memory alignment might be used. */
MatrixIndexT stride_;
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
};
/// A class for storing matrices.
template<typename Real>
class Matrix : public MatrixBase<Real> {
public:
/// Empty constructor.
Matrix();
/// Basic constructor. Sets to zero by default.
/// if set_zero == false, memory contents are undefined.
Matrix(const MatrixIndexT r, const MatrixIndexT c,
MatrixResizeType resize_type = kSetZero):
MatrixBase<Real>() { Resize(r, c, resize_type); }
/// Copy constructor from CUDA matrix
/// This is defined in ../cudamatrix/cu-matrix.h
template<typename OtherReal>
explicit Matrix(const CuMatrixBase<OtherReal> &cu,
MatrixTransposeType trans = kNoTrans);
/// Swaps the contents of *this and *other. Shallow swap.
void Swap(Matrix<Real> *other);
/// Defined in ../cudamatrix/cu-matrix.cc
void Swap(CuMatrix<Real> *mat);
/// Constructor from any MatrixBase. Can also copy with transpose.
/// Allocates new memory.
explicit Matrix(const MatrixBase<Real> & M,
MatrixTransposeType trans = kNoTrans);
/// Same as above, but need to avoid default copy constructor.
Matrix(const Matrix<Real> & M); // (cannot make explicit)
/// Copy constructor: as above, but from another type.
template<typename OtherReal>
explicit Matrix(const MatrixBase<OtherReal> & M,
MatrixTransposeType trans = kNoTrans);
/// Copy constructor taking SpMatrix...
/// It is symmetric, so no option for transpose, and NumRows == Cols
template<typename OtherReal>
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
Resize(M.NumRows(), M.NumRows(), kUndefined);
this->CopyFromSp(M);
}
/// Constructor from CompressedMatrix
explicit Matrix(const CompressedMatrix &C);
/// Copy constructor taking TpMatrix...
template <typename OtherReal>
explicit Matrix(const TpMatrix<OtherReal> & M,
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
if (trans == kNoTrans) {
Resize(M.NumRows(), M.NumCols(), kUndefined);
this->CopyFromTp(M);
} else {
Resize(M.NumCols(), M.NumRows(), kUndefined);
this->CopyFromTp(M, kTrans);
}
}
/// read from stream.
// Unlike one in base, allows resizing.
void Read(std::istream & in, bool binary, bool add = false);
/// Remove a specified row.
void RemoveRow(MatrixIndexT i);
/// Transpose the matrix. Works for non-square
/// matrices as well as square ones.
void Transpose();
/// Distructor to free matrices.
~Matrix() { Destroy(); }
/// Sets matrix to a specified size (zero is OK as long as both r and c are
/// zero). The value of the new data depends on resize_type:
/// -if kSetZero, the new data will be zero
/// -if kUndefined, the new data will be undefined
/// -if kCopyData, the new data will be the same as the old data in any
/// shared positions, and zero elsewhere.
/// This function takes time proportional to the number of data elements.
void Resize(const MatrixIndexT r,
const MatrixIndexT c,
MatrixResizeType resize_type = kSetZero);
/// Assignment operator that takes MatrixBase.
Matrix<Real> &operator = (const MatrixBase<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined);
MatrixBase<Real>::CopyFromMat(other);
return *this;
}
/// Assignment operator. Needed for inclusion in std::vector.
Matrix<Real> &operator = (const Matrix<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined);
MatrixBase<Real>::CopyFromMat(other);
return *this;
}
private:
/// Deallocates memory and sets to empty matrix (dimension 0, 0).
void Destroy();
/// Init assumes the current class contents are invalid (i.e. junk or have
/// already been freed), and it sets the matrix to newly allocated memory with
/// the specified number of rows and columns. r == c == 0 is acceptable. The data
/// memory contents will be undefined.
void Init(const MatrixIndexT r,
const MatrixIndexT c);
};
/// @} end "addtogroup matrix_group"
/// \addtogroup matrix_funcs_io
/// @{
/// A structure containing the HTK header.
/// [TODO: change the style of the variables to Kaldi-compliant]
struct HtkHeader {
/// Number of samples.
int32 mNSamples;
/// Sample period.
int32 mSamplePeriod;
/// Sample size
int16 mSampleSize;
/// Sample kind.
uint16 mSampleKind;
};
// Read HTK formatted features from file into matrix.
template<typename Real>
bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr);
// Write (HTK format) features to file from matrix.
template<typename Real>
bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr);
// Write (CMUSphinx format) features to file from matrix.
template<typename Real>
bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M);
/// @} end of "addtogroup matrix_funcs_io"
/**
Sub-matrix representation.
Can work with sub-parts of a matrix using this class.
Note that SubMatrix is not very const-correct-- it allows you to
change the contents of a const Matrix. Be careful!
*/
template<typename Real>
class SubMatrix : public MatrixBase<Real> {
public:
// Initialize a SubMatrix from part of a matrix; this is
// a bit like A(b:c, d:e) in Matlab.
// This initializer is against the proper semantics of "const", since
// SubMatrix can change its contents. It would be hard to implement
// a "const-safe" version of this class.
SubMatrix(const MatrixBase<Real>& T,
const MatrixIndexT ro, // row offset, 0 < ro < NumRows()
const MatrixIndexT r, // number of rows, r > 0
const MatrixIndexT co, // column offset, 0 < co < NumCols()
const MatrixIndexT c); // number of columns, c > 0
// This initializer is mostly intended for use in CuMatrix and related
// classes. Be careful!
SubMatrix(Real *data,
MatrixIndexT num_rows,
MatrixIndexT num_cols,
MatrixIndexT stride);
~SubMatrix<Real>() {}
/// This type of constructor is needed for Range() to work [in Matrix base
/// class]. Cannot make it explicit.
SubMatrix<Real> (const SubMatrix &other):
MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_,
other.stride_) {}
private:
/// Disallow assignment.
SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
};
/// @} End of "addtogroup matrix_funcs_io".
/// \addtogroup matrix_funcs_scalar
/// @{
// Some declarations. These are traces of products.
template<typename Real>
bool ApproxEqual(const MatrixBase<Real> &A,
const MatrixBase<Real> &B, Real tol = 0.01) {
return A.ApproxEqual(B, tol);
}
template<typename Real>
inline void AssertEqual(MatrixBase<Real> &A, MatrixBase<Real> &B,
float tol = 0.01) {
KALDI_ASSERT(A.ApproxEqual(B, tol));
}
/// Returns trace of matrix.
template <typename Real>
double TraceMat(const MatrixBase<Real> &A) { return A.Trace(); }
/// Returns tr(A B C)
template <typename Real>
Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
const MatrixBase<Real> &B, MatrixTransposeType transB,
const MatrixBase<Real> &C, MatrixTransposeType transC);
/// Returns tr(A B C D)
template <typename Real>
Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
const MatrixBase<Real> &B, MatrixTransposeType transB,
const MatrixBase<Real> &C, MatrixTransposeType transC,
const MatrixBase<Real> &D, MatrixTransposeType transD);
/// @} end "addtogroup matrix_funcs_scalar"
/// \addtogroup matrix_funcs_misc
/// @{
/// Function to ensure that SVD is sorted. This function is made as generic as
/// possible, to be applicable to other types of problems. s->Dim() should be
/// the same as U->NumCols(), and we sort s from greatest to least absolute
/// value (if sort_on_absolute_value == true) or greatest to least value
/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
/// exists, around in the same way. Note: the "absolute value" part won't matter
/// if this is an actual SVD, since singular values are non-negative.
template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real>* Vt = NULL,
bool sort_on_absolute_value = true);
/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig.
/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding
/// 2x2 block [lambda, mu; -mu, lambda].
/// This function will throw if any complex eigenvalues are not in complex conjugate
/// pairs (or the members of such pairs are not consecutively numbered).
template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag,
MatrixBase<Real> *D);
/// The following function is used in Matrix::Power, and separately tested, so we
/// declare it here mainly for the testing code to see. It takes a complex value to
/// a power using a method that will work for noninteger powers (but will fail if the
/// complex value is real and negative).
template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
/// @} end of addtogroup matrix_funcs_misc
/// \addtogroup matrix_funcs_io
/// @{
template<typename Real>
std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M);
template<typename Real>
std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
// The Matrix read allows resizing, so we override the MatrixBase one.
template<typename Real>
std::istream & operator >> (std::istream & In, Matrix<Real> & M);
template<typename Real>
bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
}
/// @} end of \addtogroup matrix_funcs_io
} // namespace kaldi
// we need to include the implementation and some
// template specializations.
#include "matrix/kaldi-matrix-inl.h"
#endif // KALDI_MATRIX_KALDI_MATRIX_H_

Просмотреть файл

@ -1,58 +0,0 @@
// matrix/kaldi-vector-inl.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation;
// Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This is an internal header file, included by other library headers.
// You should not attempt to use it directly.
#ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_
#define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1
namespace kaldi {
template<typename Real>
std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false);
return os;
}
template<typename Real>
std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false);
return is;
}
template<typename Real>
std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
rv.Read(is, false);
return is;
}
template<>
template<>
void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
template<>
template<>
void VectorBase<double>::AddVec<double>(const double alpha,
const VectorBase<double> &rv);
} // namespace kaldi
#endif // KALDI_MATRIX_KALDI_VECTOR_INL_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,570 +0,0 @@
// matrix/kaldi-vector.h
// Copyright 2009-2012 Ondrej Glembek; Microsoft Corporation; Lukas Burget;
// Saarland University (Author: Arnab Ghoshal);
// Ariya Rastrow; Petr Schwarz; Yanmin Qian;
// Karel Vesely; Go Vivace Inc.; Arnab Ghoshal
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
#define KALDI_MATRIX_KALDI_VECTOR_H_ 1
#include "matrix/matrix-common.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
/// Provides a vector abstraction class.
/// This class provides a way to work with vectors in kaldi.
/// It encapsulates basic operations and memory optimizations.
template<typename Real>
class VectorBase {
public:
/// Set vector to all zeros.
void SetZero();
/// Returns true if matrix is all zeros.
bool IsZero(Real cutoff = 1.0e-06) const; // replace magic number
/// Set all members of a vector to a specified value.
void Set(Real f);
/// Set vector to random normally-distributed noise.
void SetRandn();
/// This function returns a random index into this vector,
/// chosen with probability proportional to the corresponding
/// element. Requires that this->Min() >= 0 and this->Sum() > 0.
MatrixIndexT RandCategorical() const;
/// Returns the dimension of the vector.
inline MatrixIndexT Dim() const { return dim_; }
/// Returns the size in memory of the vector, in bytes.
inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
/// Returns a pointer to the start of the vector's data.
inline Real* Data() { return data_; }
/// Returns a pointer to the start of the vector's data (const).
inline const Real* Data() const { return data_; }
/// Indexing operator (const).
inline Real operator() (MatrixIndexT i) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i);
}
/// Indexing operator (non-const).
inline Real & operator() (MatrixIndexT i) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i);
}
/** @brief Returns a sub-vector of a vector (a range of elements).
* @param o [in] Origin, 0 < o < Dim()
* @param l [in] Length 0 < l < Dim()-o
* @return A SubVector object that aliases the data of the Vector object.
* See @c SubVector class for details */
SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
return SubVector<Real>(*this, o, l);
}
/** @brief Returns a const sub-vector of a vector (a range of elements).
* @param o [in] Origin, 0 < o < Dim()
* @param l [in] Length 0 < l < Dim()-o
* @return A SubVector object that aliases the data of the Vector object.
* See @c SubVector class for details */
const SubVector<Real> Range(const MatrixIndexT o,
const MatrixIndexT l) const {
return SubVector<Real>(*this, o, l);
}
/// Copy data from another vector (must match own size).
void CopyFromVec(const VectorBase<Real> &v);
/// Copy data from a SpMatrix or TpMatrix (must match own size).
template<typename OtherReal>
void CopyFromPacked(const PackedMatrix<OtherReal> &M);
/// Copy data from another vector of different type (double vs. float)
template<typename OtherReal>
void CopyFromVec(const VectorBase<OtherReal> &v);
/// Copy from CuVector. This is defined in ../cudamatrix/cu-vector.h
template<typename OtherReal>
void CopyFromVec(const CuVectorBase<OtherReal> &v);
/// Apply natural log to all elements. Throw if any element of
/// the vector is negative (but doesn't complain about zero; the
/// log will be -infinity
void ApplyLog();
/// Apply natural log to another vector and put result in *this.
void ApplyLogAndCopy(const VectorBase<Real> &v);
/// Apply exponential to each value in vector.
void ApplyExp();
/// Take absolute value of each of the elements
void Abs();
/// Applies floor to all elements. Returns number of elements floored.
MatrixIndexT ApplyFloor(Real floor_val);
/// Applies ceiling to all elements. Returns number of elements changed.
MatrixIndexT ApplyCeiling(Real ceil_val);
/// Applies floor to all elements. Returns number of elements floored.
MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec);
/// Apply soft-max to vector and return normalizer (log sum of exponentials).
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
Real ApplySoftMax();
/// Sets each element of *this to the tanh of the corresponding element of "src".
void Tanh(const VectorBase<Real> &src);
/// Sets each element of *this to the sigmoid function of the corresponding
/// element of "src".
void Sigmoid(const VectorBase<Real> &src);
/// Take all elements of vector to a power.
void ApplyPow(Real power);
/// Compute the p-th norm of the vector.
Real Norm(Real p) const;
/// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const;
/// Invert all elements.
void InvertElements();
/// Add vector : *this = *this + alpha * rv (with casting between floats and
/// doubles)
template<typename OtherReal>
void AddVec(const Real alpha, const VectorBase<OtherReal> &v);
/// Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
void AddVec2(const Real alpha, const VectorBase<Real> &v);
/// Add vector : *this = *this + alpha * rv^2 [element-wise squaring],
/// with casting between floats and doubles.
template<typename OtherReal>
void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
/// Add matrix times vector : this <-- beta*this + alpha*M*v.
/// Calls BLAS GEMV.
void AddMatVec(const Real alpha, const MatrixBase<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// This is as AddMatVec, except optimized for where v contains a lot
/// of zeros.
void AddMatSvec(const Real alpha, const MatrixBase<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// Add symmetric positive definite matrix times vector:
/// this <-- beta*this + alpha*M*v. Calls BLAS SPMV.
void AddSpVec(const Real alpha, const SpMatrix<Real> &M,
const VectorBase<Real> &v, const Real beta); // **beta previously defaulted to 0.0**
/// Add triangular matrix times vector: this <-- beta*this + alpha*M*v.
/// Works even if rv == *this.
void AddTpVec(const Real alpha, const TpMatrix<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// Set each element to y = (x == orig ? changed : x).
void ReplaceValue(Real orig, Real changed);
/// Multipy element-by-element by another vector.
void MulElements(const VectorBase<Real> &v);
/// Multipy element-by-element by another vector of different type.
template<typename OtherReal>
void MulElements(const VectorBase<OtherReal> &v);
/// Divide element-by-element by a vector.
void DivElements(const VectorBase<Real> &v);
/// Divide element-by-element by a vector of different type.
template<typename OtherReal>
void DivElements(const VectorBase<OtherReal> &v);
/// Add a constant to each element of a vector.
void Add(Real c);
/// Add element-by-element product of vectlrs:
// this <-- alpha * v .* r + beta*this .
void AddVecVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &r, Real beta);
/// Add element-by-element quotient of two vectors.
/// this <---- alpha*v/r + beta*this
void AddVecDivVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &r, Real beta);
/// Multiplies all elements by this constant.
void Scale(Real alpha);
/// Multiplies this vector by lower-triangular marix: *this <-- *this *M
void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans);
/// Performs a row stack of the matrix M
void CopyRowsFromMat(const MatrixBase<Real> &M);
template<typename OtherReal>
void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
/// The following is implemented in ../cudamatrix/cu-matrix.cc
void CopyRowsFromMat(const CuMatrixBase<Real> &M);
/// Performs a column stack of the matrix M
void CopyColsFromMat(const MatrixBase<Real> &M);
/// Extracts a row of the matrix M. Could also do this with
/// this->Copy(M[row]).
void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
/// Extracts a row of the matrix M with type conversion.
template<typename OtherReal>
void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
/// Extracts a row of the symmetric matrix S.
template<typename OtherReal>
void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row);
/// Extracts a column of the matrix M.
template<typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
/// Extracts the diagonal of the matrix M.
void CopyDiagFromMat(const MatrixBase<Real> &M);
/// Extracts the diagonal of a packed matrix M; works for Sp or Tp.
void CopyDiagFromPacked(const PackedMatrix<Real> &M);
/// Extracts the diagonal of a symmetric matrix.
inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); }
/// Extracts the diagonal of a triangular matrix.
inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); }
/// Returns the maximum value of any element.
Real Max() const;
/// Returns the maximum value of any element, and the associated index.
Real Max(MatrixIndexT *index) const;
/// Returns the minimum value of any element.
Real Min() const;
/// Returns the minimum value of any element, and the associated index.
Real Min(MatrixIndexT *index) const;
/// Returns sum of the elements
Real Sum() const;
/// Returns sum of the logs of the elements. More efficient than
/// just taking log of each. Will return NaN if any elements are
/// negative.
Real SumLog() const;
/// Does *this = alpha * (sum of rows of M) + beta * *this.
void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
/// Does *this = alpha * (sum of columns of M) + beta * *this.
void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
/// Add the diagonal of a matrix times itself:
/// *this = diag(M M^T) + beta * *this (if trans == kNoTrans), or
/// *this = diag(M^T M) + beta * *this (if trans == kTrans).
void AddDiagMat2(Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType trans = kNoTrans, Real beta = 1.0);
/// Add the diagonal of a matrix product: *this = diag(M N), assuming the
/// "trans" arguments are both kNoTrans; for transpose arguments, it behaves
/// as you would expect.
void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM,
const MatrixBase<Real> &N, MatrixTransposeType transN,
Real beta = 1.0);
/// Returns log(sum(exp())) without exp overflow
/// If prune > 0.0, ignores terms less than the max - prune.
/// [Note: in future, if prune = 0.0, it will take the max.
/// For now, use -1 if you don't want it to prune.]
Real LogSumExp(Real prune = -1.0) const;
/// Reads from C++ stream (option to add to existing contents).
/// Throws exception on failure
void Read(std::istream & in, bool binary, bool add = false);
/// Writes to C++ stream (option to write in binary).
void Write(std::ostream &Out, bool binary) const;
friend class VectorBase<double>;
friend class VectorBase<float>;
friend class CuVectorBase<Real>;
friend class CuVector<Real>;
protected:
/// Destructor; does not deallocate memory, this is handled by child classes.
/// This destructor is protected so this object so this object can only be
/// deleted via a child.
~VectorBase() {}
/// Empty initializer, corresponds to vector of zero size.
explicit VectorBase(): data_(NULL), dim_(0) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
// Took this out since it is not currently used, and it is possible to create
// objects where the allocated memory is not the same size as dim_ : Arnab
// /// Initializer from a pointer and a size; keeps the pointer internally
// /// (ownership or non-ownership depends on the child class).
// explicit VectorBase(Real* data, MatrixIndexT dim)
// : data_(data), dim_(dim) {}
// Arnab : made this protected since it is unsafe too.
/// Load data into the vector: sz must match own size.
void CopyFromPtr(const Real* Data, MatrixIndexT sz);
/// data memory area
Real* data_;
/// dimension of vector
MatrixIndexT dim_;
KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
}; // class VectorBase
/** @brief A class representing a vector.
*
* This class provides a way to work with vectors in kaldi.
* It encapsulates basic operations and memory optimizations. */
template<typename Real>
class Vector: public VectorBase<Real> {
public:
/// Constructor that takes no arguments. Initializes to empty.
Vector(): VectorBase<Real>() {}
/// Constructor with specific size. Sets to all-zero by default
/// if set_zero == false, memory contents are undefined.
explicit Vector(const MatrixIndexT s,
MatrixResizeType resize_type = kSetZero)
: VectorBase<Real>() { Resize(s, resize_type); }
/// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h
template<typename OtherReal>
explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim(), kUndefined);
this->CopyFromVec(v);
}
/// Copy-constructor from base-class, needed to copy from SubVector.
explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
Resize(v.Dim(), kUndefined);
this->CopyFromVec(v);
}
/// Type conversion constructor.
template<typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
Resize(v.Dim(), kUndefined);
this->CopyFromVec(v);
}
// Took this out since it is unsafe : Arnab
// /// Constructor from a pointer and a size; copies the data to a location
// /// it owns.
// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
// Resize(s);
// CopyFromPtr(Data, s);
// }
/// Swaps the contents of *this and *other. Shallow swap.
void Swap(Vector<Real> *other);
/// Destructor. Deallocates memory.
~Vector() { Destroy(); }
/// Read function using C++ streams. Can also add to existing contents
/// of matrix.
void Read(std::istream & in, bool binary, bool add = false);
/// Set vector to a specified size (can be zero).
/// The value of the new data depends on resize_type:
/// -if kSetZero, the new data will be zero
/// -if kUndefined, the new data will be undefined
/// -if kCopyData, the new data will be the same as the old data in any
/// shared positions, and zero elsewhere.
/// This function takes time proportional to the number of data elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
/// Remove one element and shifts later elements down.
void RemoveElement(MatrixIndexT i);
/// Assignment operator, protected so it can only be used by std::vector
Vector<Real> &operator = (const Vector<Real> &other) {
Resize(other.Dim(), kUndefined);
this->CopyFromVec(other);
return *this;
}
/// Assignment operator that takes VectorBase.
Vector<Real> &operator = (const VectorBase<Real> &other) {
Resize(other.Dim(), kUndefined);
this->CopyFromVec(other);
return *this;
}
private:
/// Init assumes the current contents of the class are invalid (i.e. junk or
/// has already been freed), and it sets the vector to newly allocated memory
/// with the specified dimension. dim == 0 is acceptable. The memory contents
/// pointed to by data_ will be undefined.
void Init(const MatrixIndexT dim);
/// Destroy function, called internally.
void Destroy();
};
/// Represents a non-allocating general vector which can be defined
/// as a sub-vector of higher-level vector [or as the row of a matrix].
template<typename Real>
class SubVector : public VectorBase<Real> {
public:
/// Constructor from a Vector or SubVector.
/// SubVectors are not const-safe and it's very hard to make them
/// so for now we just give up. This function contains const_cast.
SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
const MatrixIndexT length) : VectorBase<Real>() {
// following assert equiv to origin>=0 && length>=0 &&
// origin+length <= rt.dim_
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
static_cast<UnsignedMatrixIndexT>(length) <=
static_cast<UnsignedMatrixIndexT>(t.Dim()));
VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
VectorBase<Real>::dim_ = length;
}
/// This constructor initializes the vector to point at the contents
/// of this packed matrix (SpMatrix or TpMatrix).
SubVector(const PackedMatrix<Real> &M) {
VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
}
/// Copy constructor
SubVector(const SubVector &other) : VectorBase<Real> () {
// this copy constructor needed for Range() to work in base class.
VectorBase<Real>::data_ = other.data_;
VectorBase<Real>::dim_ = other.dim_;
}
/// Constructor from a pointer to memory and a length. Keeps a pointer
/// to the data but does not take ownership (will never delete).
SubVector(Real *data, MatrixIndexT length) : VectorBase<Real> () {
VectorBase<Real>::data_ = data;
VectorBase<Real>::dim_ = length;
}
/// This operation does not preserve const-ness, so be careful.
SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
VectorBase<Real>::dim_ = matrix.NumCols();
}
~SubVector() {} ///< Destructor (does nothing; no pointers are owned here).
private:
/// Disallow assignment operator.
SubVector & operator = (const SubVector &other) {}
};
/// @} end of "addtogroup matrix_group"
/// \addtogroup matrix_funcs_io
/// @{
/// Output to a C++ stream. Non-binary by default (use Write for
/// binary output).
template<typename Real>
std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
/// Input from a C++ stream. Will automatically read text or
/// binary data from the stream.
template<typename Real>
std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
/// Input from a C++ stream. Will automatically read text or
/// binary data from the stream.
template<typename Real>
std::istream & operator >> (std::istream & in, Vector<Real> & v);
/// @} end of \addtogroup matrix_funcs_io
/// \addtogroup matrix_funcs_scalar
/// @{
template<typename Real>
bool ApproxEqual(const VectorBase<Real> &a,
const VectorBase<Real> &b, Real tol = 0.01) {
return a.ApproxEqual(b, tol);
}
template<typename Real>
inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
float tol = 0.01) {
KALDI_ASSERT(a.ApproxEqual(b, tol));
}
/// Returns dot product between v1 and v2.
template<typename Real>
Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2);
template<typename Real, typename OtherReal>
Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2);
/// Returns \f$ v_1^T M v_2 \f$ .
/// Not as efficient as it could be where v1 == v2.
template<typename Real>
Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
const VectorBase<Real> &v2);
/// @} End of "addtogroup matrix_funcs_scalar"
} // namespace kaldi
// we need to include the implementation
#include "matrix/kaldi-vector-inl.h"
#endif // KALDI_MATRIX_KALDI_VECTOR_H_

Просмотреть файл

@ -1,100 +0,0 @@
// matrix/matrix-common.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_MATRIX_COMMON_H_
#define KALDI_MATRIX_MATRIX_COMMON_H_
// This file contains some #includes, forward declarations
// and typedefs that are needed by all the main header
// files in this directory.
#include "base/kaldi-common.h"
#include "matrix/kaldi-blas.h"
namespace kaldi {
typedef enum {
kTrans = CblasTrans,
kNoTrans = CblasNoTrans
} MatrixTransposeType;
typedef enum {
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum {
kTakeLower,
kTakeUpper,
kTakeMean,
kTakeMeanAndCheck
} SpCopyType;
template<typename Real> class VectorBase;
template<typename Real> class Vector;
template<typename Real> class SubVector;
template<typename Real> class MatrixBase;
template<typename Real> class SubMatrix;
template<typename Real> class Matrix;
template<typename Real> class SpMatrix;
template<typename Real> class TpMatrix;
template<typename Real> class PackedMatrix;
// these are classes that won't be defined in this
// directory; they're mostly needed for friend declarations.
template<typename Real> class CuMatrixBase;
template<typename Real> class CuSubMatrix;
template<typename Real> class CuMatrix;
template<typename Real> class CuVectorBase;
template<typename Real> class CuSubVector;
template<typename Real> class CuVector;
template<typename Real> class CuPackedMatrix;
template<typename Real> class CuSpMatrix;
template<typename Real> class CuTpMatrix;
class CompressedMatrix;
/// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines
// to switch double and float.
/// A specialized class for switching from float to double.
template<> class OtherReal<float> {
public:
typedef double Real;
};
/// A specialized class for switching from double to float.
template<> class OtherReal<double> {
public:
typedef float Real;
};
typedef int32 MatrixIndexT;
typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT;
// If you want to use size_t for the index type, do as follows instead:
//typedef size_t MatrixIndexT;
//typedef ssize_t SignedMatrixIndexT;
//typedef size_t UnsignedMatrixIndexT;
}
#endif // KALDI_MATRIX_MATRIX_COMMON_H_

Просмотреть файл

@ -1,56 +0,0 @@
// matrix/matrix-functions-inl.h
// Copyright 2009-2011 Microsoft Corporation
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
namespace kaldi {
//! ComplexMul implements, inline, the complex multiplication b *= a.
template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
Real *b_re, Real *b_im) {
Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
*b_im = *b_re * a_im + *b_im * a_re;
*b_re = tmp_re;
}
template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
const Real &b_re, const Real &b_im,
Real *c_re, Real *c_im) {
*c_re += b_re*a_re - b_im*a_im;
*c_im += b_re*a_im + b_im*a_re;
}
template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
*a_re = std::cos(x);
*a_im = std::sin(x);
}
} // end namespace kaldi
#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_

Просмотреть файл

@ -1,982 +0,0 @@
// matrix/matrix-functions.cc
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky
// Yanmin Qian; Saarland University; Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#include "matrix/matrix-functions.h"
#include "matrix/sp-matrix.h"
namespace kaldi {
template<typename Real> void ComplexFt (const VectorBase<Real> &in,
VectorBase<Real> *out, bool forward) {
int exp_sign = (forward ? -1 : 1);
KALDI_ASSERT(out != NULL);
KALDI_ASSERT(in.Dim() == out->Dim());
KALDI_ASSERT(in.Dim() % 2 == 0);
int twoN = in.Dim(), N = twoN / 2;
const Real *data_in = in.Data();
Real *data_out = out->Data();
Real exp1N_re, exp1N_im; // forward -> exp(-2pi / N), backward -> exp(2pi / N).
Real fraction = exp_sign * M_2PI / static_cast<Real>(N); // forward -> -2pi/N, backward->-2pi/N
ComplexImExp(fraction, &exp1N_re, &exp1N_im);
Real expm_re = 1.0, expm_im = 0.0; // forward -> exp(-2pi m / N).
for (int two_m = 0; two_m < twoN; two_m+=2) { // For each output component.
Real expmn_re = 1.0, expmn_im = 0.0; // forward -> exp(-2pi m n / N).
Real sum_re = 0.0, sum_im = 0.0; // complex output for index m (the sum expression)
for (int two_n = 0; two_n < twoN; two_n+=2) {
ComplexAddProduct(data_in[two_n], data_in[two_n+1],
expmn_re, expmn_im,
&sum_re, &sum_im);
ComplexMul(expm_re, expm_im, &expmn_re, &expmn_im);
}
data_out[two_m] = sum_re;
data_out[two_m + 1] = sum_im;
if (two_m % 10 == 0) { // occasionally renew "expm" from scratch to avoid
// loss of precision.
int nextm = 1 + two_m/2;
Real fraction_mult = fraction * nextm;
ComplexImExp(fraction_mult, &expm_re, &expm_im);
} else {
ComplexMul(exp1N_re, exp1N_im, &expm_re, &expm_im);
}
}
}
template
void ComplexFt (const VectorBase<float> &in,
VectorBase<float> *out, bool forward);
template
void ComplexFt (const VectorBase<double> &in,
VectorBase<double> *out, bool forward);
#define KALDI_COMPLEXFFT_BLOCKSIZE 8192
// This #define affects how we recurse in ComplexFftRecursive.
// We assume that memory-caching happens on a scale at
// least as small as this.
//! ComplexFftRecursive is a recursive function that computes the
//! complex FFT of size N. The "nffts" arguments specifies how many
//! separate FFTs to compute in parallel (we assume the data for
//! each one is consecutive in memory). The "forward argument"
//! specifies whether to do the FFT (true) or IFFT (false), although
//! note that we do not include the factor of 1/N (the user should
//! do this if required. The iterators factor_begin and factor_end
//! point to the beginning and end (i.e. one past the last element)
//! of an array of small factors of N (typically prime factors).
//! See the comments below this code for the detailed equations
//! of the recursion.
template<typename Real>
void ComplexFftRecursive (Real *data, int nffts, int N,
const int *factor_begin,
const int *factor_end, bool forward,
Vector<Real> *tmp_vec) {
if (factor_begin == factor_end) {
KALDI_ASSERT(N == 1);
return;
}
{ // an optimization: compute in smaller blocks.
// this block of code could be removed and it would still work.
MatrixIndexT size_perblock = N * 2 * sizeof(Real);
if (nffts > 1 && size_perblock*nffts > KALDI_COMPLEXFFT_BLOCKSIZE) { // can break it up...
// Break up into multiple blocks. This is an optimization. We make
// no progress on the FFT when we do this.
int block_skip = KALDI_COMPLEXFFT_BLOCKSIZE / size_perblock; // n blocks per call
if (block_skip == 0) block_skip = 1;
if (block_skip < nffts) {
int blocks_left = nffts;
while (blocks_left > 0) {
int skip_now = std::min(blocks_left, block_skip);
ComplexFftRecursive(data, skip_now, N, factor_begin, factor_end, forward, tmp_vec);
blocks_left -= skip_now;
data += skip_now * N*2;
}
return;
} // else do the actual algorithm.
} // else do the actual algorithm.
}
int P = *factor_begin;
KALDI_ASSERT(P > 1);
int Q = N / P;
if (P > 1 && Q > 1) { // Do the rearrangement. C.f. eq. (8) below. Transform
// (a) to (b).
Real *data_thisblock = data;
if (tmp_vec->Dim() < (MatrixIndexT)N) tmp_vec->Resize(N);
Real *data_tmp = tmp_vec->Data();
for (int thisfft = 0; thisfft < nffts; thisfft++, data_thisblock+=N*2) {
for (int offset = 0; offset < 2; offset++) { // 0 == real, 1 == im.
for (int p = 0; p < P; p++) {
for (int q = 0; q < Q; q++) {
int aidx = q*P + p, bidx = p*Q + q;
data_tmp[bidx] = data_thisblock[2*aidx+offset];
}
}
for (int n = 0;n < P*Q;n++) data_thisblock[2*n+offset] = data_tmp[n];
}
}
}
{ // Recurse.
ComplexFftRecursive(data, nffts*P, Q, factor_begin+1, factor_end, forward, tmp_vec);
}
int exp_sign = (forward ? -1 : 1);
Real rootN_re, rootN_im; // Nth root of unity.
ComplexImExp(static_cast<Real>(exp_sign * M_2PI / N), &rootN_re, &rootN_im);
Real rootP_re, rootP_im; // Pth root of unity.
ComplexImExp(static_cast<Real>(exp_sign * M_2PI / P), &rootP_re, &rootP_im);
{ // Do the multiplication
// could avoid a bunch of complex multiplies by moving the loop over data_thisblock
// inside.
if (tmp_vec->Dim() < (MatrixIndexT)(P*2)) tmp_vec->Resize(P*2);
Real *temp_a = tmp_vec->Data();
Real *data_thisblock = data, *data_end = data+(N*2*nffts);
for (; data_thisblock != data_end; data_thisblock += N*2) { // for each separate fft.
Real qd_re = 1.0, qd_im = 0.0; // 1^(q'/N)
for (int qd = 0; qd < Q; qd++) {
Real pdQ_qd_re = qd_re, pdQ_qd_im = qd_im; // 1^((p'Q+q') / N) == 1^((p'/P) + (q'/N))
// Initialize to q'/N, corresponding to p' == 0.
for (int pd = 0; pd < P; pd++) { // pd == p'
{ // This is the p = 0 case of the loop below [an optimization].
temp_a[pd*2] = data_thisblock[qd*2];
temp_a[pd*2 + 1] = data_thisblock[qd*2 + 1];
}
{ // This is the p = 1 case of the loop below [an optimization]
// **** MOST OF THE TIME (>60% I think) gets spent here. ***
ComplexAddProduct(pdQ_qd_re, pdQ_qd_im,
data_thisblock[(qd+Q)*2], data_thisblock[(qd+Q)*2 + 1],
&(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
}
if (P > 2) {
Real p_pdQ_qd_re = pdQ_qd_re, p_pdQ_qd_im = pdQ_qd_im; // 1^(p(p'Q+q')/N)
for (int p = 2; p < P; p++) {
ComplexMul(pdQ_qd_re, pdQ_qd_im, &p_pdQ_qd_re, &p_pdQ_qd_im); // p_pdQ_qd *= pdQ_qd.
int data_idx = p*Q + qd;
ComplexAddProduct(p_pdQ_qd_re, p_pdQ_qd_im,
data_thisblock[data_idx*2], data_thisblock[data_idx*2 + 1],
&(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
}
}
if (pd != P-1)
ComplexMul(rootP_re, rootP_im, &pdQ_qd_re, &pdQ_qd_im); // pdQ_qd *= (rootP == 1^{1/P})
// (using 1/P == Q/N)
}
for (int pd = 0; pd < P; pd++) {
data_thisblock[(pd*Q + qd)*2] = temp_a[pd*2];
data_thisblock[(pd*Q + qd)*2 + 1] = temp_a[pd*2 + 1];
}
ComplexMul(rootN_re, rootN_im, &qd_re, &qd_im); // qd *= rootN.
}
}
}
}
/* Equations for ComplexFftRecursive.
We consider here one of the "nffts" separate ffts; it's just a question of
doing them all in parallel. We also write all equations in terms of
complex math (the conversion to real arithmetic is not hard, and anyway
takes place inside function calls).
Let the input (i.e. "data" at start) be a_n, n = 0..N-1, and
the output (Fourier transform) be d_k, k = 0..N-1. We use these letters because
there will be two intermediate variables b and c.
We want to compute:
d_k = \sum_n a_n 1^(kn/N) (1)
where we use 1^x as shorthand for exp(-2pi x) for the forward algorithm
and exp(2pi x) for the backward one.
We factorize N = P Q (P small, Q usually large).
With p = 0..P-1 and q = 0..Q-1, and also p'=0..P-1 and q'=0..P-1, we let:
k == p'Q + q' (2)
n == qP + p (3)
That is, we let p, q, p', q' range over these indices and observe that this way we
can cover all n, k. Expanding (1) using (2) and (3), we can write:
d_k = \sum_{p, q} a_n 1^((p'Q+q')(qP+p)/N)
= \sum_{p, q} a_n 1^(p'pQ/N) 1^(q'qP/N) 1^(q'p/N) (4)
using 1^(PQ/N) = 1 to get rid of the terms with PQ in them. Rearranging (4),
d_k = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_n (5)
The point here is to separate the index q. Now we can expand out the remaining
instances of k and n using (2) and (3):
d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_(qP+p) (6)
The expression \sum_q varies with the indices p and q'. Let us define
C_{p, q'} = \sum_q 1^(q'qP/N) a_(qP+p) (7)
Here, C_{p, q'}, viewed as a sequence in q', is just the DFT of the points
a_(qP+p) for q = 1..Q-1. These points are not consecutive in memory though,
they jump by P each time. Let us define b as a rearranged version of a,
so that
b_(pQ+q) = a_(qP+p) (8)
How to do this rearrangement in place? In
We can rearrange (7) to be written in terms of the b's, using (8), so that
C_{p, q'} = \sum_q 1^(q'q (P/N)) b_(pQ+q) (9)
Here, the sequence of C_{p, q'} over q'=0..Q-1, is just the DFT of the sequence
of b_(pQ) .. b_(p(Q+1)-1). Let's arrange the C_{p, q'} in a single array in
memory in the same way as the b's, i.e. we define
c_(pQ+q') == C_{p, q'}. (10)
Note that we could have written (10) with q in place of q', as there is only
one index of type q present, but q' is just a more natural variable name to use
since we use q' elsewhere to subscript c and C.
Rewriting (9), we have:
c_(pQ+q') = \sum_q 1^(q'q (P/N)) b_(pQ+q) (11)
which is the DFT computed by the recursive call to this function [after computing
the b's by rearranging the a's]. From the c's we want to compute the d's.
Taking (6), substituting in the sum (7), and using (10) to write it as an array,
we have:
d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q') (12)
This sum is independent for different values of q'. Note that d overwrites c
in memory. We compute this in a direct way, using a little array of size P to
store the computed d values for one value of q' (we reuse the array for each value
of q').
So the overall picture is this:
We get a call to compute DFT on size N.
- If N == 1 we return (nothing to do).
- We factor N = P Q (typically, P is small).
- Using (8), we rearrange the data in memory so that we have b not a in memory
(this is the block "do the rearrangement").
The pseudocode for this is as follows. For simplicity we use a temporary array.
for p = 0..P-1
for q = 0..Q-1
bidx = pQ + q
aidx = qP + p
tmp[bidx] = data[aidx].
end
end
data <-- tmp
else
endif
The reason this accomplishes (8) is that we want pQ+q and qP+p to be swapped
over for each p, q, and the "if m > n" is a convenient way of ensuring that
this swapping happens only once (otherwise it would happen twice, since pQ+q
and qP+p both range over the entire set of numbers 0..N-1).
- We do the DFT on the smaller block size to compute c from b (this eq eq. (11)).
Note that this is actually multiple DFTs, one for each value of p, but this
goes to the "nffts" argument of the function call, which we have ignored up to now.
-We compute eq. (12) via a loop, as follows
allocate temporary array e of size P.
For q' = 0..Q-1:
for p' = 0..P-1:
set sum to zero [this will go in e[p']]
for p = p..P-1:
sum += 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q')
end
e[p'] = sum
end
for p' = 0..P-1:
d_(p'Q+q') = e[p']
end
end
delete temporary array e
*/
// This is the outer-layer calling code for ComplexFftRecursive.
// It factorizes the dimension and then calls the FFT routine.
template<typename Real> void ComplexFft(VectorBase<Real> *v, bool forward, Vector<Real> *tmp_in) {
KALDI_ASSERT(v != NULL);
if (v->Dim()<=1) return;
KALDI_ASSERT(v->Dim() % 2 == 0); // complex input.
int N = v->Dim() / 2;
std::vector<int> factors;
Factorize(N, &factors);
int *factor_beg = NULL;
if (factors.size() > 0)
factor_beg = &(factors[0]);
Vector<Real> tmp; // allocated in ComplexFftRecursive.
ComplexFftRecursive(v->Data(), 1, N, factor_beg, factor_beg+factors.size(), forward, (tmp_in?tmp_in:&tmp));
}
//! Inefficient version of Fourier transform, for testing purposes.
template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward) {
KALDI_ASSERT(v != NULL);
MatrixIndexT N = v->Dim();
KALDI_ASSERT(N%2 == 0);
if (N == 0) return;
Vector<Real> vtmp(N*2); // store as complex.
if (forward) {
for (MatrixIndexT i = 0; i < N; i++) vtmp(i*2) = (*v)(i);
ComplexFft(&vtmp, forward); // this is already tested so we can use this.
v->CopyFromVec( vtmp.Range(0, N) );
(*v)(1) = vtmp(N); // Copy the N/2'th fourier component, which is real,
// to the imaginary part of the 1st complex output.
} else {
// reverse the transformation above to get the complex spectrum.
vtmp(0) = (*v)(0); // copy F_0 which is real
vtmp(N) = (*v)(1); // copy F_{N/2} which is real
for (MatrixIndexT i = 1; i < N/2; i++) {
// Copy i'th to i'th fourier component
vtmp(2*i) = (*v)(2*i);
vtmp(2*i+1) = (*v)(2*i+1);
// Copy i'th to N-i'th, conjugated.
vtmp(2*(N-i)) = (*v)(2*i);
vtmp(2*(N-i)+1) = -(*v)(2*i+1);
}
ComplexFft(&vtmp, forward); // actually backward since forward == false
// Copy back real part. Complex part should be zero.
for (MatrixIndexT i = 0; i < N; i++)
(*v)(i) = vtmp(i*2);
}
}
template void RealFftInefficient (VectorBase<float> *v, bool forward);
template void RealFftInefficient (VectorBase<double> *v, bool forward);
template
void ComplexFft(VectorBase<float> *v, bool forward, Vector<float> *tmp_in);
template
void ComplexFft(VectorBase<double> *v, bool forward, Vector<double> *tmp_in);
// See the long comment below for the math behind this.
template<typename Real> void RealFft (VectorBase<Real> *v, bool forward) {
KALDI_ASSERT(v != NULL);
MatrixIndexT N = v->Dim(), N2 = N/2;
KALDI_ASSERT(N%2 == 0);
if (N == 0) return;
if (forward) ComplexFft(v, true);
Real *data = v->Data();
Real rootN_re, rootN_im; // exp(-2pi/N), forward; exp(2pi/N), backward
int forward_sign = forward ? -1 : 1;
ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
Real kN_re = -forward_sign, kN_im = 0.0; // exp(-2pik/N), forward; exp(-2pik/N), backward
// kN starts out as 1.0 for forward algorithm but -1.0 for backward.
for (MatrixIndexT k = 1; 2*k <= N2; k++) {
ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
Real Ck_re, Ck_im, Dk_re, Dk_im;
// C_k = 1/2 (B_k + B_{N/2 - k}^*) :
Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
// re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
// im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
// A_k = C_k + 1^(k/N) D_k:
data[2*k] = Ck_re; // A_k <-- C_k
data[2*k+1] = Ck_im;
// now A_k += D_k 1^(k/N)
ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
MatrixIndexT kdash = N2 - k;
if (kdash != k) {
// Next we handle the index k' = N/2 - k. This is necessary
// to do now, to avoid invalidating data that we will later need.
// The quantities C_{k'} and D_{k'} are just the conjugates of C_k
// and D_k, so the equations are simple modifications of the above,
// replacing Ck_im and Dk_im with their negatives.
data[2*kdash] = Ck_re; // A_k' <-- C_k'
data[2*kdash+1] = -Ck_im;
// now A_k' += D_k' 1^(k'/N)
// We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
// so it's the same as 1^(k/N) but with the real part negated.
ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
}
}
{ // Now handle k = 0.
// In simple terms: after the complex fft, data[0] becomes the sum of real
// parts input[0], input[2]... and data[1] becomes the sum of imaginary
// pats input[1], input[3]...
// "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
// and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
Real zeroth = data[0] + data[1],
n2th = data[0] - data[1];
data[0] = zeroth;
data[1] = n2th;
if (!forward) {
data[0] /= 2;
data[1] /= 2;
}
}
if (!forward) {
ComplexFft(v, false);
v->Scale(2.0); // This is so we get a factor of N increase, rather than N/2 which we would
// otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
// It's for consistency with our normal FFT convensions.
}
}
template void RealFft (VectorBase<float> *v, bool forward);
template void RealFft (VectorBase<double> *v, bool forward);
/* Notes for real FFTs.
We are using the same convention as above, 1^x to mean exp(-2\pi x) for the forward transform.
Actually, in a slight abuse of notation, we use this meaning for 1^x in both the forward and
backward cases because it's more convenient in this section.
Suppose we have real data a[0...N-1], with N even, and want to compute its Fourier transform.
We can make do with the first N/2 points of the transform, since the remaining ones are complex
conjugates of the first. We want to compute:
for k = 0...N/2-1,
A_k = \sum_{n = 0}^{N-1} a_n 1^(kn/N) (1)
We treat a[0..N-1] as a complex sequence of length N/2, i.e. a sequence b[0..N/2 - 1].
Viewed as sequences of length N/2, we have:
b = c + i d,
where c = a_0, a_2 ... and d = a_1, a_3 ...
We can recover the length-N/2 Fourier transforms of c and d by doing FT on b and
then doing the equations below. Derivation is marked by (*) in a comment below (search
for it). Let B, C, D be the FTs.
We have
C_k = 1/2 (B_k + B_{N/2 - k}^*) (z0)
D_k =-1/2i (B_k - B_{N/2 - k}^*) (z1)
so: re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})) (z2)
im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k})) (z3)
To recover the FT A from C and D, we write, rearranging (1):
A_k = \sum_{n = 0, 2, ..., N-2} a_n 1^(kn/N)
+\sum_{n = 1, 3, ..., N-1} a_n 1^(kn/N)
= \sum_{n = 0, 1, ..., N/2-1} a_n 1^(2kn/N) + a_{n+1} 1^(2kn/N) 1^(k/N)
= \sum_{n = 0, 1, ..., N/2-1} c_n 1^(2kn/N) + d_n 1^(2kn/N) 1^(k/N)
A_k = C_k + 1^(k/N) D_k (a0)
This equation is valid for k = 0...N/2-1, which is the range of the sequences B_k and
C_k. We don't use is for k = 0, which is a special case considered below. For
1 < k < N/2, it's convenient to consider the pair k, k', where k' = N/2 - k.
Remember that C_k' = C_k^ *and D_k' = D_k^* [where * is conjugation]. Also,
1^(N/2 / N) = -1. So we have:
A_k' = C_k^* - 1^(k/N) D_k^* (a0b)
We do (a0) and (a0b) together.
By symmetry this gives us the Fourier components for N/2+1, ... N, if we want
them. However, it doesn't give us the value for exactly k = N/2. For k = 0 and k = N/2, it
is easiest to argue directly about the meaning of the A_k, B_k and C_k in terms of
sums of points.
A_0 and A_{N/2} are both real, with A_0=\sum_n a_n, and A_1 an alternating sum
A_1 = a_0 - a_1 + a_2 ...
It's easy to show that
A_0 = B_0 + C_0 (a1)
A_{N/2} = B_0 - C_0. (a2)
Since B_0 and C_0 are both real, B_0 is the real coefficient of D_0 and C_0 is the
imaginary coefficient.
*REVERSING THE PROCESS*
Next we want to reverse this process. We just need to work out C_k and D_k from the
sequence A_k. Then we do the inverse complex fft and we get back where we started.
For 0 and N/2, working from (a1) and (a2) above, we can see that:
B_0 = 1/2 (A_0 + A_{N/2}) (y0)
C_0 = 1/2 (A_0 + A_{N/2}) (y1)
and we use
D_0 = B_0 + i C_0
to get the 1st complex coefficient of D. This is exactly the same as the forward process
except with an extra factor of 1/2.
Consider equations (a0) and (a0b). We want to work out C_k and D_k from A_k and A_k'. Remember
k' = N/2 - k.
Write down
A_k = C_k + 1^(k/N) D_k (copying a0)
A_k'^* = C_k - 1^(k/N) D_k (conjugate of a0b)
So
C_k = 0.5 (A_k + A_k'^*) (p0)
D_k = 1^(-k/N) . 0.5 (A_k - A_k'^*) (p1)
Next, we want to compute B_k and B_k' from C_k and D_k. C.f. (z0)..(z3), and remember
that k' = N/2-k. We can see
that
B_k = C_k + i D_k (p2)
B_k' = C_k - i D_k (p3)
We would like to make the equations (p0) ... (p3) look like the forward equations (z0), (z1),
(a0) and (a0b) so we can reuse the code. Define E_k = -i 1^(k/N) D_k. Then write down (p0)..(p3).
We have
C_k = 0.5 (A_k + A_k'^*) (p0')
E_k = -0.5 i (A_k - A_k'^*) (p1')
B_k = C_k - 1^(-k/N) E_k (p2')
B_k' = C_k + 1^(-k/N) E_k (p3')
So these are exactly the same as (z0), (z1), (a0), (a0b) except replacing 1^(k/N) with
-1^(-k/N) . Remember that we defined 1^x above to be exp(-2pi x/N), so the signs here
might be opposite to what you see in the code.
MODIFICATION: we need to take care of a factor of two. The complex FFT we implemented
does not divide by N in the reverse case. So upon inversion we get larger by N/2.
However, this is not consistent with normal FFT conventions where you get a factor of N.
For this reason we multiply by two after the process described above.
*/
/*
(*) [this token is referred to in a comment above].
Notes for separating 2 real transforms from one complex one. Note that the
letters here (A, B, C and N) are all distinct from the same letters used in the
place where this comment is used.
Suppose we
have two sequences a_n and b_n, n = 0..N-1. We combine them into a complex
number,
c_n = a_n + i b_n.
Then we take the fourier transform to get
C_k = \sum_{n = 0}^{N-1} c_n 1^(n/N) .
Then we use symmetry. Define A_k and B_k as the DFTs of a and b.
We use A_k = A_{N-k}^*, and B_k = B_{N-k}^*, since a and b are real. Using
C_k = A_k + i B_k,
C_{N-k} = A_k^* + i B_k^*
= A_k^* - (i B_k)^*
So:
A_k = 1/2 (C_k + C_{N-k}^*)
i B_k = 1/2 (C_k - C_{N-k}^*)
-> B_k =-1/2i (C_k - C_{N-k}^*)
-> re(B_k) = 1/2 (im(C_k) + im(C_{N-k}))
im(B_k) =-1/2 (re(C_k) - re(C_{N-k}))
*/
template<typename Real> void ComputeDctMatrix(Matrix<Real> *M) {
//KALDI_ASSERT(M->NumRows() == M->NumCols());
MatrixIndexT K = M->NumRows();
MatrixIndexT N = M->NumCols();
KALDI_ASSERT(K > 0);
KALDI_ASSERT(N > 0);
Real normalizer = std::sqrt(1.0 / static_cast<Real>(N)); // normalizer for
// X_0.
for (MatrixIndexT j = 0; j < N; j++) (*M)(0, j) = normalizer;
normalizer = std::sqrt(2.0 / static_cast<Real>(N)); // normalizer for other
// elements.
for (MatrixIndexT k = 1; k < K; k++)
for (MatrixIndexT n = 0; n < N; n++)
(*M)(k, n) = normalizer
* std::cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
}
template void ComputeDctMatrix(Matrix<float> *M);
template void ComputeDctMatrix(Matrix<double> *M);
template<typename Real>
void MatrixExponential<Real>::Clear() {
N_ = 0;
P_.Resize(0, 0);
B_.clear();
powers_.clear();
}
template<typename Real>
void MatrixExponential<Real>::Compute(const MatrixBase<Real> &M,
MatrixBase<Real> *X) {
// does *X = exp(M)
KALDI_ASSERT(M.NumRows() == M.NumCols());
Clear();
N_ = ComputeN(M);
MatrixIndexT dim = M.NumRows();
P_.Resize(dim, dim);
P_.CopyFromMat(M);
P_.Scale(std::pow(static_cast<Real>(0.5),
static_cast<Real>(N_)));
// would need to keep this code in sync with ComputeN().
B_.resize(N_+1);
B_[0].Resize(dim, dim);
ComputeTaylor(P_, &(B_[0])); // set B_[0] = exp(P_)
for (MatrixIndexT i = 1; i <= N_; i++) {
// implement the recursion B_[k] = 2 B_[k-1] + B_[k-1]^2.
B_[i].Resize(dim, dim); // zeros it.
B_[i].AddMat(2.0, B_[i-1], kNoTrans);
B_[i].AddMatMat(1.0, B_[i-1], kNoTrans, B_[i-1], kNoTrans, 1.0);
}
KALDI_ASSERT(X->NumRows() == dim && X->NumCols() == dim);
(*X).CopyFromMat(B_[N_]); // last one plus the unit matrix is the answer.
// add in the unit matrix.
for (MatrixIndexT i = 0; i < dim; i++)
(*X)(i, i) += 1.0;
};
template<typename Real>
void MatrixExponential<Real>::Compute(const SpMatrix<Real> &M,
SpMatrix<Real> *X) {
Matrix<Real> Mfull(M), Xfull(M.NumRows(), M.NumCols());
Compute(Mfull, &Xfull);
X->CopyFromMat(Xfull);
}
template<typename Real>
MatrixIndexT MatrixExponential<Real>::ComputeN(const MatrixBase<Real> &M) {
// Computes the power of two we want to use. Aim to get
// AScaled.FrobeniusNorm() < 1/10.
Real norm = M.FrobeniusNorm();
Real max_norm = 0.1;
if (norm > 1000) {
KALDI_WARN << "Trying to compute exponent of very high-norm matrix: norm = "
<< norm;
}
MatrixIndexT N = 0;
while (norm > max_norm) { norm *= 0.5; N++; }
return N;
}
template<typename Real>
void MatrixExponential<Real>::ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0) {
KALDI_ASSERT(P.FrobeniusNorm() < 1.001); // should actually be << 1
// for this to work fast enough.
KALDI_ASSERT(P.NumRows() == P.NumCols());
MatrixIndexT dim = P.NumRows();
KALDI_ASSERT(B0->NumRows() == dim && B0->NumCols() == dim);
B0->SetZero();
MatrixIndexT n = 1, n_factorial = 1, max_iter = 10000;
Matrix<Real> Pn(P), // Pn = P^n
B0cur(dim, dim),
tmp(dim, dim); // use B0cur to test whether B0 changed.
std::vector<Matrix<Real>* > powers_tmp; // list of stored powers of P, starting
// from 2 and up to 1 before the last one we used.
while (n < max_iter) { // have an arbitrary very large limit on #iters
B0cur.AddMat(1.0 / n_factorial, Pn);
if (B0cur.Equal(*B0)) // was no change [already very small]
break;
B0->CopyFromMat(B0cur); // Keep B0 in sync with B0cur.
tmp.AddMatMat(1.0, P, kNoTrans, Pn, kNoTrans, 0.0); // tmp = P * P^n
n++;
n_factorial *= n;
Pn.CopyFromMat(tmp); // copy back to P^n
powers_tmp.push_back(new Matrix<Real>(tmp));
}
if (n == max_iter)
KALDI_WARN << "Reached maximum iteration computing Taylor expansion of matrix [serious problem]";
powers_.resize(powers_tmp.size());
for (MatrixIndexT i = 0;
i < static_cast<MatrixIndexT>(powers_tmp.size()); i++) {
powers_[i].Swap(powers_tmp[i]);
delete powers_tmp[i];
}
}
template<typename Real>
void MatrixExponential<Real>::Backprop(const MatrixBase<Real> &hX,
MatrixBase<Real> *hM) const {
MatrixIndexT dim = P_.NumRows();
KALDI_ASSERT(hX.NumRows() == dim && hX.NumCols() == dim
&& hM->NumRows() == dim && hM->NumCols() == dim);
Matrix<Real> dB(hX);
// dB represents the gradient df/dB_[i] for the current
// value of i, which decreases from N_ to zero (currently it's N_)
for (MatrixIndexT i = N_-1;
i != (static_cast<MatrixIndexT>(0)-static_cast<MatrixIndexT>(1)); // i >= 0
i--) {
// Propagate back from df/dB_[i+1] to df/dB_[i].
// Using B_[i+1] = 2 B_[i] + B_[i]*B_[i],
// df/dB_[i] = 2*(df/dB_[i+1]) + B_[i]^T (df/dB_[i+1])
// + (df/dB_[i+1])B_[i]^T
// note, here we use the perhaps slightly wrong convention that
// df/dB_[i] is not transposed, i.e. its i, j'th element is the derivative
// of f w.r.t. the i, j'th element of B_[i].
Matrix<Real> prev_dB(dB);
prev_dB.Scale(2.0); // the term 2*(df/dB_[i+1])
// add in the term B_[i](df/dB_[i+1])
prev_dB.AddMatMat(1.0, B_[i], kTrans, dB, kNoTrans, 1.0);
// add in the term (df/dB_[i+1])B_[i]
prev_dB.AddMatMat(1.0, dB, kNoTrans, B_[i], kTrans, 1.0);
dB.CopyFromMat(prev_dB);
}
// currently dB is the gradient df/dB_[0], which is exp(P_) - I.
// we have to backprop this and we get df/dP_.
BackpropTaylor(dB, hM); // at this point, hM is temporarily used to store
// df/dP_.
hM->Scale(std::pow(static_cast<Real>(0.5),
static_cast<Real>(N_))); // Since A_Scaled = A * std::pow(0.5, N_).
}
template<typename Real>
void MatrixExponential<Real>::Backprop(const SpMatrix<Real> &hX,
SpMatrix<Real> *hM) const {
Matrix<Real> hXfull(hX), hMfull(hX.NumRows(), hX.NumCols());
Backprop(hXfull, &hMfull);
hM->CopyFromMat(hMfull);
}
template<typename Real>
void MatrixExponential<Real>::BackpropTaylor(const MatrixBase<Real> &hB0,
MatrixBase<Real> *hP) const {
// Backprop through the Taylor-series computation.
// the original computation was:
// X = \sum_{i = 1}^n (1/i!) P^i
// Also you can see that X is actually the exponential minus I, since we start
// the series from 1; this doesn't affect the derivatives.
// The variable \hat{B}_0 (hB0) represents df/dX, where f is a scalar function.
// Note that there is no transpose in our notation for derivative: hB0(i, j) is
// d/df of E(i, j).
// Imagine that f is \tr(hB0^T B0) (since this varies linearly with B0 in the same
// way that the real f does). We want d/dP (hB0^T B0); call this hP (for \hat{P}).
// hP = d/dP \sum_{i = 1}^n (1/i!) hB0^T P^i
// Taking each individual term P in this expression and treating the others as constants,
// and noting that whenever we have something like tr(A B), then B^T is the derivative
// of this expression w.r.t. A (in our notation),
// hP = hB0 + (1/2!) (hB0 P^T + P^T hB0) + (1/3!) (hB0 P^T P^T + P^T hB0 P^T + P^T P^T hB0)
// + (1/4!) .... (1)
// We can compute this with the following recursion by which we get each of the terms
// in this series in turn:
// hP_1 <-- hB0
// hP <-- hP_1
// for n = 2 .. infinity: # we only carry this on for as many terms as we used
// # in the original expansion
// hP_n <-- (1/n) hP_{n-1} P^T + (1/n!) (P^T)^(n-1) hP_{n-1}
// hP <-- hP + hP_n
const Matrix<Real> &P(P_);
MatrixIndexT dim = P.NumRows();
KALDI_ASSERT(P.NumCols() == dim && hB0.NumRows() == dim && hB0.NumCols() == dim
&& hP->NumRows() == dim && hP->NumCols() == dim);
hP->SetZero();
Matrix<Real> hPn1(hB0),
hPn(dim, dim);
hP->AddMat(1.0, hPn1); // first term in (1): df/dP += K
MatrixIndexT n = 2, nfact = 2;
// Now do n = 2 in comment above (this is special case,
// since we did not store the 1st power in powers_).
hPn.AddMatMat(1.0/n, hPn1, kNoTrans, P, kTrans, 0.0); // hP_n <-- (1/n) hP_{n-1} P^T
hPn.AddMatMat(1.0/nfact, P, kTrans, hB0, kNoTrans, 1.0); // hP_n += (1/n!) P^T^(n-1) hB0
hP->AddMat(1.0, hPn); // add in second term in (1)
hPn.Swap(&hPn1);
for (MatrixIndexT i = 0;
i < static_cast<MatrixIndexT>(powers_.size()); i++) {
n++;
nfact *= n;
// i corresponds to (n-1)-2, and powers_[i] contains the n-1'th power of P.
// next line: hP_n <-- (1/n) hP_{n-1} P^T
hPn.AddMatMat(1.0/n, hPn1, kNoTrans, P, kTrans, 0.0);
// next line: hP_n += (1/n!) P^T^(n-1) hB0
hPn.AddMatMat(1.0/nfact, powers_[i], kTrans, hB0, kNoTrans, 1.0);
hP->AddMat(1.0, hPn); // add in n'th term in (1)
hPn.Swap(&hPn1);
}
}
template class MatrixExponential<float>;
template class MatrixExponential<double>;
template<typename Real>
void ComputePca(const MatrixBase<Real> &X,
MatrixBase<Real> *U,
MatrixBase<Real> *A,
bool print_eigs,
bool exact) {
// Note that some of these matrices may be transposed w.r.t. the
// way it's most natural to describe them in math... it's the rows
// of X and U that correspond to the (data-points, basis elements).
MatrixIndexT N = X.NumRows(), D = X.NumCols();
// N = #points, D = feature dim.
KALDI_ASSERT(U != NULL && U->NumCols() == D);
MatrixIndexT G = U->NumRows(); // # of retained basis elements.
KALDI_ASSERT(A == NULL || (A->NumRows() == N && A->NumCols() == G));
KALDI_ASSERT(G <= N && G <= D);
if (D < N) { // Do conventional PCA.
SpMatrix<Real> Msp(D); // Matrix of outer products.
Msp.AddMat2(1.0, X, kTrans, 0.0); // M <-- X^T X
Matrix<Real> Utmp;
Vector<Real> l;
if (exact) {
Utmp.Resize(D, D);
l.Resize(D);
//Matrix<Real> M(Msp);
//M.DestructiveSvd(&l, &Utmp, NULL);
Msp.Eig(&l, &Utmp);
} else {
Utmp.Resize(D, G);
l.Resize(G);
Msp.TopEigs(&l, &Utmp);
}
SortSvd(&l, &Utmp);
for (MatrixIndexT g = 0; g < G; g++)
U->Row(g).CopyColFromMat(Utmp, g);
if (print_eigs)
KALDI_LOG << (exact ? "" : "Retained ")
<< "PCA eigenvalues are " << l;
if (A != NULL)
A->AddMatMat(1.0, X, kNoTrans, *U, kTrans, 0.0);
} else { // Do inner-product PCA.
SpMatrix<Real> Nsp(N); // Matrix of inner products.
Nsp.AddMat2(1.0, X, kNoTrans, 0.0); // M <-- X X^T
Matrix<Real> Vtmp;
Vector<Real> l;
if (exact) {
Vtmp.Resize(N, N);
l.Resize(N);
Matrix<Real> Nmat(Nsp);
Nmat.DestructiveSvd(&l, &Vtmp, NULL);
} else {
Vtmp.Resize(N, G);
l.Resize(G);
Nsp.TopEigs(&l, &Vtmp);
}
MatrixIndexT num_zeroed = 0;
for (MatrixIndexT g = 0; g < G; g++) {
if (l(g) < 0.0) {
KALDI_WARN << "In PCA, setting element " << l(g) << " to zero.";
l(g) = 0.0;
num_zeroed++;
}
}
SortSvd(&l, &Vtmp); // Make sure zero elements are last, this
// is necessary for Orthogonalize() to work properly later.
Vtmp.Transpose(); // So eigenvalues are the rows.
for (MatrixIndexT g = 0; g < G; g++) {
Real sqrtlg = sqrt(l(g));
if (l(g) != 0.0) {
U->Row(g).AddMatVec(1.0 / sqrtlg, X, kTrans, Vtmp.Row(g), 0.0);
} else {
U->Row(g).SetZero();
(*U)(g, g) = 1.0; // arbitrary direction. Will later orthogonalize.
}
if (A != NULL)
for (MatrixIndexT n = 0; n < N; n++)
(*A)(n, g) = sqrtlg * Vtmp(g, n);
}
// Now orthogonalize. This is mainly useful in
// case there were zero eigenvalues, but we do it
// for all of them.
U->OrthogonalizeRows();
if (print_eigs)
KALDI_LOG << "(inner-product) PCA eigenvalues are " << l;
}
}
template
void ComputePca(const MatrixBase<float> &X,
MatrixBase<float> *U,
MatrixBase<float> *A,
bool print_eigs,
bool exact);
template
void ComputePca(const MatrixBase<double> &X,
MatrixBase<double> *U,
MatrixBase<double> *A,
bool print_eigs,
bool exact);
// Added by Dan, Feb. 13 2012.
// This function does: *plus += max(0, a b^T),
// *minus += max(0, -(a b^T)).
template<typename Real>
void AddOuterProductPlusMinus(Real alpha,
const VectorBase<Real> &a,
const VectorBase<Real> &b,
MatrixBase<Real> *plus,
MatrixBase<Real> *minus) {
KALDI_ASSERT(a.Dim() == plus->NumRows() && b.Dim() == plus->NumCols()
&& a.Dim() == minus->NumRows() && b.Dim() == minus->NumCols());
int32 nrows = a.Dim(), ncols = b.Dim(), pskip = plus->Stride() - ncols,
mskip = minus->Stride() - ncols;
const Real *adata = a.Data(), *bdata = b.Data();
Real *plusdata = plus->Data(), *minusdata = minus->Data();
for (int32 i = 0; i < nrows; i++) {
const Real *btmp = bdata;
Real multiple = alpha * *adata;
if (multiple > 0.0) {
for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
if (*btmp > 0.0) *plusdata += multiple * *btmp;
else *minusdata -= multiple * *btmp;
}
} else {
for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
if (*btmp < 0.0) *plusdata += multiple * *btmp;
else *minusdata -= multiple * *btmp;
}
}
plusdata += pskip;
minusdata += mskip;
adata++;
}
}
// Instantiate template
template
void AddOuterProductPlusMinus<float>(float alpha,
const VectorBase<float> &a,
const VectorBase<float> &b,
MatrixBase<float> *plus,
MatrixBase<float> *minus);
template
void AddOuterProductPlusMinus<double>(double alpha,
const VectorBase<double> &a,
const VectorBase<double> &b,
MatrixBase<double> *plus,
MatrixBase<double> *minus);
} // end namespace kaldi

Просмотреть файл

@ -1,230 +0,0 @@
// matrix/matrix-functions.h
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky;
// Yanmin Qian; 1991 Henrique (Rico) Malvar (*)
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_H_
#define KALDI_MATRIX_MATRIX_FUNCTIONS_H_
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// @addtogroup matrix_funcs_misc
/// @{
/** The function ComplexFft does an Fft on the vector argument v.
v is a vector of even dimension, interpreted for both input
and output as a vector of complex numbers i.e.
\f[ v = ( re_0, im_0, re_1, im_1, ... ) \f]
The dimension of v must be a power of 2.
If "forward == true" this routine does the Discrete Fourier Transform
(DFT), i.e.:
\f[ vout[m] \leftarrow \sum_{n = 0}^{N-1} vin[i] exp( -2pi m n / N ) \f]
If "backward" it does the Inverse Discrete Fourier Transform (IDFT)
*WITHOUT THE FACTOR 1/N*,
i.e.:
\f[ vout[m] <-- \sum_{n = 0}^{N-1} vin[i] exp( 2pi m n / N ) \f]
[note the sign difference on the 2 pi for the backward one.]
Note that this is the definition of the FT given in most texts, but
it differs from the Numerical Recipes version in which the forward
and backward algorithms are flipped.
Note that you would have to multiply by 1/N after the IDFT to get
back to where you started from. We don't do this because
in some contexts, the transform is made symmetric by multiplying
by sqrt(N) in both passes. The user can do this by themselves.
*/
template<typename Real> void ComplexFft (VectorBase<Real> *v, bool forward, Vector<Real> *tmp_work = NULL);
/// ComplexFt is the same as ComplexFft but it implements the Fourier
/// transform in an inefficient way. It is mainly included for testing purposes.
/// See comment for ComplexFft to describe the input and outputs and what it does.
template<typename Real> void ComplexFt (const VectorBase<Real> &in,
VectorBase<Real> *out, bool forward);
/// RealFft is a fourier transform of real inputs. Internally it uses
/// ComplexFft. The input dimension N must be even. If forward == true,
/// it transforms from a sequence of N real points to its complex fourier
/// transform; otherwise it goes in the reverse direction. If you call it
/// in the forward and then reverse direction and multiply by 1.0/N, you
/// will get back the original data.
/// The interpretation of the complex-FFT data is as follows: the array
/// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
template<typename Real> void RealFft (VectorBase<Real> *v, bool forward);
/// RealFt has the same input and output format as RealFft above, but it is
/// an inefficient implementation included for testing purposes.
template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward);
/// ComputeDctMatrix computes a matrix corresponding to the DCT, such that
/// M * v equals the DCT of vector v. M must be square at input.
/// This is the type = III DCT with normalization, corresponding to the
/// following equations, where x is the signal and X is the DCT:
/// X_0 = 1/sqrt(2*N) \sum_{n = 0}^{N-1} x_n
/// X_k = 1/sqrt(N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k )
/// This matrix's transpose is its own inverse, so transposing this
/// matrix will give the inverse DCT.
/// Caution: the type III DCT is generally known as the "inverse DCT" (with the
/// type II being the actual DCT), so this function is somewhatd mis-named. It
/// was probably done this way for HTK compatibility. We don't change it
/// because it was this way from the start and changing it would affect the
/// feature generation.
template<typename Real> void ComputeDctMatrix(Matrix<Real> *M);
/// ComplexMul implements, inline, the complex multiplication b *= a.
template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
Real *b_re, Real *b_im);
/// ComplexMul implements, inline, the complex operation c += (a * b).
template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
const Real &b_re, const Real &b_im,
Real *c_re, Real *c_im);
/// ComplexImExp implements a <-- exp(i x), inline.
template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im);
// This class allows you to compute the matrix exponential function
// B = I + A + 1/2! A^2 + 1/3! A^3 + ...
// This method is most accurate where the result is of the same order of
// magnitude as the unit matrix (it will typically not work well when
// the answer has almost-zero eigenvalues or is close to zero).
// It also provides a function that allows you do back-propagate the
// derivative of a scalar function through this calculation.
// The
template<typename Real>
class MatrixExponential {
public:
MatrixExponential() { }
void Compute(const MatrixBase<Real> &M, MatrixBase<Real> *X); // does *X = exp(M)
// Version for symmetric matrices (it just copies to full matrix).
void Compute(const SpMatrix<Real> &M, SpMatrix<Real> *X); // does *X = exp(M)
void Backprop(const MatrixBase<Real> &hX, MatrixBase<Real> *hM) const; // Propagates
// the gradient of a scalar function f backwards through this operation, i.e.:
// if the parameter dX represents df/dX (with no transpose, so element i, j of dX
// is the derivative of f w.r.t. E(i, j)), it sets dM to df/dM, again with no
// transpose (of course, only the part thereof that comes through the effect of
// A on B). This applies to the values of A and E that were called most recently
// with Compute().
// Version for symmetric matrices (it just copies to full matrix).
void Backprop(const SpMatrix<Real> &hX, SpMatrix<Real> *hM) const;
private:
void Clear();
static MatrixIndexT ComputeN(const MatrixBase<Real> &M);
// This is intended for matrices P with small norms: compute B_0 = exp(P) - I.
// Keeps adding terms in the Taylor series till there is no further
// change in the result. Stores some of the powers of A in powers_,
// and the number of terms K as K_.
void ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0);
// Backprop through the Taylor-series computation above.
// note: hX is \hat{X} in the math; hM is \hat{M} in the math.
void BackpropTaylor(const MatrixBase<Real> &hX,
MatrixBase<Real> *hM) const;
Matrix<Real> P_; // Equals M * 2^(-N_)
std::vector<Matrix<Real> > B_; // B_[0] = exp(P_) - I,
// B_[k] = 2 B_[k-1] + B_[k-1]^2 [k > 0],
// ( = exp(P_)^k - I )
// goes from 0..N_ [size N_+1].
std::vector<Matrix<Real> > powers_; // powers (>1) of P_ stored here,
// up to all but the last one used in the Taylor expansion (this is the
// last one we need in the backprop). The index is the power minus 2.
MatrixIndexT N_; // Power N_ >=0 such that P_ = A * 2^(-N_),
// we choose it so that P_ has a sufficiently small norm
// that the Taylor series will converge fast.
};
/**
ComputePCA does a PCA computation, using either outer products
or inner products, whichever is more efficient. Let D be
the dimension of the data points, N be the number of data
points, and G be the PCA dimension we want to retain. We assume
G <= N and G <= D.
@param X [in] An N x D matrix. Each row of X is a point x_i.
@param U [out] A G x D matrix. Each row of U is a basis element u_i.
@param A [out] An N x D matrix, or NULL. Each row of A is a set of coefficients
in the basis for a point x_i, so A(i, g) is the coefficient of u_i
in x_i.
@param print_eigs [in] If true, prints out diagnostic information about the
eigenvalues.
@param exact [in] If true, does the exact computation; if false, does
a much faster (but almost exact) computation based on the Lanczos
method.
*/
template<typename Real>
void ComputePca(const MatrixBase<Real> &X,
MatrixBase<Real> *U,
MatrixBase<Real> *A,
bool print_eigs = false,
bool exact = true);
// This function does: *plus += max(0, a b^T),
// *minus += max(0, -(a b^T)).
template<typename Real>
void AddOuterProductPlusMinus(Real alpha,
const VectorBase<Real> &a,
const VectorBase<Real> &b,
MatrixBase<Real> *plus,
MatrixBase<Real> *minus);
template<typename Real1, typename Real2>
inline void AssertSameDim(const MatrixBase<Real1> &mat1, const MatrixBase<Real2> &mat2) {
KALDI_ASSERT(mat1.NumRows() == mat2.NumRows()
&& mat1.NumCols() == mat2.NumCols());
}
/// @} end of "addtogroup matrix_funcs_misc"
} // end namespace kaldi
#include "matrix/matrix-functions-inl.h"
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,37 +0,0 @@
// matrix/matrix-lib.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// Include everything from this directory.
// These files include other stuff that we need.
#ifndef KALDI_MATRIX_MATRIX_LIB_H_
#define KALDI_MATRIX_MATRIX_LIB_H_
#include "matrix/cblas-wrappers.h"
#include "base/kaldi-common.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/tp-matrix.h"
#include "matrix/matrix-functions.h"
#include "matrix/srfft.h"
#include "matrix/compressed-matrix.h"
#include "matrix/optimization.h"
#endif

Просмотреть файл

@ -1,425 +0,0 @@
// matrix/optimization.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#include "matrix/optimization.h"
namespace kaldi {
// Below, N&W refers to Nocedal and Wright, "Numerical Optimization", 2nd Ed.
template<typename Real>
OptimizeLbfgs<Real>::OptimizeLbfgs(const VectorBase<Real> &x,
const LbfgsOptions &opts):
opts_(opts), k_(0), computation_state_(kBeforeStep), H_was_set_(false) {
KALDI_ASSERT(opts.m > 0); // dimension.
MatrixIndexT dim = x.Dim();
KALDI_ASSERT(dim > 0);
x_ = x; // this is the value of x_k
new_x_ = x; // this is where we'll evaluate the function next.
deriv_.Resize(dim);
temp_.Resize(dim);
data_.Resize(2 * opts.m, dim);
rho_.Resize(opts.m);
// Just set f_ to some invalid value, as we haven't yet set it.
f_ = (opts.minimize ? 1 : -1 ) * std::numeric_limits<Real>::infinity();
best_f_ = f_;
best_x_ = x_;
}
template<typename Real>
Real OptimizeLbfgs<Real>::RecentStepLength() const {
size_t n = step_lengths_.size();
if (n == 0) return std::numeric_limits<Real>::infinity();
else {
if (n >= 2 && step_lengths_[n-1] == 0.0 && step_lengths_[n-2] == 0.0)
return 0.0; // two zeros in a row means repeated restarts, which is
// a loop. Short-circuit this by returning zero.
Real avg = 0.0;
for (size_t i = 0; i < n; i++)
avg += step_lengths_[i] / n;
return avg;
}
}
template<typename Real>
void OptimizeLbfgs<Real>::ComputeHifNeeded(const VectorBase<Real> &gradient) {
if (k_ == 0) {
if (H_.Dim() == 0) {
// H was never set up. Set it up for the first time.
Real learning_rate;
if (opts_.first_step_length > 0.0) { // this takes
// precedence over first_step_learning_rate, if set.
// We are setting up H for the first time.
Real gradient_length = gradient.Norm(2.0);
learning_rate = (gradient_length > 0.0 ?
opts_.first_step_length / gradient_length :
1.0);
} else if (opts_.first_step_impr > 0.0) {
Real gradient_length = gradient.Norm(2.0);
learning_rate = (gradient_length > 0.0 ?
opts_.first_step_impr / (gradient_length * gradient_length) :
1.0);
} else {
learning_rate = opts_.first_step_learning_rate;
}
H_.Resize(x_.Dim());
KALDI_ASSERT(learning_rate > 0.0);
H_.Set(opts_.minimize ? learning_rate : -learning_rate);
}
} else { // k_ > 0
if (!H_was_set_) { // The user never specified an approximate
// diagonal inverse Hessian.
// Set it using formula 7.20: H_k^{(0)} = \gamma_k I, where
// \gamma_k = s_{k-1}^T y_{k-1} / y_{k-1}^T y_{k-1}
SubVector<Real> y_km1 = Y(k_-1);
double gamma_k = VecVec(S(k_-1), y_km1) / VecVec(y_km1, y_km1);
if (KALDI_ISNAN(gamma_k) || KALDI_ISINF(gamma_k)) {
KALDI_WARN << "NaN encountered in L-BFGS (already converged?)";
gamma_k = (opts_.minimize ? 1.0 : -1.0);
}
H_.Set(gamma_k);
}
}
}
// This represents the first 2 lines of Algorithm 7.5 (N&W), which
// in fact is mostly a call to Algorithm 7.4.
// Note: this is valid whether we are minimizing or maximizing.
template<typename Real>
void OptimizeLbfgs<Real>::ComputeNewDirection(Real function_value,
const VectorBase<Real> &gradient) {
KALDI_ASSERT(computation_state_ == kBeforeStep);
SignedMatrixIndexT m = M(), k = k_;
ComputeHifNeeded(gradient);
// The rest of this is computing p_k <-- - H_k \nabla f_k using Algorithm
// 7.4 of N&W.
Vector<Real> &q(deriv_), &r(new_x_); // Use deriv_ as a temporary place to put
// q, and new_x_ as a temporay place to put r.
// The if-statement below is just to get rid of spurious warnings from
// valgrind about memcpy source and destination overlap, since sometimes q and
// gradient are the same variable.
if (&q != &gradient)
q.CopyFromVec(gradient); // q <-- \nabla f_k.
Vector<Real> alpha(m);
// for i = k - 1, k - 2, ... k - m
for (SignedMatrixIndexT i = k - 1;
i >= std::max(k - m, static_cast<SignedMatrixIndexT>(0));
i--) {
alpha(i % m) = rho_(i % m) * VecVec(S(i), q); // \alpha_i <-- \rho_i s_i^T q.
q.AddVec(-alpha(i % m), Y(i)); // q <-- q - \alpha_i y_i
}
r.SetZero();
r.AddVecVec(1.0, H_, q, 0.0); // r <-- H_k^{(0)} q.
// for k = k - m, k - m + 1, ... , k - 1
for (SignedMatrixIndexT i = std::max(k - m, static_cast<SignedMatrixIndexT>(0));
i < k;
i++) {
Real beta = rho_(i % m) * VecVec(Y(i), r); // \beta <-- \rho_i y_i^T r
r.AddVec(alpha(i % m) - beta, S(i)); // r <-- r + s_i (\alpha_i - \beta)
}
{ // TEST. Note, -r will be the direction.
Real dot = VecVec(gradient, r);
if ((opts_.minimize && dot < 0) || (!opts_.minimize && dot > 0))
KALDI_WARN << "Step direction has the wrong sign! Routine will fail.";
}
// Now we're out of Alg. 7.4 and back into Alg. 7.5.
// Alg. 7.4 returned r (using new_x_ as the location), and with \alpha_k = 1
// as the initial guess, we're setting x_{k+1} = x_k + \alpha_k p_k, with
// p_k = -r [hence the statement new_x_.Scale(-1.0)]., and \alpha_k = 1.
// This is the first place we'll get the user to evaluate the function;
// any backtracking (or acceptance of that step) occurs inside StepSizeIteration.
// We're still within iteration k; we haven't yet finalized the step size.
new_x_.Scale(-1.0);
new_x_.AddVec(1.0, x_);
if (&deriv_ != &gradient)
deriv_.CopyFromVec(gradient);
f_ = function_value;
d_ = opts_.d;
num_wolfe_i_failures_ = 0;
num_wolfe_ii_failures_ = 0;
last_failure_type_ = kNone;
computation_state_ = kWithinStep;
}
template<typename Real>
bool OptimizeLbfgs<Real>::AcceptStep(Real function_value,
const VectorBase<Real> &gradient) {
// Save s_k = x_{k+1} - x_{k}, and y_k = \nabla f_{k+1} - \nabla f_k.
SubVector<Real> s = S(k_), y = Y(k_);
s.CopyFromVec(new_x_);
s.AddVec(-1.0, x_); // s = new_x_ - x_.
y.CopyFromVec(gradient);
y.AddVec(-1.0, deriv_); // y = gradient - deriv_.
// Warning: there is a division in the next line. This could
// generate inf or nan, but this wouldn't necessarily be an error
// at this point because for zero step size or derivative we should
// terminate the iterations. But this is up to the calling code.
Real prod = VecVec(y, s);
rho_(k_ % opts_.m) = 1.0 / prod;
Real len = s.Norm(2.0);
if ((opts_.minimize && prod <= 1.0e-20) || (!opts_.minimize && prod >= -1.0e-20)
|| len == 0.0)
return false; // This will force restart.
KALDI_VLOG(3) << "Accepted step; length was " << len
<< ", prod was " << prod;
RecordStepLength(len);
// store x_{k+1} and the function value f_{k+1}.
x_.CopyFromVec(new_x_);
f_ = function_value;
k_++;
return true; // We successfully accepted the step.
}
template<typename Real>
void OptimizeLbfgs<Real>::RecordStepLength(Real s) {
step_lengths_.push_back(s);
if (step_lengths_.size() > static_cast<size_t>(opts_.avg_step_length))
step_lengths_.erase(step_lengths_.begin(), step_lengths_.begin() + 1);
}
template<typename Real>
void OptimizeLbfgs<Real>::Restart(const VectorBase<Real> &x,
Real f,
const VectorBase<Real> &gradient) {
// Note: we will consider restarting (the transition of x_ -> x)
// as a step, even if it has zero step size. This is necessary in
// order for convergence to be detected.
{
Vector<Real> &diff(temp_);
diff.CopyFromVec(x);
diff.AddVec(-1.0, x_);
RecordStepLength(diff.Norm(2.0));
}
k_ = 0; // Restart the iterations! [But note that the Hessian,
// whatever it was, stays as before.]
if (&x_ != &x)
x_.CopyFromVec(x);
new_x_.CopyFromVec(x);
f_ = f;
computation_state_ = kBeforeStep;
ComputeNewDirection(f, gradient);
}
template<typename Real>
void OptimizeLbfgs<Real>::StepSizeIteration(Real function_value,
const VectorBase<Real> &gradient) {
KALDI_VLOG(3) << "In step size iteration, function value changed "
<< f_ << " to " << function_value;
// We're in some part of the backtracking, and the user is providing
// the objective function value and gradient.
// We're checking two conditions: Wolfe i) [the Armijo rule] and
// Wolfe ii).
// The Armijo rule (when minimizing) is:
// f(k_k + \alpha_k p_k) <= f(x_k) + c_1 \alpha_k p_k^T \nabla f(x_k), where
// \nabla means the derivative.
// Below, "temp" is the RHS of this equation, where (\alpha_k p_k) equals
// (new_x_ - x_); we don't store \alpha or p_k separately, they are implicit
// as the difference new_x_ - x_.
// Below, pf is \alpha_k p_k^T \nabla f(x_k).
Real pf = VecVec(new_x_, deriv_) - VecVec(x_, deriv_);
Real temp = f_ + opts_.c1 * pf;
bool wolfe_i_ok;
if (opts_.minimize) wolfe_i_ok = (function_value <= temp);
else wolfe_i_ok = (function_value >= temp);
// Wolfe condition ii) can be written as:
// p_k^T \nabla f(x_k + \alpha_k p_k) >= c_2 p_k^T \nabla f(x_k)
// p2f equals \alpha_k p_k^T \nabla f(x_k + \alpha_k p_k), where
// (\alpha_k p_k^T) is (new_x_ - x_).
// Note that in our version of Wolfe condition (ii) we have an extra
// factor alpha, which doesn't affect anything.
Real p2f = VecVec(new_x_, gradient) - VecVec(x_, gradient);
//eps = (sizeof(Real) == 4 ? 1.0e-05 : 1.0e-10) *
//(std::abs(p2f) + std::abs(pf));
bool wolfe_ii_ok;
if (opts_.minimize) wolfe_ii_ok = (p2f >= opts_.c2 * pf);
else wolfe_ii_ok = (p2f <= opts_.c2 * pf);
enum { kDecrease, kNoChange } d_action; // What do do with d_: leave it alone,
// or take the square root.
enum { kAccept, kDecreaseStep, kIncreaseStep, kRestart } iteration_action;
// What we'll do in the overall iteration: accept this value, DecreaseStep
// (reduce the step size), IncreaseStep (increase the step size), or kRestart
// (set k back to zero). Generally when we can't get both conditions to be
// true with a reasonable period of time, it makes sense to restart, because
// probably we've almost converged and got into numerical issues; from here
// we'll just produced NaN's. Restarting is a safe thing to do and the outer
// code will quickly detect convergence.
d_action = kNoChange; // the default.
if (wolfe_i_ok && wolfe_ii_ok) {
iteration_action = kAccept;
d_action = kNoChange; // actually doesn't matter, it'll get reset.
} else if (!wolfe_i_ok) {
// If wolfe i) [the Armijo rule] failed then we went too far (or are
// meeting numerical problems).
if (last_failure_type_ == kWolfeII) { // Last time we failed it was Wolfe ii).
// When we switch between them we decrease d.
d_action = kDecrease;
}
iteration_action = kDecreaseStep;
last_failure_type_ = kWolfeI;
num_wolfe_i_failures_++;
} else if (!wolfe_ii_ok) {
// Curvature condition failed -> we did not go far enough.
if (last_failure_type_ == kWolfeI) // switching between wolfe i and ii failures->
d_action = kDecrease; // decrease value of d.
iteration_action = kIncreaseStep;
last_failure_type_ = kWolfeII;
num_wolfe_ii_failures_++;
}
// Test whether we've been switching too many times betwen wolfe i) and ii)
// failures, or overall have an excessive number of failures. We just give up
// and restart L-BFGS. Probably we've almost converged.
if (num_wolfe_i_failures_ + num_wolfe_ii_failures_ >
opts_.max_line_search_iters) {
KALDI_VLOG(2) << "Too many steps in line search -> restarting.";
iteration_action = kRestart;
}
if (d_action == kDecrease)
d_ = std::sqrt(d_);
KALDI_VLOG(3) << "d = " << d_ << ", iter = " << k_ << ", action = "
<< (iteration_action == kAccept ? "accept" :
(iteration_action == kDecreaseStep ? "decrease" :
(iteration_action == kIncreaseStep ? "increase" :
"reject")));
// Note: even if iteration_action != Restart at this point,
// some code below may set it to Restart.
if (iteration_action == kAccept) {
if (AcceptStep(function_value, gradient)) { // If we did
// not detect a problem while accepting the step..
computation_state_ = kBeforeStep;
ComputeNewDirection(function_value, gradient);
} else {
KALDI_VLOG(2) << "Restarting L-BFGS computation; problem found while "
<< "accepting step.";
iteration_action = kRestart; // We'll have to restart now.
}
}
if (iteration_action == kDecreaseStep || iteration_action == kIncreaseStep) {
Real scale = (iteration_action == kDecreaseStep ? 1.0 / d_ : d_);
temp_.CopyFromVec(new_x_);
new_x_.Scale(scale);
new_x_.AddVec(1.0 - scale, x_);
if (new_x_.ApproxEqual(temp_, 0.0)) {
// Value of new_x_ did not change at all --> we must restart.
KALDI_VLOG(3) << "Value of x did not change, when taking step; "
<< "will restart computation.";
iteration_action = kRestart;
}
if (new_x_.ApproxEqual(temp_, 1.0e-08) &&
std::abs(f_ - function_value) < 1.0e-08 *
std::abs(f_) && iteration_action == kDecreaseStep) {
// This is common and due to roundoff.
KALDI_VLOG(3) << "We appear to be backtracking while we are extremely "
<< "close to the old value; restarting.";
iteration_action = kRestart;
}
if (iteration_action == kDecreaseStep) {
num_wolfe_i_failures_++;
last_failure_type_ = kWolfeI;
} else {
num_wolfe_ii_failures_++;
last_failure_type_ = kWolfeII;
}
}
if (iteration_action == kRestart) {
// We want to restart the computation. If the objf at new_x_ is
// better than it was at x_, we'll start at new_x_, else at x_.
bool use_newx;
if (opts_.minimize) use_newx = (function_value < f_);
else use_newx = (function_value > f_);
KALDI_VLOG(3) << "Restarting computation.";
if (use_newx) Restart(new_x_, function_value, gradient);
else Restart(x_, f_, deriv_);
}
}
template<typename Real>
void OptimizeLbfgs<Real>::DoStep(Real function_value,
const VectorBase<Real> &gradient) {
if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
best_f_ = function_value;
best_x_.CopyFromVec(new_x_);
}
if (computation_state_ == kBeforeStep)
ComputeNewDirection(function_value, gradient);
else // kWithinStep{1,2,3}
StepSizeIteration(function_value, gradient);
}
template<typename Real>
void OptimizeLbfgs<Real>::DoStep(Real function_value,
const VectorBase<Real> &gradient,
const VectorBase<Real> &diag_approx_2nd_deriv) {
if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
best_f_ = function_value;
best_x_.CopyFromVec(new_x_);
}
if (opts_.minimize) {
KALDI_ASSERT(diag_approx_2nd_deriv.Min() > 0.0);
} else {
KALDI_ASSERT(diag_approx_2nd_deriv.Max() < 0.0);
}
H_was_set_ = true;
H_.CopyFromVec(diag_approx_2nd_deriv);
H_.InvertElements();
DoStep(function_value, gradient);
}
template<typename Real>
const VectorBase<Real>&
OptimizeLbfgs<Real>::GetValue(Real *objf_value) const {
if (objf_value != NULL) *objf_value = best_f_;
return best_x_;
}
// Instantiate the class for float and double.
template
class OptimizeLbfgs<float>;
template
class OptimizeLbfgs<double>;
} // end namespace kaldi

Просмотреть файл

@ -1,219 +0,0 @@
// matrix/optimization.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_OPTIMIZATION_H_
#define KALDI_MATRIX_OPTIMIZATION_H_
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// @addtogroup matrix_optimization
/// @{
/**
This is an implementation of L-BFGS. It pushes responsibility for
determining when to stop, onto the user. There is no call-back here:
everything is done via calls to the class itself (see the example in
matrix-lib-test.cc). This does not implement constrained L-BFGS, but it will
handle constrained problems correctly as long as the function approaches
+infinity (or -infinity for maximization problems) when it gets close to the
bound of the constraint. In these types of problems, you just let the
function value be +infinity for minimization problems, or -infinity for
maximization problems, outside these bounds).
*/
struct LbfgsOptions {
bool minimize; // if true, we're minimizing, else maximizing.
int m; // m is the number of stored vectors L-BFGS keeps.
float first_step_learning_rate; // The very first step of L-BFGS is
// like gradient descent. If you want to configure the size of that step,
// you can do it using this variable.
float first_step_length; // If this variable is >0.0, it overrides
// first_step_learning_rate; on the first step we choose an approximate
// Hessian that is the multiple of the identity that would generate this
// step-length, or 1.0 if the gradient is zero.
float first_step_impr; // If this variable is >0.0, it overrides
// first_step_learning_rate; on the first step we choose an approximate
// Hessian that is the multiple of the identity that would generate this
// amount of objective function improvement (assuming the "real" objf
// was linear).
float c1; // A constant in Armijo rule = Wolfe condition i)
float c2; // A constant in Wolfe condition ii)
float d; // An amount > 1.0 (default 2.0) that we initially multiply or
// divide the step length by, in the line search.
int max_line_search_iters; // after this many iters we restart L-BFGS.
int avg_step_length; // number of iters to avg step length over, in
// RecentStepLength().
LbfgsOptions (bool minimize = true):
minimize(minimize),
m(10),
first_step_learning_rate(1.0),
first_step_length(0.0),
first_step_impr(0.0),
c1(1.0e-04),
c2(0.9),
d(2.0),
max_line_search_iters(50),
avg_step_length(4) { }
};
template<typename Real>
class OptimizeLbfgs {
public:
/// Initializer takes the starting value of x.
OptimizeLbfgs(const VectorBase<Real> &x,
const LbfgsOptions &opts);
/// This returns the value of the variable x that has the best objective
/// function so far, and the corresponding objective function value if
/// requested. This would typically be called only at the end.
const VectorBase<Real>& GetValue(Real *objf_value = NULL) const;
/// This returns the value at which the function wants us
/// to compute the objective function and gradient.
const VectorBase<Real>& GetProposedValue() const { return new_x_; }
/// Returns the average magnitude of the last n steps (but not
/// more than the number we have stored). Before we have taken
/// any steps, returns +infinity. Note: if the most recent
/// step length was 0, it returns 0, regardless of the other
/// step lengths. This makes it suitable as a convergence test
/// (else we'd generate NaN's).
Real RecentStepLength() const;
/// The user calls this function to provide the class with the
/// function and gradient info at the point GetProposedValue().
/// If this point is outside the constraints you can set function_value
/// to {+infinity,-infinity} for {minimization,maximization} problems.
/// In this case the gradient, and also the second derivative (if you call
/// the second overloaded version of this function) will be ignored.
void DoStep(Real function_value,
const VectorBase<Real> &gradient);
/// The user can call this version of DoStep() if it is desired to set some
/// kind of approximate Hessian on this iteration. Note: it is a prerequisite
/// that diag_approx_2nd_deriv must be strictly positive (minimizing), or
/// negative (maximizing).
void DoStep(Real function_value,
const VectorBase<Real> &gradient,
const VectorBase<Real> &diag_approx_2nd_deriv);
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(OptimizeLbfgs);
// The following variable says what stage of the computation we're at.
// Refer to Algorithm 7.5 (L-BFGS) of Nodecdal & Wright, "Numerical
// Optimization", 2nd edition.
// kBeforeStep means we're about to do
/// "compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4).
// kWithinStep means we're at some point within line search; note
// that line search is iterative so we can stay in this state more
// than one time on each iteration.
enum ComputationState {
kBeforeStep,
kWithinStep, // This means we're within the step-size computation, and
// have not yet done the 1st function evaluation.
};
inline MatrixIndexT Dim() { return x_.Dim(); }
inline MatrixIndexT M() { return opts_.m; }
SubVector<Real> Y(MatrixIndexT i) {
return SubVector<Real>(data_, (i % M()) * 2); // vector y_i
}
SubVector<Real> S(MatrixIndexT i) {
return SubVector<Real>(data_, (i % M()) * 2 + 1); // vector s_i
}
// The following are subroutines within DoStep():
bool AcceptStep(Real function_value,
const VectorBase<Real> &gradient);
void Restart(const VectorBase<Real> &x,
Real function_value,
const VectorBase<Real> &gradient);
void ComputeNewDirection(Real function_value,
const VectorBase<Real> &gradient);
void ComputeHifNeeded(const VectorBase<Real> &gradient);
void StepSizeIteration(Real function_value,
const VectorBase<Real> &gradient);
void RecordStepLength(Real s);
LbfgsOptions opts_;
SignedMatrixIndexT k_; // Iteration number, starts from zero. Gets set back to zero
// when we restart.
ComputationState computation_state_;
bool H_was_set_; // True if the user specified H_; if false,
// we'll use a heuristic to estimate it.
Vector<Real> x_; // current x.
Vector<Real> new_x_; // the x proposed in the line search.
Vector<Real> best_x_; // the x with the best objective function so far
// (either the same as x_ or something in the current line search.)
Vector<Real> deriv_; // The most recently evaluated derivative-- at x_k.
Vector<Real> temp_;
Real f_; // The function evaluated at x_k.
Real best_f_; // the best objective function so far.
Real d_; // a number d > 1.0, but during an iteration we may decrease this, when
// we switch between armijo and wolfe failures.
int num_wolfe_i_failures_; // the num times we decreased step size.
int num_wolfe_ii_failures_; // the num times we increased step size.
enum { kWolfeI, kWolfeII, kNone } last_failure_type_; // last type of step-search
// failure on this iter.
Vector<Real> H_; // Current inverse-Hessian estimate. May be computed by this class itself,
// or provided by user using 2nd form of SetGradientInfo().
Matrix<Real> data_; // dimension (m*2) x dim. Even rows store
// gradients y_i, odd rows store steps s_i.
Vector<Real> rho_; // dimension m; rho_(m) = 1/(y_m^T s_m), Eq. 7.17.
std::vector<Real> step_lengths_; // The step sizes we took on the last
// (up to m) iterations; these are not stored in a rotating buffer but
// are shifted by one each time (this is more convenient when we
// restart, as we keep this info past restarting).
};
/// @}
} // end namespace kaldi
#endif

Просмотреть файл

@ -1,438 +0,0 @@
// matrix/packed-matrix.cc
// Copyright 2009-2012 Microsoft Corporation Saarland University
// Johns Hopkins University (Author: Daniel Povey);
// Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
/**
* @file packed-matrix.cc
*
* Implementation of specialized PackedMatrix template methods
*/
#include "matrix/cblas-wrappers.h"
#include "matrix/packed-matrix.h"
#include "matrix/kaldi-vector.h"
namespace kaldi {
template<typename Real>
void PackedMatrix<Real>::Scale(Real alpha) {
size_t nr = num_rows_,
sz = (nr * (nr + 1)) / 2;
cblas_Xscal(sz, alpha, data_, 1);
}
template<typename Real>
void PackedMatrix<Real>::AddPacked(const Real alpha, const PackedMatrix<Real> &rMa) {
KALDI_ASSERT(num_rows_ == rMa.NumRows());
size_t nr = num_rows_,
sz = (nr * (nr + 1)) / 2;
cblas_Xaxpy(sz, alpha, rMa.Data(), 1, data_, 1);
}
template<typename Real>
void PackedMatrix<Real>::SetRandn() {
Real *data = data_;
size_t dim = num_rows_, size = ((dim*(dim+1))/2);
for (size_t i = 0; i < size; i++)
data[i] = RandGauss();
}
template<typename Real>
inline void PackedMatrix<Real>::Init(MatrixIndexT r) {
if (r == 0) {
num_rows_ = 0;
data_ = 0;
return;
}
size_t size = ((static_cast<size_t>(r) * static_cast<size_t>(r + 1)) / 2);
if (static_cast<size_t>(static_cast<MatrixIndexT>(size)) != size) {
KALDI_WARN << "Allocating packed matrix whose full dimension does not fit "
<< "in MatrixIndexT: not all code is tested for this case.";
}
void *data; // aligned memory block
void *temp;
if ((data = KALDI_MEMALIGN(16, size * sizeof(Real), &temp)) != NULL) {
this->data_ = static_cast<Real *> (data);
this->num_rows_ = r;
} else {
throw std::bad_alloc();
}
}
template<typename Real>
void PackedMatrix<Real>::Swap(PackedMatrix<Real> *other) {
std::swap(data_, other->data_);
std::swap(num_rows_, other->num_rows_);
}
template<typename Real>
void PackedMatrix<Real>::Swap(Matrix<Real> *other) {
std::swap(data_, other->data_);
std::swap(num_rows_, other->num_rows_);
}
template<typename Real>
void PackedMatrix<Real>::Resize(MatrixIndexT r, MatrixResizeType resize_type) {
// the next block uses recursion to handle what we have to do if
// resize_type == kCopyData.
if (resize_type == kCopyData) {
if (this->data_ == NULL || r == 0) resize_type = kSetZero; // nothing to copy.
else if (this->num_rows_ == r) { return; } // nothing to do.
else {
// set tmp to a packed matrix of the desired size.
PackedMatrix<Real> tmp(r, kUndefined);
size_t r_min = std::min(r, num_rows_);
size_t mem_size_min = sizeof(Real) * (r_min*(r_min+1))/2,
mem_size_full = sizeof(Real) * (r*(r+1))/2;
// Copy the contents to tmp.
memcpy(tmp.data_, data_, mem_size_min);
char *ptr = static_cast<char*>(static_cast<void*>(tmp.data_));
// Set the rest of the contents of tmp to zero.
memset(static_cast<void*>(ptr + mem_size_min), 0, mem_size_full-mem_size_min);
tmp.Swap(this);
return;
}
}
if (data_ != NULL) Destroy();
Init(r);
if (resize_type == kSetZero) SetZero();
}
template<typename Real>
void PackedMatrix<Real>::AddToDiag(Real r) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr += r;
ptr += i;
}
}
template<typename Real>
void PackedMatrix<Real>::ScaleDiag(Real alpha) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr *= alpha;
ptr += i;
}
}
template<typename Real>
void PackedMatrix<Real>::SetDiag(Real alpha) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr = alpha;
ptr += i;
}
}
template<typename Real>
template<typename OtherReal>
void PackedMatrix<Real>::CopyFromPacked(const PackedMatrix<OtherReal> &orig) {
KALDI_ASSERT(NumRows() == orig.NumRows());
if (sizeof(Real) == sizeof(OtherReal)) {
memcpy(data_, orig.Data(), SizeInBytes());
} else {
Real *dst = data_;
const OtherReal *src = orig.Data();
size_t nr = NumRows(),
size = (nr * (nr + 1)) / 2;
for (size_t i = 0; i < size; i++, dst++, src++)
*dst = *src;
}
}
// template instantiations.
template
void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<double> &orig);
template
void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<float> &orig);
template
void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<double> &orig);
template
void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<float> &orig);
template<typename Real>
template<typename OtherReal>
void PackedMatrix<Real>::CopyFromVec(const SubVector<OtherReal> &vec) {
MatrixIndexT size = (NumRows()*(NumRows()+1)) / 2;
KALDI_ASSERT(vec.Dim() == size);
if (sizeof(Real) == sizeof(OtherReal)) {
memcpy(data_, vec.Data(), size * sizeof(Real));
} else {
Real *dst = data_;
const OtherReal *src = vec.Data();
for (MatrixIndexT i = 0; i < size; i++, dst++, src++)
*dst = *src;
}
}
// template instantiations.
template
void PackedMatrix<float>::CopyFromVec(const SubVector<double> &orig);
template
void PackedMatrix<double>::CopyFromVec(const SubVector<float> &orig);
template
void PackedMatrix<double>::CopyFromVec(const SubVector<double> &orig);
template
void PackedMatrix<float>::CopyFromVec(const SubVector<float> &orig);
template<typename Real>
void PackedMatrix<Real>::SetZero() {
memset(data_, 0, SizeInBytes());
}
template<typename Real>
void PackedMatrix<Real>::SetUnit() {
memset(data_, 0, SizeInBytes());
for (MatrixIndexT row = 0;row < num_rows_;row++)
(*this)(row, row) = 1.0;
}
template<typename Real>
Real PackedMatrix<Real>::Trace() const {
Real ans = 0.0;
for (MatrixIndexT row = 0;row < num_rows_;row++)
ans += (*this)(row, row);
return ans;
}
template<typename Real>
void PackedMatrix<Real>::Destroy() {
// we need to free the data block if it was defined
if (data_ != NULL) KALDI_MEMALIGN_FREE(data_);
data_ = NULL;
num_rows_ = 0;
}
template<typename Real>
void PackedMatrix<Real>::Write(std::ostream &os, bool binary) const {
if (!os.good()) {
KALDI_ERR << "Failed to write vector to stream: stream not good";
}
int32 size = this->NumRows(); // make the size 32-bit on disk.
KALDI_ASSERT(this->NumRows() == (MatrixIndexT) size);
MatrixIndexT num_elems = ((size+1)*(MatrixIndexT)size)/2;
if(binary) {
std::string my_token = (sizeof(Real) == 4 ? "FP" : "DP");
WriteToken(os, binary, my_token);
WriteBasicType(os, binary, size);
// We don't use the built-in Kaldi write routines for the floats, as they are
// not efficient enough.
os.write((const char*) data_, sizeof(Real) * num_elems);
}
else {
if(size == 0)
os<<"[ ]\n";
else {
os<<"[\n";
MatrixIndexT i = 0;
for (int32 j = 0; j < size; j++) {
for (int32 k = 0; k < j + 1; k++) {
WriteBasicType(os, binary, data_[i++]);
}
os << ( (j==size-1)? "]\n" : "\n");
}
KALDI_ASSERT(i == num_elems);
}
}
if (os.fail()) {
KALDI_ERR << "Failed to write packed matrix to stream";
}
}
// template<typename Real>
// void Save (std::ostream & os, const PackedMatrix<Real>& rM)
// {
// const Real* p_elem = rM.data();
// for (MatrixIndexT i = 0; i < rM.NumRows(); i++) {
// for (MatrixIndexT j = 0; j <= i ; j++) {
// os << *p_elem;
// p_elem++;
// if (j == i) {
// os << '\n';
// }
// else {
// os << ' ';
// }
// }
// }
// if (os.fail())
// KALDI_ERR("Failed to write packed matrix to stream");
// }
template<typename Real>
void PackedMatrix<Real>::Read(std::istream& is, bool binary, bool add) {
if (add) {
PackedMatrix<Real> tmp;
tmp.Read(is, binary, false); // read without adding.
if (this->NumRows() == 0) this->Resize(tmp.NumRows());
else {
if (this->NumRows() != tmp.NumRows()) {
if (tmp.NumRows() == 0) return; // do nothing in this case.
else KALDI_ERR << "PackedMatrix::Read, size mismatch " << this->NumRows()
<< " vs. " << tmp.NumRows();
}
}
this->AddPacked(1.0, tmp);
return;
} // now assume add == false.
std::ostringstream specific_error;
MatrixIndexT pos_at_start = is.tellg();
int peekval = Peek(is, binary);
const char *my_token = (sizeof(Real) == 4 ? "FP" : "DP");
const char *new_format_token = "[";
bool is_new_format = false;//added by hxu
char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
int32 size;
MatrixIndexT num_elems;
if (peekval == other_token_start) { // need to instantiate the other type to read it.
typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa.
PackedMatrix<OtherType> other(this->NumRows());
other.Read(is, binary, false); // add is false at this point.
this->Resize(other.NumRows());
this->CopyFromPacked(other);
return;
}
std::string token;
ReadToken(is, binary, &token);
if (token != my_token) {
if(token != new_format_token) {
specific_error << ": Expected token " << my_token << ", got " << token;
goto bad;
}
//new format it is
is_new_format = true;
}
if(!is_new_format) {
ReadBasicType(is, binary, &size); // throws on error.
if ((MatrixIndexT)size != this->NumRows()) {
KALDI_ASSERT(size>=0);
this->Resize(size);
}
num_elems = ((size+1)*(MatrixIndexT)size)/2;
if (!binary) {
for (MatrixIndexT i = 0; i < num_elems; i++) {
ReadBasicType(is, false, data_+i); // will throw on error.
}
} else {
if (num_elems)
is.read(reinterpret_cast<char*>(data_), sizeof(Real)*num_elems);
}
if (is.fail()) goto bad;
return;
}
else {
std::vector<Real> data;
while(1) {
int32 num_lines = 0;
int i = is.peek();
if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto bad; }
else if (static_cast<char>(i) == ']') { // Finished reading matrix.
is.get(); // eat the "]".
i = is.peek();
if (static_cast<char>(i) == '\r') {
is.get();
is.get(); // get \r\n (must eat what we wrote)
}// I don't actually understand what it's doing here
else if (static_cast<char>(i) == '\n') { is.get(); } // get \n (must eat what we wrote)
if (is.fail()) {
KALDI_WARN << "After end of matrix data, read error.";
// we got the data we needed, so just warn for this error.
}
//now process the data:
num_lines = int32(sqrt(data.size()*2));
KALDI_ASSERT(data.size() == num_lines*(num_lines+1)/2);
this->Resize(num_lines);
//std::cout<<data.size()<<' '<<num_lines<<'\n';
for(int32 i = 0; i < data.size(); i++) {
data_[i] = data[i];
}
return;
//std::cout<<"here!!!!!hxu!!!!!"<<std::endl;
}
else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
Real r;
is >> r;
if (is.fail()) {
specific_error << "Stream failure/EOF while reading matrix data.";
goto bad;
}
data.push_back(r);
}
else if (isspace(i)) {
is.get(); // eat the space and do nothing.
} else { // NaN or inf or error.
std::string str;
is >> str;
if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
!KALDI_STRCASECMP(str.c_str(), "infinity")) {
data.push_back(std::numeric_limits<Real>::infinity());
KALDI_WARN << "Reading infinite value into matrix.";
} else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
data.push_back(std::numeric_limits<Real>::quiet_NaN());
KALDI_WARN << "Reading NaN value into matrix.";
} else {
specific_error << "Expecting numeric matrix data, got " << str;
goto bad;
}
}
}
}
bad:
KALDI_ERR << "Failed to read packed matrix from stream. " << specific_error
<< " File position at start is "
<< pos_at_start << ", currently " << is.tellg();
}
// Instantiate PackedMatrix for float and double.
template
class PackedMatrix<float>;
template
class PackedMatrix<double>;
} // namespace kaldi

Просмотреть файл

@ -1,197 +0,0 @@
// matrix/packed-matrix.h
// Copyright 2009-2013 Ondrej Glembek; Lukas Burget; Microsoft Corporation;
// Saarland University; Yanmin Qian;
// Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_PACKED_MATRIX_H_
#define KALDI_MATRIX_PACKED_MATRIX_H_
#include "matrix/matrix-common.h"
#include <algorithm>
namespace kaldi {
/// \addtogroup matrix_funcs_io
// we need to declare the friend << operator here
template<typename Real>
std::ostream & operator <<(std::ostream & out, const PackedMatrix<Real>& M);
/// \addtogroup matrix_group
/// @{
/// @brief Packed matrix: base class for triangular and symmetric matrices.
template<typename Real> class PackedMatrix {
friend class CuPackedMatrix<Real>;
public:
//friend class CuPackedMatrix<Real>;
PackedMatrix() : data_(NULL), num_rows_(0) {}
explicit PackedMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero):
data_(NULL) { Resize(r, resize_type); }
explicit PackedMatrix(const PackedMatrix<Real> &orig) : data_(NULL) {
Resize(orig.num_rows_, kUndefined);
CopyFromPacked(orig);
}
template<typename OtherReal>
explicit PackedMatrix(const PackedMatrix<OtherReal> &orig) : data_(NULL) {
Resize(orig.NumRows(), kUndefined);
CopyFromPacked(orig);
}
void SetZero(); /// < Set to zero
void SetUnit(); /// < Set to unit matrix.
void SetRandn(); /// < Set to random values of a normal distribution
Real Trace() const;
// Needed for inclusion in std::vector
PackedMatrix<Real> & operator =(const PackedMatrix<Real> &other) {
Resize(other.NumRows());
CopyFromPacked(other);
return *this;
}
~PackedMatrix() {
Destroy();
}
/// Set packed matrix to a specified size (can be zero).
/// The value of the new data depends on resize_type:
/// -if kSetZero, the new data will be zero
/// -if kUndefined, the new data will be undefined
/// -if kCopyData, the new data will be the same as the old data in any
/// shared positions, and zero elsewhere.
/// This function takes time proportional to the number of data elements.
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero);
void AddToDiag(const Real r); // Adds r to diaginal
void ScaleDiag(const Real alpha); // Scales diagonal by alpha.
void SetDiag(const Real alpha); // Sets diagonal to this value.
template<typename OtherReal>
void CopyFromPacked(const PackedMatrix<OtherReal> &orig);
/// CopyFromVec just interprets the vector as having the same layout
/// as the packed matrix. Must have the same dimension, i.e.
/// orig.Dim() == (NumRows()*(NumRows()+1)) / 2;
template<typename OtherReal>
void CopyFromVec(const SubVector<OtherReal> &orig);
Real* Data() { return data_; }
const Real* Data() const { return data_; }
inline MatrixIndexT NumRows() const { return num_rows_; }
inline MatrixIndexT NumCols() const { return num_rows_; }
size_t SizeInBytes() const {
size_t nr = static_cast<size_t>(num_rows_);
return ((nr * (nr+1)) / 2) * sizeof(Real);
}
//MatrixIndexT Stride() const { return stride_; }
// This code is duplicated in child classes to avoid extra levels of calls.
Real operator() (MatrixIndexT r, MatrixIndexT c) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_rows_)
&& c <= r);
return *(data_ + (r * (r + 1)) / 2 + c);
}
// This code is duplicated in child classes to avoid extra levels of calls.
Real &operator() (MatrixIndexT r, MatrixIndexT c) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_rows_)
&& c <= r);
return *(data_ + (r * (r + 1)) / 2 + c);
}
Real Max() const {
KALDI_ASSERT(num_rows_ > 0);
return * (std::max_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
}
Real Min() const {
KALDI_ASSERT(num_rows_ > 0);
return * (std::min_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
}
void Scale(Real c);
friend std::ostream & operator << <> (std::ostream & out,
const PackedMatrix<Real> &m);
// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure.
void Read(std::istream &in, bool binary, bool add = false);
void Write(std::ostream &out, bool binary) const;
void Destroy();
/// Swaps the contents of *this and *other. Shallow swap.
void Swap(PackedMatrix<Real> *other);
void Swap(Matrix<Real> *other);
protected:
// Will only be called from this class or derived classes.
void AddPacked(const Real alpha, const PackedMatrix<Real>& M);
Real *data_;
MatrixIndexT num_rows_;
//MatrixIndexT stride_;
private:
/// Init assumes the current contents of the class are is invalid (i.e. junk or
/// has already been freed), and it sets the matrixd to newly allocated memory
/// with the specified dimension. dim == 0 is acceptable. The memory contents
/// pointed to by data_ will be undefined.
void Init(MatrixIndexT dim);
};
/// @} end "addtogroup matrix_group"
/// \addtogroup matrix_funcs_io
/// @{
template<typename Real>
std::ostream & operator << (std::ostream & os, const PackedMatrix<Real>& M) {
M.Write(os, false);
return os;
}
template<typename Real>
std::istream & operator >> (std::istream &is, PackedMatrix<Real> &M) {
M.Read(is, false);
return is;
}
/// @}
} // namespace kaldi
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше