removing files and folders as announced (examples and readers)

2016-01-11 13:52:31 +01:00 · 2016-01-11 13:52:31 +01:00 · 6fdf48d58f
--- a/Examples/Speech/Miscellaneous/G2P/README.txt
+++ b/Examples/Speech/Miscellaneous/G2P/README.txt
@ -1,40 +0,0 @@
-This directory contains experiments for grapheme-to-phoneme experiments reported in the following paper
-K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme to phoneme conversion"
-submitted to Interspeech 2015
-
-
-encoder-decoder LSTM : scripts/s36.noreg.log
-best performing systemis
-s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
-
-unidirectional LSTM : scripts/s23.unidirectional.log
-
-bidirectional LSTM: scripts/s30.bidirectional.log
-
-----------------------
-to score: 
-
-suppose the G2P results are in xxx.output
-in Python. use the following commands
-
-cd d:/dev/kaisheny/dev/exp/lts/lts/scripts/
-
-import const as cn
-import score
-
-outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reps30bilstm/test_bw1_iter35/output.rec.txt'
-outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reps23mb100fw6/test_bw1_iter34/output.rec.txt'
-outputfn='//speechstore5/transient/kaishengy/exp/lts/result/reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2/test_bw1_iter43/output.rec.txt'
-outputfn='//speechstore5/transient/kaishengy/exp/lts/result/s30rndjointconditionalbilstmn300n300n300/test_bw1_iter35/output.rec.txt'
-outputfn='//speechstore5/transient/kaishengy/exp/lts/result/s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2/test_bw1_iter43/output.rec.txt'
-lexicon = {}
-score.ReadPronunciations(cn._TEST_PRON, lexicon)
-score.BleuScore(lexicon,
-    cn._TEST_FN,
-    outputfn,
-    False)
-
-score.CORRECT_RATE(lexicon,
-    cn._TEST_FN,
-    outputfn,
-    False)
--- a/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s23.bat
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s23.bat
@ -1,10 +0,0 @@
-REM test.s26conditionalhashingbilstmn300n300
-set TESTITER=%1
-set MdlDir=%2
-set BW=%3
-set Q_ROOT=rpc://speech-data:6673
-set QEXE=\\speechstore5\q
-set PATH=\\speechstore5\q;%PATH%
-set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
-
-\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
--- a/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s30.bat
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s30.bat
@ -1,11 +0,0 @@
-REM test.s26conditionalhashingbilstmn300n300
-set TESTITER=%1
-set MdlDir=%2
-set BW=%3
-set Q_ROOT=rpc://speech-data:6673
-set QEXE=\\speechstore5\q
-set PATH=\\speechstore5\q;%PATH%
-set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
-
-\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
--- a/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s36.bat
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/jobs.s36.bat
@ -1,11 +0,0 @@
-REM test.s26conditionalhashingbilstmn300n300
-set TESTITER=%1
-set MdlDir=%2
-set BW=%3
-set Q_ROOT=rpc://speech-data:6673
-set QEXE=\\speechstore5\q
-set PATH=\\speechstore5\q;%PATH%
-set PATH=\\speechstore5\userdata\kaishengy\bin\DLLS;%PATH%
-
-\\speechstore5\transient\kaishengy\bin\binluapr13\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=%1 MdlDir=%2 bw=%3 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
--- a/Examples/Speech/Miscellaneous/G2P/scripts/s23.unidirectional.log
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/s23.unidirectional.log
@ -1,15 +0,0 @@
-# this reproduces uni-directional model performances
-
-# from so far best, increase projection layer dimension, use more forward observations
-# reps23mb100fw6
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.05]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6
-for %i in (0,1) do q sub -J testbm%is23mb100fw6conditionallstmp400n400lr005 -x \\speechstore5\userdata\kaishengy\exp\lts\exes\job.s23mb100.fw6.bat 34 \\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 %i
-
-
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 command=LSTMTest Iter=8 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 bw=1 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
-for %i in (0,1) do q sub -J s23bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s23.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\reps23mb100fw6 %i
-
-# small scale
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\lstm.2streams.lw7.conditional.mb100.fw6.config DeviceNumber=-1 LSTM=[SGD=[learningRatesPerSample=0.05]] ExpDir=d:\exp\lts\result\reps23mb100fw6conditionallstmp400n400lr005 LSTM=[SGD=[gradientcheck=true]] LSTM=[SGD=[unittest=true]]
-
--- a/Examples/Speech/Miscellaneous/G2P/scripts/s30.bidirectional.log
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/s30.bidirectional.log
@ -1,31 +0,0 @@
-# the following uses one more layer, so totally two layers of LSTMs, to model directional predictions
-# --------- training -------------
-# s30jointconditionalbilstmn300n300n300
-
-#local
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reps30bilstm LSTM=[SGD=[gradientcheck=true]]
-
-C:\dev\cntk5\x64\Release\CNTK.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\temp\reps30bilstm 
-
-C:\dev\cntk5\x64\Release\CNTK.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.config deviceNumber=-1 command=LSTMTest Iter=39 MdlDir=d:\temp\reps30bilstm bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$ command=LSTMTest
-
-
-# reps30bilstm
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm
-
-# local with NDL
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\bilstm.ndl.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reps30bilstmndl LSTM=[SGD=[gradientcheck=true]]
-
-
-# ---------- test ----------------
-for %i in (0,1) do q sub -J testbm%ibilstm -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s30.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm %i
-
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=35 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reps30bilstm bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
-#************************************************
-# test on the previously trained model
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\bilstm.config DeviceNumber=-1 command=LSTMTest Iter=34 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\s30jointconditionalbilstmn300n300n300 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[modelPath=$MdlDir$\cntkdebug.dnn.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
-for %i in (0,1) do q sub -J oldmodeltestbm%ibilstm -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s30.bat 35 \\speechstore5\transient\kaishengy\exp\lts\result\s30rndjointconditionalbilstmn300n300n300 %i
-
- 
--- a/Examples/Speech/Miscellaneous/G2P/scripts/s36.noreg.log
+++ b/Examples/Speech/Miscellaneous/G2P/scripts/s36.noreg.log
@ -1,49 +0,0 @@
-# no regularization but small learning rate
-# run multipass of data
-
-
-# even larger learning rate
-# s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=1 LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2
-for %i in (0, 1, 5) do q sub -J bm%is36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 -x \\speechstore5\userdata\kaishengy\exp\lts\exes\job.s36.bat 46 \\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
-
-# in local directory
-# does unit test
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=0 LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=-1 LSTM=[SGD=[gradientcheck=true]] LSTM=[SGD=[unittest=true]]
-
-# in local directory
-# does gradient checking on CPU
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=c:\dev\cntk5\examplesetups\g2p\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=Auto LSTM=[SGD=[gradientcheck=true]]
-
-# in local directory
-# does gradient checking on GPU
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[SGD=[gradientcheck=true]]
-
-# decoder and encoder networks are on different devices. For example, encoder network is on GPU and decoder network is on CPU
-# intereting, didn't notice this before. 
-# use more data
-# gradient check passed
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[SGD=[gradientcheck=true]] LSTM=[epochSize=70] LSTM=[minibatchSize=30] DataDir=d:/data/lts
-
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=Auto LSTM=[SGD=[learningRatesPerSample=0.01]] LSTM=[SGD=[L2RegWeight=0.0]] LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 LSTM=[epochSize=70] LSTM=[minibatchSize=30] DeviceNumber=0
-
-#test 
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=C:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+C:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=0 MdlDir=d:\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$ DataDir=d:/data/lts
-
--------------------------- full train/test -----------------------------------
-# train on full data
-# reprs36noregrnds2sencoderh500c500decoderh500c50mb100mpdlr01layers2
-\\speechstore5\transient\kaishengy\bin\binmay29\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=0
-# run in local
-C:\dev\cntk5\x64\Release\CNTK.exe configFile=c:\dev\cntk5\ExampleSetups\G2P\setups\global.lstm.config+c:\dev\cntk5\ExampleSetups\G2P\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config LSTM=[SGD=[numMBsToShowResult=100]] ExpDir=d:\temp\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 DeviceNumber=0
-
-
-# test 
-for %i in (0,1) do q sub -J s36bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s36.bat 43 \\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
-
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=46 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\reprs36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=1 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
-
-------------------- test on the old models to make sure that decoder and forward pass is right --------------
-for %i in (0,1) do q sub -J olds36bw%i -x \\speechstore5\userdata\kaishengy\exp\lts\exes\jobs.s36.bat 43 \\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 %i
-
-\\speechstore5\transient\kaishengy\bin\binluapr20\cn.exe configFile=\\speechstore5\userdata\kaishengy\exp\lts\setups\global.lstm.config+\\speechstore5\userdata\kaishengy\exp\lts\setups\s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config DeviceNumber=-1 command=LSTMTest Iter=43 MdlDir=\\speechstore5\transient\kaishengy\exp\lts\result\s36noregrnds2sencoderh500c500decoderh500c500mb100mpdlr01layers2 bw=0 LSTMTest=[beamWidth=$bw$] LSTMTest=[encoderModelPath=$MdlDir$\cntkdebug.dnn.encoder.$Iter$] LSTMTest=[decoderModelPath=$MdlDir$\cntkdebug.dnn.decoder.$Iter$] ExpDir=$MdlDir$\test_bw$bw$_iter$Iter$
--- a/Examples/Speech/Miscellaneous/G2P/setups/bilstm.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/bilstm.config
@ -1,899 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM:LSTMTest
-
-type=double
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=train
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    epochSize=486085
-
-# uncomment NDLNetworkBuilder to use NDL
-# need to comment out SimpleNetworkBuilder section
-#    NDLNetworkBuilder=[
-#        networkDescription=$NdlDir$\lstmNDL.txt
-#    ]
-
-    SimpleNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=195:50:300:300:108
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=108:87
-    lookupTableOrderSizes=1:3
-
-        rnnType=JOINTCONDITIONALBILSTMSTREAMS
-#        rnnType=UNIDIRECTIONALLSTMWITHPASTPREDICTION
-        lookupTableOrder=3
-
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.007
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-#        gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      dataMultiPass=false
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.train_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.train.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.dev_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.validate.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s01.01.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
-
-# change the ordering of test sentences
-LSTMTest2=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s30.02.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s30.02.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
--- a/Examples/Speech/Miscellaneous/G2P/setups/bilstm.indstreams.joint.conditional.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/bilstm.indstreams.joint.conditional.config
@ -1,899 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM:LSTMTest
-
-type=double
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=train
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    epochSize=486085
-
-# uncomment NDLNetworkBuilder to use NDL
-# need to comment out SimpleNetworkBuilder section
-#    NDLNetworkBuilder=[
-#        networkDescription=$NdlDir$\lstmNDL.txt
-#    ]
-
-    SimpleNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=195:50:300:300:108
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=108:87
-    lookupTableOrderSizes=1:3
-
-        rnnType=JOINTCONDITIONALBILSTMSTREAMS
-#        rnnType=UNIDIRECTIONALLSTMWITHPASTPREDICTION
-        lookupTableOrder=3
-
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.007
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-#        gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      dataMultiPass=true
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.train_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.train.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.dev_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.validate.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s01.01.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
-
-# change the ordering of test sentences
-LSTMTest2=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s30.02.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s30.02.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
--- a/Examples/Speech/Miscellaneous/G2P/setups/bilstm.ndl.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/bilstm.ndl.config
@ -1,872 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM:LSTMTest
-
-type=double
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=train
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    epochSize=486085
-
-# uncomment NDLNetworkBuilder to use NDL
-# need to comment out SimpleNetworkBuilder section
-    NDLNetworkBuilder=[
-        networkDescription=$NdlDir$\lstm.ndl
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.007
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-#        gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      dataMultiPass=true
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.train_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=29
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.train.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-#      ioNodeNames=delayedTargetStream
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.dev_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=29
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.validate.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s01.01.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=29
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
-
-# change the ordering of test sentences
-LSTMTest2=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s30.02.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=87
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s30.02.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
--- a/Examples/Speech/Miscellaneous/G2P/setups/global.lstm.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/global.lstm.config
@ -1,12 +0,0 @@
-#WorkDir=//speechstore5/transient/kaishengy/data/lts/Data/CNTK
-WorkDir=d:/exp/lts
-DataDir=d:/data/lts
-#DataDir=d:/data/ltsdbg
-NdlDir=c:/dev/cntk5/ExampleSetups/G2P/setups
-PredictionModelFeatureDir=\\speechstore5\transient\kaishengy\exp\lts\result\expbilstmce300n\s4
-ExpDir=\\speechstore5\transient\kaishengy\exp\lts\result\explstm
-OutDir=$ExpDir$
-LogDir=$ExpDir$\log
-DeviceNumber=0
-MdlDir=$ExpDir$
-
--- a/Examples/Speech/Miscellaneous/G2P/setups/lstm.2streams.lw7.conditional.mb100.fw6.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/lstm.2streams.lw7.conditional.mb100.fw6.config
@ -1,680 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM:LSTMTest
-
-type=double
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=train
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    epochSize=486085
-
-# uncomment NDLNetworkBuilder to use NDL
-# need to comment out SimpleNetworkBuilder section
-#    NDLNetworkBuilder=[
-#        networkDescription=$NdlDir$\lstmNDL.txt
-#    ]
-
-    SimpleNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=282:50:300:300:108
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=108:174
-    lookupTableOrderSizes=1:6
-
-        rnnType=TRANSDUCER
-        lookupTableOrder=6
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.007
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-#        gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      dataMultiPass=true
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.train_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.train.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.dev_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.validate.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s01.01.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
-
--- a/Examples/Speech/Miscellaneous/G2P/setups/lstm.2streams.mb100.noemb.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/lstm.2streams.mb100.noemb.config
@ -1,680 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM:LSTMTest
-
-type=double
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=train
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    epochSize=486085
-
-# uncomment NDLNetworkBuilder to use NDL
-# need to comment out SimpleNetworkBuilder section
-#    NDLNetworkBuilder=[
-#        networkDescription=$NdlDir$\lstmNDL.txt
-#    ]
-
-    SimpleNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=282:50:300:300:108
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=108:174
-    lookupTableOrderSizes=1:6
-
-        rnnType=TRANSDUCER
-        lookupTableOrder=6
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.007
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-#        gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      dataMultiPass=true
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.train_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.train.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s26.01.dev_without_oovs
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.validate.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=beamSearch
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=-1
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=outputs
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    reader=[
-      # reader to use
-      readerType=LUSequenceReader
-
-      ioNodeNames=delayedTargetStream:letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\ltr.map
-        file=$DataDir$\s01.01.test_letters
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2:3:4:5
-        randomize=None
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        ltrForward=[
-          dim=174
-        ]
-
-        #labels sections
-        labelInForward=[
-          dim=1
-          usewordmap=true
-
-          # if having labelDim, this is for output label
-          # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsfwd.txt
-        labelType=Category
-          beginSequence="BOS"
-          endSequence="EOS"
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\ltr.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-
-      delayedTargetStream=[
-        # this stream is used for training phone LM
-        unk="<unk>"
-        wordmap=$DataDir$\phn.map
-        file=$DataDir$\s6.test.phone
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        TestEncodingForDecoding=false
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        featureDelayedTarget=[
-          dim=108
-        ]
-
-        labelIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-        labelMappingFile=$ExpDir$\sentenceLabelsbwd.txt
-        labelType=Category
-          beginSequence="OBOS"
-
-          #wildcat match
-          endSequence="OEOS"
-
-          usewordmap=true
-
-          # is a node for proposal generation
-          isproposal=true
-          proposalSymbolList=$DataDir$\phn.list
-
-          # input word list
-          token=$DataDir$\phn.list
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="OBOS"
-          endSequence="OEOS"
-
-          # output token list
-          token=$DataDir$\phn.list
-
-          labelMappingFile=$ExpDir$\sentenceLabelsbwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\phn.list
-        ]
-    ]
-]
-
--- a/Examples/Speech/Miscellaneous/G2P/setups/lstm.ndl
+++ b/Examples/Speech/Miscellaneous/G2P/setups/lstm.ndl
@ -1,186 +0,0 @@
-load=ndlMacroDefine
-run=ndlBiLSTMCreateNetwork
-
-ndlMacroDefine=[
-    # Macro definitions
-    DelayNode(x)
-    {
-        D=Delay(x, delayInput=Dout, delayTime=1)
-    }
-
-    Lookup(x, indim, outdim)
-    [
-        E = Parameter(outdim, indim)
-        Lookup=Times(E, x)
-    ]
-
-    MeanVarNorm(x)
-    {   
-	xMean = Mean(x)
-    	xStdDev = InvStdDev(x)
-    	xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
-    }
-
-    LogPrior(labels)
-    {
-	Prior=Mean(labels)
-	LogPrior=Log(Prior)
-    }   
-
-    LSTMComponent(inputDim, outputDim, inputVal)
-    {
-        Wxo = Parameter(outputDim, inputDim)
-        Wxi = Parameter(outputDim, inputDim)
-        Wxf = Parameter(outputDim, inputDim)
-        Wxc = Parameter(outputDim, inputDim)
-
-        bo = Parameter(outputDim, init=fixedvalue, value=-1.0)
-        bc = Parameter(outputDim, init=fixedvalue, value=0.0)
-        bi = Parameter(outputDim, init=fixedvalue, value=-1.0)
-        bf = Parameter(outputDim, init=fixedvalue, value=-1.0)
-
-        Whi = Parameter(outputDim, outputDim)
-        Wci = Parameter(outputDim)
-        Whf = Parameter(outputDim, outputDim)
-        Wcf = Parameter(outputDim)
-        Who = Parameter(outputDim, outputDim)
-        Wco = Parameter(outputDim)
-        Whc = Parameter(outputDim, outputDim)
-
-        delayHI = Delay(outputDim, output, delayTime=1)
-        delayHF = Delay(outputDim, output, delayTime=1)
-        delayHO = Delay(outputDim, output, delayTime=1)
-        delayHC = Delay(outputDim, output, delayTime=1)
-        delayCI = Delay(outputDim, ct, delayTime=1)
-        delayCF = Delay(outputDim, ct, delayTime=1)
-        delayCC = Delay(outputDim, ct, delayTime=1)
-
-        WxiInput = Times(Wxi, inputVal)
-        WhidelayHI = Times(Whi, delayHI)
-        WcidelayCI = DiagTimes(Wci, delayCI)
-
-        it = Sigmoid (Plus ( Plus (Plus (WxiInput, bi), WhidelayHI), WcidelayCI))
-
-        WxcInput = Times(Wxc, inputVal)
-        WhcdelayHC = Times(Whc, delayHC)
-        bit = ElementTimes(it, Tanh( Plus(WxcInput, Plus(WhcdelayHC, bc))))
-        
-        Wxfinput = Times(Wxf, inputVal)
-        WhfdelayHF = Times(Whf, delayHF)
-        WcfdelayCF = DiagTimes(Wcf, delayCF)
-
-        ft = Sigmoid( Plus (Plus (Plus(Wxfinput, bf), WhfdelayHF), WcfdelayCF))
-
-        bft = ElementTimes(ft, delayCC)
-
-        ct = Plus(bft, bit)
-
-        Wxoinput = Times(Wxo, inputVal)
-        WhodelayHO = Times(Who, delayHO)
-        Wcoct = DiagTimes(Wco, ct)
-
-        ot = Sigmoid( Plus( Plus( Plus(Wxoinput, bo), WhodelayHO), Wcoct))
-
-        output = ElementTimes(ot, Tanh(ct))
-    }
-
-    LSTMNodeComponent(outputDim, colDim1, colDim2, inputVal)
-    {
-        inputGate = Parameter(outputDim, colDim1)
-        forgetGate = Parameter(outputDim, colDim1)
-        outputGate = Parameter(outputDim, colDim1)
-        memoryCell = Parameter(outputDim, colDim2)
-
-        LSTMNodeComponent = LSTM(inputVal, inputGate, forgetGate, outputGate, memoryCell)
-    }
-
-]
-
-
-ndlCreateNetwork=[
-
-	#define basic i/o
-	featDim=72
-	labelDim=183
-	hiddenDim=1024
-	features=Input(featDim, tag=feature)
-	labels=Input(labelDim, tag=label)
-
-	# define network
-	featNorm = MeanVarNorm(features)
-    
-    LSTMoutput = LSTMComponent(featDim, hiddenDim, featNorm)
-    W1 = Parameter(labelDim, hiddenDim)
-
-    LSTMoutputW1 = Times(W1, LSTMoutput)
-
-    cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
-    Err = ErrorPrediction(labels,LSTMoutputW1,tag=Eval)
-    
-    logPrior = LogPrior(labels)	 
-    ScaledLogLikelihood=Minus(LSTMoutputW1,logPrior,tag=Output)
-
-]
-
-ndlBiLSTMCreateNetwork=[
-
-	#define basic i/o
-	ltrDim=29
-	labelDim=108
-	embDim=50
-	hiddenDim=300
-
-	ltrForward=Input(ltrDim, tag=feature)
-	featureDelayedTarget=Input(labelDim, tag=feature)
-
-	labels=Input(labelDim, tag=label)
-
-	# projection
-    Wxo = Parameter(embDim, ltrDim)
-    Wxi = Parameter(embDim, labelDim)
-	ltrEmb = Times(Wxo, ltrForward)
-	prnEmb = Times(Wxi, featureDelayedTarget)
-
-	# first layer of LSTM
-	# 50 + 300 + 2
-	lstmCol1L1=352
-	# 50 + 300 + 1
-	lstmCol2L1=351
-	ltrLSTM = LSTMNodeComponent(hiddenDim, lstmCol1L1, lstmCol2L1, ltrEmb)
-	prnLSTM = LSTMNodeComponent(hiddenDim, lstmCol1L1, lstmCol2L1, prnEmb)
-
-	#backward direction
-	ltrBackward = TimeReverse(ltrLSTM)
-
-	# depth 2
-	forwardParallelLayer2 = Parallel(ltrLSTM, prnLSTM)
-	# 600 + 300 + 2
-	lstmCol3L2=902
-	# 600 + 300 + 1
-	lstmCol4L2=901
-	forwardLayer2 = LSTMNodeComponent(hiddenDim, lstmCol3L2, lstmCol4L2, forwardParallelLayer2)
-	# 300 + 300 + 2
-	lstmCol1L2=602
-	# 300 + 300 + 1
-	lstmCol2L2=601
-	backwardLayer2 = LSTMNodeComponent(hiddenDim, lstmCol1L2, lstmCol2L2, ltrBackward)
-
-	# depth 3
-    backwardLayer3 = TimeReverse(backwardLayer2)
-    depth3activity = Parallel(forwardLayer2, backwardLayer3)
-	# 600 + 300 + 2
-	lstmCol3L3=902
-	# 600 + 300 + 1
-	lstmCol4L3=901
-    LSTMoutput = LSTMNodeComponent(hiddenDim, lstmCol3L3, lstmCol4L3, depth3activity)
-
-    W1 = Parameter(labelDim, hiddenDim)
-
-    LSTMoutputW1 = Times(W1, LSTMoutput)
-
-    cr = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Criteria)
-    Err = CrossEntropyWithSoftmax(labels, LSTMoutputW1,tag=Eval)
-
-	outputs = Softmax(LSTMoutputW1, tag=Output)
-
-]
--- a/Examples/Speech/Miscellaneous/G2P/setups/s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config
+++ b/Examples/Speech/Miscellaneous/G2P/setups/s2s.mpd.rnd.hiddenstate.2nets.500.100mb.2layers.config
@ -1,800 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\ATIS\log
-command=LSTM
-
-type=float
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=trainEncoderDecoder
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    # for each epoch, maximum number of input words is set below
-    epochSize=486085
-
-    EncoderNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=84:500:500:500
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=84
-    lookupTableOrderSizes=3
-
-        rnnType=LSTMENCODER
-        lookupTableOrder=3
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    DecoderNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-        # the second layer must have the same dimension as the first layer
-        # because 40 is matched to the output layer dimension from encoder network
-    layerSizes=40:500:500:500:40
-    recurrentLayer=2:3
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=40
-    lookupTableOrderSizes=1
-
-        rnnType=LSTM
-        lookupTableOrder=1
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.01
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-      # use hidden states for encoder decoder training
-      useHiddenStates=true
-      encoderNodes="LSTM0:LSTM2"
-      decoderNodes="LSTM0:LSTM2"
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-        #gradUpdateType=AdaGrad
-        
-        # output model path        
-        modelPath=$ExpDir$\cntkdebug.dnn
-
-        # if validation shows that the model has no improvement, then do back-up to the previously 
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced 
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    encoderReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-      dataMultiPass=true
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.encoder.input.map
-        file=$DataDir$\s31.s2s.encoder.train.txt
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=Auto
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        features=[
-          dim=84
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
-        labelType=Category
-          beginSequence="BOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\s31.encoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="<EOS>"
-
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    decoderReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=phnInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-      dataMultiPass=true
-
-      ### set to true so that it can can use state activities from an encoder network
-      ignoresentencebegintag=true
-
-      phnInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.decoder.input.map
-        file=$DataDir$\s31.s2s.decoder.train.txt
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=Auto
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        features=[
-          dim=40
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
-        labelType=Category
-          beginSequence="<EOS>"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\s31.decoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-          endSequence="<EOS>"
-
-          labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    encoderCVReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.encoder.input.map
-        file=$DataDir$\s31.s2s.encoder.validation.txt
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        features=[
-          dim=84
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
-        labelType=Category
-          beginSequence="BOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\s31.encoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="<EOS>"
-
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    decoderCVReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=phnInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-
-      ### set to true so that it can can use state activities from an encoder network
-      ignoresentencebegintag=true
-
-      phnInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.decoder.input.map
-        file=$DataDir$\s31.s2s.decoder.validation.txt
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=100
-
-        # this node must be exist in the network description
-        features=[
-          dim=40
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
-        labelType=Category
-          beginSequence="<EOS>"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\s31.decoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-          endSequence="<EOS>"
-
-          labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-  ]
-]
-
-# set output files path
-# set the nodes for outputs
-# for LSTM
-# accuracy:  98.16%; precision:  94.37%; recall:  94.57%; FB1:  94.47
- 
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=testEncoderDecoder
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=$DeviceNumber$
-  epochSize=4430000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    modelPath=$MdlDir$\cntkdebug.dnn
-
-    # this is the node to evaluate scores
-    evalNodeNames=PosteriorProb
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-      encoderNodes="LSTM0:LSTM2"
-      decoderNodes="LSTM0:LSTM2"
-
-    encoderReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=letterInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-
-      letterInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.encoder.input.map
-        file=$DataDir$\s31.s2s.encoder.test.txt
-  
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0:1:2
-        randomize=None
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        features=[
-          dim=84
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.input.txt
-        labelType=Category
-          beginSequence="BOS"
-
-          usewordmap=true
-
-          # input word list
-          token=$DataDir$\s31.encoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          beginSequence="<EOS>"
-
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    decoderReader=[
-      # reader to use for encoder
-      # this is letter only observations
-
-      readerType=LUSequenceReader
-
-      ioNodeNames=phnInForward
-      
-      #### write definition
-      wfile=$ExpDir$\ltrsequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=10000
-
-      equalLength=false
-
-      ### set to true so that it can can use state activities from an encoder network
-      ignoresentencebegintag=true
-
-      phnInForward=[
-        unk="<unk>"
-        wordmap=$DataDir$\s31.decoder.input.map
-        file=$DataDir$\s31.s2s.decoder.test.txt
-  
-        TestEncodingForDecoding=false
-
-        #typedef argvector<size_t> intargvector which is not compatible with negative number
-        wordContext=0
-        randomize=None
-
-        inputLabel=labelsIn
-        outputLabel=labels
-
-        # number of utterances to be allocated for each minibatch
-        nbruttsineachrecurrentiter=1
-
-        # this node must be exist in the network description
-        features=[
-          dim=40
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labelsIn=[
-          dim=1
-          usewordmap=true
-
-      # vocabulary size
-          labelDim=10000
-          labelMappingFile=$ExpDir$\sentenceLabelsfwd.output.txt
-        labelType=Category
-          beginSequence="<EOS>"
-
-          usewordmap=true
-
-          isproposal=true
-          proposalSymbolList=$DataDir$\s31.decoder.input.proposal.lst
-
-          # input word list
-          token=$DataDir$\s31.decoder.input.lst
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-
-        #labels sections
-        # this name must be exist in the network description
-        labels=[
-          dim=1
-        labelType=Category
-          # output token list
-          token=$DataDir$\s31.decoder.input.lst
-          endSequence="<EOS>"
-
-          labelMappingFile=$ExpDir$\sentencePhnfwd.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          sectionType=labels
-          mapping=[
-            sectionType=labelMapping
-          ]
-          category=[
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LUSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$DataDir$\s31.decoder.input.lst
-        ]
-    ]
-]
-
--- a/Examples/Text/Miscellaneous/News/README.txt
+++ b/Examples/Text/Miscellaneous/News/README.txt
@ -1,3 +0,0 @@
-# News comments setup
-
-# steps are under steps directory
--- a/Examples/Text/Miscellaneous/News/scripts/util.py
+++ b/Examples/Text/Miscellaneous/News/scripts/util.py
@ -1,92 +0,0 @@
-# python scripts
-
-'''
-add silence ending to the begining and ending of a sentence 
-the silence ending and begining symbol is </s>
-example:
-add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt')
-'''
-def add_silence_ending(fn, fnout):
-	outfile = open(fnout, 'wt')
-	with open(fn) as infile:
-		for line in infile:
-			line = line.strip()
-			newline = '</s> ' + line + ' </s>'
-			outfile.write(newline + '\n')
-
-	outfile.close()
-
-
-'''
-create validation (first 100), test (last 100) and training data (remainning) split
-example:
-split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.test.txt')
-'''
-def split_data_into_train_valid_test(fn, fntrain, fnvalid, fntest):
-	outfile_train = open(fntrain, 'wt')
-	outfile_valid = open(fnvalid, 'wt')
-	outfile_test = open(fntest, 'wt')
-
-	# first get the line numbers
-	totalln = 0
-	with open(fn) as infile:
-		for ln in infile:
-			totalln += 1
-
-	linenbr = 0
-	with open(fn) as infile:
-		for line in infile:
-			if linenbr < 0.1 * totalln:
-				outfile_valid.write(line)
-			elif linenbr > 0.9 * totalln:
-				outfile_test.write(line)
-			else:
-				outfile_train.write(line)
-
-			linenbr += 1
-	outfile_train.close()
-	outfile_test.close()
-	outfile_valid.close()
-
-'''
-convert to ascii file
-example:
-util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt')
-'''
-def convert2ascii(fn, fnout):
-	import codecs
-	of = open(fnout, 'wt')
-	with open (fn) as infile:
-		for line in infile:
-			line = line.strip()
-			if len(line) > 0:
-				lineu = line.decode('utf8')
-				of.write(lineu.encode("ASCII", 'ignore'))
-				of.write('\n')
-
-	of.close()
-
-'''
-remove agency 
-util.removeagency('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.ascii.noagency.txt')
-'''
-def removeagency(fn, fnout):
-	import codecs
-	of = open(fnout, 'wt')
-	with open (fn) as infile:
-		for line in infile:
-			line = line.strip()
-			agency_index = 0
-			if ') -' in line:
-				agency_index = line.find(') -')
-				agency_index += 4
-			if agency_index == 0 and ') ' in line:
-				agency_index = line.find(') ')
-				agency_index += 3
-
-			nline = line[agency_index:]
-
-			of.write(nline)
-			of.write('\n')
-
-	of.close()
--- a/Examples/Text/Miscellaneous/News/setups/global.config
+++ b/Examples/Text/Miscellaneous/News/setups/global.config
@ -1,5 +0,0 @@
-ExpDir=\\speechstore5\transient\kaishengy\exp\News\steps1\
-ConfigDir=D:\dev\cntkcodeplex\ExampleSetups\News\setups\
-#DataDir=\\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
-DataDir=d:\data\newscomments\2015\03-23
-DEVICE=-1
--- a/Examples/Text/Miscellaneous/News/setups/global.s2s.config
+++ b/Examples/Text/Miscellaneous/News/setups/global.s2s.config
@ -1,28 +0,0 @@
-#WorkDir=//speechstore5/transient/kaishengy/data/lts/Data/CNTK
-WorkDir=d:/exp/smt
-DataDir=d:\data\newscomments\2015\03-23
-#DataDir=//speechstore5/transient/kaishengy/data/newscomments/2015/03-23
-ExpDir=d:\exp\smt\result\expcsen
-OutDir=$ExpDir$
-LogDir=$ExpDir$\log
-DeviceNumber=Auto
-MdlDir=$ExpDir$
-# source side info
-SRCFEATDIM=160001
-SRCBEGINSYMBOL=BOS
-TGTBEGINSYMBOL=<EOS>
-TGTENDSYMBOL=<EOS>
-# target side info
-#TGTFEATDIM=82922
-TGTFEATDIM=80002
-# dimensionality
-VLSTMDIM=100
-VEMBDIM=50
-TRAINSRCFILE=news.cntk.train.txt
-TRAINTGTFILE=comments.cntk.train.txt
-VALIDATESRCFILE=news.cntk.valid.txt
-VALIDATETGTFILE=comments.cntk.valid.txt
-TESTSRCFILE=news.cntk.test.txt
-TESTTGTFILE=comments.cntk.test.txt
-VOCABSIZE=2264
-CLASSSIZE=100
--- a/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.2stream.config
+++ b/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.2stream.config
@ -1,457 +0,0 @@
-# configuration file for class based RNN training
-ExpFolder=$ExpDir$
-ConfigFolder=$ConfigDir$
-DataFolder=$DataDir$
-
-stderr=$ExpFolder$
-numCPUThreads=4
-# command=dumpNodeInfo
-#command=train
-#command=test
-command=writeWordAndClassInfo:train
-#command=writeWordAndClassInfo:train:test
-#command=train:test
-type=double
-
-DEVICEID=$DEVICE$
-NBR=2
-
-NOISE=100
-RATE=0.1
-VOCABSIZE=2263
-CLASSSIZE=50
-makeMode=true
-TRAINFILE=comments.cntk.train.txt
-VALIDFILE=comments.cntk.valid.txt
-TESTFILE=comments.cntk.test.txt
-
-#number of threads
-nthreads=4
-
-writeWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataFolder$\$TRAINFILE$
-	  outputVocabFile=$ExpFolder$\vocab.txt
-    outputWord2Cls=$ExpFolder$\word2cls.txt
-    outputCls2Index=$ExpFolder$\cls2idx.txt
-    vocabSize=$VOCABSIZE$
-	  cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-dumpNodeInfo=[
-    action=dumpnode
-    modelPath=$ExpFolder$\modelRnnCNTK
-    #nodeName=W0
-    printValues=true
-]
-
-devtest=[action=devtest]
-
-train=[
-    action=train
-    minibatchSize=10
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLSTM 
-
-    SimpleNetworkBuilder=[
-        trainingCriterion=classcrossentropywithsoftmax
-        evalCriterion=classcrossentropywithsoftmax
-        nodeType=Sigmoid
-        initValueScale=6.0
-        layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
-        addPrior=false
-        addDropoutNodes=false
-        applyMeanVarNorm=false
-        uniformInit=true;
-
-        lookupTableOrder=1
-
-        # these are for the class information for class-based language modeling
-        vocabSize=$VOCABSIZE$
-        nbrClass=$CLASSSIZE$
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-	    makeMode=true
-        learningRatesPerSample=$RATE$
-        momentumPerMB=0
-        gradientClippingWithTruncation=true
-        clippingThresholdPerSample=15.0
-        maxEpochs=40
-        unroll=false
-        numMBsToShowResult=2000
-        # gradUpdateType=AdaGrad
-        gradUpdateType=None
-      
-        modelPath=$ExpFolder$\modelRnnCNTK
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-            autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0.001
-            continueReduce=false
-            increaseLearnRateIfImproveMoreThan=1000000000
-            learnRateDecreaseFactor=0.5
-            learnRateIncreaseFactor=1.382
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-        dropoutRate=0.0
-    ]
-
-    reader=[
-      readers=textIn:binaryIn
-#      readers=textIn
-      randomize=None
-      nbruttsineachrecurrentiter=$NBR$
-
-      textIn=[
-        readerType=LMSequenceReader
-
-        # word class info
-        wordclass=$ExpFolder$\vocab.txt
-        
-        #### write definition
-        wfile=$ExpFolder$\sequenceSentence.bin
-        #wsize - inital size of the file in MB
-        # if calculated size would be bigger, that is used instead
-        wsize=256
-
-        #wrecords - number of records we should allocate space for in the file
-        # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-        wrecords=1000
-        #windowSize - number of records we should include in BinaryWriter window
-        windowSize=$VOCABSIZE$
-
-        file=$DataFolder$\$TRAINFILE$
-
-        #additional features sections
-        #for now store as expanded category data (including label in)
-        features=[
-          # sentence has no features, so need to set dimension to zero
-          dim=$VOCABSIZE$
-          ### write definition
-          sectionType=data
-        ]
-        # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-        sequence=[
-          dim=1
-          wrecords=2
-          ### write definition
-          sectionType=data
-        ]
-        #labels sections
-        labelIn=[
-          dim=1
-          # vocabulary size
-          labelDim=$VOCABSIZE$
-          labelMappingFile=$ExpFolder$\sentenceLabels.txt
-          labelType=Category
-          beginSequence="</s>"
-          endSequence="</s>"
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-        #labels sections
-        labels=[
-          dim=1
-          labelType=NextWord
-          beginSequence="O"
-          endSequence="O"
-
-          # vocabulary size
-          labelDim=$VOCABSIZE$
-
-          labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=3
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=3
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-      binaryIn=[
-        readerType=UCIFastReader
-        onelineperfile=true
-        binaryFeature=[
-          onelineperfile=true 
-        # onelineperfile: each line has all data for a file
-          dim=20
-          start=0     
-          file=$DataFolder$\train.lda.dat
-        ]
-      ]
-    ]
-
-    cvReader=[
-      readers=textIn:binaryIn
-#      readers=textIn
-      randomize=None
-      nbruttsineachrecurrentiter=6
-
-      textIn=[
-        # reader to use
-        readerType=LMSequenceReader
-        randomize=None
-        # word class info
-        wordclass=$ExpFolder$\vocab.txt
-
-        # if writerType is set, we will cache to a binary file
-        # if the binary file exists, we will use it instead of parsing this file
-        # writerType=BinaryReader
-
-        #### write definition
-        wfile=$ExpFolder$\sequenceSentence.valid.bin
-        #wsize - inital size of the file in MB
-        # if calculated size would be bigger, that is used instead
-        wsize=256
-
-        unk="<unk>"
-
-        #wrecords - number of records we should allocate space for in the file
-        # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-        wrecords=1000
-        #windowSize - number of records we should include in BinaryWriter window
-        windowSize=$VOCABSIZE$
-
-        file=$DataFolder$\$VALIDFILE$
-
-        #additional features sections
-        #for now store as expanded category data (including label in)
-        features=[
-          # sentence has no features, so need to set dimension to zero
-          dim=0
-          ### write definition
-          sectionType=data
-        ]
-        # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-        sequence=[
-          dim=1
-          wrecords=2
-          ### write definition
-          sectionType=data
-        ]
-        #labels sections
-        # it should be the same as that in the training set
-        labelIn=[
-          dim=1
-
-          # vocabulary size
-          labelDim=$VOCABSIZE$
-          labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-          labelType=Category
-          beginSequence="</s>"
-          endSequence="</s>"
-
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=11
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=11
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-        #labels sections
-        labels=[
-          dim=1
-          labelType=NextWord
-          beginSequence="O"
-          endSequence="O"
-
-          labelDim=$VOCABSIZE$
-          labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-          #### Write definition ####
-          # sizeof(unsigned) which is the label index type
-          elementSize=4
-          sectionType=labels
-          mapping=[
-            #redefine number of records for this section, since we don't need to save it for each data record
-            wrecords=3
-            #variable size so use an average string size
-            elementSize=10
-            sectionType=labelMapping
-          ]
-          category=[
-            dim=3
-            #elementSize=sizeof(ElemType) is default
-            sectionType=categoryLabels
-          ]
-        ]
-      ]
-      binaryIn=[
-        readerType=UCIFastReader
-        onelineperfile=true
-        binaryFeature=[
-          onelineperfile=true 
-        # onelineperfile: each line has all data for a file
-          dim=20
-          start=0     
-          file=$DataFolder$\validate.lda.dat
-        ]
-      ]
-    ]
-]
-
-
-test=[
-    action=eval
-
-    # correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-    # need to be small since models are updated for each minibatch
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLASSLSTM
-
-    modelPath=$ExpFolder$\modelRnnCNTK
-    evalNodeNames=EvalNodeClassBasedCrossEntrpy
-
-    reader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      # wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      # windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TESTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
--- a/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.config
+++ b/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.config
@ -1,424 +0,0 @@
-# configuration file for class based RNN training
-ExpFolder=$ExpDir$
-ConfigFolder=$ConfigDir$
-DataFolder=$DataDir$
-
-stderr=$ExpFolder$
-numCPUThreads=4
-# command=dumpNodeInfo
-#command=train
-#command=test
-#command=writeWordAndClassInfo
-command=writeWordAndClassInfo:train:test
-#command=train:test
-type=double
-
-DEVICEID=$DEVICE$
-
-NOISE=100
-RATE=0.1
-VOCABSIZE=2263
-CLASSSIZE=50
-makeMode=true
-TRAINFILE=comments.cntk.train.txt
-VALIDFILE=comments.cntk.valid.txt
-TESTFILE=comments.cntk.test.txt
-
-#number of threads
-nthreads=4
-
-writeWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataFolder$\$TRAINFILE$
-	  outputVocabFile=$ExpFolder$\vocab.txt
-    outputWord2Cls=$ExpFolder$\word2cls.txt
-    outputCls2Index=$ExpFolder$\cls2idx.txt
-    vocabSize=$VOCABSIZE$
-	  cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-dumpNodeInfo=[
-    action=dumpnode
-    modelPath=$ExpFolder$\modelRnnCNTK
-    #nodeName=W0
-    printValues=true
-]
-
-devtest=[action=devtest]
-
-train=[
-    action=train
-    minibatchSize=10
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLASSLSTM 
-
-    SimpleNetworkBuilder=[
-        trainingCriterion=classcrossentropywithsoftmax
-        evalCriterion=classcrossentropywithsoftmax
-        nodeType=Sigmoid
-        initValueScale=6.0
-        layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
-        addPrior=false
-        addDropoutNodes=false
-        applyMeanVarNorm=false
-        uniformInit=true;
-
-        lookupTableOrder=1
-
-        # these are for the class information for class-based language modeling
-        vocabSize=$VOCABSIZE$
-        nbrClass=$CLASSSIZE$
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-	    makeMode=true
-        learningRatesPerSample=$RATE$
-        momentumPerMB=0
-        gradientClippingWithTruncation=true
-        clippingThresholdPerSample=15.0
-        maxEpochs=40
-        unroll=false
-        numMBsToShowResult=2000
-        # gradUpdateType=AdaGrad
-        gradUpdateType=None
-      
-        modelPath=$ExpFolder$\modelRnnCNTK
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-            autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0.001
-            continueReduce=false
-            increaseLearnRateIfImproveMoreThan=1000000000
-            learnRateDecreaseFactor=0.5
-            learnRateIncreaseFactor=1.382
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-        dropoutRate=0.0
-    ]
-
-    reader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=10
-
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-      
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TRAINFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-
-      # if writerType is set, we will cache to a binary file
-      # if the binary file exists, we will use it instead of parsing this file
-      # writerType=BinaryReader
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.valid.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$VALIDFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      # it should be the same as that in the training set
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
-
-
-test=[
-    action=eval
-
-    # correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-    # need to be small since models are updated for each minibatch
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLASSLSTM
-
-    modelPath=$ExpFolder$\modelRnnCNTK
-    evalNodeNames=EvalNodeClassBasedCrossEntrpy
-
-    reader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      # wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      # windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TESTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
--- a/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.config.txt
+++ b/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.classlm.config.txt
@ -1,424 +0,0 @@
-# configuration file for class based RNN training
-ExpFolder=$ExpDir$
-ConfigFolder=$ConfigDir$
-DataFolder=$DataDir$
-
-stderr=$ExpFolder$
-numCPUThreads=4
-# command=dumpNodeInfo
-#command=train
-#command=test
-#command=writeWordAndClassInfo
-command=writeWordAndClassInfo:train:test
-#command=train:test
-type=double
-
-DEVICEID=$DEVICE$
-
-NOISE=100
-RATE=0.1
-VOCABSIZE=2263
-CLASSSIZE=50
-makeMode=true
-TRAINFILE=comments.cntk.train.txt
-VALIDFILE=comments.cntk.valid.txt
-TESTFILE=comments.cntk.test.txt
-
-#number of threads
-nthreads=4
-
-writeWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataFolder$\$TRAINFILE$
-	  outputVocabFile=$ExpFolder$\vocab.txt
-    outputWord2Cls=$ExpFolder$\word2cls.txt
-    outputCls2Index=$ExpFolder$\cls2idx.txt
-    vocabSize=$VOCABSIZE$
-	  cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-dumpNodeInfo=[
-    action=dumpnode
-    modelPath=$ExpFolder$\modelRnnCNTK
-    #nodeName=W0
-    printValues=true
-]
-
-devtest=[action=devtest]
-
-train=[
-    action=train
-    minibatchSize=10
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLASSLSTM 
-
-    SimpleNetworkBuilder=[
-        trainingCriterion=classcrossentropywithsoftmax
-        evalCriterion=classcrossentropywithsoftmax
-        nodeType=Sigmoid
-        initValueScale=6.0
-        layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$
-        addPrior=false
-        addDropoutNodes=false
-        applyMeanVarNorm=false
-        uniformInit=true;
-
-        lookupTableOrder=1
-
-        # these are for the class information for class-based language modeling
-        vocabSize=$VOCABSIZE$
-        nbrClass=$CLASSSIZE$
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-	    makeMode=true
-        learningRatesPerSample=$RATE$
-        momentumPerMB=0
-        gradientClippingWithTruncation=true
-        clippingThresholdPerSample=15.0
-        maxEpochs=40
-        unroll=false
-        numMBsToShowResult=2000
-        # gradUpdateType=AdaGrad
-        gradUpdateType=None
-      
-        modelPath=$ExpFolder$\modelRnnCNTK
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-            autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0.001
-            continueReduce=false
-            increaseLearnRateIfImproveMoreThan=1000000000
-            learnRateDecreaseFactor=0.5
-            learnRateIncreaseFactor=1.382
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-        dropoutRate=0.0
-    ]
-
-    reader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=10
-
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-      
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TRAINFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-
-      # if writerType is set, we will cache to a binary file
-      # if the binary file exists, we will use it instead of parsing this file
-      # writerType=BinaryReader
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.valid.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$VALIDFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      # it should be the same as that in the training set
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
-
-
-test=[
-    action=eval
-
-    # correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-    # need to be small since models are updated for each minibatch
-    traceLevel=0
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=CLASSLSTM
-
-    modelPath=$ExpFolder$\modelRnnCNTK
-    evalNodeNames=EvalNodeClassBasedCrossEntrpy
-
-    reader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      # word class info
-      wordclass=$ExpFolder$\vocab.txt
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      # wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      # windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TESTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
--- a/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.nce.config.txt
+++ b/Examples/Text/Miscellaneous/News/setups/lstmlm.gpu.nce.config.txt
@ -1,421 +0,0 @@
-# configuration file for class based RNN training
-ExpFolder=$ExpDir$
-ConfigFolder=$ConfigDir$
-DataFolder=$DataDir$
-
-stderr=$ExpFolder$
-numCPUThreads=4
-# command=dumpNodeInfo
-#command=train
-#command=test
-command=writeWordAndClassInfo
-#command=writeWordAndClassInfo:train:test
-#command=train:test
-type=double
-
-DEVICEID=$DEVICE$
-
-NOISE=100
-RATE=0.1
-VOCABSIZE=2263
-CLASSSIZE=50
-makeMode=true
-TRAINFILE=comments.cntk.train.txt
-VALIDFILE=comments.cntk.valid.txt
-TESTFILE=comments.cntk.test.txt
-
-#number of threads
-nthreads=4
-
-writeWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataFolder$\$TRAINFILE$
-	  outputVocabFile=$DataFolder$\vocab.txt
-    outputWord2Cls=$ExpFolder$\word2cls.txt
-    outputCls2Index=$ExpFolder$\cls2idx.txt
-    vocabSize=$VOCABSIZE$
-	  cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-dumpNodeInfo=[
-    action=dumpnode
-    modelPath=$ExpFolder$\modelRnnCNTK
-    #nodeName=W0
-    printValues=true
-]
-
-devtest=[action=devtest]
-
-train=[
-    action=train
-    minibatchSize=10
-    traceLevel=1
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=NCELSTM 
-
-    SimpleNetworkBuilder=[
-        trainingCriterion=NoiseContrastiveEstimationNode
-        evalCriterion=NoiseContrastiveEstimationNode
-        nodeType=Sigmoid
-        initValueScale=6.0
-        layerSizes=$VOCABSIZE$:200:$VOCABSIZE$
-        addPrior=false
-        addDropoutNodes=false
-        applyMeanVarNorm=false
-        uniformInit=true;
-
-        # these are for the class information for class-based language modeling
-        vocabSize=$VOCABSIZE$
-        #nbrClass=$CLASSSIZE$
-		    noise_number=$NOISE$
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-	    makeMode=true
-        learningRatesPerSample=$RATE$
-        momentumPerMB=0
-        gradientClippingWithTruncation=true
-        clippingThresholdPerSample=15.0
-        maxEpochs=40
-        unroll=false
-        numMBsToShowResult=2000
-        # gradUpdateType=AdaGrad
-        gradUpdateType=None
-      
-        modelPath=$ExpFolder$\modelRnnCNTK
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-            autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0.001
-            continueReduce=false
-            increaseLearnRateIfImproveMoreThan=1000000000
-            learnRateDecreaseFactor=0.5
-            learnRateIncreaseFactor=1.382
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-        dropoutRate=0.0
-    ]
-
-    reader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=10
-
-      # word class info
-      wordclass=$DataFolder$\vocab.txt
-      noise_number=$NOISE$
-	    mode=nce
-      
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TRAINFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    cvReader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      mode=softmax
-      # word class info
-      wordclass=$DataFolder$\vocab.txt
-
-      # if writerType is set, we will cache to a binary file
-      # if the binary file exists, we will use it instead of parsing this file
-      # writerType=BinaryReader
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.valid.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$VALIDFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      # it should be the same as that in the training set
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
-
-
-test=[
-    action=eval
-
-    # correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-    # need to be small since models are updated for each minibatch
-    traceLevel=1
-    deviceId=$DEVICEID$
-    epochSize=4430000
-    # which is 886 * 5000
-    recurrentLayer=1
-    defaultHiddenActivity=0.1
-    useValidation=true
-    rnnType=NCELSTM
-
-    modelPath=$ExpFolder$\modelRnnCNTK
-    evalNodeNames=EvalNodeNCEBasedCrossEntrpy
-
-    reader=[
-      # reader to use
-      readerType=LMSequenceReader
-      randomize=None
-      mode=softmax
-      # word class info
-      wordclass=$DataFolder$\vocab.txt
-
-      #### write definition
-      wfile=$ExpFolder$\sequenceSentence.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      # wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      # windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataFolder$\$TESTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpFolder$\sentenceLabels.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpFolder$\sentenceLabels.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-]
--- a/Examples/Text/Miscellaneous/News/setups/s2s.class.alignment.config
+++ b/Examples/Text/Miscellaneous/News/setups/s2s.class.alignment.config
@ -1,792 +0,0 @@
-# configuration file for CNTK ATIS for language understanding tasks
-
-stderr=$LogDir$\SMT\log
-command=writeEncoderWordAndClassInfo:writeDecoderWordAndClassInfo:LSTM
-
-LSTMDIM=$VLSTMDIM$
-EMBDIM=$VEMBDIM$
-NBRUTT=1
-
-type=double
-
-writeEncoderWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataDir$\$TRAINSRCFILE$
-    outputVocabFile=$ExpDir$\vocab.src.txt
-    outputWord2Cls=$ExpDir$\word2cls.src.txt
-    outputCls2Index=$ExpDir$\cls2idx.src.txt
-    vocabSize=$VOCABSIZE$
-    cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-writeDecoderWordAndClassInfo=[
-    action=writeWordAndClass
-    inputFile=$DataDir$\$TRAINTGTFILE$
-    outputVocabFile=$ExpDir$\vocab.tgt.txt
-    outputWord2Cls=$ExpDir$\word2cls.tgt.txt
-    outputCls2Index=$ExpDir$\cls2idx.tgt.txt
-    vocabSize=$VOCABSIZE$
-    cutoff=2
-    nbrClass=$CLASSSIZE$
-    printValues=true
-]
-
-LSTM=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-    action=trainEncoderDecoder
-  makeMode=true
-
-#  recurrent networks are trained with minibatch
-#  minibatch size, for example in language model, is the number of input words
-#  e.g., 6, corresponds to having 6 inputs words from one sentence
-#  In the learning process, we split an input sequence into a vector of subsequences of size T_bptt .
-    minibatchSize=1000
-
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-    # CPU is -1
-  deviceId=$DeviceNumber$
-
-    # for each epoch, maximum number of input words is set below
-#    epochSize=12075604 sentences
-# nbr of words
-    epochSize=315956058
-# half pass of data
-#    epochSize=6075604
-	# 1% of trainnig data
-#    epochSize=3159560
-
-    EncoderNetworkBuilder=[
-      trainingCriterion=crossentropywithsoftmax
-      evalCriterion=crossentropywithsoftmax
-
-	    sparseinput=true
-
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-    layerSizes=$VOCABSIZE$:$EMBDIM$:$LSTMDIM$
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=$SRCFEATDIM$
-    lookupTableOrderSizes=1
-
-        rnnType=LSTMENCODER
-        lookupTableOrder=1
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    DecoderNetworkBuilder=[
-      trainingCriterion=ClassCrossEntropyWithSoftmax
-      evalCriterion=ClassCrossEntropyWithSoftmax
-
-	  sparseinput=true
-	  nbrClass=79
- #       # default hidden layer activity
-      defaultHiddenActivity=0.1
-
-        # randomization range
-        initValueScale=1.6
-
-        # first layer, second layer, and output layer size
-        # the second layer must have the same dimension as the first layer
-        # because 40 is matched to the output layer dimension from encoder network
-    layerSizes=$VOCABSIZE$:$EMBDIM$:$LSTMDIM$:$VOCABSIZE$
-    recurrentLayer=2
-    # the letter stream doesn't support context-dependent inputs
-    streamSizes=40
-    lookupTableOrderSizes=1
-
-        rnnType=ALIGNMENTSIMILARITYGENERATOR
-        lookupTableOrder=1
-
-       # these are for the class information for class-based language modeling
-        vocabSize=$VOCABSIZE$
-        nbrClass=$CLASSSIZE$
-
-       addPrior=false
-       addDropoutNodes=false
-       applyMeanVarNorm=false
-       uniformInit=true
-    ]
-
-    # configuration file, base parameters
-    SGD=[
-    learningRatesPerSample=0.0001
-        momentumPerMB=0.0
-
-        gradientClippingWithTruncation=true
-    clippingThresholdPerSample=5.0
-
-      # use hidden states for encoder decoder training
-      useHiddenStates=true
-      encoderNodes="LSTM0"
-      decoderNodes="LSTM0"
-
-        # maximum number of epochs
-      maxEpochs=100
-
-#        gradientcheck=true
-        sigFigs=4
-
-        # for information purpose, number of minibatches to report progress
-        numMBsToShowResult=1000
-
-        # Whether use AdaGrad
-        #gradUpdateType=AdaGrad
-
-        # output model path
-        modelPath=$ExpDir$\smt.lstm
-
-        # if validation shows that the model has no improvement, then do back-up to the previously
-        # estimated model and reduce learning rate
-        loadBestModel=true
-
-        # settings for Auto Adjust Learning Rate
-        AutoAdjust=[
-            # auto learning rate adjustment
-          autoAdjustLR=adjustafterepoch
-            reduceLearnRateIfImproveLessThan=0
-            increaseLearnRateIfImproveMoreThan=1000000000
-
-            # how much learning rate is reduced
-            learnRateDecreaseFactor=0.5
-
-            # if continously improved, can increase learning rate by the following ratio
-            learnRateIncreaseFactor=1.0
-
-            numMiniBatch4LRSearch=100
-            numPrevLearnRates=5
-            numBestSearchEpoch=1
-        ]
-
-
-        dropoutRate=0
-    ]
-
-    encoderReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.src.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.src.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataDir$\$TRAINSRCFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.src.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    decoderReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.tgt.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.tgt.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataDir$\$TRAINTGTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    encoderCVReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.src.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.src.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataDir$\$VALIDATESRCFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.src.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    decoderCVReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.tgt.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.tgt.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataDir$\$VALIDATETGTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-  ]
-]
-
-LSTMTest=[
-    # this is the maximum size for the minibatch, since sequence minibatches are really just a single sequence
-  # can be considered as the maximum length of a sentence
-  action=testEncoderDecoder
-
-# correspond to the number of words/characteres to train in a minibatch
-    minibatchSize=1
-  # need to be small since models are updated for each minibatch
-    traceLevel=1
-  deviceId=$DeviceNumber$
-  epochSize=5000
-  # which is 886 * 5000
-  #recurrentLayer=1
-    defaultHiddenActivity=0.1
-
-    encoderModelPath=$MdlDir$\smt.lstm.encoder
-    decoderModelPath=$MdlDir$\smt.lstm.decoder
-
-    # this is the node to evaluate scores
-    evalNodeNames=PosteriorProb
-
-    # this is the node to output results
-    outputNodeNames=outputs
-
-    beamWidth=1
-    maxNbrTokens=10
-
-     minibatchSize=1000
-
-    encoderReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.src.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.src.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      file=$DataDir$\$TESTSRCFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.src.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.src.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    decoderReader=[
-      readerType=LMSequenceReader
-      randomize=None
-      nbruttsineachrecurrentiter=$NBRUTT$
-
-      # word class info
-      wordclass=$ExpDir$\vocab.tgt.txt
-      
-      #### write definition
-      wfile=$ExpDir$\sequenceSentence.tgt.bin
-      #wsize - inital size of the file in MB
-      # if calculated size would be bigger, that is used instead
-      wsize=256
-
-      unk="<unk>"
-
-      #wrecords - number of records we should allocate space for in the file
-      # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file
-      wrecords=1000
-      #windowSize - number of records we should include in BinaryWriter window
-      windowSize=$VOCABSIZE$
-
-      #file=$DataDir$\$TESTTGTFILE$
-
-      #additional features sections
-      #for now store as expanded category data (including label in)
-      features=[
-        # sentence has no features, so need to set dimension to zero
-        dim=0
-        ### write definition
-        sectionType=data
-      ]
-      # sequence break table, list indexes into sequence records, so we know when a sequence starts/stops
-      sequence=[
-        dim=1
-        wrecords=2
-        ### write definition
-        sectionType=data
-      ]
-      #labels sections
-      labelIn=[
-        dim=1
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.txt
-        labelType=Category
-        beginSequence="</s>"
-        endSequence="</s>"
-
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=11
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=11
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-      #labels sections
-      labels=[
-        dim=1
-        labelType=NextWord
-        beginSequence="O"
-        endSequence="O"
-
-        # vocabulary size
-        labelDim=$VOCABSIZE$
-
-        labelMappingFile=$ExpDir$\sentenceLabels.tgt.out.txt
-        #### Write definition ####
-        # sizeof(unsigned) which is the label index type
-        elementSize=4
-        sectionType=labels
-        mapping=[
-          #redefine number of records for this section, since we don't need to save it for each data record
-          wrecords=3
-          #variable size so use an average string size
-          elementSize=10
-          sectionType=labelMapping
-        ]
-        category=[
-          dim=3
-          #elementSize=sizeof(ElemType) is default
-          sectionType=categoryLabels
-        ]
-      ]
-    ]
-
-    writer=[
-        writerType=LMSequenceWriter
-
-        outputs=[
-            file=$OutDir$\output.rec.txt
-            token=$ExpDir$\vocab.tgt.txt
-        ]
-    ]
-]
-
--- a/Examples/Text/Miscellaneous/News/steps/s1.newscomments.txt
+++ b/Examples/Text/Miscellaneous/News/steps/s1.newscomments.txt
@ -1,71 +0,0 @@
-# steps 1
-# generate LSTM LM on news comments
-
-# -----------------------------
-# preparing data
-# data is also saved at
-# \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
-# -----------------------------
-# first have </s> to the begining and ending of sentences
-# go to ../scripts directory
-python
->>> import util
->>> util.add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt')
-# the output sentences look like
-# </s> god speed gentlemen . </s>
-
-# convert data to ASCII format
->>> util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt')
-
-# create test (last 10%), validation (first 10%) data, and train (remaining 80%) data
->>> util.split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/comments.cntk.test.txt')
-
-# prepare source side data
->>> import util
->>> util.add_silence_ending('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.txt')
->>> util.add_silence_ending('d:/data/newscomments/2015/03-23/news.txt', 'd:/data/newscomments/2015/03-23/news.cntk.txt')
-# the output sentences look like
-# </s> god speed gentlemen . </s>
-
-# convert data to ASCII format
->>> util.convert2ascii('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.ascii.txt')
->>> util.convert2ascii('d:/data/newscomments/2015/03-23/news.cntk.txt', 'd:/data/newscomments/2015/03-23/news.cntk.ascii.txt')
-
-# create test (last 10%), validation (first 10%) data, and train (remaining 80%) data
->>> util.split_data_into_train_valid_test('//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.ascii.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.train.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.valid.txt', '//speechstore5/transient/kaishengy/data/newscomments/2015/03-23/news.cntk.test.txt')
->>> util.split_data_into_train_valid_test('d:/data/newscomments/2015/03-23/news.cntk.ascii.txt', 'd:/data/newscomments/2015/03-23/news.cntk.train.txt', 'd:/data/newscomments/2015/03-23/news.cntk.valid.txt', 'd:/data/newscomments/2015/03-23/news.cntk.test.txt')
-
-
-# -----------------------------
-# create word cluster
-# -----------------------------
-# goto ../steps directory
-# use cutoff = 2
-mkdir \\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments
-D:\dev\cntkcodeplex\x64\Debug\CNTK.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=writeWordAndClassInfo
-# check $ExpDir$ and $DataFolder$ for outputs
-
-# -----------------------------
-# Train LSTM LM using NCE criterion
-# -----------------------------
-# the vocabulary size is 2263
-# in local
-D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=train
-# on server
-\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments command=train
-
-# test PPL : 61.31 with 1 layer of 200 hidden dimension LSTM 
-\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test
-
-# ---- 2 layers of LSTM --------
-# in local wit 2 layers of LSTM
-D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.nce.config.txt ExpDir=d:\exp\news\s1.lstmlm.comments.2layers command=train train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:$VOCABSIZE$]] DataDir=d:\data\newscomments\2015\03-23
-
-# on server
-\\speechstore5\userdata\kaishengy\bin\binmay26\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.2layers command=train train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:$VOCABSIZE$]]
-# test 
-# PPL Perplexity = 36.1999
-\\speechstore5\userdata\kaishengy\bin\binmay28\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.2layers test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test
-
-# use 3 layers of LSTM 52.85
-\\speechstore5\userdata\kaishengy\bin\binmay28\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.nce.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.lstmlm.comments.3layers train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:200:200:200:$VOCABSIZE$]] test=[modelPath=$ExpFolder$\modelRnnCNTK] DEVICE=0
--- a/Examples/Text/Miscellaneous/News/steps/s2.kn.lm.txt
+++ b/Examples/Text/Miscellaneous/News/steps/s2.kn.lm.txt
@ -1,40 +0,0 @@
-# this use K-N ngram model to construct a language model
-
-set BINDIR=\\speechstore5\transient\kaishengy\tools\SRILM\SRILM\bin\msvc64
-set DATADIR=\\speechstore5\transient\kaishengy\data\newscomments\2015\03-23
-set TRAINFILE=%DATADIR%\comments.cntk.train.txt
-set TESTFILE=%DATADIR%\comments.cntk.test.txt
-set EXPDIR=\\speechstore5\transient\kaishengy\exp\news\s2.knlm.comments
-set OUTLMFN=%EXPDIR%\kn3.lm
-
-# ----------------------
-# 3-gram
-# ----------------------
-%BINDIR%\ngram-count.exe -no-sos -no-eos -text %TRAINFILE% -lm %OUTLMFN% -unk
-
-# test PPL 
-%BINDIR%\ngram.exe -lm %OUTLMFN% -ppl %TESTFILE% >  %EXPDIR%\ppl_3gm.ppl
-
-# results from KN 3-gram model 
-# D:\dev\cntkcodeplex\ExampleSetups\News\steps>more %EXPDIR%\ppl_3gm.ppl
-# file \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23\comments.cntk.test.txt: 153 sentences, 8848 words, 0 OOVs
-# 0 zeroprobs, logprob= -17617.4 ppl= 90.6291 ppl1= 97.9744
-
-
-# ----------------------
-# class-based LM
-# ----------------------
-set CLASSCNTFILE=%EXPDIR%\class.cnt
-set CLASSFILE=%EXPDIR%\class.txt
-
-%BINDIR%\ngram-class -numclasses 50 -class-counts %CLASSCNTFILE% -classes %CLASSFILE% -text %TRAINFILE%
-
-
-# test PPL 
-%BINDIR%\ngram.exe -lm %OUTLMFN% -classes %CLASSFILE% -ppl %TESTFILE% >  %EXPDIR%\ppl_3gm.50classes.ppl
-
-# results from 50 class 3-gram LM
-# D:\dev\cntkcodeplex\ExampleSetups\News\steps>more %EXPDIR%\ppl_3gm.50classes.ppl
-# file \\speechstore5\transient\kaishengy\data\newscomments\2015\03-23\comments.cntk.test.txt: 153 sentences, 8848 words, 0 OOVs
-# 0 zeroprobs, logprob= -17617.4 ppl= 90.629 ppl1= 97.9744
-
--- a/Examples/Text/Miscellaneous/News/steps/s3.class.lstm.txt
+++ b/Examples/Text/Miscellaneous/News/steps/s3.class.lstm.txt
@ -1,14 +0,0 @@
-# -----------------------------
-# Train LSTM LM using class-based criterion
-# -----------------------------
-# in local
-C:\dev\cntk5\x64\Release\CNTK.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.classlm.config ExpDir=d:\exp\news\s1.lstmclslm.comments
-
-# ------------ use LSTM components --------------
-# use LSTM components
-# 1-layer Perplexity = 29.668369
-\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm  CLASSSIZE=100 train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:100:200:$VOCABSIZE$]] DEVICE=0
-
-# 2-layers PPL 
-\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm.2layer  CLASSSIZE=100 train=[SimpleNetworkBuilder=[layerSizes=$VOCABSIZE$:100:200:200:$VOCABSIZE$]] DEVICE=1
-\\speechstore5\userdata\kaishengy\bin\binmay28a\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\news\setups\global.config+\\speechstore5\userdata\kaishengy\exp\news\setups\lstmlm.gpu.classlm.config.txt ExpDir=\\speechstore5\transient\kaishengy\exp\news\s1.clstmcls100lm.2layer test=[modelPath=$ExpFolder$\modelRnnCNTK] command=test
--- a/Examples/Text/Miscellaneous/News/steps/s4.2stream.class.lstm.txt
+++ b/Examples/Text/Miscellaneous/News/steps/s4.2stream.class.lstm.txt
@ -1,8 +0,0 @@
-# -----------------------------
-# Train LSTM LM using class-based criterion
-# -----------------------------
-# in local
-D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=d:\dev\cntkcodeplex\examplesetups\news\setups\global.config+d:\dev\cntkcodeplex\examplesetups\news\setups\lstmlm.gpu.classlm.2stream.config ExpDir=d:\exp\news\s4.lstmclslm.dbg command=train train=[SGD=[gradientcheck=true]] DEVICE=-1
-
-# debug with not using multi-streams to make sure it is backward compatible
-D:\dev\cntkcodeplex\x64\Release\cntk.exe configFile=..\setups\global.config+..\setups\lstmlm.gpu.classlm.config ExpDir=d:\exp\news\s4.lstmclslm.dbg command=writeWordAndClassInfo:train
--- a/Examples/Text/Miscellaneous/News/steps/s5.s2s.log
+++ b/Examples/Text/Miscellaneous/News/steps/s5.s2s.log
@ -1,10 +0,0 @@
-# this uses smaller learning rate
-
-d:\exp\bin\cntk\cntk.exe configFile=..\setups\global.s2s.config+..\setups\s2s.class.alignment.config DeviceNumber=Auto LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\news\s2sryrun LSTM=[SGD=[learningRatesPerSample=0.00001]] LSTM=[SGD=[gradientcheck=TRUE]] 
-
-# with 1 layer of LSTM, embeding 50, lstm dimension 100
-d:\bin\cntkjuly7\cntk.exe configFile=..\setups\global.s2s.config+..\setups\s2s.class.alignment.config DeviceNumber=1 LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=d:\exp\news\attention_dryrun LSTM=[SGD=[learningRatesPerSample=0.001]] 
-
-# test
-\\speechstore5\transient\kaishengy\bin\binjuly2\cntk.exe configFile=\\speechstore5\userdata\kaishengy\exp\smt\setups\global.cs-en.config+\\speechstore5\userdata\kaishengy\exp\smt\setups\s2s.class.alignment.config LSTM=[SGD=[numMBsToShowResult=1000]] ExpDir=\\speechstore5\transient\kaishengy\exp\smt\c2elr1e3cs200emb50lstm100 MdlDir=//speechstore5/transient/kaishengy/exp/smt/c2elr1e3cs200emb50lstm100 LSTMTest=[encoderModelPath=$MdlDir$\smt.lstm.encoder.27] LSTMTest=[decoderModelPath=$MdlDir$\smt.lstm.decoder.27] NBRUTT=1 command=LSTMTest DeviceNumber=-1 LSTMTest=[beamWidth=0.05]
-
--- a/Source/Readers/DataReaderClient/DataReaderClient.cpp
+++ b/Source/Readers/DataReaderClient/DataReaderClient.cpp
@ -1,33 +0,0 @@
-//
-// <copyright file="DataReaderClient.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// DataReaderClient.cpp : Defines the entry point for the console application.
-//
-
-#include "stdafx.h"
-#include "DataReader.h"
-using namespace std;
-using namespace Microsoft::MSR::CNTK;
-
-int _tmain(int argc, _TCHAR* argv[])
-{
-    size_t vdim = 429;
-    size_t udim = 1504;
-    vector<wstring> filepaths;
-    filepaths.push_back( wstring(L"C:\\speech\\swb300h\\data\\archive.swb_mini.52_39.notestspk.dev.small.scplocal"));
-    filepaths.push_back( wstring(L"C:\\speech\\swb300h\\data\\swb_mini.1504.align.small.statemlf"));
-
-    DataReader<float> dataReader(vdim, udim, filepaths, wstring(L""), 4096);
-    Matrix<float> features;
-    Matrix<float> labels;
-    dataReader.StartMinibatchLoop(256, 0);
-    int i = 0;
-    while (dataReader.GetMinibatch(features, labels))
-    {
-        fprintf(stderr,"%4d: features dim: %d x %d - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0,0), features(0,1), labels.GetNumRows(), labels.GetNumCols(), (int)labels(0,0), (int)labels(1,0));
-    }
-    return 0;
-}
-
--- a/Source/Readers/DataReaderClient/DataReaderClient.vcxproj
+++ b/Source/Readers/DataReaderClient/DataReaderClient.vcxproj
@ -1,194 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{89A1BB74-345D-4CB4-BC56-2C05D8076183}</ProjectGuid>
-    <SccProjectName>SAK</SccProjectName>
-    <SccAuxPath>SAK</SccAuxPath>
-    <SccLocalPath>SAK</SccLocalPath>
-    <SccProvider>SAK</SccProvider>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>DataReaderClient</RootNamespace>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v110</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v110</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v110</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v110</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSDK_LibraryPath_x86);</LibraryPath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>..\..\common\include;..\..\Source\Math;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);</LibraryPath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSDK_LibraryPath_x86);</LibraryPath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>..\..\common\include;..\..\Source\Math;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSDK_LibraryPath_x64);</LibraryPath>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..;..\..\Source\Math</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-    <PostBuildEvent>
-      <Command>copy $(ACML_PATH)/lib/*.dll $(TargetDir)</Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>Math.lib;HTKMLFReader.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>copy $(ACML_PATH)\lib\*.dll $(TargetDir)</Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..;..\..\Source\Math</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <PostBuildEvent>
-      <Command>copy $(ACML_PATH)/lib/*.dll $(TargetDir)</Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>Math.lib;HTKMLFReader.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>copy $(ACML_PATH)\lib\*.dll $(TargetDir)</Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <Text Include="ReadMe.txt" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="stdafx.h" />
-    <ClInclude Include="targetver.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="DataReaderClient.cpp" />
-    <ClCompile Include="stdafx.cpp">
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
-    </ClCompile>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
--- a/Source/Readers/DataReaderClient/ReadMe.txt
+++ b/Source/Readers/DataReaderClient/ReadMe.txt
@ -1,40 +0,0 @@
-========================================================================
-    CONSOLE APPLICATION : DataReaderClient Project Overview
-========================================================================
-
-AppWizard has created this DataReaderClient application for you.
-
-This file contains a summary of what you will find in each of the files that
-make up your DataReaderClient application.
-
-
-DataReaderClient.vcxproj
-    This is the main project file for VC++ projects generated using an Application Wizard.
-    It contains information about the version of Visual C++ that generated the file, and
-    information about the platforms, configurations, and project features selected with the
-    Application Wizard.
-
-DataReaderClient.vcxproj.filters
-    This is the filters file for VC++ projects generated using an Application Wizard. 
-    It contains information about the association between the files in your project 
-    and the filters. This association is used in the IDE to show grouping of files with
-    similar extensions under a specific node (for e.g. ".cpp" files are associated with the
-    "Source Files" filter).
-
-DataReaderClient.cpp
-    This is the main application source file.
-
-/////////////////////////////////////////////////////////////////////////////
-Other standard files:
-
-StdAfx.h, StdAfx.cpp
-    These files are used to build a precompiled header (PCH) file
-    named DataReaderClient.pch and a precompiled types file named StdAfx.obj.
-
-/////////////////////////////////////////////////////////////////////////////
-Other notes:
-
-AppWizard uses "TODO:" comments to indicate parts of the source code you
-should add to or customize.
-
-/////////////////////////////////////////////////////////////////////////////
--- a/Source/Readers/DataReaderClient/stdafx.cpp
+++ b/Source/Readers/DataReaderClient/stdafx.cpp
@ -1,13 +0,0 @@
-//
-// <copyright file="stdafx.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// stdafx.cpp : source file that includes just the standard includes
-// DataReaderClient.pch will be the pre-compiled header
-// stdafx.obj will contain the pre-compiled type information
-
-#include "stdafx.h"
-
-// TODO: reference any additional headers you need in STDAFX.H
-// and not in this file
--- a/Source/Readers/DataReaderClient/stdafx.h
+++ b/Source/Readers/DataReaderClient/stdafx.h
@ -1,20 +0,0 @@
-//
-// <copyright file="stdafx.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// stdafx.h : include file for standard system include files,
-// or project specific include files that are used frequently, but
-// are changed infrequently
-//
-
-#pragma once
-
-#include "targetver.h"
-
-#include <stdio.h>
-#include <tchar.h>
-
-
-
-// TODO: reference additional headers your program requires here
--- a/Source/Readers/DataReaderClient/targetver.h
+++ b/Source/Readers/DataReaderClient/targetver.h
@ -1,13 +0,0 @@
-//
-// <copyright file="targetver.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-#pragma once
-
-// Including SDKDDKVer.h defines the highest available Windows platform.
-
-// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
-// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
-
-#include <SDKDDKVer.h>
--- a/Source/Readers/DataReaderTest/DataReaderUnitTest.cpp
+++ b/Source/Readers/DataReaderTest/DataReaderUnitTest.cpp
@ -1,84 +0,0 @@
-//
-// <copyright file="DataReaderUnitTest.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-#include "stdafx.h"
-#include "CppUnitTest.h"
-#include "DataReader.h"
-using namespace std;
-using namespace Microsoft::MSR::CNTK;
-
-using namespace Microsoft::VisualStudio::CppUnitTestFramework;
-
-namespace DataReaderTest
-{        
-    TEST_CLASS(UCIDataReaderUnitTest)
-    {
-    public:
-        
-        // StandardLoopTest
-        // Test of the DataReader loop 
-        TEST_METHOD(TestMode)
-        {
-            size_t vdim = 785;
-            size_t udim = 10;
-            size_t epochSize = 500;
-            size_t mbSize = 256;
-            size_t epochs = 2;
-            vector<wstring> filepaths;
-            filepaths.push_back( wstring(L"C:\\speech\\mnist\\mnist_test.txt"));
-
-            DataReader<float, int> dataReader(vdim, udim, filepaths, wstring(L"-label:none -minibatchmode:partial "), randomizeNone); //-labels:regression
-            Matrix<float> features;
-            Matrix<float> labels;
-            for (int epoch = 0; epoch < epochs; epoch++)
-            {
-                dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
-                
-                for (int cnt = 0;dataReader.GetMinibatch(features, labels);cnt++)
-                {
-                    Assert::IsTrue(labels.GetNumRows() == 0);
-                    Assert::IsTrue(features.GetNumRows() == 785);
-                    Assert::IsTrue(features.GetNumCols() == (cnt?244:mbSize));
-                    for (int i = 1;i < features.GetNumCols();i++)
-                    {
-                        // really labels, these should be in order
-                        Assert::IsTrue(features(0,i-1) <= features(0,i));
-                    }
-                }
-            }
-        }
-
-        TEST_METHOD(Partial)
-        {
-            size_t vdim = 784;
-            size_t udim = 10;
-            size_t epochSize = 500;
-            size_t mbSize = 256;
-            size_t epochs = 2;
-            vector<wstring> filepaths;
-            filepaths.push_back( wstring(L"C:\\speech\\mnist\\mnist_test.txt"));
-
-            DataReader<float, int> dataReader(vdim, udim, filepaths, wstring(L"-label:first -labeltype:category -minibatchmode:partial "), randomizeNone); //-labels:regression
-            Matrix<float> features;
-            Matrix<float> labels;
-            for (int epoch = 0; epoch < epochs; epoch++)
-            {
-                dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
-                
-                for (int cnt = 0;dataReader.GetMinibatch(features, labels);cnt++)
-                {
-                    Assert::IsTrue(labels.GetNumRows() == udim);
-                    Assert::IsTrue(features.GetNumRows() == 785);
-                    Assert::IsTrue(features.GetNumCols() == (cnt?244:mbSize));
-                    for (int i = 1;i < features.GetNumCols();i++)
-                    {
-                        // really labels, these should be in order
-                        Assert::IsTrue(features(0,i-1) <= features(0,i));
-                    }
-                }
-            }
-        }
-    };
-}
--- a/Source/Readers/DataReaderTest/stdafx.cpp
+++ b/Source/Readers/DataReaderTest/stdafx.cpp
@ -1,13 +0,0 @@
-//
-// <copyright file="stdafx.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// stdafx.cpp : source file that includes just the standard includes
-// DataReaderTest.pch will be the pre-compiled header
-// stdafx.obj will contain the pre-compiled type information
-
-#include "stdafx.h"
-
-// TODO: reference any additional headers you need in STDAFX.H
-// and not in this file
--- a/Source/Readers/DataReaderTest/stdafx.h
+++ b/Source/Readers/DataReaderTest/stdafx.h
@ -1,18 +0,0 @@
-//
-// <copyright file="stdafx.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// stdafx.h : include file for standard system include files,
-// or project specific include files that are used frequently, but
-// are changed infrequently
-//
-
-#pragma once
-
-#include "targetver.h"
-
-// Headers for CppUnitTest
-#include "CppUnitTest.h"
-
-// TODO: reference additional headers your program requires here
--- a/Source/Readers/DataReaderTest/targetver.h
+++ b/Source/Readers/DataReaderTest/targetver.h
@ -1,13 +0,0 @@
-//
-// <copyright file="targetver.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-#pragma once
-
-// Including SDKDDKVer.h defines the highest available Windows platform.
-
-// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
-// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
-
-#include <SDKDDKVer.h>
--- a/Source/Readers/HTKMLFReader.sln
+++ b/Source/Readers/HTKMLFReader.sln
@ -1,73 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 2012
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HTKMLFReader", "HTKMLFReader\HTKMLFReader.vcxproj", "{33D2FD22-DEF2-4507-A58A-368F641AEBE5}"
-	ProjectSection(ProjectDependencies) = postProject
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMath", "..\Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DataReaderClient", "DataReaderClient\DataReaderClient.vcxproj", "{89A1BB74-345D-4CB4-BC56-2C05D8076183}"
-	ProjectSection(ProjectDependencies) = postProject
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
-	EndProjectSection
-EndProject
-Global
-	GlobalSection(TeamFoundationVersionControl) = preSolution
-		SccNumberOfProjects = 3
-		SccEnterpriseProvider = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
-		SccTeamFoundationServer = http://vstfcodebox:8080/tfs/kappa
-		SccProjectUniqueName0 = ..\\Math\\Math\\Math.vcxproj
-		SccProjectName0 = ../Math/Math
-		SccAuxPath0 = http://vstfcodebox:8080/tfs/kappa
-		SccLocalPath0 = ..\\Math\\Math
-		SccProvider0 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
-		SccProjectUniqueName1 = DataReaderClient\\DataReaderClient.vcxproj
-		SccProjectName1 = DataReaderClient
-		SccAuxPath1 = http://vstfcodebox:8080/tfs/kappa
-		SccLocalPath1 = DataReaderClient
-		SccProvider1 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
-		SccProjectUniqueName2 = HTKMLFReader\\HTKMLFReader.vcxproj
-		SccProjectName2 = HTKMLFReader
-		SccAuxPath2 = http://vstfcodebox:8080/tfs/kappa
-		SccLocalPath2 = HTKMLFReader
-		SccProvider2 = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
-	EndGlobalSection
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Win32 = Debug|Win32
-		Debug|x64 = Debug|x64
-		Release|Win32 = Release|Win32
-		Release|x64 = Release|x64
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.ActiveCfg = Debug|Win32
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.Build.0 = Debug|Win32
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.ActiveCfg = Release|Win32
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.Build.0 = Release|Win32
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64
-		{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.Build.0 = Release|x64
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.ActiveCfg = Debug|Win32
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.Build.0 = Debug|Win32
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.ActiveCfg = Debug|x64
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.Build.0 = Debug|x64
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.ActiveCfg = Release|Win32
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.Build.0 = Release|Win32
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.ActiveCfg = Release|x64
-		{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.Build.0 = Release|x64
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|Win32.ActiveCfg = Debug|Win32
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|Win32.Build.0 = Debug|Win32
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|x64.ActiveCfg = Debug|x64
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Debug|x64.Build.0 = Debug|x64
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|Win32.ActiveCfg = Release|Win32
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|Win32.Build.0 = Release|Win32
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|x64.ActiveCfg = Release|x64
-		{89A1BB74-345D-4CB4-BC56-2C05D8076183}.Release|x64.Build.0 = Release|x64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
--- a/Source/Readers/KaldiReader/DataReader.cpp
+++ b/Source/Readers/KaldiReader/DataReader.cpp
@ -1,66 +0,0 @@
-//
-// <copyright file="DataReader.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// DataReader.cpp : Defines the exported functions for the DLL application.
-//
-
-#include "stdafx.h"
-#ifdef _WIN32
-#include <objbase.h>
-#endif
-#include "basetypes.h"
-
-#include "htkfeatio.h"                  // for reading HTK features
-//#include "latticearchive.h"             // for reading HTK phoneme lattices (MMI training)
-//#include "simplesenonehmm.h"            // for MMI scoring
-//#include "msra_mgram.h"                 // for unigram scores of ground-truth path in sequence training
-
-#include "rollingwindowsource.h"        // minibatch sources
-#include "utterancesource.h"
-//#include "readaheadsource.h"
-#include "chunkevalsource.h"
-#define DATAREADER_EXPORTS
-#include "DataReader.h"
-#include "HTKMLFReader.h"
-#include "Config.h"
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-template<class ElemType>
-void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
-{
-    *preader = new HTKMLFReader<ElemType>();
-}
-
-extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
-{
-    GetReader(preader);
-}
-extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
-{
-    GetReader(preader);
-}
-
-// Utility function, in ConfigFile.cpp, but HTKMLFReader doesn't need that code...
-
-// Trim - trim white space off the start and end of the string
-// str - string to trim
-// NOTE: if the entire string is empty, then the string will be set to an empty string
-/*  void Trim(std::string& str)
-{
-    auto found = str.find_first_not_of(" \t");
-    if (found == npos)
-    {
-        str.erase(0);
-        return;
-    }
-    str.erase(0, found);
-    found = str.find_last_not_of(" \t");
-    if (found != npos)
-        str.erase(found+1);
-}*/
-
-
-}}}
--- a/Source/Readers/KaldiReader/DataWriter.cpp
+++ b/Source/Readers/KaldiReader/DataWriter.cpp
@ -1,104 +0,0 @@
-//
-// <copyright file="DataWriter.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// DataWriter.cpp : Defines the exported functions for the DLL application.
-//
-
-#include "stdafx.h"
-#include "basetypes.h"
-
-#include "htkfeatio.h"                  // for reading HTK features
-
-#define DATAWRITER_EXPORTS
-#include "DataWriter.h"
-#include "HTKMLFWriter.h"
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-template<class ElemType>
-void DATAWRITER_API GetWriter(IDataWriter<ElemType>** pwriter)
-{
-    *pwriter = new HTKMLFWriter<ElemType>();
-}
-
-extern "C" DATAWRITER_API void GetWriterF(IDataWriter<float>** pwriter)
-{
-    GetWriter(pwriter);
-}
-extern "C" DATAWRITER_API void GetWriterD(IDataWriter<double>** pwriter)
-{
-    GetWriter(pwriter);
-}
-
-
-template<class ElemType>
-template<class ConfigRecordType> void DataWriter<ElemType>::InitFromConfig(const ConfigRecordType & writerConfig)
-{
-    m_dataWriter = new HTKMLFWriter<ElemType>();
-    m_dataWriter->Init(writerConfig);
-}
-
-
-// Destroy - cleanup and remove this class
-// NOTE: this destroys the object, and it can't be used past this point
-template<class ElemType>
-void DataWriter<ElemType>::Destroy()
-{
-    delete m_dataWriter;
-    m_dataWriter = NULL;
-}
-
-
-// DataWriter Constructor
-// config - [in] configuration data for the data writer
-template<class ElemType>
-template<class ConfigRecordType> DataWriter<ElemType>::DataWriter(const ConfigRecordType & config)
-{
-    Init(config);
-}
-
-
-// destructor - cleanup temp files, etc. 
-template<class ElemType>
-DataWriter<ElemType>::~DataWriter()
-{
-    delete m_dataWriter;
-    m_dataWriter = NULL;
-}
-
-// GetSections - Get the sections of the file
-// sections - a map of section name to section. Data sepcifications from config file will be used to determine where and how to save data
-template<class ElemType>
-void DataWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections)
-{
-    m_dataWriter->GetSections(sections);
-}
-
-// SaveData - save data in the file/files 
-// recordStart - Starting record number
-// matricies - a map of section name (section:subsection) to data pointer. Data sepcifications from config file will be used to determine where and how to save data
-// numRecords - number of records we are saving, can be zero if not applicable
-// datasetSize - Size of the dataset
-// byteVariableSized - for variable sized data, size of current block to be written, zero when not used, or ignored if not variable sized data
-template<class ElemType>
-bool DataWriter<ElemType>::SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized)
-{
-    return m_dataWriter->SaveData(recordStart, matrices, numRecords, datasetSize, byteVariableSized);
-}
-
-// SaveMapping - save a map into the file
-// saveId - name of the section to save into (section:subsection format)
-// labelMapping - map we are saving to the file
-template<class ElemType>
-void DataWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& labelMapping)
-{
-    m_dataWriter->SaveMapping(saveId, labelMapping);
-}
-
-//The explicit instantiation
-template class DataWriter<double>; 
-template class DataWriter<float>;
-
-}}}
--- a/Source/Readers/KaldiReader/HTKMLFReader.cpp
+++ b/Source/Readers/KaldiReader/HTKMLFReader.cpp
--- a/Source/Readers/KaldiReader/HTKMLFReader.h
+++ b/Source/Readers/KaldiReader/HTKMLFReader.h
@ -1,135 +0,0 @@
-//
-// <copyright file="HTKMLFReader.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples 
-#pragma once
-#include "DataReader.h"
-#include "Config.h" // for intargvector
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-template<class ElemType>
-class HTKMLFReader : public IDataReader<ElemType>
-{
-private:
-    msra::dbn::minibatchiterator* m_mbiter;
-    msra::dbn::minibatchsource* m_frameSource;
-    //msra::dbn::minibatchreadaheadsource* m_readAheadSource;
-     msra::dbn::FileEvalSource* m_fileEvalSource; 
-    msra::dbn::latticesource* m_lattices;
-    map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
-    
-    vector<bool> m_sentenceEnd;
-    bool m_readAhead;
-    bool m_truncated;
-    bool m_framemode;
-    vector<size_t> m_processedFrame;
-    size_t m_numberOfuttsPerMinibatch;
-    size_t m_actualnumberOfuttsPerMinibatch;
-    size_t m_mbSize;
-    vector<size_t> m_toProcess;
-    vector<size_t> m_switchFrame;
-    bool m_noData;
-
-    bool m_trainOrTest; // if false, in file writing mode
-	using LabelType = typename IDataReader<ElemType>::LabelType;
-	using LabelIdType = typename IDataReader<ElemType>::LabelIdType;
- 
-    std::map<LabelIdType, LabelType> m_idToLabelMap;
-    
-    bool m_partialMinibatch; // allow partial minibatches?
-    
-    std::vector<ElemType*> m_featuresBufferMultiUtt;
-    std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
-    std::vector<ElemType*> m_labelsBufferMultiUtt;
-    std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
-    std::vector<size_t> m_featuresStartIndexMultiUtt;
-    std::vector<size_t> m_labelsStartIndexMultiUtt;
-
-    std::vector<ElemType*> m_featuresBufferMultiIO;
-    std::vector<size_t> m_featuresBufferAllocatedMultiIO;
-    std::vector<ElemType*> m_labelsBufferMultiIO;
-    std::vector<size_t> m_labelsBufferAllocatedMultiIO;
-
-    std::map<std::wstring,size_t> m_featureNameToIdMap;
-    std::map<std::wstring,size_t> m_labelNameToIdMap;
-    std::map<std::wstring,size_t> m_nameToTypeMap;
-    std::map<std::wstring,size_t> m_featureNameToDimMap;
-    std::map<std::wstring,size_t> m_labelNameToDimMap;
-    // for writing outputs to files (standard single input/output network) - deprecate eventually
-    bool m_checkDictionaryKeys;
-    bool m_convertLabelsToTargets;
-    std::vector <bool> m_convertLabelsToTargetsMultiIO;
-    std::vector<std::vector<std::wstring>> m_inputFilesMultiIO;
- 
-    size_t m_inputFileIndex;
-    std::vector<size_t> m_featDims;
-    std::vector<size_t> m_labelDims;
-
-    std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
-     
-    void PrepareForTrainingOrTesting(const ConfigParameters& config);
-    void PrepareForWriting(const ConfigParameters& config);
-    
-    bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
-    bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
-    
-    void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
-    void StartMinibatchLoopToWrite(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
-
-    bool ReNewBufferForMultiIO(size_t i);
-
-    size_t GetNumParallelSequences() { return m_numberOfuttsPerMinibatch; } 
-    void SetNumParallelSequences(const size_t) { };
-
-    size_t NumberSlicesInEachRecurrentIter() { return m_numberOfuttsPerMinibatch ;} 
-    void SetNbrSlicesEachRecurrentIter(const size_t) { };
-
-     void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels);
-
-    
-    size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
-    enum InputOutputTypes
-    {
-        real,
-        category,
-    };
-
-
-
-public:
-    MBLayoutPtr m_pMBLayout;
-
-    /// by default it is false
-    /// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
-    /// set to true so that a current minibatch can uses state activities from the previous minibatch. 
-    /// default will have truncated BPTT, which only does BPTT inside a minibatch
-
-	bool mIgnoreSentenceBeginTag;
-     HTKMLFReader() : m_pMBLayout(make_shared<MBLayout>())
-     {
-     }
-
-    template<class ConfigRecordType> void InitFromConfig(const ConfigRecordType &);
-    virtual void Init(const ConfigParameters & config) override { InitFromConfig(config); }
-    virtual void Init(const ScriptableObjects::IConfigRecord & config) override { InitFromConfig(config); }
-    virtual void Destroy() {delete this;}
-    virtual ~HTKMLFReader();
-    virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
-    virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
-    virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
-    virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
-    virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
-
-    virtual bool DataEnd(EndDataType endDataType);
-    void CopyMBLayoutTo(MBLayoutPtr);
-    void SetSentenceEndInBatch(vector<size_t> &/*sentenceEnd*/);
-    void SetSentenceEnd(int /*actualMbSize*/){};
-    void SetRandomSeed(int) { NOT_IMPLEMENTED };
-
-    bool RequireSentenceSeg() { return !m_framemode; }; 
-};
-
-}}}
--- a/Source/Readers/KaldiReader/HTKMLFWriter.cpp
+++ b/Source/Readers/KaldiReader/HTKMLFWriter.cpp
@ -1,280 +0,0 @@
-//
-// <copyright file="HTKMLFWriter.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// HTKMLFReader.cpp : Defines the exported functions for the DLL application.
-//
-
-#include "stdafx.h"
-#include "basetypes.h"
-
-#include "htkfeatio.h"                  // for reading HTK features
-
-//#ifndef __unix__
-#include "ssematrix.h"
-//#endif
-//#include "latticearchive.h"             // for reading HTK phoneme lattices (MMI training)
-//#include "simplesenonehmm.h"            // for MMI scoring
-//#include "msra_mgram.h"                 // for unigram scores of ground-truth path in sequence training
-
-//#include "rollingwindowsource.h"        // minibatch sources
-//#include "utterancesource.h"
-//#include "readaheadsource.h"
-//#include "chunkevalsource.h"
-//#include "minibatchiterator.h"
-
-#define DATAWRITER_EXPORTS  // creating the exports here
-#include "DataWriter.h"
-#include "Config.h"
-#include "HTKMLFWriter.h"
-
-#include "Config.h"
-#ifdef LEAKDETECT
-#include <vld.h> // for memory leak detection
-#endif
-
-
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-    // Create a Data Writer
-    //DATAWRITER_API IDataWriter* DataWriterFactory(void)
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::Init(const ConfigParameters& writerConfig)
-    {
-        m_tempArray = nullptr;
-        m_tempArraySize = 0;
-
-        vector<wstring> scriptpaths;
-        vector<wstring> filelist;
-        size_t numFiles;
-        size_t firstfilesonly = SIZE_MAX;   // set to a lower value for testing
-
-        ConfigArray outputNames = writerConfig(L"outputNodeNames","");
-        if (outputNames.size()<1)
-            RuntimeError("writer needs at least one outputNodeName specified in config");
-
-        foreach_index(i, outputNames) // inputNames should map to node names
-        {
-            ConfigParameters thisOutput = writerConfig(outputNames[i]);
-
-            if (thisOutput.Exists("dim"))
-                udims.push_back(thisOutput(L"dim"));
-            else
-                RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output");
-            if (thisOutput.Exists("file"))
-                scriptpaths.push_back(thisOutput(L"file"));
-            else if (thisOutput.Exists("scpFile"))
-                scriptpaths.push_back(thisOutput(L"scpFile"));
-            else
-                RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output");
-            
-            if (thisOutput.Exists("Kaldicmd"))
-            {
-                kaldicmd.push_back(thisOutput(L"Kaldicmd"));
-            }
- 
-            outputNameToIdMap[outputNames[i]]= i;
-            outputNameToDimMap[outputNames[i]]=udims[i];
-            wstring type = thisOutput(L"type","Real");
-            if (type == L"Real")
-            {
-                outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal;
-            }
-            else
-            {
-                throw std::runtime_error ("HTKMLFWriter::Init: output type for writer output expected to be Real");
-            }
-        }
-
-        numFiles=0;
-        foreach_index(i,scriptpaths)
-        {
-            filelist.clear();
-            std::wstring scriptPath = scriptpaths[i];
-            fprintf(stderr, "HTKMLFWriter::Init: reading output script file %S ...", scriptPath.c_str());
-            size_t n = 0;
-            for (msra::files::textreader reader(scriptPath); reader && filelist.size() <= firstfilesonly/*optimization*/; )
-            {
-                filelist.push_back (reader.wgetline());
-                n++;
-            }
-
-            fprintf (stderr, " %zd entries\n", n);
-
-            if (i==0)
-                numFiles=n;
-            else
-                if (n!=numFiles)
-                    throw std::runtime_error (msra::strfun::strprintf ("HTKMLFWriter:Init: number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));
-
-            outputFiles.push_back(filelist);
-        }
-        outputFileIndex=0;
-        sampPeriod=100000;
-
-    }
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::Destroy()
-    {
-        delete [] m_tempArray;
-        m_tempArray = nullptr;
-        m_tempArraySize = 0;
-    }
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& /*sections*/)
-    {
-    }
-
-    template<class ElemType>
-    bool HTKMLFWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
-    {
-        
-        if (kaldicmd.size() == 0)
-        {
-            //std::map<std::wstring, void*, nocase_compare>::iterator iter;
-            if (outputFileIndex>=outputFiles[0].size())
-                RuntimeError("index for output scp file out of range...");
-
-            for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
-            {
-                wstring outputName = iter->first;
-                Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
-                size_t id = outputNameToIdMap[outputName];
-                size_t dim = outputNameToDimMap[outputName];
-                wstring outFile = outputFiles[id][outputFileIndex];
-            
-                assert(outputData.GetNumRows()==dim); dim;
-
-                SaveToKaldiFile(outFile,outputData);
-            }
-
-            outputFileIndex++;
-        } else
-        {
-            if (outputFileIndex>=outputFiles[0].size())
-                RuntimeError("index for output scp file out of range...");
-
-            for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
-            {
-                wstring outputName = iter->first;
-                Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
-                size_t id = outputNameToIdMap[outputName];
-                size_t dim = outputNameToDimMap[outputName];
-                wstring outFile = outputFiles[id][outputFileIndex];
-                string wfea = "ark:" + msra::strfun::utf8(outFile);
-                
-                wfea = msra::strfun::utf8(kaldicmd[0]);
-                kaldi::BaseFloatMatrixWriter feature_writer(wfea);
-                kaldi::Matrix<kaldi::BaseFloat> nnet_out_host;
-   
-                assert(outputData.GetNumRows()==dim); dim;
-                const std::string outputPath = msra::strfun::utf8(outFile);
-                const std::string file_key = removeExtension(basename(outputPath));
-
-                nnet_out_host.Resize(outputData.GetNumCols(), outputData.GetNumRows());
-                outputData.CopyToArray(m_tempArray, m_tempArraySize);
-                ElemType * pValue = m_tempArray;
-
-           
-                for (int j=0; j< outputData.GetNumCols(); j++)
-                {
-                    for (int i=0; i<outputData.GetNumRows(); i++)
-                    {
-                        nnet_out_host(j,i) = (float)*pValue++;                
-                        if (nnet_out_host(j,i) > 50)
-                        {
-                            nnet_out_host(j,i)  = -(float)log(1.0/outputData.GetNumCols());
-                        }
-
-
-                    }
-                }
-                
-                fprintf (stderr, "evaluate: writing %zd frames of %s\n", outputData.GetNumCols(), wfea.c_str());
-                feature_writer.Write(file_key, nnet_out_host);
-
- 
-            }
-
-            outputFileIndex++;
-
-        }
-        return true;
-    }
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
-    {
-        msra::dbn::matrix output;
-        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
-        outputData.CopyToArray(m_tempArray, m_tempArraySize);
-        ElemType * pValue = m_tempArray;
-
-        for (int j=0; j< outputData.GetNumCols(); j++)
-            {
-                for (int i=0; i<outputData.GetNumRows(); i++)
-                {
-                    output(i,j) = (float)*pValue++;                
-                }
-            }
-            
-        const size_t nansinf = output.countnaninf();
-        if (nansinf > 0)
-            fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
-        // save it
-        msra::files::make_intermediate_dirs (outputFile);
-        msra::util::attempt (5, [&]()
-        {
-            msra::asr::htkfeatwriter::write (outputFile, "USER", this->sampPeriod, output);
-        });
-                        
-        fprintf (stderr, "evaluate: writing %zd frames of %S\n", output.cols(), outputFile.c_str());
-
-
-    }
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
-    {
-        msra::dbn::matrix output;
-        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
-        outputData.CopyToArray(m_tempArray, m_tempArraySize);
-        ElemType * pValue = m_tempArray;
-
-        for (int j=0; j< outputData.GetNumCols(); j++)
-            {
-                for (int i=0; i<outputData.GetNumRows(); i++)
-                {
-                    output(i,j) = (float)*pValue++;                
-                }
-            }
-            
-        const size_t nansinf = output.countnaninf();
-        if (nansinf > 0)
-            fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
-        // save it
-        msra::files::make_intermediate_dirs (outputFile);
-        msra::util::attempt (5, [&]()
-        {
-            msra::asr::htkfeatwriter::writeKaldi (outputFile, "USER", this->sampPeriod, output, sizeof(ElemType));
-        });
-                        
-        fprintf (stderr, "evaluate: writing %zd frames of %S\n", output.cols(), outputFile.c_str());
-    }
-
-
-
-    template<class ElemType>
-    void HTKMLFWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& /*labelMapping*/)
-    {
-    }
-   
-    template class HTKMLFWriter<float>;
-    template class HTKMLFWriter<double>;
-
-}}}
--- a/Source/Readers/KaldiReader/HTKMLFWriter.h
+++ b/Source/Readers/KaldiReader/HTKMLFWriter.h
@ -1,52 +0,0 @@
-//
-// <copyright file="HTKMLFWriter.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples 
-#pragma once
-#include "DataWriter.h"
-#include <map>
-#include <vector>
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-template<class ElemType>
-class HTKMLFWriter : public IDataWriter<ElemType>
-{
-private:
-    std::vector<size_t> outputDims;
-    std::vector<std::vector<std::wstring>> outputFiles;
-    std::vector<std::wstring> kaldicmd;
-    
-    std::vector<size_t> udims;
-    std::map<std::wstring,size_t> outputNameToIdMap;
-    std::map<std::wstring,size_t> outputNameToDimMap;
-    std::map<std::wstring,size_t> outputNameToTypeMap;
-    unsigned int sampPeriod;
-    size_t outputFileIndex;
-    void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
-    void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
-    ElemType * m_tempArray;
-    size_t m_tempArraySize;
-
-    enum OutputTypes
-    {
-        outputReal,
-        outputCategory,
-    };
-
-public:
-    using LabelType = typename IDataWriter<ElemType>::LabelType;
-    using LabelIdType = typename IDataWriter<ElemType>::LabelIdType;
-    template<class ConfigRecordType>
-    void InitFromConfig(const ConfigRecordType & writerConfig);
-    virtual void Init(const ConfigParameters & config) { InitFromConfig(config); }
-    virtual void Init(const ScriptableObjects::IConfigRecord & config) { InitFromConfig(config); }
-    virtual void Destroy();
-    virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections);
-    virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized);
-    virtual void SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& labelMapping);
-};
-
-}}}
--- a/Source/Readers/KaldiReader/base/io-funcs-inl.h
+++ b/Source/Readers/KaldiReader/base/io-funcs-inl.h
@ -1,219 +0,0 @@
-// base/io-funcs-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian;  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_INL_H_
-#define KALDI_BASE_IO_FUNCS_INL_H_ 1
-
-// Do not include this file directly.  It is included by base/io-funcs.h
-
-#include <limits>
-#include <vector>
-
-namespace kaldi {
-
-// Template that covers integers.
-template<class T>  void WriteBasicType(std::ostream &os,
-                                       bool binary, T t) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char len_c = (std::numeric_limits<T>::is_signed ? 1 :  -1)
-        * static_cast<char>(sizeof(t));
-    os.put(len_c);
-    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
-  } else {
-    if (sizeof(t) == 1)
-      os << static_cast<int16>(t) << " ";
-    else
-      os << t << " ";
-  }
-  if (os.fail()) {
-    throw std::runtime_error("Write failure in WriteBasicType.");
-  }
-}
-
-// Template that covers integers.
-template<class T> inline void ReadBasicType(std::istream &is,
-                                            bool binary, T *t) {
-  KALDI_PARANOID_ASSERT(t != NULL);
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    int len_c_in = is.get();
-    if (len_c_in == -1)
-      KALDI_ERR << "ReadBasicType: encountered end of stream.";
-    char len_c = static_cast<char>(len_c_in), len_c_expected
-      = (std::numeric_limits<T>::is_signed ? 1 :  -1)
-      * static_cast<char>(sizeof(*t));
-    
-    if (len_c !=  len_c_expected) {
-      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
-                << static_cast<int>(len_c)
-                << " vs. " << static_cast<int>(len_c_expected)
-                << ".  You can change this code to successfully"
-                << " read it later, if needed.";
-      // insert code here to read "wrong" type.  Might have a switch statement.
-    }
-    is.read(reinterpret_cast<char *>(t), sizeof(*t));
-  } else {
-    if (sizeof(*t) == 1) {
-      int16 i;
-      is >> i;
-      *t = i;
-    } else {
-      is >> *t;
-    }
-  }
-  if (is.fail()) {
-    KALDI_ERR << "Read failure in ReadBasicType, file position is "
-              << is.tellg() << ", next char is " << is.peek();
-  }
-}
-
-
-template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
-                                                 const std::vector<T> &v) {
-  // Compile time assertion that this is not called with a wrong type.
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  if (binary) {
-    char sz = sizeof(T);  // this is currently just a check.
-    os.write(&sz, 1);
-    int32 vecsz = static_cast<int32>(v.size());
-    KALDI_ASSERT((size_t)vecsz == v.size());
-    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
-    if (vecsz != 0) {
-      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
-    }
-  } else {
-    // focus here is on prettiness of text form rather than
-    // efficiency of reading-in.
-    // reading-in is dominated by low-level operations anyway:
-    // for efficiency use binary.
-    os << "[ ";
-    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
-    for (; iter != end; ++iter) {
-      if (sizeof(T) == 1)
-        os << static_cast<int16>(*iter) << " ";
-      else
-        os << *iter << " ";
-    }
-    os << "]\n";
-  }
-  if (os.fail()) {
-    throw std::runtime_error("Write failure in WriteIntegerType.");
-  }
-}
-
-
-template<class T> inline void ReadIntegerVector(std::istream &is,
-                                                bool binary,
-                                                std::vector<T> *v) {
-  KALDI_ASSERT_IS_INTEGER_TYPE(T);
-  KALDI_ASSERT(v != NULL);
-  if (binary) {
-    int sz = is.peek();
-    if (sz == sizeof(T)) {
-      is.get();
-    } else {  // this is currently just a check.
-      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
-                << sizeof(T) << ", saw instead " << sz << ", at file position "
-                << is.tellg();
-    }
-    int32 vecsz;
-    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
-    if (is.fail() || vecsz < 0) goto bad;
-    v->resize(vecsz);
-    if (vecsz > 0) {
-      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
-    }
-  } else {
-    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
-                           // due to resizing.
-    is >> std::ws;
-    if (is.peek() != static_cast<int>('[')) {
-      KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
-                << is.peek() << ", at file position " << is.tellg();
-    }
-    is.get();  // consume the '['.
-    is >> std::ws;  // consume whitespace.
-    while (is.peek() != static_cast<int>(']')) {
-      if (sizeof(T) == 1) {  // read/write chars as numbers.
-        int16 next_t;
-        is >> next_t >> std::ws;
-        if (is.fail()) goto bad;
-        else
-            tmp_v.push_back((T)next_t);
-      } else {
-        T next_t;
-        is >> next_t >> std::ws;
-        if (is.fail()) goto bad;
-        else
-            tmp_v.push_back(next_t);
-      }
-    }
-    is.get();  // get the final ']'.
-    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
-    // uses less permanent memory.
-  }
-  if (!is.fail()) return;
- bad:
-  KALDI_ERR << "ReadIntegerVector: read failure at file position "
-            << is.tellg();
-}
-
-// Initialize an opened stream for writing by writing an optional binary
-// header and modifying the floating-point precision.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
-  // This does not throw exceptions (does not check for errors).
-  if (binary) {
-    os.put('\0');
-    os.put('B');
-  }
-  // Note, in non-binary mode we may at some point want to mess with
-  // the precision a bit.
-  // 7 is a bit more than the precision of float..
-  if (os.precision() < 7)
-    os.precision(7);
-}
-
-/// Initialize an opened stream for reading by detecting the binary header and
-// setting the "binary" value appropriately.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
-  // Sets the 'binary' variable.
-  // Throws exception in the very unusual situation that stream
-  // starts with '\0' but not then 'B'.
-
-  if (is.peek() == '\0') {  // seems to be binary
-    is.get();
-    if (is.peek() != 'B') {
-      return false;
-    }
-    is.get();
-    *binary = true;
-    return true;
-  } else {
-    *binary = false;
-    return true;
-  }
-}
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_BASE_IO_FUNCS_INL_H_
--- a/Source/Readers/KaldiReader/base/io-funcs-test.cc
+++ b/Source/Readers/KaldiReader/base/io-funcs-test.cc
@ -1,137 +0,0 @@
-// base/io-funcs-test.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-void UnitTestIo(bool binary) {
-  {
-    const char *filename = "tmpf";
-    std::ofstream outfile(filename, std::ios_base::out | std::ios_base::binary);
-    InitKaldiOutputStream(outfile, binary);
-    if (!binary) outfile << "\t";
-    int64 i1 = rand() % 10000;
-    WriteBasicType(outfile, binary, i1);
-    uint16 i2 = rand() % 10000;
-    WriteBasicType(outfile, binary, i2);
-    if (!binary) outfile << "\t";
-    char c = rand();
-    WriteBasicType(outfile, binary, c);
-    if (!binary && rand()%2 == 0) outfile << " \n";
-    std::vector<int32> vec1;
-    WriteIntegerVector(outfile, binary, vec1);
-    if (!binary && rand()%2 == 0) outfile << " \n";
-    std::vector<uint16> vec2;
-    for (size_t i = 0; i < 10; i++) vec2.push_back(rand()%100 - 10);
-    WriteIntegerVector(outfile, binary, vec2);
-    if (!binary) outfile << " \n";
-    std::vector<char> vec3;
-    for (size_t i = 0; i < 10; i++) vec3.push_back(rand()%100);
-    WriteIntegerVector(outfile, binary, vec3);
-    if (!binary && rand()%2 == 0) outfile << " \n";
-    const char *token1 = "Hi";
-    WriteToken(outfile, binary, token1);
-    if (!binary) outfile << " \n";
-    std::string token2 = "There.";
-    WriteToken(outfile, binary, token2);
-    if (!binary && rand()%2 == 0) outfile << " \n";
-    std::string token3 = "You.";
-    WriteToken(outfile, binary, token3);
-    if (!binary && rand()%2 == 0) outfile << " ";
-    float f1 = RandUniform();
-    WriteBasicType(outfile, binary, f1);
-    if (!binary && rand()%2 == 0) outfile << "\t";
-    float f2 = RandUniform();
-    WriteBasicType(outfile, binary, f2);
-    double d1 = RandUniform();
-    WriteBasicType(outfile, binary, d1);
-    if (!binary && rand()%2 == 0) outfile << "\t";
-    double d2 = RandUniform();
-    WriteBasicType(outfile, binary, d2);
-    if (!binary && rand()%2 == 0) outfile << "\t";
-    outfile.close();
-
-    {
-      std::ifstream infile(filename, std::ios_base::in | std::ios_base::binary);
-      bool binary_in;
-      InitKaldiInputStream(infile, &binary_in);
-      int64 i1_in;
-      ReadBasicType(infile, binary_in, &i1_in);
-      KALDI_ASSERT(i1_in == i1);
-      uint16 i2_in;
-      ReadBasicType(infile, binary_in, &i2_in);
-      KALDI_ASSERT(i2_in == i2);
-      char c_in;
-      ReadBasicType(infile, binary_in, &c_in);
-      KALDI_ASSERT(c_in == c);
-      std::vector<int32> vec1_in;
-      ReadIntegerVector(infile, binary_in, &vec1_in);
-      KALDI_ASSERT(vec1_in == vec1);
-      std::vector<uint16> vec2_in;
-      ReadIntegerVector(infile, binary_in, &vec2_in);
-      KALDI_ASSERT(vec2_in == vec2);
-      std::vector<char> vec3_in;
-      ReadIntegerVector(infile, binary_in, &vec3_in);
-      KALDI_ASSERT(vec3_in == vec3);
-      std::string  token1_in, token2_in;
-      KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
-      KALDI_ASSERT(PeekToken(infile, binary_in) == (int)*token1); // Note:
-      // the stuff with skipping over '<' is tested in ../util/kaldi-io-test.cc,
-      // since we need to make sure it works with pipes.
-      ReadToken(infile, binary_in, &token1_in);
-      KALDI_ASSERT(token1_in == std::string(token1));
-      ReadToken(infile, binary_in, &token2_in);
-      KALDI_ASSERT(token2_in == std::string(token2));
-      if (rand() % 2 == 0)
-        ExpectToken(infile, binary_in, token3.c_str());
-      else
-        ExpectToken(infile, binary_in, token3);
-      float f1_in;  // same type.
-      ReadBasicType(infile, binary_in, &f1_in);
-      AssertEqual(f1_in, f1);
-      double f2_in;  // wrong type.
-      ReadBasicType(infile, binary_in, &f2_in);
-      AssertEqual(f2_in, f2);
-      double d1_in;  // same type.
-      ReadBasicType(infile, binary_in, &d1_in);
-      AssertEqual(d1_in, d1);
-      float d2_in;  // wrong type.
-      ReadBasicType(infile, binary_in, &d2_in);
-      AssertEqual(d2_in, d2);
-      KALDI_ASSERT(Peek(infile, binary_in) == -1);
-      KALDI_ASSERT(PeekToken(infile, binary_in) == -1);
-    }
-  }
-}
-
-
-
-}  // end namespace kaldi.
-
-int main() {
-  using namespace kaldi;
-  for (size_t i = 0; i < 10; i++) {
-    UnitTestIo(false);
-    UnitTestIo(true);
-  }
-  KALDI_ASSERT(1);  // just wanted to check that KALDI_ASSERT does not fail for 1.
-  return 0;
-}
-
--- a/Source/Readers/KaldiReader/base/io-funcs.cc
+++ b/Source/Readers/KaldiReader/base/io-funcs.cc
@ -1,206 +0,0 @@
-// base/io-funcs.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template<>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
-  os << (b ? "T":"F");
-  if (!binary) os << " ";
-  if (os.fail())
-    KALDI_ERR << "Write failure in WriteBasicType<bool>";
-}
-
-template<>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
-  KALDI_PARANOID_ASSERT(b != NULL);
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  char c = is.peek();
-  if (c == 'T') {
-      *b = true;
-      is.get();
-  } else if (c == 'F') {
-      *b = false;
-      is.get();
-  } else {
-    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
-              << is.tellg() << ", next char is " << CharToString(c);
-  }
-}
-
-template<>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template<>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
-  if (binary) {
-    char c = sizeof(f);
-    os.put(c);
-    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
-  } else {
-    os << f << " ";
-  }
-}
-
-template<>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
-  KALDI_PARANOID_ASSERT(f != NULL);
-  if (binary) {
-    double d;
-    int c = is.peek();
-    if (c == sizeof(*f)) {
-      is.get();
-      is.read(reinterpret_cast<char*>(f), sizeof(*f));
-    } else if (c == sizeof(d)) {
-      ReadBasicType(is, binary, &d);
-      *f = d;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *f;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-template<>
-void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
-  KALDI_PARANOID_ASSERT(d != NULL);
-  if (binary) {
-    float f;
-    int c = is.peek();
-    if (c == sizeof(*d)) {
-      is.get();
-      is.read(reinterpret_cast<char*>(d), sizeof(*d));
-    } else if (c == sizeof(f)) {
-      ReadBasicType(is, binary, &f);
-      *d = f;
-    } else {
-      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
-                << ", at file position " << is.tellg();
-    }
-  } else {
-    is >> *d;
-  }
-  if (is.fail()) {
-    KALDI_ERR << "ReadBasicType: failed to read, at file position "
-              << is.tellg();
-  }
-}
-
-void CheckToken(const char *token) {
-  KALDI_ASSERT(*token != '\0');  // check it's nonempty.
-  while (*token != '\0') {
-    KALDI_ASSERT(!::isspace(*token));
-    token++;
-  }
-}
-
-void WriteToken(std::ostream &os, bool binary, const char *token) {
-  // binary mode is ignored;
-  // we use space as termination character in either case.
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  os << token << " ";
-  if (os.fail()) {
-    throw std::runtime_error("Write failure in WriteToken.");
-  }
-}
-
-int Peek(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // eat up whitespace.
-  return is.peek();
-}
-
-void WriteToken(std::ostream &os, bool binary, const std::string & token) {
-  WriteToken(os, binary, token.c_str());
-}
-
-void ReadToken(std::istream &is, bool binary, std::string *str) {
-  KALDI_ASSERT(str != NULL);
-  if (!binary) is >> std::ws;  // consume whitespace.
-  is >> *str;
-  if (is.fail()) {
-    KALDI_ERR << "ReadToken, failed to read token at file position "
-              << is.tellg();
-  }
-  if (!isspace(is.peek())) {
-    KALDI_ERR << "ReadToken, expected space after token, saw instead "
-              << static_cast<char>(is.peek())
-              << ", at file position " << is.tellg();
-  }
-  is.get();  // consume the space.
-}
-
-int PeekToken(std::istream &is, bool binary) {
-  if (!binary) is >> std::ws;  // consume whitespace.
-  bool read_bracket;
-  if (static_cast<char>(is.peek()) == '<') {
-    read_bracket = true;
-    is.get();
-  } else {
-    read_bracket = false;
-  }
-  int ans = is.peek();
-  if (read_bracket) {
-    if (!is.unget())
-      KALDI_WARN << "Error ungetting '<' in PeekToken";
-  }
-  return ans;
-}
-
-
-void ExpectToken(std::istream &is, bool binary, const char *token) {
-  int pos_at_start = is.tellg();
-  KALDI_ASSERT(token != NULL);
-  CheckToken(token);  // make sure it's valid (can be read back)
-  if (!binary) is >> std::ws;  // consume whitespace.
-  std::string str;
-  is >> str;
-  is.get();  // consume the space.
-  if (is.fail()) {
-    KALDI_ERR << "Failed to read token [started at file position "
-              << pos_at_start << "], expected " << token;
-  }
-  if (strcmp(str.c_str(), token) != 0) {
-    KALDI_ERR << "Expected token \"" << token << "\", got instead \""
-              << str <<"\".";
-  }
-}
-
-void ExpectToken(std::istream &is, bool binary, const std::string &token) {
-  ExpectToken(is, binary, token.c_str());
-}
-
-}  // end namespace kaldi
--- a/Source/Readers/KaldiReader/base/io-funcs.h
+++ b/Source/Readers/KaldiReader/base/io-funcs.h
@ -1,231 +0,0 @@
-// base/io-funcs.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;   Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_IO_FUNCS_H_
-#define KALDI_BASE_IO_FUNCS_H_
-
-// This header only contains some relatively low-level I/O functions.
-// The full Kaldi I/O declarations are in ../util/kaldi-io.h
-// and ../util/kaldi-table.h
-// They were put in util/ in order to avoid making the Matrix library
-// dependent on them.
-
-#include <cctype>
-#include <vector>
-#include <string>
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-
-
-/*
-  This comment describes the Kaldi approach to I/O.  All objects can be written
-  and read in two modes: binary and text.  In addition we want to make the I/O
-  work if we redefine the typedef "BaseFloat" between floats and doubles.
-  We also want to have control over whitespace in text mode without affecting
-  the meaning of the file, for pretty-printing purposes.
-
-  Errors are handled by throwing an exception (std::runtime_error).
-
-  For integer and floating-point types (and boolean values):
-
-   WriteBasicType(std::ostream &, bool binary, const T&);
-   ReadBasicType(std::istream &, bool binary, T*);
-
-  and we expect these functions to be defined in such a way that they work when
-  the type T changes between float and double, so you can read float into double
-  and vice versa].  Note that for efficiency and space-saving reasons, the Vector
-  and Matrix classes do not use these functions [but they preserve the type
-  interchangeability in their own way]
-
-  For a class (or struct) C:
-  class C {
-  ..
-    Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
-    Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
-  ..
-  }
-  NOTE: The only actual optional args we used are the "add" arguments in
-  Vector/Matrix classes, which specify whether we should sum the data already
-  in the class with the data being read.
-
-  For types which are typedef's involving stl classes, I/O is as follows:
-  typedef std::vector<std::pair<A, B> > MyTypedefName;
-
-  The user should define something like:
-
-   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
-   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
-
-  The user would have to write these functions.
-
-  For a type std::vector<T>:
-
-   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
-   void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
-
-  For other types, e.g. vectors of pairs, the user should create a routine of the
-  type WriteMyTypedefName.  This is to avoid introducing confusing templated functions;
-  we could easily create templated functions to handle most of these cases but they
-  would have to share the same name.
-
-  It also often happens that the user needs to write/read special tokens as part
-  of a file.  These might be class headers, or separators/identifiers in the class.
-  We provide special functions for manipulating these.  These special tokens must
-  be nonempty and must not contain any whitespace.
-
-    void WriteToken(std::ostream &os, bool binary, const char*);
-    void WriteToken(std::ostream &os, bool binary, const std::string & token);
-    int Peek(std::istream &is, bool binary);
-    void ReadToken(std::istream &is, bool binary, std::string *str);
-    void PeekToken(std::istream &is, bool binary, std::string *str);
-
-
-  WriteToken writes the token and one space (whether in binary or text mode).
-
-  Peek returns the first character of the next token, by consuming whitespace
-  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
-  it doesn't throw.  It's useful if a class can have various forms based on
-  typedefs and virtual classes, and wants to know which version to read.
-
-  ReadToken allow the caller to obtain the next token.  PeekToken works just
-  like ReadToken, but seeks back to the beginning of the token.  A subsequent
-  call to ReadToken will read the same token again.  This is useful when
-  different object types are written to the same file; using PeekToken one can
-  decide which of the objects to read.
-
-  There is currently no special functionality for writing/reading strings (where the strings
-  contain data rather than "special tokens" that are whitespace-free and nonempty).  This is
-  because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
-  table entries (and these have their own format).
-
-
-  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
-  such as int and size_t, that are machine-independent -- at least not
-  if you want your file formats to port between machines.  Use int32 and
-  int64 where necessary.  There is no way to detect this using compile-time
-  assertions because C++ only keeps track of the internal representation of
-  the type.
-*/
-
-/// \addtogroup io_funcs_basic
-/// @{
-
-
-/// WriteBasicType is the name of the write function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);
-
-/// ReadBasicType is the name of the read function for bool, integer types,
-/// and floating-point types. They all throw on error.
-template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);
-
-
-// Declare specialization for bool.
-template<>
-void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
-
-template <>
-void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
-
-// Declare specializations for float and double.
-template<>
-void WriteBasicType<float>(std::ostream &os, bool binary, float f);
-
-template<>
-void WriteBasicType<double>(std::ostream &os, bool binary, double f);
-
-template<>
-void ReadBasicType<float>(std::istream &is, bool binary, float *f);
-
-template<>
-void ReadBasicType<double>(std::istream &is, bool binary, double *f);
-
-// Define ReadBasicType that accepts an "add" parameter to add to
-// the destination.  Caution: if used in Read functions, be careful
-// to initialize the parameters concerned to zero in the default
-// constructor.
-template<class T>
-inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
-  if (!add) {
-    ReadBasicType(is, binary, t);
-  } else {
-    T tmp = T(0);
-    ReadBasicType(is, binary, &tmp);
-    *t += tmp;
-  }
-}
-
-/// Function for writing STL vectors of integer types.
-template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
-                                                 const std::vector<T> &v);
-
-/// Function for reading STL vector of integer types.
-template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
-                                                std::vector<T> *v);
-
-/// The WriteToken functions are for writing nonempty sequences of non-space
-/// characters. They are not for general strings.
-void WriteToken(std::ostream &os, bool binary, const char *token);
-void WriteToken(std::ostream &os, bool binary, const std::string & token);
-
-/// Peek consumes whitespace (if binary == false) and then returns the peek()
-/// value of the stream.
-int Peek(std::istream &is, bool binary);
-
-/// ReadToken gets the next token and puts it in str (exception on failure).
-void ReadToken(std::istream &is, bool binary, std::string *token);
-
-/// PeekToken will return the first character of the next token, or -1 if end of
-/// file.  It's the same as Peek(), except if the first character is '<' it will
-/// skip over it and will return the next character.  It will unget the '<' so
-/// the stream is where it was before you did PeekToken().
-int PeekToken(std::istream &is, bool binary);
-
-/// ExpectToken tries to read in the given token, and throws an exception
-/// on failure.
-void ExpectToken(std::istream &is, bool binary, const char *token);
-void ExpectToken(std::istream &is, bool binary, const std::string & token);
-
-/// ExpectPretty attempts to read the text in "token", but only in non-binary
-/// mode.  Throws exception on failure.  It expects an exact match except that
-/// arbitrary whitespace matches arbitrary whitespace.
-void ExpectPretty(std::istream &is, bool binary, const char *token);
-void ExpectPretty(std::istream &is, bool binary, const std::string & token);
-
-/// @} end "addtogroup io_funcs_basic"
-
-
-/// InitKaldiOutputStream initializes an opened stream for writing by writing an
-/// optional binary header and modifying the floating-point precision; it will
-/// typically not be called by users directly.
-inline void InitKaldiOutputStream(std::ostream &os, bool binary);
-
-/// InitKaldiInputStream initializes an opened stream for reading by detecting
-/// the binary header and setting the "binary" value appropriately;
-/// It will typically not be called by users directly.
-inline bool InitKaldiInputStream(std::istream &is, bool *binary);
-
-}  // end namespace kaldi.
-
-#include "base/io-funcs-inl.h"
-
-#endif  // KALDI_BASE_IO_FUNCS_H_
--- a/Source/Readers/KaldiReader/base/kaldi-common.h
+++ b/Source/Readers/KaldiReader/base/kaldi-common.h
@ -1,41 +0,0 @@
-// base/kaldi-common.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_COMMON_H_
-#define KALDI_BASE_KALDI_COMMON_H_ 1
-
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>  // C string stuff like strcpy
-#include <string>
-#include <sstream>
-#include <stdexcept>
-#include <cassert>
-#include <vector>
-#include <iostream>  
-#include <fstream>  
-
-#include "base/kaldi-utils.h"
-#include "base/kaldi-error.h"
-#include "base/kaldi-types.h"
-#include "base/io-funcs.h"
-#include "base/kaldi-math.h"
-
-#endif  // KALDI_BASE_KALDI_COMMON_H_
-
--- a/Source/Readers/KaldiReader/base/kaldi-error-test.cc
+++ b/Source/Readers/KaldiReader/base/kaldi-error-test.cc
@ -1,53 +0,0 @@
-// base/kaldi-error-test.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "base/kaldi-common.h"
-
-// testing that we get the stack trace.
-namespace kaldi {
-
-void MyFunction2() {
-  KALDI_ERR << "Ignore this error";
-}
-
-void MyFunction1() {
-  MyFunction2();
-}
-
-void UnitTestError() {
-  {
-    std::cerr << "Ignore next error:\n";
-    MyFunction1();
-  }
-}
-
-
-}  // end namespace kaldi.
-
-int main() {
-  kaldi::g_program_name = "/foo/bar/kaldi-error-test";
-  try {
-    kaldi::UnitTestError();
-    KALDI_ASSERT(0);  // should not happen.
-  } catch (std::runtime_error &r) {
-    std::cout << "UnitTestError: the error we generated was: " << r.what();
-  }
-}
-
--- a/Source/Readers/KaldiReader/base/kaldi-error.cc
+++ b/Source/Readers/KaldiReader/base/kaldi-error.cc
@ -1,184 +0,0 @@
-// base/kaldi-error.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef HAVE_EXECINFO_H
-#include <execinfo.h>  // To get stack trace in error messages.
-// If this #include fails there is an error in the Makefile, it does not
-// support your platform well. Make sure HAVE_EXECINFO_H is undefined, and the
-// code will compile.
-#ifdef HAVE_CXXABI_H
-#include <cxxabi.h>  // For name demangling.
-// Useful to decode the stack trace, but only used if we have execinfo.h
-#endif  // HAVE_CXXABI_H
-#endif  // HAVE_EXECINFO_H
-
-#include "base/kaldi-common.h"
-#include "base/kaldi-error.h"
-
-namespace kaldi {
-int32 g_kaldi_verbose_level = 0;  // Just initialize this global variable.
-const char *g_program_name = NULL;
-
-// If the program name was set (g_program_name != ""), the function
-// GetProgramName returns the program name (without the path) followed by a
-// colon, e.g. "gmm-align:".  Otherwise it returns the empty string "".
-const char *GetProgramName() {
-  if (g_program_name == NULL) return "";
-  else return g_program_name;
-}
-
-// Given a filename like "/a/b/c/d/e/f.cc",  GetShortFileName
-// returns "e/f.cc".  Does not currently work if backslash is
-// the filename separator.
-const char *GetShortFileName(const char *filename) {
-  const char *last_slash = strrchr(filename, '/');
-  if (!last_slash) { return filename; }
-  else {
-    while (last_slash > filename && last_slash[-1] != '/')
-      last_slash--;
-    return last_slash;
-  }
-}
-
-
-#if defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
-// The function name looks like a macro: it's a macro if we don't have ccxxabi.h
-inline void KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(std::string &ans,  
-                                                   const char *to_append) {
-  // at input the string "to_append" looks like:
-  //   ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
-  // We want to extract the name e.g. '_ZN5kaldi13UnitTestErrorEv",
-  // demangle it and return it.
-  int32 status;
-  const char *paren = strchr(to_append, '(');
-  const char *plus = (paren ? strchr(paren, '+') : NULL);
-  if (!plus) {  // did not find the '(' or did not find the '+'
-    // This is a soft failure in case we did not get what we expected.
-    ans += to_append;
-    return;
-  }
-  std::string stripped(paren+1, plus-(paren+1));  // the bit between ( and +.
-
-  char *demangled_name = abi::__cxa_demangle(stripped.c_str(), 0, 0, &status);
-
-  // if status != 0 it is an error (demangling failure),  but not all names seem
-  // to demangle, so we don't check it.
-
-  if (demangled_name != NULL) {
-    ans += demangled_name;
-    free(demangled_name);
-  } else {
-    ans += to_append;  // add the original string.
-  }
-}
-#else  // defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
-#define KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, to_append) ans += to_append
-#endif  // defined(HAVE_CXXABI_H) && defined(HAVE_EXECINFO_H)
-
-#ifdef HAVE_EXECINFO_H
-std::string KaldiGetStackTrace() {
-#define KALDI_MAX_TRACE_SIZE 50
-#define KALDI_MAX_TRACE_PRINT 10  // must be even.
-  std::string ans;
-  void *array[KALDI_MAX_TRACE_SIZE];
-  size_t size = backtrace(array, KALDI_MAX_TRACE_SIZE);
-  char **strings = backtrace_symbols(array, size);
-  if (size <= KALDI_MAX_TRACE_PRINT) {
-    for (size_t i = 0; i < size; i++) {
-      KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
-      ans += "\n";
-    }
-  } else {  // print out first+last (e.g.) 5.
-    for (size_t i = 0; i < KALDI_MAX_TRACE_PRINT/2; i++) {
-      KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
-      ans += "\n";
-    }
-    ans += ".\n.\n.\n";
-    for (size_t i = size - KALDI_MAX_TRACE_PRINT/2; i < size; i++) {
-      KALDI_APPEND_POSSIBLY_DEMANGLED_STRING(ans, strings[i]);
-      ans += "\n";
-    }
-    if (size == KALDI_MAX_TRACE_SIZE)
-      ans += ".\n.\n.\n";  // stack was too long, probably a bug.
-  }
-  free(strings);  // it's all in one big malloc()ed block.
-
-
-#ifdef HAVE_CXXABI_H  // demangle the name, if possible.
-#endif  // HAVE_CXXABI_H
-  return ans;
-}
-#endif
-
-void KaldiAssertFailure_(const char *func, const char *file,
-                         int32 line, const char *cond_str) {
-  std::cerr << "KALDI_ASSERT: at " << GetProgramName() << func << ':'
-            << GetShortFileName(file)
-            << ':' << line << ", failed: " << cond_str << '\n';
-#ifdef HAVE_EXECINFO_H
-  std::cerr << "Stack trace is:\n" << KaldiGetStackTrace();
-#endif
-  std::cerr.flush();
-  abort();  // Will later throw instead if needed.
-}
-
-
-KaldiWarnMessage::KaldiWarnMessage(const char *func, const char *file,
-                                   int32 line) {
-  this->stream() << "WARNING (" << GetProgramName() << func << "():"
-                 << GetShortFileName(file) << ':' << line << ") ";
-}
-
-
-KaldiLogMessage::KaldiLogMessage(const char *func, const char *file,
-                                 int32 line) {
-  this->stream() << "LOG (" << GetProgramName() << func << "():"
-                 << GetShortFileName(file) << ':' << line << ") ";
-}
-
-
-KaldiVlogMessage::KaldiVlogMessage(const char *func, const char *file,
-                                   int32 line, int32 verbose) {
-  this->stream() << "VLOG[" << verbose << "] (" << GetProgramName() << func
-                 << "():" << GetShortFileName(file) << ':' << line << ") ";
-}
-
-KaldiErrorMessage::KaldiErrorMessage(const char *func, const char *file,
-                                     int32 line) {
-  this->stream() << "ERROR (" << GetProgramName() << func << "():"
-                 << GetShortFileName(file) << ':' << line << ") ";
-}
-
-KaldiErrorMessage::~KaldiErrorMessage() {
-  // (1) Print the message to stderr.
-  std::cerr << ss.str() << '\n';
-  // (2) Throw an exception with the message, plus traceback info if available.
-  if (!std::uncaught_exception()) {
-#ifdef HAVE_EXECINFO_H
-    throw std::runtime_error(ss.str() + "\n\n[stack trace: ]\n" +
-                             KaldiGetStackTrace() + "\n");
-#else
-    throw std::runtime_error(ss.str());
-#endif
-  } else {
-    abort(); // This may be temporary...
-  }
-}
-
-}  // end namespace kaldi
--- a/Source/Readers/KaldiReader/base/kaldi-error.h
+++ b/Source/Readers/KaldiReader/base/kaldi-error.h
@ -1,145 +0,0 @@
-// base/kaldi-error.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Ondrej Glembek;  Lukas Burget;
-//                      Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_ERROR_H_
-#define KALDI_BASE_KALDI_ERROR_H_ 1
-
-#include <stdexcept>
-#include <string>
-#include <cstring>
-#include <sstream>
-#include <cstdio>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-utils.h"
-
-/* Important that this file does not depend on any other kaldi headers. */
-
-
-namespace kaldi {
-
-/// \addtogroup error_group
-/// @{
-
-/// This is set by util/parse-options.{h, cc} if you set --verbose = ? option
-extern int32 g_kaldi_verbose_level;
-
-/// This is set by util/parse-options.{h, cc} (from argv[0]) and used (if set)
-/// in error reporting code to display the name of the program (this is because
-/// in our scripts, we often mix together the stderr of many programs).  it is
-/// the base-name of the program (no directory), followed by ':' We don't use
-/// std::string, due to the static initialization order fiasco.
-extern const char *g_program_name;
-
-inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
-
-/// This should be rarely used; command-line programs set the verbose level
-/// automatically from ParseOptions.
-inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
-
-// Class KaldiLogMessage is invoked from the  KALDI_WARN, KALDI_VLOG and
-// KALDI_LOG macros. It prints the message to stderr.  Note: we avoid
-// using cerr, due to problems with thread safety.  fprintf is guaranteed
-// thread-safe.
-
-// class KaldiWarnMessage is invoked from the KALDI_WARN macro.
-class KaldiWarnMessage {
- public:
-  inline std::ostream &stream() { return ss; }
-  KaldiWarnMessage(const char *func, const char *file, int32 line);
-  ~KaldiWarnMessage()  { fprintf(stderr, "%s\n", ss.str().c_str()); }
- private:
-  std::ostringstream ss;
-};
-
-// class KaldiLogMessage is invoked from the KALDI_LOG macro.
-class KaldiLogMessage {
- public:
-  inline std::ostream &stream() { return ss; }
-  KaldiLogMessage(const char *func, const char *file, int32 line);
-  ~KaldiLogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
- private:
-  std::ostringstream ss;
-};
-
-// Class KaldiVlogMessage is invoked from the KALDI_VLOG macro.
-class KaldiVlogMessage {
- public:
-  KaldiVlogMessage(const char *func, const char *file, int32 line,
-                   int32 verbose_level);
-  inline std::ostream &stream() { return ss; }
-  ~KaldiVlogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
- private:
-  std::ostringstream ss;
-};
-
-
-// class KaldiErrorMessage is invoked from the KALDI_ERROR macro.
-// The destructor throws an exception.
-class KaldiErrorMessage {
- public:
-  KaldiErrorMessage(const char *func, const char *file, int32 line);
-  inline std::ostream &stream() { return ss; }
-  ~KaldiErrorMessage();  // defined in kaldi-error.cc
- private:
-  std::ostringstream ss;
-};
-
-
-
-#ifdef _MSC_VER
-#define __func__ __FUNCTION__
-#endif
-
-#ifndef NDEBUG
-#define KALDI_ASSERT(cond) \
-  if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
-#else
-#define KALDI_ASSERT(cond)
-#endif
-// also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h,
-// and KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE,
-// also defined there.
-#ifdef KALDI_PARANOID // some more expensive asserts only checked if this defined
-#define KALDI_PARANOID_ASSERT(cond) \
-  if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
-#else
-#define KALDI_PARANOID_ASSERT(cond)
-#endif
-
-#define KALDI_ERR kaldi::KaldiErrorMessage(__func__, __FILE__, __LINE__).stream() 
-#define KALDI_WARN kaldi::KaldiWarnMessage(__func__, __FILE__, __LINE__).stream() 
-#define KALDI_LOG kaldi::KaldiLogMessage(__func__, __FILE__, __LINE__).stream()
-
-#define KALDI_VLOG(v) if (v <= kaldi::g_kaldi_verbose_level)     \
-           kaldi::KaldiVlogMessage(__func__, __FILE__, __LINE__, v).stream()
-
-inline bool IsKaldiError(const std::string &str) {
-  return(!strncmp(str.c_str(), "ERROR ", 6));
-}
-
-void KaldiAssertFailure_(const char *func, const char *file,
-                         int32 line, const char *cond_str);
-
-/// @} end "addtogroup error_group"
-
-}  // namespace kaldi
-
-#endif  // KALDI_BASE_KALDI_ERROR_H_
--- a/Source/Readers/KaldiReader/base/kaldi-math-test.cc
+++ b/Source/Readers/KaldiReader/base/kaldi-math-test.cc
@ -1,258 +0,0 @@
-// base/kaldi-math-test.cc
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-
-template<class I> void UnitTestGcdTpl() {
-  for (I a = 1; a < 15; a++) {  // a is min gcd.
-    I b = (I)(rand() % 10);
-    I c = (I)(rand() % 10);
-    if (rand()%2 == 0 && std::numeric_limits<I>::is_signed) b = -b;
-    if (rand()%2 == 0 && std::numeric_limits<I>::is_signed) c = -c;
-    if (b == 0 && c == 0) continue;  // gcd not defined for such numbers.
-    I g = Gcd(b*a, c*a);
-    KALDI_ASSERT(g >= a);
-    KALDI_ASSERT((b*a) % g == 0);
-    KALDI_ASSERT((c*a) % g == 0);
-  }
-}
-
-void UnitTestRoundUpToNearestPowerOfTwo() {
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(1) == 1);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(2) == 2);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(3) == 4);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(4) == 4);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(7) == 8);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(8) == 8);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(255) == 256);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(256) == 256);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(257) == 512);
-  KALDI_ASSERT(RoundUpToNearestPowerOfTwo(1073700000) == 1073741824  );
-}
-
-void UnitTestGcd() {
-  UnitTestGcdTpl<int>();
-  UnitTestGcdTpl<char>();
-  UnitTestGcdTpl<size_t>();
-  UnitTestGcdTpl<unsigned short>();
-}
-
-void UnitTestRand() {
-  // Testing random-number generation.
-  using namespace kaldi;
-  std::cout << "Testing random-number generation.  "
-            << "If there is an error this may not terminate.\n";
-  std::cout << "If this does not terminate, look more closely.  "
-            << "There might be a problem [but might not be]\n";
-  for (int i = 1; i < 10; i++) {
-    {  // test RandUniform.
-      std::cout << "Test RandUniform\n";
-      KALDI_ASSERT(RandUniform() >= 0 && RandUniform() <= 1);
-      float sum = RandUniform()-0.5;
-      for (int j = 0; ; j++) {
-        sum += RandUniform()-0.5;
-        if (std::abs(sum) < 0.5*sqrt((double)j)) break;
-      }
-    }
-    {  // test RandGauss.
-      float sum = RandGauss();
-      for (int j = 0; ; j++) {
-        sum += RandGauss();
-        if (std::abs(sum) < 0.5*sqrt((double)j)) break;
-      }
-    }
-    {  // test poisson_rand().
-      KALDI_ASSERT(RandPoisson(3.0) >= 0);
-      KALDI_ASSERT(RandPoisson(0.0) == 0);
-      std::cout << "Test RandPoisson\n";
-      float lambda = RandUniform() * 3.0;  // between 0 and 3.
-      double sum = RandPoisson(lambda) - lambda;  // expected value is zero.
-      for (int j = 0; ; j++) {
-        sum += RandPoisson(lambda) - lambda;
-        if (std::abs(sum) < 0.5*sqrt((double)j)) break;
-      }
-    }
-
-    { // test WithProb().
-      for (int32 i = 0; i < 10; i++) {
-        KALDI_ASSERT((WithProb(0.0) == false) && (WithProb(1.0) == true));
-      }
-      {
-        int32 tot = 0, n = 10000;
-        BaseFloat p = 0.5;
-        for (int32 i = 0; i < n; i++)
-          tot += WithProb(p);
-        KALDI_ASSERT(tot > (n * p * 0.8) && tot < (n * p * 1.2));
-      }
-      {
-        int32 tot = 0, n = 10000;
-        BaseFloat p = 0.25;
-        for (int32 i = 0; i < n; i++)
-          tot += WithProb(p);
-        KALDI_ASSERT(tot > (n * p * 0.8) && tot < (n * p * 1.2));
-      }
-    }
-    {  // test RandInt().
-      KALDI_ASSERT(RandInt(0, 3) >= 0 && RandInt(0, 3) <= 3);
-
-      std::cout << "Test RandInt\n";
-      int minint = rand() % 200;
-      int maxint = minint + 1 + rand()  % 20;
-
-      float sum = RandInt(minint, maxint) +  0.5*(minint+maxint);
-      for (int j = 0; ; j++) {
-        sum += RandInt(minint, maxint) - 0.5*(minint+maxint);
-        if (std::abs((float)sum) < 0.5*sqrt((double)j)*(maxint-minint)) break;
-      }
-    }
-    { // test RandPrune in basic way.
-      KALDI_ASSERT(RandPrune(1.1, 1.0) == 1.1);
-      KALDI_ASSERT(RandPrune(0.0, 0.0) == 0.0);
-      KALDI_ASSERT(RandPrune(-1.1, 1.0) == -1.1);
-      KALDI_ASSERT(RandPrune(0.0, 1.0) == 0.0);
-      KALDI_ASSERT(RandPrune(0.5, 1.0) >= 0.0);
-      KALDI_ASSERT(RandPrune(-0.5, 1.0) <= 0.0);
-      BaseFloat f = RandPrune(-0.5, 1.0);
-      KALDI_ASSERT(f == 0.0 || f == -1.0);
-      f = RandPrune(0.5, 1.0);
-      KALDI_ASSERT(f == 0.0 || f == 1.0);
-    }
-  }
-}
-
-void UnitTestLogAddSub() {
-  using namespace kaldi;
-  for (int i = 0; i < 100; i++) {
-    double f1 = rand() % 10000, f2 = rand() % 20;
-    double add1 = exp(LogAdd(log(f1), log(f2)));
-    double add2 = exp(LogAdd(log(f2), log(f1)));
-    double add = f1 + f2, thresh = add*0.00001;
-    KALDI_ASSERT(std::abs(add-add1) < thresh && std::abs(add-add2) < thresh);
-
-
-    try {
-      double f2_check = exp(LogSub(log(add), log(f1))), thresh = (f2*0.01)+0.001;
-      KALDI_ASSERT(std::abs(f2_check-f2) < thresh);
-    } catch(...) {
-      KALDI_ASSERT(f2 == 0);  // It will probably crash for f2=0.
-    }
-  }
-}
-
-void UnitTestDefines() {  // Yes, we even unit-test the preprocessor statements.
-  KALDI_ASSERT(exp(kLogZeroFloat) == 0.0);
-  KALDI_ASSERT(exp(kLogZeroDouble) == 0.0);
-  BaseFloat den = 0.0;
-  KALDI_ASSERT(KALDI_ISNAN(0.0 / den));
-  KALDI_ASSERT(!KALDI_ISINF(0.0 / den));
-  KALDI_ASSERT(!KALDI_ISFINITE(0.0 / den));
-  KALDI_ASSERT(!KALDI_ISNAN(1.0 / den));
-  KALDI_ASSERT(KALDI_ISINF(1.0 / den));
-  KALDI_ASSERT(!KALDI_ISFINITE(1.0 / den));
-  KALDI_ASSERT(KALDI_ISFINITE(0.0));
-  KALDI_ASSERT(!KALDI_ISINF(0.0));
-  KALDI_ASSERT(!KALDI_ISNAN(0.0));
-
-  std::cout << 1.0+DBL_EPSILON;
-  std::cout << 1.0 + 0.5*DBL_EPSILON;
-  KALDI_ASSERT(1.0 + DBL_EPSILON != 1.0 && 1.0 + (0.5*DBL_EPSILON) == 1.0
-               && "If this test fails, you can probably just comment it out-- may mean your CPU exceeds expected floating point precision");
-  KALDI_ASSERT(1.0f + FLT_EPSILON != 1.0f && 1.0f + (0.5f*FLT_EPSILON) == 1.0f
-               && "If this test fails, you can probably just comment it out-- may mean your CPU exceeds expected floating point precision");
-  KALDI_ASSERT(std::abs(sin(M_PI)) < 1.0e-05 && std::abs(cos(M_PI)+1.0) < 1.0e-05);
-  KALDI_ASSERT(std::abs(sin(M_2PI)) < 1.0e-05 && std::abs(cos(M_2PI)-1.0) < 1.0e-05);
-  KALDI_ASSERT(std::abs(sin(exp(M_LOG_2PI))) < 1.0e-05);
-  KALDI_ASSERT(std::abs(cos(exp(M_LOG_2PI)) - 1.0) < 1.0e-05);
-}
-
-void UnitTestAssertFunc() {  // Testing Assert** *functions
-  using namespace kaldi;
-  for (int i = 1; i < 100; i++) {
-    float f1 = rand() % 10000 + 1, f2 = rand() % 20 + 1;
-    float tmp1 = f1 * f2;
-    float tmp2 = (1/f1 + 1/f2);
-    float tmp3 = (1/(f1 - 1.0) + 1/(f2 - 1.0));
-    float tmp4 = (1/(f1 + 1.0) + 1/(f2 + 1.0));
-    float add = f1 + f2;
-    float addeql = tmp1 * tmp2, addgeq = tmp1 * tmp3, addleq = tmp1 * tmp4;
-    float thresh = 0.00001;
-    AssertEqual(add, addeql, thresh);  // test AssertEqual()
-    AssertGeq(addgeq, add, thresh);  // test AsserGeq()
-    AssertLeq(addleq, add, thresh);  // test AsserLeq()
-  }
-}
-
-template<class I> void UnitTestFactorizeTpl() {
-  for (int p= 0; p < 100; p++) {
-    I m = rand() % 100000;
-    if (m >= 1) {
-      std::vector<I> factors;
-      Factorize(m, &factors);
-      I m2 = 1;
-      for (size_t i = 0; i < factors.size(); i++) {
-        m2 *= factors[i];
-        if (i+1 < factors.size())
-          KALDI_ASSERT(factors[i+1] >= factors[i]);  // check sorted.
-      }
-      KALDI_ASSERT(m2 == m);  // check correctness.
-    }
-  }
-}
-
-void UnitTestFactorize() {
-  UnitTestFactorizeTpl<int>();
-  UnitTestFactorizeTpl<size_t>();
-  UnitTestFactorizeTpl<unsigned short>();
-}
-
-void UnitTestApproxEqual() {
-  KALDI_ASSERT(ApproxEqual(1.0, 1.00001));
-  KALDI_ASSERT(ApproxEqual(1.0, 1.00001, 0.001));
-  KALDI_ASSERT(!ApproxEqual(1.0, 1.1));
-  KALDI_ASSERT(!ApproxEqual(1.0, 1.01, 0.001));
-  KALDI_ASSERT(!ApproxEqual(1.0, 0.0));
-  KALDI_ASSERT(ApproxEqual(0.0, 0.0));
-  KALDI_ASSERT(!ApproxEqual(0.0, 0.00001));
-  KALDI_ASSERT(!ApproxEqual(std::numeric_limits<float>::infinity(),
-                            -std::numeric_limits<float>::infinity()));
-  KALDI_ASSERT(ApproxEqual(std::numeric_limits<float>::infinity(),
-                           std::numeric_limits<float>::infinity()));
-  KALDI_ASSERT(ApproxEqual(-std::numeric_limits<float>::infinity(),
-                           -std::numeric_limits<float>::infinity()));
-  KALDI_ASSERT(!ApproxEqual(-std::numeric_limits<float>::infinity(),
-                            0));
-  KALDI_ASSERT(!ApproxEqual(-std::numeric_limits<float>::infinity(),
-                            1));
-               
-}
-
-}  // end namespace kaldi.
-
-int main() {
-  using namespace kaldi;
-  UnitTestApproxEqual();
-  UnitTestGcd();
-  UnitTestFactorize();
-  UnitTestDefines();
-  UnitTestLogAddSub();
-  UnitTestRand();
-  UnitTestAssertFunc();
-  UnitTestRoundUpToNearestPowerOfTwo();
-}
-
--- a/Source/Readers/KaldiReader/base/kaldi-math.cc
+++ b/Source/Readers/KaldiReader/base/kaldi-math.cc
@ -1,107 +0,0 @@
-// base/kaldi-math.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
-//                      Saarland University;  Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-#include "base/kaldi-math.h"
-
-namespace kaldi {
-// These routines are tested in matrix/matrix-test.cc
-
-int32 RoundUpToNearestPowerOfTwo(int32 n) {
-  KALDI_ASSERT(n > 0);
-  n--;
-  n |= n >> 1;
-  n |= n >> 2;
-  n |= n >> 4;
-  n |= n >> 8;
-  n |= n >> 16;
-  return n+1;
-}
-
-bool WithProb(BaseFloat prob) {
-  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
-  // but we allow slightly larger values that could arise from roundoff in
-  // previous calculations.
-  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
-  if (prob == 0) return false;
-  else if (prob == 1.0) return true;
-  else if (prob * RAND_MAX < 128.0) {
-    // prob is very small but nonzero, and the "main algorithm"
-    // wouldn't work that well.  So: with probability 1/128, we
-    // return WithProb (prob * 128), else return false.
-    if (rand() < RAND_MAX / 128) { // with probability 128...
-      // Note: we know that prob * 128.0 < 1.0, because
-      // we asserted RAND_MAX > 128 * 128.
-      return WithProb(prob * 128.0);
-    } else {
-      return false;
-    }
-  } else {
-    return (rand() < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
-  }
-}
-
-int32 RandInt(int32 min_val, int32 max_val) {  // This is not exact.
-  KALDI_ASSERT(max_val >= min_val);
-  if (max_val == min_val) return min_val;
-
-#ifdef _MSC_VER
-  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
-  if (RAND_MAX > (max_val-min_val)*8) {
-        // *8 to avoid large inaccuracies in probability, from the modulus...
-    return min_val + ((unsigned int)rand() % (unsigned int)(max_val+1-min_val));
-  } else {
-    if ((unsigned int)(RAND_MAX*RAND_MAX) > (unsigned int)((max_val+1-min_val)*8)) {
-        // *8 to avoid inaccuracies in probability, from the modulus...
-      return min_val + ( (unsigned int)( (rand()+RAND_MAX*rand()))
-                    % (unsigned int)(max_val+1-min_val));
-    } else {
-      throw std::runtime_error(std::string()
-                               +"rand_int failed because we do not support "
-                               +"such large random numbers. "
-                               +"(Extend this function).");
-    }
-  }
-#else
-  return min_val +
-      (static_cast<int32>(rand()) % (int32)(max_val+1-min_val));
-#endif
-}
-
-// Returns poisson-distributed random number.
-// Take care: this takes time proportinal
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda) {
-  // Knuth's algorithm.
-  KALDI_ASSERT(lambda >= 0);
-  float L = expf(-lambda), p = 1.0;
-  int32 k = 0;
-  do {
-    k++;
-    float u = RandUniform();
-    p *= u;
-  } while (p > L);
-  return k-1;
-}
-
-
-}  // end namespace kaldi
-
-
--- a/Source/Readers/KaldiReader/base/kaldi-math.h
+++ b/Source/Readers/KaldiReader/base/kaldi-math.h
@ -1,309 +0,0 @@
-// base/kaldi-math.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
-//                      Jan Silovsky;  Saarland University
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_MATH_H_
-#define KALDI_BASE_KALDI_MATH_H_ 1
-
-#ifdef _MSC_VER
-#include <float.h>
-#endif
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-#include "base/kaldi-types.h"
-#include "base/kaldi-common.h"
-
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 2.2204460492503131e-16
-#endif
-#ifndef FLT_EPSILON
-#define FLT_EPSILON 1.19209290e-7f
-#endif
-
-#ifndef M_PI
-#  define M_PI 3.1415926535897932384626433832795
-#endif
-
-#ifndef M_SQRT2
-#  define M_SQRT2 1.4142135623730950488016887
-#endif
-
-
-#ifndef M_2PI
-#  define M_2PI 6.283185307179586476925286766559005
-#endif
-
-#ifndef M_SQRT1_2
-# define M_SQRT1_2 0.7071067811865475244008443621048490
-#endif
-
-#ifndef M_LOG_2PI
-#define M_LOG_2PI 1.8378770664093454835606594728112
-#endif
-
-#ifndef M_LN2
-#define M_LN2 0.693147180559945309417232121458
-#endif
-
-#ifdef _MSC_VER
-#  define KALDI_ISNAN _isnan
-#  define KALDI_ISINF(x) (!_isnan(x) && _isnan(x-x))
-#  define KALDI_ISFINITE _finite
-#else
-#  define KALDI_ISNAN std::isnan
-#  define KALDI_ISINF std::isinf
-#  define KALDI_ISFINITE(x) std::isfinite(x)
-#endif
-#if !defined(KALDI_SQR)
-# define KALDI_SQR(x) ((x) * (x))
-#endif
-
-namespace kaldi {
-
-// -infinity
-const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
-const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
-const BaseFloat kBaseLogZero = -std::numeric_limits<BaseFloat>::infinity();
-
-// Big numbers
-const BaseFloat kBaseFloatMax = std::numeric_limits<BaseFloat>::max();
-
-// Returns a random integer between min and max inclusive.
-int32 RandInt(int32 min, int32 max);
-
-bool WithProb(BaseFloat prob); // Returns true with probability "prob",
-// with 0 <= prob <= 1 [we check this].
-// Internally calls rand().  This function is carefully implemented so
-// that it should work even if prob is very small.
-
-inline float RandUniform() {  // random intended to be strictly between 0 and 1.
-  return static_cast<float>((rand() + 1.0) / (RAND_MAX+2.0));  
-}
-
-inline float RandGauss() {
-  return static_cast<float>(sqrt (-2 * std::log(RandUniform()))
-                            * cos(2*M_PI*RandUniform()));
-}
-
-// Returns poisson-distributed random number.  Uses Knuth's algorithm.
-// Take care: this takes time proportinal
-// to lambda.  Faster algorithms exist but are more complex.
-int32 RandPoisson(float lambda);
-
-// Also see Vector<float,double>::RandCategorical().
-
-// This is a randomized pruning mechanism that preserves expectations,
-// that we typically use to prune posteriors.
-template<class Float>
-inline Float RandPrune(Float post, BaseFloat prune_thresh) {
-  KALDI_ASSERT(prune_thresh >= 0.0);
-  if (post == 0.0 || std::abs(post) >= prune_thresh)
-    return post;
-  return (post >= 0 ? 1.0 : -1.0) *
-      (RandUniform() <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
-}
-
-static const double kMinLogDiffDouble = std::log(DBL_EPSILON);  // negative!
-static const float kMinLogDiffFloat = std::log(FLT_EPSILON);  // negative!
-
-
-inline double LogAdd(double x, double y) {
-  double diff;
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffDouble) {
-    double res;
-#ifdef _MSC_VER
-    res = x + log(1.0 + exp(diff));
-#else
-    res = x + log1p(exp(diff));
-#endif
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-inline float LogAdd(float x, float y) {
-  float diff;
-  if (x < y) {
-    diff = x - y;
-    x = y;
-  } else {
-    diff = y - x;
-  }
-  // diff is negative.  x is now the larger one.
-
-  if (diff >= kMinLogDiffFloat) {
-    float res;
-#ifdef _MSC_VER
-    res = x + logf(1.0 + expf(diff));
-#else
-    res = x + log1pf(expf(diff));
-#endif
-    return res;
-  } else {
-    return x;  // return the larger one.
-  }
-}
-
-
-// returns exp(x) - exp(y).
-inline double LogSub(double x, double y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  double diff = y - x;  // Will be negative.
-  double res = x + log(1.0 - exp(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroDouble;
-  return res;
-}
-
-
-// returns exp(x) - exp(y).
-inline float LogSub(float x, float y) {
-  if (y >= x) {  // Throws exception if y>=x.
-    if (y == x)
-      return kLogZeroDouble;
-    else
-      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
-  }
-
-  float diff = y - x;  // Will be negative.
-  float res = x + logf(1.0 - expf(diff));
-
-  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
-  if (KALDI_ISNAN(res))
-    return kLogZeroFloat;
-  return res;
-}
-
-// return (a == b)
-static inline bool ApproxEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  if (a==b) return true;
-  float diff = std::abs(a-b);
-  if (diff == std::numeric_limits<float>::infinity()
-      || diff!=diff) return false; // diff is +inf or nan.
-  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b))); 
-}
-
-// assert (a == b)
-static inline void AssertEqual(float a, float b,
-                               float relative_tolerance = 0.001) {
-  // a==b handles infinities.
-  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
-}
-
-// assert (a>=b)
-static inline void AssertGeq(float a, float b,
-                             float relative_tolerance = 0.001) {
-  KALDI_ASSERT(a-b >= -relative_tolerance * (std::abs(a)+std::abs(b)));
-}
-
-// assert (a<=b)
-static inline void AssertLeq(float a, float b,
-                             float relative_tolerance = 0.001) {
-  KALDI_ASSERT(a-b <= -relative_tolerance * (std::abs(a)+std::abs(b)));
-}
-
-// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
-int32 RoundUpToNearestPowerOfTwo(int32 n);
-
-template<class I> I  Gcd(I m, I n) {
-  if (m == 0 || n == 0) {
-    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
-      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
-    }
-    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
-    // return absolute value of whichever is nonzero
-  }
-  // could use compile-time assertion
-  // but involves messing with complex template stuff.
-  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
-  while (1) {
-    m %= n;
-    if (m == 0) return (n > 0 ? n : -n);
-    n %= m;
-    if (n == 0) return (m > 0 ? m : -m);
-  }
-}
-
-template<class I> void Factorize(I m, std::vector<I> *factors) {
-  // Splits a number into its prime factors, in sorted order from
-  // least to greatest,  with duplication.  A very inefficient
-  // algorithm, which is mainly intended for use in the
-  // mixed-radix FFT computation (where we assume most factors
-  // are small).
-  KALDI_ASSERT(factors != NULL);
-  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
-  factors->clear();
-  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
-
-  // First try small factors.
-  for (I i = 0; i < 10; i++) {
-    if (m == 1) return;  // We're done.
-    while (m % small_factors[i] == 0) {
-      m /= small_factors[i];
-      factors->push_back(small_factors[i]);
-    }
-  }
-  // Next try all odd numbers starting from 31.
-  for (I j = 31;; j += 2) {
-    if (m == 1) return;
-    while (m % j == 0) {
-      m /= j;
-      factors->push_back(j);
-    }
-  }
-}
-
-inline double Hypot(double x, double y) {  return hypot(x, y); }
-
-inline float Hypot(float x, float y) {  return hypotf(x, y); }
-
-inline double Log1p(double x) {  return log1p(x); }
-
-inline float Log1p(float x) {  return log1pf(x); }
-
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_BASE_KALDI_MATH_H_
--- a/Source/Readers/KaldiReader/base/kaldi-types.h
+++ b/Source/Readers/KaldiReader/base/kaldi-types.h
@ -1,61 +0,0 @@
-// base/kaldi-types.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Jan Silovsky;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_TYPES_H_
-#define KALDI_BASE_KALDI_TYPES_H_ 1
-
-namespace kaldi {
-// TYPEDEFS ..................................................................
-#if (KALDI_DOUBLEPRECISION != 0)
-typedef double  BaseFloat;
-#else
-typedef float   BaseFloat;
-#endif
-}
-
-#ifdef _MSC_VER
-namespace kaldi {
-typedef unsigned __int16 uint16;
-typedef unsigned __int32 uint32;
-typedef __int16          int16;
-typedef __int32          int32;
-typedef __int64          int64;
-typedef unsigned __int64 uint64;
-typedef float          float32;
-typedef double        double64;
-}
-#else
-// we can do this a different way if some platform
-// we find in the future lacks stdint.h
-#include <stdint.h>
-
-namespace kaldi {
-typedef uint16_t        uint16;
-typedef uint32_t        uint32;
-typedef uint64_t        uint64;
-typedef int16_t         int16;
-typedef int32_t         int32;
-typedef int64_t         int64;
-typedef float           float32;
-typedef double         double64;
-}  // end namespace kaldi
-#endif
-
-#endif  // KALDI_BASE_KALDI_TYPES_H_
--- a/Source/Readers/KaldiReader/base/kaldi-utils.cc
+++ b/Source/Readers/KaldiReader/base/kaldi-utils.cc
@ -1,33 +0,0 @@
-// base/kaldi-utils.cc
-// Copyright 2009-2011   Karel Vesely;  Yanmin Qian;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-std::string CharToString(const char &c) {
-  char buf[20];
-  if (std::isprint(c))
-    sprintf(buf, "\'%c\'", c);
-  else
-    sprintf(buf, "[character %d]", (int) c);
-  return (std::string) buf;
-}
-
-}  // end namespace kaldi
--- a/Source/Readers/KaldiReader/base/kaldi-utils.h
+++ b/Source/Readers/KaldiReader/base/kaldi-utils.h
@ -1,133 +0,0 @@
-// base/kaldi-utils.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
-//                      Saarland University;  Karel Vesely;  Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_BASE_KALDI_UTILS_H_
-#define KALDI_BASE_KALDI_UTILS_H_ 1
-
-#include <limits>
-#include <string>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
-#define __restrict__
-#endif
-
-#ifdef HAVE_POSIX_MEMALIGN
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#elif defined(HAVE_MEMALIGN)
-  /* Some systems have memalign() but no declaration for it */
-  void * memalign(size_t align, size_t size);
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-     (*(pp_orig) = memalign(align, size))
-#  define KALDI_MEMALIGN_FREE(x) free(x)
-#elif defined(_MSC_VER)
-#  define KALDI_MEMALIGN(align, size, pp_orig) \
-  (*(pp_orig) = _aligned_malloc(size, align))
-#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
-#else
-#error Manual memory alignment is no longer supported
-#endif
-
-#ifdef __ICC
-#pragma warning(disable: 383)  // ICPC remark we don't want.
-#pragma warning(disable: 810)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#pragma warning(disable: 1418)  // ICPC remark we don't want.
-#pragma warning(disable: 444)  // ICPC remark we don't want.
-#pragma warning(disable: 869)  // ICPC remark we don't want.
-#pragma warning(disable: 1287)  // ICPC remark we don't want.
-#pragma warning(disable: 279)  // ICPC remark we don't want.
-#pragma warning(disable: 981)  // ICPC remark we don't want.
-#endif
-
-
-namespace kaldi {
-
-
-// CharToString prints the character in a human-readable form, for debugging.
-std::string CharToString(const char &c);
-
-
-inline int MachineIsLittleEndian() {
-  int check = 1;
-  return (*reinterpret_cast<char*>(&check) != 0);
-}
-
-}
-
-#define KALDI_SWAP8(a) { \
-  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[7]; ((char*)&a)[7]=t;\
-      t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[6]; ((char*)&a)[6]=t;\
-      t = ((char*)&a)[2]; ((char*)&a)[2]=((char*)&a)[5]; ((char*)&a)[5]=t;\
-      t = ((char*)&a)[3]; ((char*)&a)[3]=((char*)&a)[4]; ((char*)&a)[4]=t;}
-#define KALDI_SWAP4(a) { \
-  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[3]; ((char*)&a)[3]=t;\
-      t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[2]; ((char*)&a)[2]=t;}
-#define KALDI_SWAP2(a) { \
-  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[1]; ((char*)&a)[1]=t;}
-
-
-// Makes copy constructor and operator= private.  Same as in compat.h of OpenFst
-// toolkit.  If using VS, for which this results in compilation errors, we
-// do it differently.
-
-#if defined(_MSC_VER)
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \
-  void operator = (const type&)
-#else
-#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
-  type(const type&);                  \
-  void operator = (const type&)
-#endif
-
-template<bool B> class KaldiCompileTimeAssert { };
-template<> class KaldiCompileTimeAssert<true> {
- public:
-  static inline void Check() { }
-};
-
-#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
-
-#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
-  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
-                 && std::numeric_limits<I>::is_integer>::Check()
-
-#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
-  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
-                && !std::numeric_limits<F>::is_integer>::Check()
-
-
-#ifdef _MSC_VER
-#define KALDI_STRCASECMP _stricmp
-#else
-#define KALDI_STRCASECMP strcasecmp
-#endif
-#ifdef _MSC_VER
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
-#else
-#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
-#endif
-
-#define KALDI_STRTOD(cur_cstr, end_cstr) strtod(cur_cstr, end_cstr)
-
-#endif  // KALDI_BASE_KALDI_UTILS_H_
-
--- a/Source/Readers/KaldiReader/basetypes.h
+++ b/Source/Readers/KaldiReader/basetypes.h
--- a/Source/Readers/KaldiReader/basetypes.old.h
+++ b/Source/Readers/KaldiReader/basetypes.old.h
@ -1,885 +0,0 @@
-// TODO: This is a dup, we should get back to the shared one. But this one has some stuff the other doesn't.
-
-//
-// <copyright file="basetypes.old.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-
-#pragma once
-#ifndef _BASETYPES_
-#define _BASETYPES_
-
-// [kit]: seems SECURE_SCL=0 doesn't work - causes crashes in release mode
-// there are some complaints along this line on the web
-// so disabled for now
-//
-//// we have agreed that _SECURE_SCL is disabled for release builds
-//// it would be super dangerous to mix projects where this is inconsistent
-//// this is one way to detect possible mismatches
-//#ifdef NDEBUG
-//#if !defined(_CHECKED) && _SECURE_SCL != 0 
-//#error "_SECURE_SCL should be disabled for release builds"
-//#endif
-//#endif
-
-#ifndef UNDER_CE    // fixed-buffer overloads not available for wince
-#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES  // fixed-buffer overloads for strcpy() etc.
-#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
-#endif
-#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
-#endif
-
-#pragma warning (push)
-#pragma warning (disable: 4793)    // caused by varargs
-
-// disable certain parts of basetypes for wince compilation
-#ifdef UNDER_CE
-#define BASETYPES_NO_UNSAFECRTOVERLOAD // disable unsafe CRT overloads (safe functions don't exist in wince)
-#define BASETYPES_NO_STRPRINTF         // dependent functions here are not defined for wince
-#endif
-
-#ifndef OACR    // dummies when we are not compiling under Office
-#define OACR_WARNING_SUPPRESS(x, y)
-#define OACR_WARNING_DISABLE(x, y)
-#define OACR_WARNING_PUSH
-#define OACR_WARNING_POP
-#endif
-#ifndef OACR_ASSUME    // this seems to be a different one
-#define OACR_ASSUME(x)
-#endif
-
-// following oacr warnings are not level1 or level2-security
-// in currect stage we want to ignore those warnings
-// if necessay this can be fixed at later stage
-
-// not a bug
-OACR_WARNING_DISABLE(EXC_NOT_CAUGHT_BY_REFERENCE, "Not indicating a bug or security threat.");
-OACR_WARNING_DISABLE(LOCALDECLHIDESLOCAL, "Not indicating a bug or security threat.");
-
-// not reviewed
-OACR_WARNING_DISABLE(MISSING_OVERRIDE, "Not level1 or level2_security.");
-OACR_WARNING_DISABLE(EMPTY_DTOR, "Not level1 or level2_security.");
-OACR_WARNING_DISABLE(DEREF_NULL_PTR, "Not level1 or level2_security.");
-OACR_WARNING_DISABLE(INVALID_PARAM_VALUE_1, "Not level1 or level2_security.");
-OACR_WARNING_DISABLE(VIRTUAL_CALL_IN_CTOR, "Not level1 or level2_security.");
-OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_security.");
-
-// determine WIN32 api calling convention
-// it seems this is normally stdcall?? but when compiling as /clr:pure or /clr:Safe
-// this is not supported, so in this case, we need to use the 'default' calling convention
-// TODO: can we reuse the #define of WINAPI??
-#ifdef _WIN32
-#ifdef _M_CEE_SAFE 
-#define WINAPI_CC __clrcall
-#elif _M_CEE
-#define WINAPI_CC __clrcall
-#else
-#define WINAPI_CC __stdcall
-#endif
-#endif
-
-// fix some warnings in STL
-#if !defined(_DEBUG) || defined(_CHECKED) || defined(_MANAGED)
-#pragma warning(disable : 4702) // unreachable code
-#endif
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>     // include here because we redefine some names later
-#include <string>
-#include <vector>
-#include <cmath>        // for HUGE_VAL
-#include <assert.h>
-#include <map>
-#ifdef __windows__
-#include <windows.h>    // for CRITICAL_SECTION
-#include <strsafe.h>    // for strbcpy() etc templates
-#endif
-#if __unix__
-#include <strings.h>
-#include <chrono>
-#include <thread>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <dlfcn.h>
-typedef unsigned char byte;
-#endif
-
-
-#pragma push_macro("STRSAFE_NO_DEPRECATE")
-#define STRSAFE_NO_DEPRECATE    // deprecation managed elsewhere, not by strsafe
-#pragma pop_macro("STRSAFE_NO_DEPRECATE")
-
-// CRT error handling seems to not be included in wince headers
-// so we define our own imports
-#ifdef UNDER_CE
-
-// TODO: is this true - is GetLastError == errno?? - also this adds a dependency on windows.h
-#define errno GetLastError() 
-
-// strerror(x) - x here is normally errno - TODO: make this return errno as a string
-#define strerror(x) "strerror error but can't report error number sorry!"
-#endif
-
-#ifndef __in // dummies for sal annotations if compiler does not support it
-#define __in
-#define __inout_z
-#define __in_count(x)
-#define __inout_cap(x)
-#define __inout_cap_c(x)
-#endif
-#ifndef __out_z_cap    // non-VS2005 annotations
-#define __out_cap(x)
-#define __out_z_cap(x)
-#define __out_cap_c(x)
-#endif
-
-#ifndef __override      // and some more non-std extensions required by Office
-#define __override virtual
-#endif
-
-// disable warnings for which fixing would make code less readable
-#pragma warning(disable : 4290) // throw() declaration ignored
-#pragma warning(disable : 4244) // conversion from typeA to typeB, possible loss of data
-
-// ----------------------------------------------------------------------------
-// basic macros
-// ----------------------------------------------------------------------------
-
-#define SAFE_DELETE(p)  { if(p) { delete (p); (p)=NULL; } }
-#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }     // nasty! use CComPtr<>
-#ifndef assert
-#ifdef _CHECKED // basetypes.h expects this function to be defined (it is in message.h)
-extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp);
-#define assert(exp) ((exp)||(_CHECKED_ASSERT_error(__FILE__,__LINE__,#exp),0))
-#else
-#define assert assert
-#endif
-#endif
-
-using namespace std;
-// ----------------------------------------------------------------------------
-// basic data types
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace basetypes {
-
-// class std::vector -- std::vector with array-bounds checking
-// VS 2008 and above do this, so there is no longer a need for this.
-
-template<class _ElemType>
-class std::vector : public std::vector<_ElemType>
-{
-#if defined (_DEBUG) || defined (_CHECKED)    // debug version with range checking
-    static void throwOutOfBounds()
-    {   // (moved to separate function hoping to keep inlined code smaller
-        OACR_WARNING_PUSH;
-        OACR_WARNING_DISABLE(IGNOREDBYCOMMA, "Reviewd OK. Special trick below to show a message when assertion fails"
-            "[rogeryu 2006/03/24]");
-        OACR_WARNING_DISABLE(BOGUS_EXPRESSION_LIST, "This is intentional. [rogeryu 2006/03/24]");
-        assert (("std::vector::operator[] out of bounds", false));
-        OACR_WARNING_POP;
-    }
-#endif
-
-public:
-
-    std::vector() : std::vector<_ElemType> () { }
-    std::vector (int size) : std::vector<_ElemType> (size) { }
-
-#if defined (_DEBUG) || defined (_CHECKED)    // debug version with range checking
-    // ------------------------------------------------------------------------
-    // operator[]: with array-bounds checking
-    // ------------------------------------------------------------------------
-
-    inline _ElemType & operator[] (int index)            // writing
-    {
-        if (index < 0 || index >= size()) throwOutOfBounds();
-        return (*(std::vector<_ElemType>*) this)[index];
-    }
-
-    // ------------------------------------------------------------------------
-
-    inline const _ElemType & operator[] (int index) const    // reading
-    {
-        if (index < 0 || index >= size()) throwOutOfBounds();
-        return (*(std::vector<_ElemType>*) this)[index];
-    }
-#endif
-
-    // ------------------------------------------------------------------------
-    // size(): same as base class, but returning an 'int' instead of 'size_t'
-    // to allow for better readable code
-    // ------------------------------------------------------------------------
-
-    inline int size() const
-    {
-        size_t siz = ((std::vector<_ElemType>*) this)->size();
-        return (int) siz;
-    }
-};
-// overload swap(), otherwise we'd fallback to 3-way assignment & possibly throw
-template<class _T> inline void swap (std::vector<_T> & L, std::vector<_T> & R) throw()
-{ swap ((std::vector<_T> &) L, (std::vector<_T> &) R); }
-
-// class fixed_vector - non-resizable vector
-
-template<class _T> class fixed_vector
-{
-    _T * p;                 // pointer array
-    size_t n;               // number of elements
-    void check (int index) const { index; assert (index >= 0 && (size_t) index < n); }
-    void check (size_t index) const { index; assert (index < n); }
-    // ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.
-    //fixed_vector (const fixed_vector & other) : n (0), p (NULL) { *this = other; }
-public:
-    fixed_vector() : n (0), p (NULL) { }
-    void resize (int size) { clear(); if (size > 0) { p = new _T[size]; n = size; } }
-    void resize (size_t size) { clear(); if (size > 0) { p = new _T[size]; n = size; } }
-    fixed_vector (int size) : n (size), p (size > 0 ? new _T[size] : NULL) { }
-    fixed_vector (size_t size) : n ((int) size), p (size > 0 ? new _T[size] : NULL) { }
-    ~fixed_vector() { delete[] p; }
-    inline int size() const { return (int) n; }
-    inline int capacity() const { return (int) n; }
-    inline bool empty() const { return n == 0; }
-    void clear() { delete[] p; p = NULL; n = 0; }
-    _T *       begin()       { return p; }
-    const _T * begin() const { return p; }
-    _T * end()   { return p + n; } // note: n == 0 so result is NULL
-    inline       _T & operator[] (int index)          { check (index); return p[index]; }  // writing
-    inline const _T & operator[] (int index) const    { check (index); return p[index]; }  // reading
-    inline       _T & operator[] (size_t index)       { check (index); return p[index]; }  // writing
-    inline const _T & operator[] (size_t index) const { check (index); return p[index]; }  // reading
-    inline int indexof (const _T & elem) const { assert (&elem >= p && &elem < p + n); return &elem - p; }
-    inline void swap (fixed_vector & other) throw() { std::swap (other.p, p); std::swap (other.n, n); }
-    template<class VECTOR> fixed_vector & operator= (const VECTOR & other)
-    {
-        int other_n = (int) other.size();
-        fixed_vector tmp (other_n);
-        for (int k = 0; k < other_n; k++) tmp[k] = other[k];
-        swap (tmp);
-        return *this;
-    }
-    fixed_vector & operator= (const fixed_vector & other)
-    {
-        int other_n = (int) other.size();
-        fixed_vector tmp (other_n);
-        for (int k = 0; k < other_n; k++) tmp[k] = other[k];
-        swap (tmp);
-        return *this;
-    }
-    template<class VECTOR> fixed_vector (const VECTOR & other) : n (0), p (NULL) { *this = other; }
-};
-template<class _T> inline void swap (fixed_vector<_T> & L, fixed_vector<_T> & R) throw() { L.swap (R); }
-
-// class matrix - simple fixed-size 2-dimensional array, access elements as m(i,j)
-// stored as concatenation of rows
-
-template<class T> class matrix : fixed_vector<T>
-{
-    size_t numcols;
-    size_t locate (size_t i, size_t j) const { assert (i < rows() && j < cols()); return i * cols() + j; }
-public:
-    typedef T elemtype;
-    matrix() : numcols (0) {}
-    matrix (size_t n, size_t m) { resize (n, m); }
-    void resize (size_t n, size_t m) { numcols = m; fixed_vector<T>::resize (n * m); }
-    size_t cols() const { return numcols; }
-    size_t rows() const { return empty() ? 0 : size() / cols(); }
-    size_t size() const { return fixed_vector<T>::size(); }    // use this for reading and writing... not nice!
-    bool empty() const { return fixed_vector<T>::empty(); }
-    T &       operator() (size_t i, size_t j)       { return (*this)[locate(i,j)]; }
-    const T & operator() (size_t i, size_t j) const { return (*this)[locate(i,j)]; }
-    void swap (matrix & other) throw() { std::swap (numcols, other.numcols); fixed_vector<T>::swap (other); }
-};
-template<class _T> inline void swap (matrix<_T> & L, matrix<_T> & R) throw() { L.swap (R); }
-
-// TODO: get rid of these
-typedef std::string STRING;
-typedef std::wstring WSTRING;
-#ifdef __unix__
-typedef wchar_t TCHAR;
-#endif
-typedef std::basic_string<TCHAR> TSTRING;    // wide/narrow character string
-
-// derive from this for noncopyable classes (will get you private unimplemented copy constructors)
-// ... TODO: change all of basetypes classes/structs to use this
-class noncopyable
-{
-    noncopyable & operator= (const noncopyable &);
-    noncopyable (const noncopyable &);
-public:
-    noncopyable(){}
-};
-
-struct throw_hr
-{
-    const char * msg;
-    inline throw_hr (const char * msg = NULL) : msg (msg) {}
-};
-
-// back-mapping of exceptions to HRESULT codes
-// usage pattern: HRESULT COM_function (...) { try { exception-based function body } catch_hr_return; }
-#define catch_hr_return    \
-        catch (const bad_alloc &) { return E_OUTOFMEMORY; }         \
-        catch (const bad_hr & e) { return e.hr; }                   \
-        catch (const invalid_argument &) { return E_INVALIDARG; }   \
-        catch (const runtime_error &) { return E_FAIL; }            \
-        catch (const logic_error &) { return E_UNEXPECTED; }        \
-        catch (const exception &) { return E_FAIL; }                \
-        return S_OK;
-
-};};    // namespace
-
-#ifndef BASETYPES_NO_UNSAFECRTOVERLOAD // if on, no unsafe CRT overload functions
-
-// ----------------------------------------------------------------------------
-// overloads for "unsafe" CRT functions used in our code base
-// ----------------------------------------------------------------------------
-
-// strlen/wcslen overloads for fixed-buffer size
-
-// Note: Careful while fixing bug related to these templates.
-// In all attempted experiments, in seems all 6 definitions are required 
-// below to get the correct behaviour.  Be very very careful 
-// not to delete something without testing that case 5&6 have "size" deduced.
-// 1. char *
-// 2. char * const
-// 3. const char *
-// 4. const char * const
-// 5. char (&) [size]
-// 6. const char (&) [size]
-// the following includes all headers that use strlen() and fail because of the mapping below
-// to find those, change #define strlen strlen_ to something invalid e.g. strlen::strlen_
-#if _MSC_VER >= 1600    // VS 2010  --TODO: fix this by correct include order instead
-#include <intrin.h>     // defines strlen() as an intrinsic in VS 2010
-#include <typeinfo>     // uses strlen()
-#include <xlocale>      // uses strlen()
-#endif
-#define strlen strlen_
-template<typename _T> 
-size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
-template<typename _T> inline size_t strlen_(const _T &s)     { return strnlen(static_cast<const char *>(s), SIZE_MAX); }
-template<> inline size_t strlen_(char * &s)                  { return strnlen(s, SIZE_MAX); }
-template<> inline size_t strlen_(const char * &s)            { return strnlen(s, SIZE_MAX); }
-template<size_t n> inline size_t strlen_(const char (&s)[n]) { return (strnlen(s, n)); }
-template<size_t n> inline size_t strlen_(char (&s)[n])       { return (strnlen(s, n)); }
-#define wcslen wcslen_
-template<typename _T> 
-size_t wcslen_(_T &s) { return wcsnlen_s(static_cast<const wchar_t *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
-template<> inline size_t wcslen_(wchar_t * &s)                  { return wcsnlen(s, SIZE_MAX); }
-template<> inline size_t wcslen_(const wchar_t * &s)            { return wcsnlen(s, SIZE_MAX); }
-template<size_t n> inline size_t wcslen_(const wchar_t (&s)[n]) { return (wcsnlen(s, n)); }
-template<size_t n> inline size_t wcslen_(wchar_t (&s)[n])       { return (wcsnlen(s, n)); }
-
-// xscanf wrappers -- one overload for each actual use case in our code base
-static inline int sscanf  (const char * buf, const char * format, int * i1)                     { return sscanf (buf, format, i1); }
-static inline int sscanf  (const char * buf, const char * format, int * i1, int * i2)           { return sscanf (buf, format, i1, i2); }
-static inline int sscanf  (const char * buf, const char * format, int * i1, int * i2, int * i3) { return sscanf (buf, format, i1, i2, i3); }
-static inline int sscanf  (const char * buf, const char * format, double * f1)                  { return sscanf (buf, format, f1); }
-static inline int swscanf (const wchar_t * buf, const wchar_t * format, int * i1)               { return swscanf (buf, format, i1); }
-static inline int fscanf  (FILE * file, const char * format, float * f1)                        { return fscanf (file, format, f1); }
-
-// cacpy -- fixed-size character array (same as original strncpy (dst, src, sizeof (dst)))
-// NOTE: THIS FUNCTION HAS NEVER BEEN TESTED. REMOVE THIS COMMENT ONCE IT HAS.
-template<class T, size_t n> static inline void cacpy (T (&dst)[n], const T * src)
-{ for (int i = 0; i < n; i++) { dst[i] = *src; if (*src) src++; } }
-// { return strncpy (dst, src, n); }   // using original C std lib function
-
-#endif
-
-// ----------------------------------------------------------------------------
-// frequently missing string functions
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace strfun {
-
-#ifndef BASETYPES_NO_STRPRINTF
-    template<typename C> struct basic_cstring : public std::basic_string<C>
-    {
-        template<typename S> basic_cstring (S p) : std::basic_string<C> (p) { }
-        operator const C * () const { return this->c_str(); }
-    };
- 
-typedef basic_cstring<char> cstring;
-typedef basic_cstring<wchar_t> wcstring;
-
-// [w]strprintf() -- like sprintf() but resulting in a C++ string
-template<class _T> struct _strprintf : public std::basic_string<_T>
-{   // works for both wchar_t* and char*
-    _strprintf (const _T * format, ...)
-    {
-        va_list args; va_start (args, format);  // varargs stuff
-        size_t n = _cprintf (format, args);     // num chars excl. '\0'
-        const int FIXBUF_SIZE = 128;            // incl. '\0'
-        if (n < FIXBUF_SIZE)
-        {
-            _T fixbuf[FIXBUF_SIZE];
-            this->assign (_sprintf (&fixbuf[0], sizeof (fixbuf)/sizeof (*fixbuf), format, args), n);
-        }
-        else    // too long: use dynamically allocated variable-size buffer
-        {
-            std::vector<_T> varbuf (n + 1);     // incl. '\0'
-            this->assign (_sprintf (&varbuf[0], varbuf.size(), format, args), n);
-        }
-    }
-private:
-    // helpers
-    inline size_t _cprintf (const wchar_t * format, va_list args) { return _vscwprintf (format, args); }
-    inline size_t _cprintf (const  char   * format, va_list args) { return _vscprintf  (format, args); }
-    inline const wchar_t * _sprintf (wchar_t * buf, size_t bufsiz, const wchar_t * format, va_list args) { vswprintf_s (buf, bufsiz, format, args); return buf; }
-    inline const  char   * _sprintf ( char   * buf, size_t bufsiz, const  char   * format, va_list args) { vsprintf_s  (buf, bufsiz, format, args); return buf; }
-};
-
-typedef strfun::_strprintf<char>    strprintf;  // char version
-typedef strfun::_strprintf<wchar_t> wstrprintf; // wchar_t version
-
-#endif
-
-//http://www.nanobit.net/putty/doxy/PUTTY_8H-source.html
-#ifndef CP_UTF8
-#define CP_UTF8 65001
-#endif
-// string-encoding conversion functions
-#ifdef _WIN32
-struct utf8 : std::string { utf8 (const std::wstring & p)    // utf-16 to -8
-{
-    size_t len = p.length();
-    if (len == 0) { return;}    // empty string
-    msra::basetypes::fixed_vector<char> buf (3 * len + 1);   // max: 1 wchar => up to 3 mb chars
-    // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
-    std::fill (buf.begin (), buf.end (), 0);
-    int rc = WideCharToMultiByte (CP_UTF8, 0, p.c_str(), (int) len,
-                                  &buf[0], (int) buf.size(), NULL, NULL);
-    if (rc == 0) throw std::runtime_error ("WideCharToMultiByte");
-    (*(std::string*)this) = &buf[0];
-}};
-struct utf16 : std::wstring { utf16 (const std::string & p)  // utf-8 to -16
-{
-    size_t len = p.length();
-    if (len == 0) { return;}    // empty string
-    msra::basetypes::fixed_vector<wchar_t> buf (len + 1);
-    // ... TODO: this fill() should be unnecessary (a 0 is appended)--but verify
-    std::fill (buf.begin (), buf.end (), (wchar_t) 0);
-    int rc = MultiByteToWideChar (CP_UTF8, 0, p.c_str(), (int) len,
-                                  &buf[0], (int) buf.size());
-    if (rc == 0) throw std::runtime_error ("MultiByteToWideChar");
-    assert (rc < buf.size ());
-    (*(std::wstring*)this) = &buf[0];
-}};
-#endif
-
-
-#pragma warning(push)
-#pragma warning(disable : 4996) // Reviewed by Yusheng Li, March 14, 2006. depr. fn (wcstombs, mbstowcs)
-static inline std::string wcstombs (const std::wstring & p)  // output: MBCS
-{
-    size_t len = p.length();
-    msra::basetypes::fixed_vector<char> buf (2 * len + 1); // max: 1 wchar => 2 mb chars
-    std::fill (buf.begin (), buf.end (), 0);
-    ::wcstombs (&buf[0], p.c_str(), 2 * len + 1);
-    return std::string (&buf[0]);
-}
-static inline std::wstring mbstowcs (const std::string & p)  // input: MBCS
-{
-    size_t len = p.length();
-    msra::basetypes::fixed_vector<wchar_t> buf (len + 1); // max: >1 mb chars => 1 wchar
-    std::fill (buf.begin (), buf.end (), (wchar_t) 0);
-    OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
-    ::mbstowcs (&buf[0], p.c_str(), len + 1);
-    return std::wstring (&buf[0]);
-}
-#pragma warning(pop)
-static inline std::string utf8 (const std::wstring & p) { return msra::strfun::wcstombs (p.c_str()); }   // output: UTF-8... not really
-static inline std::wstring utf16 (const std::string & p) { return msra::strfun::mbstowcs(p.c_str()); }   // input: UTF-8... not really
-
-
-
-// split and join -- tokenize a string like strtok() would, join() strings together
-template<class _T> static inline std::vector<std::basic_string<_T>> split (const std::basic_string<_T> & s, const _T * delim)
-{
-    std::vector<std::basic_string<_T>> res;
-    for (size_t st = s.find_first_not_of (delim); st != std::basic_string<_T>::npos; )
-    {
-        size_t en = s.find_first_of (delim, st +1);
-        if (en == std::basic_string<_T>::npos) en = s.length();
-        res.push_back (s.substr (st, en-st));
-        st = s.find_first_not_of (delim, en +1);    // may exceed
-    }
-    return res;
-}
-
-template<class _T> static inline std::basic_string<_T> join (const std::vector<std::basic_string<_T>> & a, const _T * delim)
-{
-    std::basic_string<_T> res;
-    for (int i = 0; i < (int) a.size(); i++)
-    {
-        if (i > 0) res.append (delim);
-        res.append (a[i]);
-    }
-    return res;
-}
-
-#ifdef _WIN32
-// parsing strings to numbers
-static inline int toint (const wchar_t * s)
-{
-    return _wtoi (s);   // ... TODO: check it
-}
-#endif
-static inline int toint (const char * s)
-{
-    return atoi (s);    // ... TODO: check it
-}
-static inline int toint (const std::wstring & s) { return toint (s.c_str()); }
-
-static inline double todouble (const char * s)
-{
-    char * ep;          // will be set to point to first character that failed parsing
-    double value = strtod (s, &ep);
-    if (*s == 0 || *ep != 0)
-        throw std::runtime_error ("todouble: invalid input string");
-    return value;
-}
-
-// TODO: merge this with todouble(const char*) above
-static inline double todouble (const std::string & s)
-{
-    s.size();       // just used to remove the unreferenced warning
-    
-    double value = 0.0;
-
-    // stod supposedly exists in VS2010, but some folks have compilation errors
-    // If this causes errors again, change the #if into the respective one for VS 2010.
-#if _MSC_VER > 1400 // VS 2010+
-    size_t * idx = 0;
-    value = std::stod (s, idx);
-    if (idx) throw std::runtime_error ("todouble: invalid input string");
-#else
-    char *ep = 0;   // will be updated by strtod to point to first character that failed parsing
-    value = strtod (s.c_str(), &ep);
-
-    // strtod documentation says ep points to first unconverted character OR 
-    // return value will be +/- HUGE_VAL for overflow/underflow
-    if (ep != s.c_str() + s.length() || value == HUGE_VAL || value == -HUGE_VAL)
-        throw std::runtime_error ("todouble: invalid input string");
-#endif
-    
-    return value;
-}
-
-static inline double todouble (const std::wstring & s)
-{
-    wchar_t * endptr;
-    double value = wcstod (s.c_str(), &endptr);
-    if (*endptr) throw std::runtime_error ("todouble: invalid input string");
-    return value;
-}
-
-// ----------------------------------------------------------------------------
-// tokenizer -- utility for white-space tokenizing strings in a character buffer
-// This simple class just breaks a string, but does not own the string buffer.
-// ----------------------------------------------------------------------------
-
-class tokenizer : public std::vector<char*>
-{
-    const char * delim;
-public:
-    tokenizer (const char * delim, size_t cap) : delim (delim) { reserve (cap); }
-    // Usage: tokenizer tokens (delim, capacity); tokens = buf; tokens.size(), tokens[i]
-    void operator= (char * buf)
-    {
-        resize (0);
-
-        // strtok_s not available on all platforms - so backoff to strtok on those
-#ifdef strtok_s
-        char * context; // for strtok_s()
-        for (char * p = strtok_s (buf, delim, &context); p; p = strtok_s (NULL, delim, &context))
-            push_back (p);
-#else
-        for (char * p = strtok (buf, delim); p; p = strtok (NULL, delim))
-            push_back (p);
-#endif   
-    }
-};
-
-};};    // namespace
-static inline msra::strfun::cstring charpath (const std::wstring & p)
-{
-#ifdef _WIN32
-    return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(p);
-#else   // old version, delete once we know it works
-    size_t len = p.length();
-    std::vector<char> buf(2 * len + 1, 0); // max: 1 wchar => 2 mb chars
-    ::wcstombs(buf.data(), p.c_str(), 2 * len + 1);
-    return msra::strfun::cstring (&buf[0]);
-#endif
-}
-static inline FILE* _wfopen (const wchar_t * path, const wchar_t * mode) { return fopen(charpath(path), charpath(mode)); }
-static inline void Sleep (size_t ms) { std::this_thread::sleep_for (std::chrono::milliseconds (ms)); }
-
-
-// ----------------------------------------------------------------------------
-// wrappers for some basic types (files, handles, timer)
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace basetypes {
-
-// FILE* with auto-close; use auto_file_ptr instead of FILE*.
-// Warning: do not pass an auto_file_ptr to a function that calls fclose(),
-// except for fclose() itself.
-class auto_file_ptr
-{
-    FILE * f;
-    FILE * operator= (auto_file_ptr &); // can't ref-count: no assignment
-    auto_file_ptr (auto_file_ptr &);
-    // implicit close (destructor, assignment): we ignore error
-    void close()  throw() { if (f) try { if (f != stdin && f != stdout && f != stderr) ::fclose (f); } catch (...) { } f = NULL; }
-    void openfailed (const std::string & path) { throw std::runtime_error ("auto_file_ptr: error opening file '" + path + "': " + strerror (errno)); }
-protected:
-    friend int fclose (auto_file_ptr&); // explicit close (note: may fail)
-    int fclose() { int rc = ::fclose (f); if (rc == 0) f = NULL; return rc; }
-public:
-    auto_file_ptr() : f (NULL) { }
-    ~auto_file_ptr() { close(); }
-    auto_file_ptr (const char * path, const char * mode) { f = fopen (path, mode); if (f == NULL) openfailed (path); }
-    auto_file_ptr (const wchar_t * wpath, const char * mode) { f = _wfopen (wpath, msra::strfun::utf16 (mode).c_str()); if (f == NULL) openfailed (msra::strfun::utf8 (wpath)); }
-    FILE * operator= (FILE * other) { close(); f = other; return f; }
-    auto_file_ptr (FILE * other) : f (other) { }
-    operator FILE * () const { return f; }
-    FILE * operator->() const { return f; }
-    void swap (auto_file_ptr & other)  throw() { std::swap (f, other.f); }
-};
-inline int fclose (auto_file_ptr & af) { return af.fclose(); }
-
-
-};};
-
-namespace msra { namespace files {
-
-// ----------------------------------------------------------------------------
-// textreader -- simple reader for text files --we need this all the time!
-// Currently reads 8-bit files, but can return as wstring, in which case
-// they are interpreted as UTF-8 (without BOM).
-// Note: Not suitable for pipes or typed input due to readahead (fixable if needed).
-// ----------------------------------------------------------------------------
-
-class textreader
-{
-    msra::basetypes::auto_file_ptr f;
-    std::vector<char> buf;  // read buffer (will only grow, never shrink)
-    int ch;                 // next character (we need to read ahead by one...)
-    char getch() { char prevch = (char) ch; ch = fgetc (f); return prevch; }
-public:
-    textreader (const std::wstring & path) : f (path.c_str(), "rb") { buf.reserve (10000); ch = fgetc (f); }
-    operator bool() const { return ch != EOF; } // true if still a line to read
-    std::string getline()                       // get and consume the next line
-    {
-        if (ch == EOF) throw std::logic_error ("textreader: attempted to read beyond EOF");
-        assert (buf.empty());
-        // get all line's characters --we recognize UNIX (LF), DOS (CRLF), and Mac (CR) convention
-        while (ch != EOF && ch != '\n' && ch != '\r') buf.push_back (getch());
-        if (ch != EOF && getch() == '\r' && ch == '\n') getch();    // consume EOLN char
-        std::string line (buf.begin(), buf.end());
-        buf.clear();
-        return line;
-    }
-    std::wstring wgetline() { return msra::strfun::utf16 (getline()); }
-};
-
-};};
-
-// ----------------------------------------------------------------------------
-// functional-programming style helper macros (...do this with templates?)
-// ----------------------------------------------------------------------------
-
-#define foreach_index(_i,_dat) for (int _i = 0; _i < (int) (_dat).size(); _i++)
-#define map_array(_x,_expr,_y) { _y.resize (_x.size()); foreach_index(_i,_x) _y[_i]=_expr(_x[_i]); }
-#define reduce_array(_x,_expr,_y) { foreach_index(_i,_x) _y = (_i==0) ? _x[_i] : _expr(_y,_x[_i]); }
-
-// ----------------------------------------------------------------------------
-// frequently missing utility functions
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace util {
-
-// to (slightly) simplify processing of command-line arguments.
-// command_line args (argc, argv);
-// while (args.has (1) && args[0][0] == '-') { option = args.shift(); process (option); }
-// for (const wchar_t * arg = args.shift(); arg; arg = args.shift()) { process (arg); }
-class command_line
-{
-    int num;
-    const wchar_t * * args;
-public:
-    command_line (int argc, wchar_t * argv[]) : num (argc), args ((const wchar_t **) argv) { shift(); }
-    inline int size() const { return num; }
-    inline bool has (int left) { return size() >= left; }
-    const wchar_t * shift() { if (size() == 0) return NULL; num--; return *args++; }
-    const wchar_t * operator[] (int i) const { return (i < 0 || i >= size()) ? NULL : args[i]; }
-};
-
-// byte-reverse a variable --reverse all bytes (intended for integral types and float)
-template<typename T> static inline void bytereverse (T & v) throw()
-{   // note: this is more efficient than it looks because sizeof (v[0]) is a constant
-    char * p = (char *) &v;
-    const size_t elemsize = sizeof (v);
-    for (int k = 0; k < elemsize / 2; k++)  // swap individual bytes
-        swap (p[k], p[elemsize-1 - k]);
-}
-
-// byte-swap an entire array
-template<class V> static inline void byteswap (V & v) throw()
-{
-    foreach_index (i, v)
-        bytereverse (v[i]);
-}
-
-// execute a block with retry
-// Block must be restartable.
-// Use this when writing small files to those unreliable Windows servers.
-// TODO: This will fail to compile under VS 2008--we need an #ifdef around this
-template<typename FUNCTION> static void attempt (int retries, const FUNCTION & body)
-{
-    for (int attempt = 1; ; attempt++)
-    {
-        try
-        {
-            body();
-            if (attempt > 1) fprintf (stderr, "attempt: success after %d retries\n", attempt);
-            break;
-        }
-        catch (const std::exception & e)
-        {
-            if (attempt >= retries)
-                throw;      // failed N times --give up and rethrow the error
-            fprintf (stderr, "attempt: %s, retrying %d-th time out of %d...\n", e.what(), attempt+1, retries);
-            ::Sleep (1000); // wait a little, then try again
-        }
-    }
-}
-
-};};    // namespace
-
-
-#ifdef _WIN32
-// ----------------------------------------------------------------------------
-// frequently missing Win32 functions
-// ----------------------------------------------------------------------------
-
-// strerror() for Win32 error codes
-static inline std::wstring FormatWin32Error (DWORD error)
-{
-    wchar_t buf[1024] = { 0 };
-    ::FormatMessageW (FORMAT_MESSAGE_FROM_SYSTEM, "", error, 0, buf, sizeof (buf)/sizeof (*buf) -1, NULL);
-    std::wstring res (buf);
-    // eliminate newlines (and spaces) from the end
-    size_t last = res.find_last_not_of (L" \t\r\n");
-    if (last != std::string::npos) res.erase (last +1, res.length());
-    return res;
-}
-// we always wanted this!
-#pragma warning (push)
-#pragma warning (disable: 6320) // Exception-filter expression is the constant EXCEPTION_EXECUTE_HANDLER
-#pragma warning (disable: 6322) // Empty _except block
-static inline void SetCurrentThreadName (const char* threadName)
-{   // from http://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx
-    ::Sleep(10);
-#pragma pack(push,8)
-   struct { DWORD dwType; LPCSTR szName; DWORD dwThreadID; DWORD dwFlags; } info = { 0x1000, threadName, (DWORD) -1, 0 };
-#pragma pack(pop)
-   __try { RaiseException (0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info); }
-   __except(EXCEPTION_EXECUTE_HANDLER) { }
-}
-#pragma warning (pop)
-
-// return a string as a CoTaskMemAlloc'ed memory object
-// Returns NULL if out of memory (we don't throw because we'd just catch it outside and convert to HRESULT anyway).
-static inline LPWSTR CoTaskMemString (const wchar_t * s)
-{
-    size_t n = wcslen (s) + 1;  // number of chars to allocate and copy
-    LPWSTR p = (LPWSTR) ::CoTaskMemAlloc (sizeof (*p) * n);
-    if (p) for (size_t i = 0; i < n; i++) p[i] = s[i];
-    return p;
-}
-
-template<class S> static inline void ZeroStruct (S & s) { memset (&s, 0, sizeof (s)); }
-
-#endif
-// ----------------------------------------------------------------------------
-// machine dependent
-// ----------------------------------------------------------------------------
-
-#define MACHINE_IS_BIG_ENDIAN (false)
-
-using namespace msra::basetypes;    // for compatibility
-
-#pragma warning (pop)
-
-// RuntimeError - throw a std::runtime_error with a formatted error string
-#ifdef _MSC_VER
-__declspec(noreturn)
-#endif
-static inline void RuntimeError(const char * format, ...)
-{
-    va_list args;
-    char buffer[1024];
-
-    va_start(args, format);
-    vsprintf(buffer, format, args);
-    throw std::runtime_error(buffer);
-};
-
-// LogicError - throw a std::logic_error with a formatted error string
-#ifdef _MSC_VER
-__declspec(noreturn)
-#endif
-static inline void LogicError(const char * format, ...)
-{
-    va_list args;
-    char buffer[1024];
-
-    va_start(args, format);
-    vsprintf(buffer, format, args);
-    throw std::logic_error(buffer);
-};
-
-// ----------------------------------------------------------------------------
-// dynamic loading of modules
-// ----------------------------------------------------------------------------
-
-#ifdef _WIN32
-class Plugin
-{
-    HMODULE m_hModule;      // module handle for the writer DLL
-    std::wstring m_dllName; // name of the writer DLL
-public:
-    Plugin() { m_hModule = NULL; }
-    template<class STRING>  // accepts char (UTF-8) and wide string 
-    FARPROC Load(const STRING & plugin, const std::string & proc)
-    {
-        m_dllName = msra::strfun::utf16(plugin);
-        m_dllName += L".dll";
-        m_hModule = LoadLibrary(m_dllName.c_str());
-        if (m_hModule == NULL)
-            RuntimeError("Plugin not found: %s", msra::strfun::utf8(m_dllName));
-
-        // create a variable of each type just to call the proper templated version
-        return GetProcAddress(m_hModule, proc.c_str());
-    }
-    ~Plugin() { if (m_hModule) FreeLibrary(m_hModule); }
-};
-#else
-class Plugin
-{
-public:
-    template<class STRING>  // accepts char (UTF-8) and wide string 
-    void * Load(const STRING & plugin, const std::string & proc)
-    {
-        RuntimeError("Plugins not implemented on Linux yet");
-        return nullptr;
-    }
-};
-#endif
-
-#endif    // _BASETYPES_
--- a/Source/Readers/KaldiReader/biggrowablevectors.h
+++ b/Source/Readers/KaldiReader/biggrowablevectors.h
@ -1,123 +0,0 @@
-//
-// <copyright file="biggrowablevectors.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// biggrowablevectors.h -- big growable vector that uses two layers and optionally a disk backing store for paging
-
-#pragma once
-
-namespace msra { namespace dbn {
-
-// ---------------------------------------------------------------------------
-// growablevectorbase -- helper for two-layer growable random-access array
-// This allows both a fully allocated vector (with push_back()), e.g. for uids,
-// as well as a partially allocated one (content managed by derived class), for features and lattice blocks.
-// TODO:
-//  - test this (make copy of binary first before full compilation; or rebuild the previous version)
-//  - fully move in-mem range here, test again
-//  - then we can move towards paging from archive directly (biggrowablevectorarray gets tossed)
-// ---------------------------------------------------------------------------
-template<class BLOCKTYPE> class growablevectorbase
-{
-protected:  // fix this later
-    const size_t elementsperblock;
-    size_t n;                                           // number of elements
-    std::vector<std::unique_ptr<BLOCKTYPE>> blocks;     // the data blocks
-    void operator= (const growablevectorbase &);        // (non-assignable)
-    void check (size_t t) const { if (t >= n) throw std::logic_error ("growablevectorbase: out of bounds"); }   // bounds check helper
-
-    // resize intermediate level, but do not allocate blocks
-    // (may deallocate if shrinking)
-    void resize_without_commit (size_t T)
-    {
-        blocks.resize ((T + elementsperblock-1) / elementsperblock);
-        n = T;
-        // TODO: update allocated range
-    }
-
-    // commit memory
-    // begin/end must be block boundaries
-    void commit (size_t begin, size_t end, BLOCKTYPE * blockdata)
-    {
-        auto blockptr = getblock (begin, end);  // memory leak: if this fails (logic error; should never happen)
-        blockptr.set (blockdata);               // take ownership of the block
-        // TODO: update allocated range  --also enforce consecutiveness
-    }
-
-    // flush a block
-    // begin/end must be block boundaries
-    void flush (size_t begin, size_t end)
-    {
-        auto blockptr = getblock (begin, end);  // memory leak: if this fails (logic error; should never happen)
-        blockptr.reset();                       // release it
-        // TODO: update allocated range  --also enforce consecutiveness
-    }
-
-    // helper to get a block pointer, with block referenced as its entire range
-    std::unique_ptr<BLOCKTYPE> & getblockptr (size_t t) // const
-    {
-        check (t);
-        return blocks[t / elementsperblock];
-    }
-
-    // helper to get a block pointer, with block referenced as its entire range
-    std::unique_ptr<BLOCKTYPE> & getblockptr (size_t begin, size_t end) const
-    {
-        // BUGBUG: last block may be shorter than elementsperblock
-        if (end - begin != elementsperblock || getblockt (begin) != 0)
-            throw std::logic_error ("growablevectorbase: non-block boundaries passed to block-level function");
-        return getblockptr (begin);
-    }
-public:
-    growablevectorbase (size_t elementsperblock) : elementsperblock (elementsperblock), n (0) { blocks.reserve (1000); }
-    size_t size() const { return n; }       // number of frames
-    bool empty() const { return size() == 0; }
-
-    // to access an element t -> getblock(t)[getblockt(t)]
-    BLOCKTYPE & getblock (size_t t) const
-    {
-        check (t);
-        const size_t blockid = t / elementsperblock;
-        return *blocks[blockid].get();
-    }
-
-    size_t getblockt (size_t t) const
-    {
-        check (t);
-        return t % elementsperblock;
-    }
-};
-
-// ---------------------------------------------------------------------------
-// biggrowablevector -- big vector we can push_back to
-// ---------------------------------------------------------------------------
-template<typename ELEMTYPE> class biggrowablevector : public growablevectorbase<std::vector<ELEMTYPE>>
-{
-    using base_t = growablevectorbase<std::vector<ELEMTYPE>>;
-public:
-    biggrowablevector() : growablevectorbase<std::vector<ELEMTYPE>>::growablevectorbase (65536) { }
-
-    template<typename VALTYPE> void push_back (VALTYPE e)   // VALTYPE could be an rvalue reference
-    {
-        size_t i = base_t::size();
-        base_t::resize_without_commit (i + 1);
-        auto & block = base_t::getblockptr (i);
-        if (block.get() == NULL)
-            block.reset (new std::vector<ELEMTYPE> (this->elementsperblock));
-        (*block)[base_t::getblockt (i)] = e;
-    }
-
-    ELEMTYPE & operator[] (size_t t)       { return base_t::getblock(t)[base_t::getblockt (t)]; }    // get an element
-    const ELEMTYPE & operator[] (size_t t) const { return base_t::getblock(t)[base_t::getblockt (t)]; }    // get an element
-
-    void resize (const size_t n)
-    {
-        base_t::resize_without_commit (n);
-        foreach_index (i, this->blocks)
-            if (this->blocks[i].get() == NULL)
-                this->blocks[i].reset (new std::vector<ELEMTYPE> (this->elementsperblock));
-    }
-};
-
-};};
--- a/Source/Readers/KaldiReader/chunkevalsource.h
+++ b/Source/Readers/KaldiReader/chunkevalsource.h
@ -1,373 +0,0 @@
-//
-// <copyright file="chunkevalsource.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-#pragma once
-
-
-//#include <objbase.h>
-#include "basetypes.h"                  // for attempt()
-#include "htkfeatio.h"                  // for reading HTK features
-#include "minibatchsourcehelpers.h"
-
-#ifndef __unix__
-#include "ssematrix.h"
-#endif
-
-#ifdef LEAKDETECT
-#include <vld.h> // for memory leak detection
-#endif
-
-namespace msra { namespace dbn {
-
-    class chunkevalsource // : public numamodelmanager
-    {
-        const size_t chunksize;                 // actual block size to perform computation on
-
-        // data FIFO
-        msra::dbn::matrix feat;
-        std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-        std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-        std::vector<size_t> numframes;          // [k] number of frames for all appended files
-        std::vector<std::wstring> outpaths;     // [k] and their pathnames
-        std::vector<unsigned int> sampperiods;  // [k] and sample periods (they should really all be the same...)
-        size_t vdim; // input dimension
-        size_t udim; // output dimension
-        bool minibatchready;
-        void operator=(const chunkevalsource &);
-    private:
-        void clear()    // empty the FIFO
-        {
-            frames.clear();
-            boundaryflags.clear();
-            numframes.clear();
-            outpaths.clear();
-            sampperiods.clear();
-            minibatchready=false;
-        }
-
-        
-
-        void saveandflush(msra::dbn::matrix &pred)
-        {
-            const size_t framesinblock = frames.size();
-
-            // write out all files
-            size_t firstframe = 0;
-            foreach_index (k, numframes)
-            {
-                const wstring & outfile = outpaths[k];
-                unsigned int sampperiod = sampperiods[k];
-                size_t n = numframes[k];
-                msra::files::make_intermediate_dirs (outfile);
-                fprintf (stderr, "saveandflush: writing %zd frames to %S\n", n, outfile.c_str());
-                msra::dbn::matrixstripe thispred (pred, firstframe, n);
-                // some sanity check for the data we've written
-                const size_t nansinf = thispred.countnaninf();
-                if (nansinf > 0)
-                    fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
-                // save it
-                msra::util::attempt (5, [&]()
-                {
-                    msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
-                });
-                firstframe += n;
-            }
-            assert (firstframe == framesinblock); framesinblock;
-
-            // and we are done --forget the FIFO content & get ready for next chunk
-            clear();
-
-        }
-
-    public:
-        chunkevalsource (size_t numinput, size_t numoutput, size_t chunksize)
-            :vdim(numinput),udim(numoutput),chunksize(chunksize)
-        {         
-            frames.reserve (chunksize * 2);    
-            feat.resize(vdim,chunksize); // initialize to size chunksize
-        }
-
-        // append data to chunk
-        template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath)
-        {
-            // append to frames; also expand neighbor frames
-            if (feat.cols() < 2)
-                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-            foreach_column (t, feat)
-            {
-                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-                frames.push_back (v);
-                boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-            }
-
-            numframes.push_back (feat.cols());
-            outpaths.push_back (outpath);
-            sampperiods.push_back (sampperiod);
-            
-        }
-
-        void createevalminibatch()
-        {
-            const size_t framesinblock = frames.size();
-            feat.resize(vdim, framesinblock);   // input features for whole utt (col vectors)
-            // augment the features
-            msra::dbn::augmentneighbors (frames, boundaryflags, 0, framesinblock, feat);
-            minibatchready=true;
-        }
-
-        void writetofiles(msra::dbn::matrix &pred){ saveandflush(pred); }
-
-        msra::dbn::matrix chunkofframes() { assert(minibatchready); return feat; }
-
-        bool isminibatchready() { return minibatchready; }
-
-        size_t currentchunksize() { return frames.size(); }
-        void flushinput(){createevalminibatch();}
-        void reset() { clear(); }
-
-    };
-
-
-    class chunkevalsourcemulti // : public numamodelmanager
-    {
-        const size_t chunksize;                 // actual block size to perform computation on
-
-        // data FIFO
-        std::vector<msra::dbn::matrix> feat;
-        std::vector<std::vector<std::vector<float>>> framesmulti; // [t] all feature frames concatenated into a big block
-        std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-        std::vector<size_t> numframes;          // [k] number of frames for all appended files
-        std::vector<std::vector<std::wstring>> outpaths;     // [k] and their pathnames
-        std::vector<std::vector<unsigned int>> sampperiods;  // [k] and sample periods (they should really all be the same...)
-        std::vector<size_t> vdims; // input dimension
-        std::vector<size_t> udims; // output dimension
-        bool minibatchready;
-
-                void operator=(const chunkevalsourcemulti &);
-    private:
-        void clear()    // empty the FIFO
-        {
-            foreach_index(i, vdims)
-            {
-                framesmulti[i].clear();
-                outpaths[i].clear();
-                sampperiods[i].clear();
-            }
-            boundaryflags.clear();
-            numframes.clear();
-            minibatchready=false;
-        }
-
-        
-
-        void saveandflush(msra::dbn::matrix &pred, size_t index)
-        {
-            const size_t framesinblock = framesmulti[index].size();
-
-            // write out all files
-            size_t firstframe = 0;
-            foreach_index (k, numframes)
-            {
-                const wstring & outfile = outpaths[index][k];
-                unsigned int sampperiod = sampperiods[index][k];
-                size_t n = numframes[k];
-                msra::files::make_intermediate_dirs (outfile);
-                fprintf (stderr, "saveandflush: writing %zd frames to %S\n", n, outfile.c_str());
-                msra::dbn::matrixstripe thispred (pred, firstframe, n);
-                // some sanity check for the data we've written
-                const size_t nansinf = thispred.countnaninf();
-                if (nansinf > 0)
-                    fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
-                // save it
-                msra::util::attempt (5, [&]()
-                {
-                    msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
-                });
-                firstframe += n;
-            }
-            assert (firstframe == framesinblock); framesinblock;
-
-            // and we are done --forget the FIFO content & get ready for next chunk
-            
-        }
-
-    public:
-        chunkevalsourcemulti (std::vector<size_t> vdims, std::vector<size_t> udims, size_t chunksize)
-            :vdims(vdims),udims(udims),chunksize(chunksize)
-        {     
-
-            foreach_index(i, vdims)
-            {
-                msra::dbn::matrix thisfeat;
-                std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-                
-                frames.reserve(chunksize * 2);
-                framesmulti.push_back(frames);
-                //framesmulti[i].reserve (chunksize * 2);    
-                
-                thisfeat.resize(vdims[i], chunksize);
-                feat.push_back(thisfeat);
-    
-                outpaths.push_back(std::vector<std::wstring>());
-                sampperiods.push_back(std::vector<unsigned int>());
-                //feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
-            }
-        }
-
-        // append data to chunk
-        template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath, size_t index)
-        {
-            // append to frames; also expand neighbor frames
-            if (feat.cols() < 2)
-                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-            foreach_column (t, feat)
-            {
-                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-                framesmulti[index].push_back (v);
-                if (index==0)
-                    boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-            }
-            if (index==0)
-                numframes.push_back (feat.cols());
-
-            outpaths[index].push_back (outpath);
-            sampperiods[index].push_back (sampperiod);
-            
-        }
-
-        void createevalminibatch()
-        {
-            foreach_index(i, framesmulti)
-            {
-                const size_t framesinblock = framesmulti[i].size();
-                feat[i].resize(vdims[i], framesinblock);   // input features for whole utt (col vectors)
-                // augment the features
-                msra::dbn::augmentneighbors (framesmulti[i], boundaryflags, 0, framesinblock, feat[i]);
-            }
-            minibatchready=true;
-        }
-
-        void writetofiles(msra::dbn::matrix &pred, size_t index){ saveandflush(pred, index); }
-
-        msra::dbn::matrix chunkofframes(size_t index) { assert(minibatchready); assert(index<=feat.size()); return feat[index]; }
-
-        bool isminibatchready() { return minibatchready; }
-
-        size_t currentchunksize() { return framesmulti[0].size(); }
-        void flushinput(){createevalminibatch();}
-        void reset() { clear(); }
-
-    };
-
-    class FileEvalSource // : public numamodelmanager
-    {
-        const size_t chunksize;                 // actual block size to perform computation on
-
-        // data FIFO
-        std::vector<msra::dbn::matrix> feat;
-        std::vector<std::vector<std::vector<float>>> framesMulti; // [t] all feature frames concatenated into a big block
-        std::vector<char> boundaryFlags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-        std::vector<size_t> numFrames;          // [k] number of frames for all appended files
-        std::vector<std::vector<unsigned int>> sampPeriods;  // [k] and sample periods (they should really all be the same...)
-        std::vector<size_t> vdims; // input dimension
-        std::vector<size_t> leftcontext;
-        std::vector<size_t> rightcontext;
-        bool minibatchReady;
-        size_t minibatchSize;
-        size_t frameIndex;
-
-        void operator=(const FileEvalSource &);
-
-    private:
-        void Clear()    // empty the FIFO
-        {
-            foreach_index(i, vdims)
-            {
-                framesMulti[i].clear();
-                sampPeriods[i].clear();
-            }
-            boundaryFlags.clear();
-            numFrames.clear();
-            minibatchReady=false;
-            frameIndex=0;
-        }
-
-    public:
-        FileEvalSource(std::vector<size_t> vdims, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t chunksize) :vdims(vdims), leftcontext(leftcontext), rightcontext(rightcontext), chunksize(chunksize)
-        {     
-            foreach_index(i, vdims)
-            {
-                msra::dbn::matrix thisfeat;
-                std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-                
-                frames.reserve(chunksize * 2);
-                framesMulti.push_back(frames);
-                //framesmulti[i].reserve (chunksize * 2);    
-                
-                thisfeat.resize(vdims[i], chunksize);
-                feat.push_back(thisfeat);
-    
-                sampPeriods.push_back(std::vector<unsigned int>());
-                //feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
-            }
-        }
-
-        // append data to chunk
-        template<class MATRIX> void AddFile (const MATRIX & feat, const string & /*featkind*/, unsigned int sampPeriod, size_t index)
-        {
-            // append to frames; also expand neighbor frames
-            if (feat.cols() < 2)
-                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-            foreach_column (t, feat)
-            {
-                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-                framesMulti[index].push_back (v);
-                if (index==0)
-                    boundaryFlags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-            }
-            if (index==0)
-                numFrames.push_back (feat.cols());
-
-            sampPeriods[index].push_back (sampPeriod);
-            
-        }
-
-        void CreateEvalMinibatch()
-        {
-            foreach_index(i, framesMulti)
-            {
-                const size_t framesInBlock = framesMulti[i].size();
-                feat[i].resize(vdims[i], framesInBlock);   // input features for whole utt (col vectors)
-                // augment the features
-                size_t leftextent, rightextent;
-                // page in the needed range of frames
-                if (leftcontext[i] == 0 && rightcontext[i] == 0)
-                {
-                    leftextent = rightextent = augmentationextent(framesMulti[i][0].size(), vdims[i]);
-                }
-                else
-                {
-                    leftextent = leftcontext[i];
-                    rightextent = rightcontext[i];
-                }
-
-                //msra::dbn::augmentneighbors(framesMulti[i], boundaryFlags, 0, leftcontext[i], rightcontext[i],)
-                msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, leftextent, rightextent, 0, framesInBlock, feat[i]);
-            }
-            minibatchReady=true;
-        }
-
-        void SetMinibatchSize(size_t mbSize){ minibatchSize=mbSize;}
-        msra::dbn::matrix ChunkOfFrames(size_t index) { assert(minibatchReady); assert(index<=feat.size()); return feat[index]; }
-
-        bool IsMinibatchReady() { return minibatchReady; }
-
-        size_t CurrentFileSize() { return framesMulti[0].size(); }
-        void FlushInput(){CreateEvalMinibatch();}
-        void Reset() { Clear(); }
-    };
-
-    
-};};
--- a/Source/Readers/KaldiReader/dllmain.cpp
+++ b/Source/Readers/KaldiReader/dllmain.cpp
@ -1,24 +0,0 @@
-//
-// <copyright file="dllmain.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-// dllmain.cpp : Defines the entry point for the DLL application.
-#include "stdafx.h"
-
-BOOL APIENTRY DllMain( HMODULE /*hModule*/,
-                       DWORD  ul_reason_for_call,
-                       LPVOID /*lpReserved*/
-                     )
-{
-    switch (ul_reason_for_call)
-    {
-    case DLL_PROCESS_ATTACH:
-    case DLL_THREAD_ATTACH:
-    case DLL_THREAD_DETACH:
-    case DLL_PROCESS_DETACH:
-        break;
-    }
-    return TRUE;
-}
-
--- a/Source/Readers/KaldiReader/fileutil.cpp
+++ b/Source/Readers/KaldiReader/fileutil.cpp
--- a/Source/Readers/KaldiReader/fileutil.h
+++ b/Source/Readers/KaldiReader/fileutil.h
@ -1,620 +0,0 @@
-//
-// fileutil.h - file I/O with error checking
-//
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-//
-#pragma once
-#ifndef _FILEUTIL_
-#define _FILEUTIL_
-
-#include "Platform.h"
-#include <cstdio>
-#ifdef __unix__
-#include <sys/types.h>
-#include <sys/stat.h>
-#endif
-#include <algorithm>    // for std::find
-#include <vector>
-#include <map>
-#include <functional>
-#include <cctype>
-#include <cerrno>
-#include <cstdint>
-#include <cassert>
-#include <cstring>     // for strerror()
-
-using namespace std;
-
-#define SAFE_CLOSE(f) (((f) == NULL) || (fcloseOrDie ((f)), (f) = NULL))
-
-// ----------------------------------------------------------------------------
-// fopenOrDie(): like fopen() but terminate with err msg in case of error.
-// A pathname of "-" returns stdout or stdin, depending on mode, and it will
-// change the binary mode if 'b' or 't' are given. If you use this, make sure
-// not to fclose() such a handle.
-// ----------------------------------------------------------------------------
-
-FILE * fopenOrDie (const string & pathname, const char * mode);
-FILE * fopenOrDie (const wstring & pathname, const wchar_t * mode);
-
-#ifndef __unix__
-// ----------------------------------------------------------------------------
-// fsetmode(): set mode to binary or text
-// ----------------------------------------------------------------------------
-
-void fsetmode (FILE * f, char type);
-#endif
-
-// ----------------------------------------------------------------------------
-// freadOrDie(): like fread() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void freadOrDie (void * ptr, size_t size, size_t count, FILE * f);
-
-template<class _T>
-void freadOrDie (_T & data, int num, FILE * f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-template<class _T>
-void freadOrDie (_T & data, size_t num, FILE * f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-
-// ----------------------------------------------------------------------------
-// fwriteOrDie(): like fwrite() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f);
-
-template<class _T>
-void fwriteOrDie (const _T & data, FILE * f)    // template for vector<>
-{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-
-// ----------------------------------------------------------------------------
-// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fprintfOrDie (FILE * f, const char *format, ...);
-
-// ----------------------------------------------------------------------------
-// fcloseOrDie(): like fclose() but terminate with err msg in case of error
-// not yet implemented, but we should
-// ----------------------------------------------------------------------------
-
-#define fcloseOrDie fclose
-
-// ----------------------------------------------------------------------------
-// fflushOrDie(): like fflush() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fflushOrDie (FILE * f);
-
-// ----------------------------------------------------------------------------
-// filesize(): determine size of the file in bytes
-// ----------------------------------------------------------------------------
-
-size_t filesize (const wchar_t * pathname);
-size_t filesize (FILE * f);
-int64_t filesize64 (const wchar_t * pathname);
-
-// ----------------------------------------------------------------------------
-// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
-// ----------------------------------------------------------------------------
-
-// 32-bit offsets only
-long fseekOrDie (FILE * f, long offset, int mode = SEEK_SET);
-#define ftellOrDie ftell
-
-// ----------------------------------------------------------------------------
-// fget/setpos(): seek functions with error handling
-// ----------------------------------------------------------------------------
-
-uint64_t fgetpos (FILE * f);
-void fsetpos (FILE * f, uint64_t pos);
-
-// ----------------------------------------------------------------------------
-// unlinkOrDie(): unlink() with error handling
-// ----------------------------------------------------------------------------
-
-void unlinkOrDie (const std::string & pathname);
-void unlinkOrDie (const std::wstring & pathname);
-
-// ----------------------------------------------------------------------------
-// renameOrDie(): rename() with error handling
-// ----------------------------------------------------------------------------
-
-void renameOrDie (const std::string & from, const std::string & to);
-void renameOrDie (const std::wstring & from, const std::wstring & to);
-
-// ----------------------------------------------------------------------------
-// fexists(): test if a file exists
-// ----------------------------------------------------------------------------
-
-bool fexists (const char * pathname);
-bool fexists (const wchar_t * pathname);
-inline bool fexists (const std::string & pathname) { return fexists (pathname.c_str()); }
-inline bool fexists (const std::wstring & pathname) { return fexists (pathname.c_str()); }
-
-// ----------------------------------------------------------------------------
-// funicode(): test if a file uses unicode
-// ----------------------------------------------------------------------------
-
-bool funicode (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fskipspace(): skip space characters
-// ----------------------------------------------------------------------------
-
-bool fskipspace (FILE * F);
-bool fskipwspace (FILE * F);
-
-// ----------------------------------------------------------------------------
-// fgetline(): like fgets() but terminate with err msg in case of error;
-//  removes the newline character at the end (like gets()), returned buffer is
-//  always 0-terminated; has second version that returns an STL string instead
-// fgetstring(): read a 0-terminated string (terminate if error)
-// fgetword(): read a space-terminated token (terminate if error)
-// fskipNewLine(): skip all white space until end of line incl. the newline
-// ----------------------------------------------------------------------------
-
-// ----------------------------------------------------------------------------
-// fputstring(): write a 0-terminated string (terminate if error)
-// ----------------------------------------------------------------------------
-
-void fputstring (FILE * f, const char *);
-void fputstring (const HANDLE f, const char * str);
-void fputstring (FILE * f, const std::string &);
-void fputstring (FILE * f, const wchar_t *);
-void fputstring (FILE * f, const std::wstring &);
-
-template<class CHAR> CHAR * fgetline (FILE * f, CHAR * buf, int size);
-template<class CHAR, size_t n> CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); }
-string fgetline (FILE * f);
-wstring fgetlinew (FILE * f);
-void fgetline (FILE * f, std::string & s, std::vector<char> & buf);
-void fgetline (FILE * f, std::wstring & s, std::vector<char> & buf);
-void fgetline (FILE * f, std::vector<char> & buf);
-void fgetline (FILE * f, std::vector<wchar_t> & buf);
-
-const char * fgetstring (FILE * f, char * buf, int size);
-template<size_t n> const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); }
-const char * fgetstring (const HANDLE f, char * buf, int size);
-template<size_t n> const char * fgetstring (const HANDLE f, char (& buf)[n]) { return fgetstring (f, buf, n); }
-
-const wchar_t * fgetstring (FILE * f, wchar_t * buf, int size);
-wstring fgetwstring (FILE * f);
-string fgetstring (FILE * f);
-
-const char * fgettoken (FILE * f, char * buf, int size);
-template<size_t n> const char * fgettoken (FILE * f, char (& buf)[n]) { return fgettoken (f, buf, n); }
-string fgettoken (FILE * f);
-const wchar_t * fgettoken (FILE * f, wchar_t * buf, int size);
-wstring fgetwtoken (FILE * f);
-
-int fskipNewline (FILE * f, bool skip = true);
-int fskipwNewline (FILE * f, bool skip = true);
-
-// ----------------------------------------------------------------------------
-// fputstring(): write a 0-terminated string (terminate if error)
-// ----------------------------------------------------------------------------
-
-void fputstring (FILE * f, const char *);
-void fputstring (FILE * f, const std::string &);
-void fputstring (FILE * f, const wchar_t *);
-void fputstring (FILE * f, const std::wstring &);
-
-// ----------------------------------------------------------------------------
-// fgetTag(): read a 4-byte tag & return as a string
-// ----------------------------------------------------------------------------
-
-string fgetTag (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
-// ----------------------------------------------------------------------------
-
-void fcheckTag (FILE * f, const char * expectedTag);
-void fcheckTag_ascii (FILE * f, const string & expectedTag);
-
-// ----------------------------------------------------------------------------
-// fcompareTag(): compare two tags; terminate if wrong tag
-// ----------------------------------------------------------------------------
-
-void fcompareTag (const string & readTag, const string & expectedTag);
-
-// ----------------------------------------------------------------------------
-// fputTag(): write a 4-byte tag
-// ----------------------------------------------------------------------------
-
-void fputTag (FILE * f, const char * tag);
-
-// ----------------------------------------------------------------------------
-// fskipstring(): skip a 0-terminated string, such as a pad string
-// ----------------------------------------------------------------------------
-
-void fskipstring (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fpad(): write a 0-terminated string to pad file to a n-byte boundary
-// ----------------------------------------------------------------------------
-
-void fpad (FILE * f, int n);
-
-// ----------------------------------------------------------------------------
-// fgetbyte(): read a byte value
-// ----------------------------------------------------------------------------
-
-char fgetbyte (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetshort(): read a short value
-// ----------------------------------------------------------------------------
-
-short fgetshort (FILE * f);
-short fgetshort_bigendian (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetint24(): read a 3-byte (24-bit) int value
-// ----------------------------------------------------------------------------
-
-int fgetint24 (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetint(): read an int value
-// ----------------------------------------------------------------------------
-
-int fgetint (FILE * f);
-int fgetint_bigendian (FILE * f);
-int fgetint_ascii (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetlong(): read an long value
-// ----------------------------------------------------------------------------
-long fgetlong (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetfloat(): read a float value
-// ----------------------------------------------------------------------------
-
-float fgetfloat (FILE * f);
-float fgetfloat_bigendian (FILE * f);
-float fgetfloat_ascii (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetdouble(): read a double value
-// ----------------------------------------------------------------------------
-
-double fgetdouble (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fputbyte(): write a byte value
-// ----------------------------------------------------------------------------
-
-void fputbyte (FILE * f, char val);
-
-// ----------------------------------------------------------------------------
-// fputshort(): write a short value
-// ----------------------------------------------------------------------------
-
-void fputshort (FILE * f, short val);
-
-// ----------------------------------------------------------------------------
-// fputint24(): write a 3-byte (24-bit) int value
-// ----------------------------------------------------------------------------
-
-void fputint24 (FILE * f, int v);
-
-// ----------------------------------------------------------------------------
-// fputint(): write an int value
-// ----------------------------------------------------------------------------
-
-void fputint (FILE * f, int val);
-
-// ----------------------------------------------------------------------------
-// fputlong(): write an long value
-// ----------------------------------------------------------------------------
-
-void fputlong (FILE * f, long val);
-
-// ----------------------------------------------------------------------------
-// fputfloat(): write a float value
-// ----------------------------------------------------------------------------
-
-void fputfloat (FILE * f, float val);
-
-// ----------------------------------------------------------------------------
-// fputdouble(): write a double value
-// ----------------------------------------------------------------------------
-
-void fputdouble (FILE * f, double val);
-
-
-// template versions of put/get functions for binary files
-template <typename T>
-void fput(FILE * f, T v)
-{
-    fwriteOrDie (&v, sizeof (v), 1, f);
-}
-
-
-// template versions of put/get functions for binary files
-template <typename T>
-void fget(FILE * f, T& v)
-{
-    freadOrDie ((void *)&v, sizeof (v), 1, f);
-}
-
-
-// GetFormatString - get the format string for a particular type
-template <typename T>
-const wchar_t* GetFormatString(T /*t*/)
-{
-    // if this _ASSERT goes off it means that you are using a type that doesn't have
-    // a read and/or write routine. 
-    // If the type is a user defined class, you need to create some global functions that handles file in/out.
-    // for example: 
-    //File& operator>>(File& stream, MyClass& test);
-    //File& operator<<(File& stream, MyClass& test);
-    //
-    // in your class you will probably want to add these functions as friends so you can access any private members
-    // friend File& operator>>(File& stream, MyClass& test);
-    // friend File& operator<<(File& stream, MyClass& test);
-    //
-    // if you are using wchar_t* or char* types, these use other methods because they require buffers to be passed
-    // either use std::string and std::wstring, or use the WriteString() and ReadString() methods
-    assert(false);  // need a specialization
-    return NULL;
-}
-
-// GetFormatString - specalizations to get the format string for a particular type
-template <>             const wchar_t* GetFormatString(char);
-template <>          const wchar_t* GetFormatString(wchar_t);
-template <>            const wchar_t* GetFormatString(short);
-template <>              const wchar_t* GetFormatString(int);
-template <>             const wchar_t* GetFormatString(long);
-template <>   const wchar_t* GetFormatString(unsigned short);
-template <>     const wchar_t* GetFormatString(unsigned int);
-template <>    const wchar_t* GetFormatString(unsigned long);
-template <>            const wchar_t* GetFormatString(float);
-template <>           const wchar_t* GetFormatString(double);
-template <>           const wchar_t* GetFormatString(size_t);
-template <>        const wchar_t* GetFormatString(long long);
-template <>      const wchar_t* GetFormatString(const char*);
-template <>   const wchar_t* GetFormatString(const wchar_t*);
-
-// GetScanFormatString - get the format string for a particular type
-template <typename T>
-const wchar_t* GetScanFormatString(T t)
-{
-    assert(false);  // need a specialization
-    return NULL;
-}
-
-// GetScanFormatString - specalizations to get the format string for a particular type
-template <>             const wchar_t* GetScanFormatString(char);
-template <>          const wchar_t* GetScanFormatString(wchar_t);
-template <>            const wchar_t* GetScanFormatString(short);
-template <>              const wchar_t* GetScanFormatString(int);
-template <>             const wchar_t* GetScanFormatString(long);
-template <>   const wchar_t* GetScanFormatString(unsigned short);
-template <>     const wchar_t* GetScanFormatString(unsigned int);
-template <>    const wchar_t* GetScanFormatString(unsigned long);
-template <>            const wchar_t* GetScanFormatString(float);
-template <>           const wchar_t* GetScanFormatString(double);
-template <>           const wchar_t* GetScanFormatString(size_t);
-template <>        const wchar_t* GetScanFormatString(long long);
-
-
-// ----------------------------------------------------------------------------
-// fgetText(): get a value from a text file
-// ----------------------------------------------------------------------------
-template <typename T>
-void fgetText(FILE * f, T& v)
-{
-    int rc = ftrygetText(f, v);
-    if (rc == 0)
-        throw std::runtime_error("error reading value from file (invalid format)");
-    else if (rc == EOF)
-        throw std::runtime_error(std::string("error reading from file: ") + strerror(errno));
-    assert(rc == 1);
-}
-
-// version to try and get a string, and not throw exceptions if contents don't match
-template <typename T>
-int ftrygetText(FILE * f, T& v)
-{
-    const wchar_t* formatString = GetScanFormatString<T>(v);
-    int rc = fwscanf (f, formatString, &v);
-    assert(rc == 1 || rc == 0);
-    return rc;
-}
-
-template <> int ftrygetText<bool>(FILE * f, bool& v);
-// ----------------------------------------------------------------------------
-// fgetText() specializations for fwscanf_s differences: get a value from a text file
-// ----------------------------------------------------------------------------
-void fgetText(FILE * f, char& v);
-void fgetText(FILE * f, wchar_t& v);
-
-
-// ----------------------------------------------------------------------------
-// fputText(): write a value out as text
-// ----------------------------------------------------------------------------
-template <typename T>
-void fputText(FILE * f, T v)
-{
-    const wchar_t* formatString = GetFormatString(v);
-    int rc = fwprintf(f, formatString, v);
-    if (rc == 0)
-        throw std::runtime_error("error writing value to file, no values written");
-    else if (rc < 0)
-        throw std::runtime_error(std::string("error writing to file: ") + strerror(errno));
-}
-
-// ----------------------------------------------------------------------------
-// fputText(): write a bool out as character
-// ----------------------------------------------------------------------------
-template <> void fputText<bool>(FILE * f, bool v);
-
-// ----------------------------------------------------------------------------
-// fputfile(): write a binary block or a string as a file
-// ----------------------------------------------------------------------------
-
-void fputfile (const wstring & pathname, const std::vector<char> & buffer);
-void fputfile (const wstring & pathname, const std::wstring & string);
-void fputfile (const wstring & pathname, const std::string & string);
-
-// ----------------------------------------------------------------------------
-// fgetfile(): load a file as a binary block
-// ----------------------------------------------------------------------------
-
-void fgetfile (const wstring & pathname, std::vector<char> & buffer);
-void fgetfile (FILE * f, std::vector<char> & buffer);
-namespace msra { namespace files {
-    void fgetfilelines (const std::wstring & pathname, vector<char> & readbuffer, std::vector<std::string> & lines);
-    static inline std::vector<std::string> fgetfilelines (const std::wstring & pathname) { vector<char> buffer; std::vector<std::string> lines; fgetfilelines (pathname, buffer, lines); return lines; }
-    vector<char*> fgetfilelines (const wstring & pathname, vector<char> & readbuffer);
-};};
-
-// ----------------------------------------------------------------------------
-// expand_wildcards() -- expand a path with wildcards (also intermediate ones)
-// ----------------------------------------------------------------------------
-
-void expand_wildcards (const wstring & path, vector<wstring> & paths);
-
-// ----------------------------------------------------------------------------
-// make_intermediate_dirs() -- make all intermediate dirs on a path
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace files {
-    void make_intermediate_dirs (const wstring & filepath);
-};};
-
-// ----------------------------------------------------------------------------
-// fuptodate() -- test whether an output file is at least as new as an input file
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace files {
-    bool fuptodate (const wstring & target, const wstring & input, bool inputrequired = true);
-};};
-
-#if 0
-// ----------------------------------------------------------------------------
-// simple support for WAV file I/O
-// ----------------------------------------------------------------------------
-
-// define the header if we haven't seen it yet
-#ifndef _WAVEFORMATEX_
-#define _WAVEFORMATEX_
-
-/*
- *  extended waveform format structure used for all non-PCM formats. this
- *  structure is common to all non-PCM formats.
- */
-typedef unsigned short WORD;  // in case not defined yet (i.e. linux)
-typedef struct tWAVEFORMATEX
-{
-    WORD        wFormatTag;         /* format type */
-    WORD        nChannels;          /* number of channels (i.e. mono, stereo...) */
-    DWORD       nSamplesPerSec;     /* sample rate */
-    DWORD       nAvgBytesPerSec;    /* for buffer estimation */
-    WORD        nBlockAlign;        /* block size of data */
-    WORD        wBitsPerSample;     /* number of bits per sample of mono data */
-    WORD        cbSize;             /* the count in bytes of the size of */
-                                    /* extra information (after cbSize) */
-} WAVEFORMATEX, *PWAVEFORMATEX;
-
-#endif /* _WAVEFORMATEX_ */
-
-typedef struct wavehder{
-    char          riffchar[4];
-    unsigned int  RiffLength;
-    char          wavechar[8];
-    unsigned int  FmtLength; 
-    signed short  wFormatTag; 
-    signed short  nChannels;    
-    unsigned int  nSamplesPerSec; 
-    unsigned int  nAvgBytesPerSec; 
-    signed short  nBlockAlign; 
-    signed short  wBitsPerSample;
-    char          datachar[4];
-    unsigned int  DataLength;
-private:
-    void prepareRest (int SampleCount);
-public:
-    void prepare (unsigned int Fs, int Bits, int Channels, int SampleCount);
-    void prepare (const WAVEFORMATEX & wfx, int SampleCount);
-    unsigned int read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample);
-    void write (FILE * f);
-    static void update (FILE * f);
-} WAVEHEADER;
-
-// ----------------------------------------------------------------------------
-// fgetwfx(), fputwfx(): I/O of wave file headers only
-// ----------------------------------------------------------------------------
-unsigned int fgetwfx (FILE *f, WAVEFORMATEX & wfx);
-void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples);
-
-// ----------------------------------------------------------------------------
-// fgetraw(): read data of .wav file, and separate data of multiple channels. 
-//            For example, data[i][j]: i is channel index, 0 means the first 
-//            channel. j is sample index.
-// ----------------------------------------------------------------------------
-void fgetraw (FILE *f,std::vector< std::vector<short> > & data,const WAVEHEADER & wavhd);
-#endif
-
-// ----------------------------------------------------------------------------
-// temp functions -- clean these up
-// ----------------------------------------------------------------------------
-
-// split a pathname into directory and filename
-static inline void splitpath (const wstring & path, wstring & dir, wstring & file)
-{
-    size_t pos = path.find_last_of (L"\\:/");    // DOS drives, UNIX, Windows
-    if (pos == path.npos)   // no directory found
-    {
-        dir.clear();
-        file = path;
-    }
-    else
-    {
-        dir = path.substr (0, pos);
-        file = path.substr (pos +1);
-    }
-}
-
-// test if a pathname is a relative path
-// A relative path is one that can be appended to a directory.
-// Drive-relative paths, such as D:file, are considered non-relative.
-static inline bool relpath (const wchar_t * path)
-{   // this is a wild collection of pathname conventions in Windows
-    if (path[0] == '/' || path[0] == '\\')  // e.g. \WINDOWS
-        return false;
-    if (path[0] && path[1] == ':')          // drive syntax
-        return false;
-    // ... TODO: handle long NT paths
-    return true;                            // all others
-}
-template<class CHAR>
-static inline bool relpath (const std::basic_string<CHAR> & s) { return relpath (s.c_str()); }
-
-// trim from start
-static inline std::string &ltrim(std::string &s) {
-    s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
-    return s;
-}
-
-// trim from end
-static inline std::string &rtrim(std::string &s) {
-    s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
-    return s;
-}
-
-// trim from both ends
-static inline std::string &trim(std::string &s) {
-    return ltrim(rtrim(s));
-}
-
-vector<string> sep_string(const string & str, const string & sep);
-
-#endif    // _FILEUTIL_
--- a/Source/Readers/KaldiReader/fileutil.old.h
+++ b/Source/Readers/KaldiReader/fileutil.old.h
@ -1,448 +0,0 @@
-// TODO: this is a dup; use the one in Include/ instead
-
-//
-// <copyright file="fileutil.old.h" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-
-#pragma once
-#ifndef _FILEUTIL_
-#define _FILEUTIL_
-
-#include "basetypes.h"
-#include <stdio.h>
-#ifdef __WINDOWS__
-#include <windows.h>    // for mmreg.h and FILETIME
-#include <mmreg.h>
-#endif
-#include <stdint.h>
-using namespace std;
-
-#define SAFE_CLOSE(f) (((f) == NULL) || (fcloseOrDie ((f)), (f) = NULL))
-
-// ----------------------------------------------------------------------------
-// fopenOrDie(): like fopen() but terminate with err msg in case of error.
-// A pathname of "-" returns stdout or stdin, depending on mode, and it will
-// change the binary mode if 'b' or 't' are given. If you use this, make sure
-// not to fclose() such a handle.
-// ----------------------------------------------------------------------------
-
-FILE * fopenOrDie (const STRING & pathname, const char * mode);
-FILE * fopenOrDie (const WSTRING & pathname, const wchar_t * mode);
-
-#ifndef __unix__ // don't need binary/text distinction on unix
-// ----------------------------------------------------------------------------
-// fsetmode(): set mode to binary or text
-// ----------------------------------------------------------------------------
-
-void fsetmode (FILE * f, char type);
-#endif
-
-// ----------------------------------------------------------------------------
-// freadOrDie(): like fread() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void freadOrDie (void * ptr, size_t size, size_t count, FILE * f);
-void freadOrDie (void * ptr, size_t size, size_t count, const HANDLE f);
-
-template<class _T>
-void freadOrDie (_T & data, int num, FILE * f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-template<class _T>
-void freadOrDie (_T & data, size_t num, FILE * f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-template<class _T>
-void freadOrDie (_T & data, int num, const HANDLE f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-template<class _T>
-void freadOrDie (_T & data, size_t num, const HANDLE f)    // template for vector<>
-{ data.resize (num); if (data.size() > 0) freadOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-
-// ----------------------------------------------------------------------------
-// fwriteOrDie(): like fwrite() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fwriteOrDie (const void * ptr, size_t size, size_t count, FILE * f);
-void fwriteOrDie (const void * ptr, size_t size, size_t count, const HANDLE f);
-
-template<class _T>
-void fwriteOrDie (const _T & data, FILE * f)    // template for vector<>
-{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-template<class _T>
-void fwriteOrDie (const _T & data, const HANDLE f)    // template for vector<>
-{ if (data.size() > 0) fwriteOrDie (&data[0], sizeof (data[0]), data.size(), f); }
-
-
-// ----------------------------------------------------------------------------
-// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fprintfOrDie (FILE * f, const char *format, ...);
-
-// ----------------------------------------------------------------------------
-// fcloseOrDie(): like fclose() but terminate with err msg in case of error
-// not yet implemented, but we should
-// ----------------------------------------------------------------------------
-
-#define fcloseOrDie fclose
-
-// ----------------------------------------------------------------------------
-// fflushOrDie(): like fflush() but terminate with err msg in case of error
-// ----------------------------------------------------------------------------
-
-void fflushOrDie (FILE * f);
-
-// ----------------------------------------------------------------------------
-// filesize(): determine size of the file in bytes
-// ----------------------------------------------------------------------------
-
-size_t filesize (const wchar_t * pathname);
-size_t filesize (FILE * f);
-int64_t filesize64 (const wchar_t * pathname);
-
-// ----------------------------------------------------------------------------
-// fseekOrDie(),ftellOrDie(), fget/setpos(): seek functions with error handling
-// ----------------------------------------------------------------------------
-
-// 32-bit offsets only
-long fseekOrDie (FILE * f, long offset, int mode = SEEK_SET);
-#define ftellOrDie ftell
-uint64_t fgetpos (FILE * f);
-void fsetpos (FILE * f, uint64_t pos);
-
-// ----------------------------------------------------------------------------
-// unlinkOrDie(): unlink() with error handling
-// ----------------------------------------------------------------------------
-
-void unlinkOrDie (const std::string & pathname);
-void unlinkOrDie (const std::wstring & pathname);
-
-// ----------------------------------------------------------------------------
-// renameOrDie(): rename() with error handling
-// ----------------------------------------------------------------------------
-
-void renameOrDie (const std::string & from, const std::string & to);
-void renameOrDie (const std::wstring & from, const std::wstring & to);
-
-// ----------------------------------------------------------------------------
-// fexists(): test if a file exists
-// ----------------------------------------------------------------------------
-
-bool fexists (const char * pathname);
-bool fexists (const wchar_t * pathname);
-inline bool fexists (const std::string & pathname) { return fexists (pathname.c_str()); }
-inline bool fexists (const std::wstring & pathname) { return fexists (pathname.c_str()); }
-
-// ----------------------------------------------------------------------------
-// funicode(): test if a file uses unicode
-// ----------------------------------------------------------------------------
-
-bool funicode (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fskipspace(): skip space characters
-// ----------------------------------------------------------------------------
-
-void fskipspace (FILE * F);
-
-// ----------------------------------------------------------------------------
-// fgetline(): like fgets() but terminate with err msg in case of error;
-//  removes the newline character at the end (like gets()), returned buffer is
-//  always 0-terminated; has second version that returns an STL string instead
-// fgetstring(): read a 0-terminated string (terminate if error)
-// fgetword(): read a space-terminated token (terminate if error)
-// fskipNewLine(): skip all white space until end of line incl. the newline
-// ----------------------------------------------------------------------------
-
-template<class CHAR> CHAR * fgetline (FILE * f, CHAR * buf, int size);
-template<class CHAR, size_t n> CHAR * fgetline (FILE * f, CHAR (& buf)[n]) { return fgetline (f, buf, n); }
-STRING fgetline (FILE * f);
-WSTRING fgetlinew (FILE * f);
-void fgetline (FILE * f, std::string & s, std::vector<char> & buf);
-void fgetline (FILE * f, std::wstring & s, std::vector<char> & buf);
-void fgetline (FILE * f, std::vector<char> & buf);
-void fgetline (FILE * f, std::vector<wchar_t> & buf);
-
-const char * fgetstring (FILE * f, char * buf, int size);
-template<size_t n> const char * fgetstring (FILE * f, char (& buf)[n]) { return fgetstring (f, buf, n); }
-const char * fgetstring (const HANDLE f, char * buf, int size);
-template<size_t n> const char * fgetstring (const HANDLE f, char (& buf)[n]) { return fgetstring (f, buf, n); }
-wstring fgetwstring (FILE * f);
-
-const char * fgettoken (FILE * f, char * buf, int size);
-template<size_t n> const char * fgettoken (FILE * f, char (& buf)[n]) { return fgettoken (f, buf, n); }
-STRING fgettoken (FILE * f);
-
-void fskipNewline (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fputstring(): write a 0-terminated string (terminate if error)
-// ----------------------------------------------------------------------------
-
-void fputstring (FILE * f, const char *);
-void fputstring (const HANDLE f, const char * str);
-void fputstring (FILE * f, const std::string &);
-void fputstring (FILE * f, const wchar_t *);
-void fputstring (FILE * f, const std::wstring &);
-
-// ----------------------------------------------------------------------------
-// fgetTag(): read a 4-byte tag & return as a string
-// ----------------------------------------------------------------------------
-
-STRING fgetTag (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
-// ----------------------------------------------------------------------------
-
-void fcheckTag (FILE * f, const char * expectedTag);
-void fcheckTag (const HANDLE f, const char * expectedTag);
-void fcheckTag_ascii (FILE * f, const STRING & expectedTag);
-
-// ----------------------------------------------------------------------------
-// fcompareTag(): compare two tags; terminate if wrong tag
-// ----------------------------------------------------------------------------
-
-void fcompareTag (const STRING & readTag, const STRING & expectedTag);
-
-// ----------------------------------------------------------------------------
-// fputTag(): write a 4-byte tag
-// ----------------------------------------------------------------------------
-
-void fputTag (FILE * f, const char * tag);
-void fputTag(const HANDLE f, const char * tag);
-
-// ----------------------------------------------------------------------------
-// fskipstring(): skip a 0-terminated string, such as a pad string
-// ----------------------------------------------------------------------------
-
-void fskipstring (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fpad(): write a 0-terminated string to pad file to a n-byte boundary
-// ----------------------------------------------------------------------------
-
-void fpad (FILE * f, int n);
-
-// ----------------------------------------------------------------------------
-// fgetbyte(): read a byte value
-// ----------------------------------------------------------------------------
-
-char fgetbyte (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetshort(): read a short value
-// ----------------------------------------------------------------------------
-
-short fgetshort (FILE * f);
-short fgetshort_bigendian (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetint24(): read a 3-byte (24-bit) int value
-// ----------------------------------------------------------------------------
-
-int fgetint24 (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetint(): read an int value
-// ----------------------------------------------------------------------------
-
-int fgetint (FILE * f);
-int fgetint (const HANDLE f);
-int fgetint_bigendian (FILE * f);
-int fgetint_ascii (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetfloat(): read a float value
-// ----------------------------------------------------------------------------
-
-float fgetfloat (FILE * f);
-float fgetfloat_bigendian (FILE * f);
-float fgetfloat_ascii (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetdouble(): read a double value
-// ----------------------------------------------------------------------------
-
-double fgetdouble (FILE * f);
-
-// ----------------------------------------------------------------------------
-// fgetwav(): read an entire .wav file
-// ----------------------------------------------------------------------------
-
-void fgetwav (FILE * f, std::vector<short> & wav, int & sampleRate);
-void fgetwav (const wstring & fn, std::vector<short> & wav, int & sampleRate);
-
-// ----------------------------------------------------------------------------
-// fputwav(): save data into a .wav file
-// ----------------------------------------------------------------------------
-
-void fputwav (FILE * f, const vector<short> & wav, int sampleRate, int nChannels = 1); 
-void fputwav (const wstring & fn, const vector<short> & wav, int sampleRate, int nChannels = 1); 
-
-// ----------------------------------------------------------------------------
-// fputbyte(): write a byte value
-// ----------------------------------------------------------------------------
-
-void fputbyte (FILE * f, char val);
-
-// ----------------------------------------------------------------------------
-// fputshort(): write a short value
-// ----------------------------------------------------------------------------
-
-void fputshort (FILE * f, short val);
-
-// ----------------------------------------------------------------------------
-// fputint24(): write a 3-byte (24-bit) int value
-// ----------------------------------------------------------------------------
-
-void fputint24 (FILE * f, int v);
-
-// ----------------------------------------------------------------------------
-// fputint(): write an int value
-// ----------------------------------------------------------------------------
-
-void fputint (FILE * f, int val);
-void fputint (const HANDLE f, int v);
-
-// ----------------------------------------------------------------------------
-// fputfloat(): write a float value
-// ----------------------------------------------------------------------------
-
-void fputfloat (FILE * f, float val);
-
-// ----------------------------------------------------------------------------
-// fputdouble(): write a double value
-// ----------------------------------------------------------------------------
-
-void fputdouble (FILE * f, double val);
-
-// ----------------------------------------------------------------------------
-// fputfile(): write a binary block or a string as a file
-// ----------------------------------------------------------------------------
-
-void fputfile (const WSTRING & pathname, const std::vector<char> & buffer);
-void fputfile (const WSTRING & pathname, const std::wstring & string);
-void fputfile (const WSTRING & pathname, const std::string & string);
-
-// ----------------------------------------------------------------------------
-// fgetfile(): load a file as a binary block
-// ----------------------------------------------------------------------------
-
-void fgetfile (const WSTRING & pathname, std::vector<char> & buffer);
-void fgetfile (FILE * f, std::vector<char> & buffer);
-namespace msra { namespace files {
-    void fgetfilelines (const std::wstring & pathname, vector<char> & readbuffer, std::vector<std::string> & lines);
-    static inline std::vector<std::string> fgetfilelines (const std::wstring & pathname) { vector<char> buffer; std::vector<std::string> lines; fgetfilelines (pathname, buffer, lines); return lines; }
-    vector<char*> fgetfilelines (const wstring & pathname, vector<char> & readbuffer);
-};};
-
-// ----------------------------------------------------------------------------
-// getfiletime(), setfiletime(): access modification time
-// ----------------------------------------------------------------------------
-
-bool getfiletime (const std::wstring & path, FILETIME & time);
-void setfiletime (const std::wstring & path, const FILETIME & time);
-
-// ----------------------------------------------------------------------------
-// expand_wildcards() -- expand a path with wildcards (also intermediate ones)
-// ----------------------------------------------------------------------------
-
-void expand_wildcards (const wstring & path, vector<wstring> & paths);
-
-// ----------------------------------------------------------------------------
-// make_intermediate_dirs() -- make all intermediate dirs on a path
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace files {
-    void make_intermediate_dirs (const wstring & filepath);
-};};
-
-// ----------------------------------------------------------------------------
-// fuptodate() -- test whether an output file is at least as new as an input file
-// ----------------------------------------------------------------------------
-
-namespace msra { namespace files {
-    bool fuptodate (const wstring & target, const wstring & input, bool inputrequired = true);
-};};
-
-// ----------------------------------------------------------------------------
-// simple support for WAV file I/O
-// ----------------------------------------------------------------------------
-
-typedef struct wavehder{
-    char          riffchar[4];
-    unsigned int  RiffLength;
-    char          wavechar[8];
-    unsigned int  FmtLength; 
-    signed short  wFormatTag; 
-    signed short  nChannels;    
-    unsigned int  nSamplesPerSec; 
-    unsigned int  nAvgBytesPerSec; 
-    signed short  nBlockAlign; 
-    signed short  wBitsPerSample;
-    char          datachar[4];
-    unsigned int  DataLength;
-private:
-    void prepareRest (int SampleCount);
-public:
-    void prepare (unsigned int Fs, int Bits, int Channels, int SampleCount);
-    void prepare (const WAVEFORMATEX & wfx, int SampleCount);
-    unsigned int read (FILE * f, signed short & wRealFormatTag, int & bytesPerSample);
-    void write (FILE * f);
-    static void update (FILE * f);
-} WAVEHEADER;
-
-// ----------------------------------------------------------------------------
-// fgetwfx(), fputwfx(): I/O of wave file headers only
-// ----------------------------------------------------------------------------
-unsigned int fgetwfx (FILE *f, WAVEFORMATEX & wfx);
-void fputwfx (FILE *f, const WAVEFORMATEX & wfx, unsigned int numSamples);
-
-// ----------------------------------------------------------------------------
-// fgetraw(): read data of .wav file, and separate data of multiple channels. 
-//            For example, data[i][j]: i is channel index, 0 means the first 
-//            channel. j is sample index.
-// ----------------------------------------------------------------------------
-void fgetraw (FILE *f,std::vector< std::vector<short> > & data,const WAVEHEADER & wavhd);
-
-// ----------------------------------------------------------------------------
-// temp functions -- clean these up
-// ----------------------------------------------------------------------------
-
-// split a pathname into directory and filename
-static inline void splitpath (const wstring & path, wstring & dir, wstring & file)
-{
-    size_t pos = path.find_last_of (L"\\:/");    // DOS drives, UNIX, Windows
-    if (pos == path.npos)   // no directory found
-    {
-        dir.clear();
-        file = path;
-    }
-    else
-    {
-        dir = path.substr (0, pos);
-        file = path.substr (pos +1);
-    }
-}
-
-// test if a pathname is a relative path
-// A relative path is one that can be appended to a directory.
-// Drive-relative paths, such as D:file, are considered non-relative.
-static inline bool relpath (const wchar_t * path)
-{   // this is a wild collection of pathname conventions in Windows
-    if (path[0] == '/' || path[0] == '\\')  // e.g. \WINDOWS
-        return false;
-    if (path[0] && path[1] == ':')          // drive syntax
-        return false;
-    // ... TODO: handle long NT paths
-    return true;                            // all others
-}
-template<class CHAR>
-static inline bool relpath (const std::basic_string<CHAR> & s) { return relpath (s.c_str()); }
-
-#endif    // _FILEUTIL_
--- a/Source/Readers/KaldiReader/htkfeatio.h
+++ b/Source/Readers/KaldiReader/htkfeatio.h
--- a/Source/Readers/KaldiReader/itf/clusterable-itf.h
+++ b/Source/Readers/KaldiReader/itf/clusterable-itf.h
@ -1,97 +0,0 @@
-// itf/clusterable-itf.h
-
-// Copyright 2009-2011     Microsoft Corporation;  Go Vivace Inc.
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_ITF_CLUSTERABLE_ITF_H_
-#define KALDI_ITF_CLUSTERABLE_ITF_H_ 1
-
-#include <string>
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-
-/** \addtogroup clustering_group
- @{
-  A virtual class for clusterable objects; see \ref clustering for an
-  explanation if its function.
-*/
-
-
-
-class Clusterable {
- public:
-  /// \name Functions that must be overridden
-  /// @{
-
-  /// Return a copy of this object.
-  virtual Clusterable *Copy() const = 0;
-  /// Return the objective function associated with the stats
-  /// [assuming ML estimation]
-  virtual BaseFloat Objf() const = 0;
-  /// Return the normalizer (typically, count) associated with the stats
-  virtual BaseFloat Normalizer() const = 0;
-  /// Set stats to empty.
-  virtual void SetZero() = 0;
-  /// Add other stats.
-  virtual void Add(const Clusterable &other) = 0;
-  /// Subtract other stats.
-  virtual void Sub(const Clusterable &other) = 0;
-  /// Scale the stats by a positive number f [not mandatory to supply this].
-  virtual void Scale(BaseFloat f) {
-    KALDI_ERR << "This Clusterable object does not implement Scale().";
-  }
-
-  /// Return a string that describes the inherited type. 
-  virtual std::string Type() const = 0;
-
-  /// Write data to stream.
-  virtual void Write(std::ostream &os, bool binary) const = 0;
-
-  /// Read data from a stream and return the corresponding object (const
-  /// function; it's a class member because we need access to the vtable
-  /// so generic code can read derived types).
-  virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0;
-
-  virtual ~Clusterable() {}
-
-  /// @}
-
-  /// \name Functions that have default implementations
-  /// @{
-
-  // These functions have default implementations (but may be overridden for
-  // speed). Implementatons in tree/clusterable-classes.cc
-
-  /// Return the objective function of the combined object this + other.
-  virtual BaseFloat ObjfPlus(const Clusterable &other) const;
-  /// Return the objective function of the subtracted object this - other.
-  virtual BaseFloat ObjfMinus(const Clusterable &other) const;
-  /// Return the objective function decrease from merging the two
-  /// clusters, negated to be a positive number (or zero).
-  virtual BaseFloat Distance(const Clusterable &other) const;
-  /// @}
-
-};
-/// @} end of "ingroup clustering_group"
-
-}  // end namespace kaldi
-
-#endif  // KALDI_ITF_CLUSTERABLE_ITF_H_
-
--- a/Source/Readers/KaldiReader/itf/context-dep-itf.h
+++ b/Source/Readers/KaldiReader/itf/context-dep-itf.h
@ -1,80 +0,0 @@
-// itf/context-dep-itf.h
-
-// Copyright 2009-2011     Microsoft Corporation;  Go Vivace Inc.
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_
-#define KALDI_ITF_CONTEXT_DEP_ITF_H_
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup tree_group
-/// @{
-
-/// context-dep-itf.h provides a link between
-/// the tree-building code in ../tree/, and the FST code in ../fstext/
-/// (particularly, ../fstext/context-dep.h).  It is an abstract
-/// interface that describes an object that can map from a
-/// phone-in-context to a sequence of integer leaf-ids.
-class ContextDependencyInterface {
- public:
-  /// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones
-  ///   are considered for computing context.
-  virtual int ContextWidth() const = 0;
-
-  /// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical
-  /// triphone system.  We have to see if we can do without this function.
-  virtual int CentralPosition() const = 0;
-
-  /// The "new" Compute interface.  For typical topologies,
-  /// pdf_class would be 0, 1, 2.
-  /// Returns success or failure; outputs the pdf-id.
-  ///
-  /// "Compute" is the main function of this interface, that takes a
-  /// sequence of N phones (and it must be N phones), possibly
-  /// including epsilons (symbol id zero) but only at positions other
-  /// than P [these represent unknown phone context due to end or
-  /// begin of sequence].  We do not insist that Compute must always
-  /// output (into stateseq) a nonempty sequence of states, but we
-  /// anticipate that stateseq will alyway be nonempty at output in
-  /// typical use cases.  "Compute" returns false if expansion somehow
-  /// failed.  Normally the calling code should raise an exception if
-  /// this happens.  We can define a different interface later in
-  /// order to handle other kinds of information-- the underlying
-  /// data-structures from event-map.h are very flexible.
-  virtual bool Compute(const std::vector<int32> &phoneseq, int32 pdf_class,
-                       int32 *pdf_id) const = 0;
-
-
-
-  /// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
-  virtual int32 NumPdfs() const = 0;
-
-  virtual ~ContextDependencyInterface() {};
-  ContextDependencyInterface() {}
-
-  /// Returns pointer to new object which is copy of current one.
-  virtual ContextDependencyInterface *Copy() const = 0;
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface);
-};
-/// @}
-}  // namespace Kaldi
-
-
-#endif
--- a/Source/Readers/KaldiReader/itf/decodable-itf.h
+++ b/Source/Readers/KaldiReader/itf/decodable-itf.h
@ -1,61 +0,0 @@
-// itf/decodable-itf.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
-//                      Mirko Hannemann;  Go Vivace Inc.
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_DECODABLE_ITF_H_
-#define KALDI_ITF_DECODABLE_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-/// decodable-itf.h provides a link between the (acoustic-modeling and
-/// feature-processing) code and the decoder.  The idea is to make this
-/// interface as small as possible, and to make it as agnostic as possible about
-/// the form of the acoustic model (e.g. don't assume the probabilities are a
-/// function of just a vector of floats), and about the decoder (e.g. don't
-/// assume it accesses frames in strict left-to-right order).  For normal
-/// models, without on-line operation, the "decodable" sub-class will just be a
-/// wrapper around a matrix of features and an acoustic model, and it will
-/// answer the question 'what is the acoustic likelihood for this index and this
-/// frame?'.
-
-/// An interface for a feature-file and model; see \ref decodable_interface
-
-class DecodableInterface {
- public:
-  /// Returns the log likelihood, which will be negated in the decoder.
-  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
-
-  /// Returns true if this is the last frame.  Frames are one-based.
-  virtual bool IsLastFrame(int32 frame) = 0;
-
-  // virtual int32 NumFrames() = 0;
-  /// Returns the number of indices that the decodable object can accept;
-
-  /// Indices are one-based!  This is for compatibility with OpenFst.
-  virtual int32 NumIndices() = 0;
-  
-  virtual ~DecodableInterface() {}
-};
-/// @}
-}  // namespace Kaldi
-
-#endif  // KALDI_ITF_DECODABLE_ITF_H_
--- a/Source/Readers/KaldiReader/itf/optimizable-itf.h
+++ b/Source/Readers/KaldiReader/itf/optimizable-itf.h
@ -1,51 +0,0 @@
-// itf/optimizable-itf.h
-
-// Copyright 2009-2011  Go Vivace Inc.;  Microsoft Corporation;  Georg Stemmer
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_
-#define KALDI_ITF_OPTIMIZABLE_ITF_H_
-
-#include "base/kaldi-common.h"
-#include "matrix/matrix-lib.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-/// OptimizableInterface provides
-/// a virtual class for optimizable objects.
-/// E.g. a class that computed a likelihood function and
-/// its gradient using some parameter
-/// that has to be optimized on data
-/// could inherit from it.
-template<class Real>
-class OptimizableInterface {
- public:
-  /// computes gradient for a parameter params and returns it
-  /// in gradient_out
-  virtual void ComputeGradient(const Vector<Real> &params,
-                               Vector<Real> *gradient_out) = 0;
-  /// computes the function value for a parameter params
-  /// and returns it
-  virtual Real ComputeValue(const Vector<Real> &params) = 0;
-
-  virtual ~OptimizableInterface() {}
-};
-/// @} end of "Interfaces"
-} // end namespace kaldi
-
-#endif
--- a/Source/Readers/KaldiReader/itf/options-itf.h
+++ b/Source/Readers/KaldiReader/itf/options-itf.h
@ -1,49 +0,0 @@
-// itf/options-itf.h
-
-// Copyright 2013  Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_ITF_OPTIONS_ITF_H_
-#define KALDI_ITF_OPTIONS_ITF_H_ 1
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-class OptionsItf {
- public:
-  
-  virtual void Register(const std::string &name,
-                bool *ptr, const std::string &doc) = 0; 
-  virtual void Register(const std::string &name,
-                int32 *ptr, const std::string &doc) = 0; 
-  virtual void Register(const std::string &name,
-                uint32 *ptr, const std::string &doc) = 0; 
-  virtual void Register(const std::string &name,
-                float *ptr, const std::string &doc) = 0; 
-  virtual void Register(const std::string &name,
-                double *ptr, const std::string &doc) = 0; 
-  virtual void Register(const std::string &name,
-                std::string *ptr, const std::string &doc) = 0; 
-  
-  virtual ~OptionsItf() {}
-};
-
-}  // namespace Kaldi
-
-#endif  // KALDI_ITF_OPTIONS_ITF_H_
-
-
--- a/Source/Readers/KaldiReader/latticearchive.cpp
+++ b/Source/Readers/KaldiReader/latticearchive.cpp
@ -1,743 +0,0 @@
-//
-// <copyright file="latticearchive.cpp" company="Microsoft">
-//     Copyright (c) Microsoft Corporation.  All rights reserved.
-// </copyright>
-//
-
-
-#pragma once
-
-#include "stdafx.h"
-#include "basetypes.h"
-#include "fileutil.h"
-#include "htkfeatio.h"  // for MLF reading for numer lattices
-#include "latticearchive.h"
-#include "msra_mgram.h" // for MLF reading for numer lattices
-#include <stdio.h>
-#include <stdint.h>
-#include <vector>
-#include <string>
-#include <set>
-#include <hash_map>
-#include <regex>
-
-#pragma warning(disable : 4996)
-namespace msra { namespace lattices {
-
-// helper to write a symbol hash (string -> int) to a file
-// File has two sections:
-//  - physicalunitname     // line number is mapping, starting with 0
-//  - logunitname physicalunitname   // establishes a mapping; logunitname will get the same numeric index as physicalunitname
-template<class UNITMAP>
-static void writeunitmap (const wstring & symlistpath, const UNITMAP & unitmap)
-{
-    std::vector<std::string> units;
-    units.reserve (unitmap.size());
-    std::vector<std::string> mappings;
-    mappings.reserve (unitmap.size());
-    for (auto iter = unitmap.cbegin(); iter != unitmap.cend(); iter++)  // why would 'for (auto iter : unitmap)' not work?
-    {
-        const std::string label = iter->first;
-        const size_t unitid = iter->second;
-        if (units.size() <= unitid)
-            units.resize (unitid + 1);      // we grow it on demand; the result must be compact (all entries filled), we check that later
-        if (!units[unitid].empty())         // many-to-one mapping: remember the unit; look it up while writing
-            mappings.push_back (label);
-        else
-            units[unitid] = label;
-    }
-
-    auto_file_ptr flist = fopenOrDie (symlistpath, L"wb");
-    // write (physical) units
-    foreach_index (k, units)
-    {
-        if (units[k].empty())
-            throw std::logic_error ("build: unitmap has gaps");
-        fprintfOrDie (flist, "%s\n", units[k].c_str());
-    }
-    // write log-phys mappings
-    foreach_index (k, mappings)
-    {
-        const std::string unit = mappings[k];               // logical name
-        const size_t unitid = unitmap.find (unit)->second;  // get its unit id; this indexes the units array
-        const std::string tounit = units[unitid];           // and get the name from tehre
-        fprintfOrDie (flist, "%s %s\n", unit.c_str(), tounit.c_str());
-    }
-    fflushOrDie (flist);
-}
-
-// (little helper to do a map::find() with default value)
-template<typename MAPTYPE, typename KEYTYPE, typename VALTYPE>
-static size_t tryfind (const MAPTYPE & map, const KEYTYPE & key, VALTYPE deflt)
-{
-    auto iter = map.find (key);
-    if (iter == map.end())
-        return deflt;
-    else
-        return iter->second;
-}
-
-// archive format:
-//  - output files of build():
-//     - OUTPATH                --the resulting archive (a huge file), simple concatenation of binary blocks
-//     - OUTPATH.toc            --contains keys and offsets; this is how content in archive is found
-//       KEY=ARCHIVE[BYTEOFFSET]        // where ARCHIVE can be empty, meaning same as previous
-//     - OUTPATH.symlist    --list of all unit names encountered, in order of numeric index used in archive (first = index 0)
-//                                This file is suitable as an input to HHEd's AU command.
-//  - in actual use,
-//     - .toc files can be concatenated
-//     - .symlist files must remain paired with the archive file
-//  - for actual training, user also needs to provide, typically from an HHEd AU run:
-//     - OUTPATH.tying          --map from triphone units to senone sequence by name; get full phone set from .symlist above
-//       UNITNAME SENONE[2] SENONE[3] SENONE[4]
-/*static*/ void archive::build (const std::vector<std::wstring> & infiles, const std::wstring & outpath,
-                                const std::unordered_map<std::string,size_t> & modelsymmap,
-                                const msra::asr::htkmlfreader<msra::asr::htkmlfentry,msra::lattices::lattice::htkmlfwordsequence> & labels,   // non-empty: build numer lattices
-                                const msra::lm::CMGramLM & unigram, const msra::lm::CSymbolSet & unigramsymbols)  // for numer lattices
-{
-#if 0   // little unit test helper for testing the read function
-    bool test = true;
-    if (test)
-    {
-        archive a;
-        a.open (outpath + L".toc");
-        lattice L;
-        std::hash_map<string,size_t> symmap;
-        a.getlattice (L"sw2001_A_1263622500_1374610000", L, symmap);
-        a.getlattice (L"sw2001_A_1391162500_1409287500", L, symmap);
-        return;
-    }
-#endif
-
-    const bool numermode = !labels.empty(); // if labels are passed then we shall convert the MLFs to lattices, and 'infiles' are regular keys
-
-    const std::wstring tocpath = outpath + L".toc";
-    const std::wstring symlistpath = outpath + L".symlist";
-
-    // process all files
-    std::set<std::wstring> seenkeys;        // (keep track of seen keys; throw error for duplicate keys)
-    msra::files::make_intermediate_dirs (outpath);
-
-    auto_file_ptr f = fopenOrDie (outpath, L"wb");
-    auto_file_ptr ftoc = fopenOrDie (tocpath, L"wb");
-    size_t brokeninputfiles = 0;
-    foreach_index (i, infiles)
-    {
-        const std::wstring & inlatpath = infiles[i];
-        fprintf (stderr, "build: processing lattice '%S'\n", inlatpath.c_str());
-
-        // get key
-        std::wstring key = regex_replace (inlatpath, wregex (L"=.*"), wstring());  // delete mapping
-        key = regex_replace (key, wregex (L".*[\\\\/]"), wstring());                // delete path
-        key = regex_replace (key, wregex (L"\\.[^\\.\\\\/:]*$"), wstring());        // delete extension (or not if none)
-        if (!seenkeys.insert (key).second)
-            throw std::runtime_error (msra::strfun::strprintf ("build: duplicate key for lattice '%S'", inlatpath.c_str()));
-
-        // we fail all the time due to totally broken HDecode/copy process, OK if not too many files are missing
-        bool latticeread = false;
-        try
-        {
-            // fetch lattice
-            lattice L;
-            if (!numermode)
-                L.fromhtklattice (inlatpath, modelsymmap);      // read HTK lattice
-            else
-                L.frommlf (key, modelsymmap, labels, unigram, unigramsymbols);       // read MLF into a numerator lattice
-            latticeread = true;
-
-            // write to archive
-            uint64_t offset = fgetpos (f);
-            L.fwrite (f);
-            fflushOrDie (f);
-
-            // write reference to TOC file   --note: TOC file is a headerless UTF8 file; so don't use fprintf %S format (default code page)
-            fprintfOrDie (ftoc, "%s=%s[%llu]\n", msra::strfun::utf8 (key).c_str(), ((i - brokeninputfiles) == 0) ? msra::strfun::utf8 (outpath).c_str() : "", offset);
-            fflushOrDie (ftoc);
-
-            fprintf (stderr, "written lattice to offset %llu as '%S'\n", offset, key.c_str());
-        }
-        catch (const exception & e)
-        {
-            if (latticeread) throw;        // write failure
-            // we ignore read failures
-            fprintf (stderr, "ERROR: skipping unreadable lattice '%S': %s\n", inlatpath.c_str(), e.what());
-            brokeninputfiles++;
-        }
-    }
-
-    // write out the unit map
-    // TODO: This is sort of redundant now--it gets the symmap from the HMM, i.e. always the same for all archives.
-    writeunitmap (symlistpath, modelsymmap);
-
-    fprintf (stderr, "completed %lu out of %lu lattices (%lu read failures, %.1f%%)\n", infiles.size(), infiles.size()-brokeninputfiles, brokeninputfiles, 100.0f * brokeninputfiles / infiles.size());
-}
-
-// helper to set a context value (left, right) with checking of uniqueness
-void lattice::nodecontext::setcontext (int & lr, int val)
-{
-    if (lr == unknown)
-        lr = val;
-    else if (lr != val)
-        lr = (signed short) ambiguous;
-}
-
-// helper for merge() to determine the unique node contexts
-vector<lattice::nodecontext> lattice::determinenodecontexts (const msra::asr::simplesenonehmm & hset) const
-{
-    const size_t spunit = tryfind (hset.getsymmap(), "sp", SIZE_MAX);
-    const size_t silunit = tryfind (hset.getsymmap(), "sil", SIZE_MAX);
-    vector<lattice::nodecontext> nodecontexts (nodes.size());
-    nodecontexts.front().left = nodecontext::start;
-    nodecontexts.front().right = nodecontext::ambiguous;    // (should not happen, but won't harm either)
-    nodecontexts.back().right = nodecontext::end;
-    nodecontexts.back().left = nodecontext::ambiguous;      // (should not happen--we require !sent_end; but who knows)
-    size_t multispseen = 0;                                 // bad entries with multi-sp
-    foreach_index (j, edges)
-    {
-        const auto & e = edges[j];
-        const size_t S = e.S;
-        const size_t E = e.E;
-        auto a = getaligninfo (j);
-        if (a.size() == 0)  // !NULL edge
-            throw std::logic_error ("determinenodecontexts: !NULL edges not allowed in merging, should be removed before");
-        size_t A = a[0].unit;
-        size_t Z = a[a.size()-1].unit;
-        if (Z == spunit)
-        {
-            if (a.size() < 2)
-                throw std::runtime_error ("determinenodecontexts: context-free unit (/sp/) found as a single-phone word");
-            else
-            {
-                Z = a[a.size()-2].unit;
-                if (Z == spunit)        // a bugg lattice --I got this from HVite, to be tracked down
-                {
-                    // search from end once again, to print a warning
-                    int n;
-                    for (n = (int) a.size() -1; n >= 0; n--)
-                        if (a[n].unit != spunit)
-                            break;
-                    // ends with n = position of furthest non-sp
-                    if (n < 0)  // only sp?
-                        throw std::runtime_error ("determinenodecontexts: word consists only of /sp/");
-                    fprintf (stderr, "determinenodecontexts: word with %lu /sp/ at the end found, edge %d\n", a.size() -1 - n, j);
-                    multispseen++;
-                    Z = a[n].unit;
-                }
-            }
-        }
-        if (A == spunit || Z == spunit)
-        {
-#if 0
-            fprintf (stderr, "A=%d   Z=%d   fa=%d   j=%d/N=%d    L=%d  n=%d   totalalign=%d  ts/te=%d/%d\n", (int) A, (int) Z, (int) e.firstalign,(int) j, (int) edges.size(), (int) nodes.size(), (int) a.size(), (int) align.size(),
-                    nodes[S].t, nodes[E].t);
-            foreach_index (kk, a)
-                fprintf (stderr, "a[%d] = %d\n", kk, a[kk].unit);
-            dump (stderr, [&] (size_t i) { return hset.gethmm (i).getname(); });
-#endif
-            throw std::runtime_error ("determinenodecontexts: context-free unit (/sp/) found as a start phone or second last phone");
-        }
-        const auto & Ahmm = hset.gethmm (A);
-        const auto & Zhmm = hset.gethmm (Z);
-        int Aid = (int) Ahmm.gettransPindex();
-        int Zid = (int) Zhmm.gettransPindex();
-        nodecontexts[S].setright (Aid);
-        nodecontexts[E].setleft (Zid);
-    }
-    if (multispseen > 0)
-        fprintf (stderr, "determinenodecontexts: %lu broken edges in %lu with multiple /sp/ at the end seen\n", multispseen, edges.size());
-    // check CI conditions and put in 't'
-    // We make the hard assumption that there is only one CI phone, /sil/.
-    const auto & silhmm = hset.gethmm (silunit);
-    int silid = silhmm.gettransPindex();
-    foreach_index (i, nodecontexts)
-    {
-        auto & nc = nodecontexts[i];
-        if ((nc.left == nodecontext::unknown) ^ (nc.right == nodecontext::unknown))
-            throw std::runtime_error ("determinenodecontexts: invalid dead-end node in lattice");
-        if (nc.left == nodecontext::ambiguous && nc.right != silid && nc.right != nodecontext::end)
-            throw std::runtime_error ("determinenodecontexts: invalid ambiguous left context (right context is not CI)");
-        if (nc.right == nodecontext::ambiguous && nc.left != silid && nc.left != nodecontext::start)
-            throw std::runtime_error ("determinenodecontexts: invalid ambiguous right context (left context is not CI)");
-        nc.t = nodes[i].t;
-    }
-    return nodecontexts;    // (will this use a move constructor??)
-}
-
-// compar function for sorting and merging
-bool lattice::nodecontext::operator< (const nodecontext & other) const
-{
-    // sort by t, left, right, i  --sort by i to make i appear before iother, as assumed in merge function
-    int diff = (int) t - (int) other.t;
-    if (diff == 0)
-    {
-        diff = left - other.left;
-        if (diff == 0)
-        {
-            diff = right - other.right;
-            if (diff == 0)
-                return i < other.i; // (cannot use 'diff=' pattern since unsigned but may be SIZE_MAX)
-        }
-    }
-    return diff < 0;
-}
-
-// remove that final !NULL edge
-// We have that in HAPI lattices, but there can be only one at the end.
-void lattice::removefinalnull()
-{
-    const auto & lastedge = edges.back();
-    // last edge can be !NULL, recognized as having 0 alignment records
-    if (lastedge.firstalign < align.size()) // has alignment records --not !NULL
-        return;
-    if (lastedge.S != nodes.size() -2 || lastedge.E != nodes.size() -1)
-        throw std::runtime_error ("removefinalnull: malformed final !NULL edge");
-    edges.resize (edges.size() -1); // remove it
-    nodes.resize (nodes.size() -1); // its start node is now the new end node
-    foreach_index (j, edges)
-        if (edges[j].E >= nodes.size())
-            throw std::runtime_error ("removefinalnull: cannot have final !NULL edge and other edges connecting to end node at the same time");
-}
-
-// merge a secondary lattice into the first
-// With lots of caveats:
-//  - this optimizes lattices to true unigram lattices where the only unique node condition is acoustic context
-//  - no !NULL edge at the end, call removefinalnull() before
-//  - this function returns an unsorted edges[] array, i.e. invalid. We sort in uniq'ed representation, which is easier.
-// This function is not elegant at all, just hard labor!
-void lattice::merge (const lattice & other, const msra::asr::simplesenonehmm & hset)
-{
-    if (!edges2.empty() || !other.edges2.empty())
-        throw std::logic_error ("merge: lattice(s) must be in non-uniq'ed format (V1)");
-    if (!info.numframes || !other.info.numframes)
-        throw std::logic_error ("merge: lattice(s) must have identical number of frames");
-
-    // establish node contexts
-    auto contexts = determinenodecontexts (hset);
-    auto othercontexts = other.determinenodecontexts (hset);
-
-    // create joint node space and node mapping
-    // This also collapses non-unique nodes.
-    // Note the edge case sil-sil in one lattice which may be sil-ambiguous or ambiguous-sil on the other.
-    // We ignore this, keeping such nodes unmerged. That's OK since middle /sil/ words have zero LM, and thus it's OK to keep them non-connected.
-    foreach_index (i, contexts) contexts[i].i = i;
-    foreach_index (i, othercontexts) othercontexts[i].iother = i;
-    contexts.insert (contexts.end(), othercontexts.begin(), othercontexts.end());   // append othercontext
-    sort (contexts.begin(), contexts.end());
-    vector<size_t> nodemap (nodes.size(), SIZE_MAX);
-    vector<size_t> othernodemap (other.nodes.size(), SIZE_MAX);
-    int j = 0;
-    foreach_index (i, contexts)     // merge identical nodes  --this is the critical step
-    {
-        if (j == 0 || contexts[j-1].t != contexts[i].t || contexts[j-1].left != contexts[i].left || contexts[j-1].right != contexts[i].right)
-            contexts[j++] = contexts[i];            // entered a new one
-        // node map
-        if (contexts[i].i != SIZE_MAX)
-            nodemap[contexts[i].i] = j-1;
-        if (contexts[i].iother != SIZE_MAX)
-            othernodemap[contexts[i].iother] = j-1;
-    }
-    fprintf (stderr, "merge: joint node space uniq'ed to %d from %d\n", j, contexts.size());
-    contexts.resize (j);
-
-    // create a new node array (just copy the contexts[].t fields)
-    nodes.resize (contexts.size());
-    foreach_index (inew, nodes)
-        nodes[inew].t = (unsigned short) contexts[inew].t;
-    info.numnodes = nodes.size();
-
-    // incorporate the alignment records
-    const size_t alignoffset = align.size();
-    align.insert (align.end(), other.align.begin(), other.align.end());
-
-    // map existing edges' S and E fields, and also 'firstalign'
-    foreach_index (j, edges)
-    {
-        edges[j].S = nodemap[edges[j].S];
-        edges[j].E = nodemap[edges[j].E];
-    }
-    auto otheredges = other.edges;
-    foreach_index (j, otheredges)
-    {
-        otheredges[j].S = othernodemap[otheredges[j].S];
-        otheredges[j].E = othernodemap[otheredges[j].E];
-        otheredges[j].firstalign += alignoffset;    // that's where they are now
-    }
-
-    // at this point, a new 'nodes' array exists, and the edges already are w.r.t. the new node space and align space
-
-    // now we are read to merge 'other' edges into this, simply by concatenation
-    edges.insert (edges.end(), otheredges.begin(), otheredges.end());
-
-    // remove acoustic scores --they are likely not identical if they come from different decoders
-    // If we don't do that, this will break the sorting in builduniquealignments()
-    info.hasacscores = 0;
-    foreach_index (j, edges)
-        edges[j].a = 0.0f;
-
-    // Note: we have NOT sorted or de-duplicated yet. That is best done after conversion to the uniq'ed format.
-}
-
-// remove duplicates
-// This must be called in uniq'ed format.
-void lattice::dedup()
-{
-    if (edges2.empty())
-        throw std::logic_error ("dedup: lattice must be in uniq'ed format (V2)");
-
-    size_t k = 0;
-    foreach_index (j, edges2)
-    {
-        if (k > 0 && edges2[k-1].S == edges2[j].S && edges2[k-1].E == edges2[j].E && edges2[k-1].firstalign == edges2[j].firstalign)
-        {
-            if (edges2[k-1].implysp != edges2[j].implysp)
-                throw std::logic_error ("dedup: inconsistent 'implysp' flag for otherwise identical edges");
-            continue;
-        }
-        edges2[k++] = edges2[j];
-    }
-    fprintf (stderr, "dedup: edges reduced to %d from %d\n", k, edges2.size());
-    edges2.resize (k);
-    info.numedges = edges2.size();
-    edges.clear();  // (should already be, but isn't; make sure we no longer use it)
-}
-
-// load all lattices from a TOC file and write them to a new archive
-// Use this to
-//  - upgrade the file format to latest in case of format changes
-//  - check consistency (read only; don't write out)
-//  - dump to stdout
-//  - merge two lattices (for merging numer into denom lattices)
-// Input path is an actual TOC path, output is the stem (.TOC will be added). --yes, not nice, maybe fix it later
-// Example command:
-// convertlatticearchive --latticetocs dummy c:\smbrdebug\sw20_small.den.lats.toc.10 -w c:\smbrdebug\sw20_small.den.lats.converted --cdphonetying c:\smbrdebug\combined.tying --statelist c:\smbrdebug\swb300h.9304.aligned.statelist --transprobs c:\smbrdebug\MMF.9304.transprobs
-// How to regenerate from my test lattices:
-// buildlatticearchive c:\smbrdebug\sw20_small.den.lats.regenerated c:\smbrdebug\hvitelat\*lat
-// We support two special output path syntaxs:
-//  - empty ("") -> don't output, just check the format
-//  - dash ("-") -> dump lattice to stdout instead
-/*static*/ void archive::convert (const std::wstring & intocpath, const std::wstring & intocpath2, const std::wstring & outpath,
-                                  const msra::asr::simplesenonehmm & hset)
-{
-    const auto & modelsymmap = hset.getsymmap();
-
-    const std::wstring tocpath = outpath + L".toc";
-    const std::wstring symlistpath = outpath + L".symlist";
-
-    // open input archive
-    // TODO: I find that HVite emits redundant physical triphones, and even HHEd seems so (in .tying file).
-    //  Thus, we should uniq the units before sorting. We can do that here if we have the .tying file.
-    //  And then use the modelsymmap to map them down.
-    //  Do this directly in the hset module (it will be transparent).
-    std::vector<std::wstring> intocpaths (1, intocpath);            // set of paths consisting of 1
-    msra::lattices::archive archive (intocpaths, modelsymmap);
-
-    // secondary archive for optional merging operation
-    const bool mergemode = !intocpath2.empty();                     // true if merging two lattices
-    std::vector<std::wstring> intocpaths2;
-    if (mergemode)
-        intocpaths2.push_back (intocpath2);
-    msra::lattices::archive archive2 (intocpaths2, modelsymmap);    // (if no merging then this archive2 is empty)
-
-    // read the intocpath file once again to get the keys in original order
-    std::vector<char> textbuffer;
-    auto toclines = msra::files::fgetfilelines (intocpath, textbuffer);
-
-    auto_file_ptr f = NULL;
-    auto_file_ptr ftoc = NULL;
-
-    // process all files
-    if (outpath != L"" && outpath != L"-")  // test for special syntaxes that bypass to actually create an output archive
-    {
-        msra::files::make_intermediate_dirs (outpath);
-        f = fopenOrDie (outpath, L"wb");
-        ftoc = fopenOrDie (tocpath, L"wb");
-    }
-    vector<const char *> invmodelsymmap;    // only used for dump() mode
-
-    // we must parse the toc file once again to get the keys in original order
-    size_t skippedmerges = 0;
-    foreach_index (i, toclines)
-    {
-        const char * line = toclines[i];
-        const char * p = strchr (line, '=');
-        if (p == NULL)
-            throw std::runtime_error ("open: invalid TOC line (no = sign): " + std::string (line));
-        const std::wstring key = msra::strfun::utf16 (std::string (line, p - line));
-
-        fprintf (stderr, "convert: processing lattice '%S'\n", key.c_str());
-
-        // fetch lattice  --this performs any necessary format conversions already
-        lattice L;
-        archive.getlattice (key, L);
-
-        lattice L2;
-        if (mergemode)
-        {
-            if (!archive2.haslattice (key))
-            {
-                fprintf (stderr, "convert: cannot merge because lattice '%S' missing in secondary archive; skipping\n", key.c_str());
-                skippedmerges++;
-                continue;
-            }
-            archive2.getlattice (key, L2);
-
-            // merge it in
-            // This will connect each node with matching 1-phone context conditions; aimed at merging numer lattices.
-            L.removefinalnull();    // get rid of that final !NULL headache
-            L2.removefinalnull();
-            L.merge (L2, hset);
-            // note: we are left with dups due to true unigram merging (HTK lattices cannot represent true unigram lattices since id is on the nodes)
-        }
-        //L.removefinalnull();
-        //L.determinenodecontexts (hset);
-
-        // convert it  --TODO: once we permanently use the new format, do this in fread() for V1
-        // Note: Merging may have left this in unsorted format; we need to be robust against that.
-        const size_t spunit = tryfind (modelsymmap, "sp", SIZE_MAX);
-        L.builduniquealignments (spunit);
-
-        if (mergemode)
-            L.dedup();
-
-        if (f && ftoc)
-        {
-            // write to archive
-            uint64_t offset = fgetpos (f);
-            L.fwrite (f);
-            fflushOrDie (f);
-            
-            // write reference to TOC file   --note: TOC file is a headerless UTF8 file; so don't use fprintf %S format (default code page)
-            fprintfOrDie (ftoc, "%s=%s[%llu]\n", msra::strfun::utf8 (key).c_str(), (i == 0) ? msra::strfun::utf8 (outpath).c_str() : "", offset);
-            fflushOrDie (ftoc);
-
-            fprintf (stderr, "written converted lattice to offset %llu as '%S'\n", offset, key.c_str());
-        }
-        else if (outpath == L"-")
-        {
-            if (invmodelsymmap.empty()) // build this lazily
-            {
-                invmodelsymmap.resize (modelsymmap.size());
-                for (auto iter = modelsymmap.begin(); iter != modelsymmap.end(); iter++)
-                    invmodelsymmap[iter->second] = iter->first.c_str();
-            }
-            L.rebuildedges (false);
-            L.dump (stdout, [&] (size_t i) { return invmodelsymmap[i]; } );
-        }
-    }   // end for (toclines)
-    if (skippedmerges > 0)
-        fprintf (stderr, "convert: %d out of %d merge operations skipped due to secondary lattice missing\n", skippedmerges, toclines.size());
-
-    // write out the updated unit map
-    if (f && ftoc)
-        writeunitmap (symlistpath, modelsymmap);
-
-    fprintf (stderr, "converted %d lattices\n", toclines.size());
-}
-
-// ---------------------------------------------------------------------------
-// reading lattices from external formats (HTK lat, MLF)
-// ---------------------------------------------------------------------------
-
-// read an HTK lattice
-// The lattice is expected to be freshly constructed (I did not bother to check).
-void lattice::fromhtklattice (const wstring & path, const std::unordered_map<std::string,size_t> & unitmap)
-{
-    vector<char> textbuffer;
-    auto lines = msra::files::fgetfilelines (path, textbuffer);
-    if (lines.empty())
-                throw std::runtime_error ("lattice: mal-formed lattice--empty input file (or all-zeroes)");
-    auto iter = lines.begin();
-    // parse out LMF and WP
-    char dummychar = 0;     // dummy for sscanf() end checking
-    for ( ; iter != lines.end() && strncmp (*iter, "N=", 2); iter++)
-    {
-        if (strncmp (*iter, "lmscale=", 8) == 0)    // note: HTK sometimes generates extra garbage space at the end of this line
-            if (sscanf_s (*iter, "lmscale=%f wdpenalty=%f%c", &info.lmf, &info.wp, &dummychar, sizeof (dummychar)) != 2 && dummychar != ' ')
-                throw std::runtime_error ("lattice: mal-formed lmscale/wdpenalty line in lattice: " + string (*iter));
-    }
-    
-    // parse N and L
-    if (iter != lines.end())
-    {
-        unsigned long N, L;
-        if (sscanf_s (*iter, "N=%lu L=%lu %c", &N, &L, &dummychar, sizeof (dummychar)) != 2)
-            throw std::runtime_error ("lattice: mal-formed N=/L= line in lattice: " + string (*iter));
-        info.numnodes = N;
-        info.numedges = L;
-        iter++;
-    }
-    else
-        throw std::runtime_error ("lattice: mal-formed before parse N=/L= line in lattice.");
-    
-    assert(info.numnodes > 0);
-    nodes.reserve (info.numnodes);
-    // parse the nodes
-    for (size_t i = 0; i < info.numnodes; i++, iter++)
-    {
-        if (iter == lines.end())
-            throw std::runtime_error ("lattice: not enough I lines in lattice");
-        unsigned long itest;
-        float t;
-        if (sscanf_s (*iter, "I=%lu t=%f%c", &itest, &t, &dummychar, sizeof (dummychar)) < 2)
-            throw std::runtime_error ("lattice: mal-formed node line in lattice: " + string (*iter));
-        if (i != (size_t) itest)
-            throw std::runtime_error ("lattice: out-of-sequence node line in lattice: " + string (*iter));
-        nodes.push_back (nodeinfo ((unsigned int) (t / info.frameduration + 0.5)));
-        info.numframes = max (info.numframes, (size_t) nodes.back().t);
-    }
-    // parse the edges
-    assert(info.numedges > 0);
-    edges.reserve (info.numedges);
-    align.reserve (info.numedges * 10);  // 10 phones per word on av. should be enough
-    std::string label;
-    for (size_t j = 0; j < info.numedges; j++, iter++)
-    {
-        if (iter == lines.end())
-            throw std::runtime_error ("lattice: not enough J lines in lattice");
-        unsigned long jtest;
-        unsigned long S, E;
-        float a, l;
-        char d[1024];
-        // example:
-        // J=12    S=1    E=13   a=-326.81   l=-5.090  d=:sil-t:s+k:e,0.03:dh:m-ax:m+sil,0.03:sil,0.02:
-        int nvals = sscanf_s (*iter, "J=%lu S=%lu E=%lu a=%f l=%f d=%s", &jtest, &S, &E, &a, &l, &d, sizeof (d));
-        if (nvals == 5 && j == info.numedges - 1)    // special case: last edge is a !NULL and thus may have the d= record missing
-            strcpy (d, ":");
-        else if (nvals != 6)
-            throw std::runtime_error ("lattice: mal-formed edge line in lattice: " + string (*iter));
-        if (j != (size_t) jtest)
-            throw std::runtime_error ("lattice: out-of-sequence edge line in lattice: " + string (*iter));
-        edges.push_back (edgeinfowithscores (S, E, a, l, align.size()));
-        // build align array
-        size_t edgeframes = 0;      // (for checking whether the alignment sums up right)
-        const char * p = d;
-        if (p[0] != ':' || (p[1] == 0 && j < info.numedges-1))    // last edge may be empty
-            throw std::runtime_error ("lattice: alignment info must start with a colon and must have at least one entry: " + string (*iter));
-        p++;
-        while (*p)
-        {
-            // p points to an entry of the form TRIPHONE,DURATION
-            const char * q = strchr (p, ',');
-            if (q == NULL)
-                throw std::runtime_error ("lattice: alignment entry lacking a comma: " + string (*iter));
-            if (q == p)
-                throw std::runtime_error ("lattice: alignment entry label empty: " + string (*iter));
-            label.assign (p, q-p);  // the triphone label
-            q++;
-            char * ep;
-            double duration = strtod (q, &ep); // (weird--returns a non-const ptr in ep to a const object)
-            p = ep;
-            if (*p != ':')
-                throw std::runtime_error ("lattice: alignment entry not ending with a colon: " + string (*iter));
-            p++;
-            // create the alignment entry
-            const size_t frames = (unsigned int) (duration / info.frameduration + 0.5);
-            auto it = unitmap.find (label);
-            if (it == unitmap.end())
-                throw std::runtime_error ("lattice: unit in alignment that is not in model: " + label);
-            const size_t unitid = it->second;
-            //const size_t unitid = unitmap.insert (make_pair (label, unitmap.size())).first->second;  // may create a new entry with index = #entries
-            align.push_back (aligninfo (unitid, frames));
-            edgeframes += frames;
-        }
-        if (edgeframes != nodes[E].t - (size_t) nodes[S].t)
-        {
-            char msg[128];
-            sprintf (msg, "\n-- where edgeframes=%d != (nodes[E].t - nodes[S].t=%d), the gap is %d.", edgeframes, nodes[E].t - (size_t) nodes[S].t, edgeframes + nodes[S].t - nodes[E].t);
-            throw std::runtime_error ("lattice: alignment info duration mismatches edge duration: " + string (*iter) + msg);
-        }
-    }
-    if (iter != lines.end())
-        throw std::runtime_error ("lattice: unexpected garbage at end of lattice: " + string (*iter));
-    checklattice();
-
-    // create more efficient storage for alignments
-    const size_t spunit = tryfind (unitmap, "sp", SIZE_MAX);
-    builduniquealignments (spunit);
-
-    showstats();
-}
-
-// construct a numerator lattice from an MLF entry
-// The lattice is expected to be freshly constructed (I did not bother to check).
-void lattice::frommlf (const wstring & key, const std::unordered_map<std::string,size_t> & unitmap,
-                       const msra::asr::htkmlfreader<msra::asr::htkmlfentry,lattice::htkmlfwordsequence> & labels,
-                       const msra::lm::CMGramLM & unigram, const msra::lm::CSymbolSet & unigramsymbols)
-{
-    const auto & transcripts = labels.allwordtranscripts(); // (TODO: we could just pass the transcripts map--does not really matter)
-
-    // get the labels (state and word)
-    auto iter = transcripts.find (key);
-    if (iter == transcripts.end())
-        throw std::runtime_error ("frommlf: no reference word sequence in MLF for lattice with key " + strfun::utf8 (key));
-    const auto & transcript = iter->second;
-    if (transcript.words.size() == 0)
-        throw std::runtime_error ("frommlf: empty reference word sequence for lattice with key " + strfun::utf8 (key));
-
-    // determine unigram scores for all words
-    vector<float> lmscores (transcript.words.size());
-    size_t silence = unigramsymbols["!silence"];
-    size_t lmend = unigramsymbols["</s>"];
-    size_t sentstart = unigramsymbols["!sent_start"];
-    size_t sentend = unigramsymbols["!sent_end"];
-
-    // create the lattice
-    nodes.resize (transcript.words.size() +1);
-    edges.resize (transcript.words.size());
-    align.reserve (transcript.align.size());
-    size_t numframes = 0;
-    foreach_index (j, transcript.words)
-    {
-        const auto & w = transcript.words[j];
-        nodes[j].t = w.firstframe;
-        auto & e = edges[j];
-        e.unused = 0;
-        e.S = j;
-        e.E = j+1;
-        if (e.E != j+1)
-            throw std::runtime_error (msra::strfun::strprintf ("frommlf: too many tokens to be represented as edgeinfo::E in label set: %S", key.c_str()));
-        e.a = 0.0f; // no ac score
-
-        // LM score
-        // !sent_start and !silence are patched to LM score 0
-        size_t wid = w.wordindex;
-        if (wid == sentstart)
-        {
-            if (j != 0)
-                throw std::logic_error ("frommlf: found an !sent_start token not at the first position");
-        }
-        else if (wid == sentend)
-        {
-            if (j != (int) transcript.words.size()-1)
-                throw std::logic_error ("frommlf: found an !sent_end token not at the end position");
-            wid = lmend;    // use </s> for score lookup
-        }
-        const int iwid = (int) wid;
-        e.l = (wid != sentstart && wid != silence) ? (float) unigram.score (&iwid, 1) : 0.0f;
-
-        // alignment
-        e.implysp = 0;
-        e.firstalign = align.size();
-        auto a = transcript.getaligninfo (j);
-        align.insert (align.end(), a.begin(), a.end());
-        foreach_index (k, a)
-            numframes += a[k].frames;
-    }
-    nodes[transcript.words.size()].t = (unsigned short) numframes;
-    if (nodes[transcript.words.size()].t != numframes)
-        throw std::runtime_error (msra::strfun::strprintf ("frommlf: too many frames to be represented as nodeinfo::t in label set: %S", key.c_str()));
-    info.lmf = -1.0f;       // indicates not set
-    info.wp = 0.0f;         // not set indicated by lmf < 0
-    info.numedges = edges.size();
-    info.numnodes = nodes.size();
-    info.numframes = numframes;
-    checklattice();
-
-    // create more efficient storage for alignments
-    const size_t spunit = tryfind (unitmap, "sp", SIZE_MAX);
-    builduniquealignments (spunit);
-
-    showstats();
-}
-
-};};
--- a/Source/Readers/KaldiReader/matrix/cblas-wrappers.h
+++ b/Source/Readers/KaldiReader/matrix/cblas-wrappers.h
@ -1,479 +0,0 @@
-// matrix/cblas-wrappers.h
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey);
-//                 Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_
-#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1
-
-
-#include <limits>
-#include "matrix/sp-matrix.h"
-#include "matrix/kaldi-vector.h"
-#include "matrix/kaldi-matrix.h"
-#include "matrix/matrix-functions.h"
-
-// Do not include this file directly.  It is to be included
-// by .cc files in this directory.
-
-namespace kaldi {
-
-
-inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
-                        const int incY) {
-  cblas_scopy(N, X, incX, Y, incY);
-}
-
-inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
-                        const int incY) {
-  cblas_dcopy(N, X, incX, Y, incY);
-}
-
-
-inline float cblas_Xasum(const int N, const float *X, const int incX) {
-  return cblas_sasum(N, X, incX);
-}
-
-inline double cblas_Xasum(const int N, const double *X, const int incX) {
-  return cblas_dasum(N, X, incX);
-}
-
-inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
-                       const int incY, const float c, const float s) {
-  cblas_srot(N, X, incX, Y, incY, c, s);
-}
-inline void cblas_Xrot(const int N, double *X, const int incX, double *Y,
-                       const int incY, const double c, const double s) {
-  cblas_drot(N, X, incX, Y, incY, c, s);
-}
-inline float cblas_Xdot(const int N, const float *const X,
-                        const int incX, const float *const Y,
-                        const int incY) {
-  return cblas_sdot(N, X, incX, Y, incY);
-}
-inline double cblas_Xdot(const int N, const double *const X,
-                        const int incX, const double *const Y,
-                        const int incY) {
-  return cblas_ddot(N, X, incX, Y, incY);
-}
-inline void cblas_Xaxpy(const int N, const float alpha, const float *X,
-                        const int incX, float *Y, const int incY) {
-  cblas_saxpy(N, alpha, X, incX, Y, incY);
-}
-inline void cblas_Xaxpy(const int N, const double alpha, const double *X,
-                        const int incX, double *Y, const int incY) {
-  cblas_daxpy(N, alpha, X, incX, Y, incY);
-}
-inline void cblas_Xscal(const int N, const float alpha, float *data,
-                        const int inc) {
-  cblas_sscal(N, alpha, data, inc);
-}
-inline void cblas_Xscal(const int N, const double alpha, double *data, 
-                        const int inc) {
-  cblas_dscal(N, alpha, data, inc);
-}
-inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata,
-                        const float *v, const int v_inc,
-                        const float beta, float *y, const int y_inc) {
-  cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
-}
-inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata,
-                        const double *v, const int v_inc,
-                        const double beta, double *y, const int y_inc) {
-  cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
-}
-inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata,
-                        const int num_rows, float *y, const int y_inc) {
-  cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata,
-                        const int num_rows, double *y, const int y_inc) {
-  cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-
-// x = alpha * M * y + beta * x
-inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata,
-                        const float *ydata, MatrixIndexT ystride,
-                        float beta, float *xdata, MatrixIndexT xstride) {
-  cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
-              ydata, ystride, beta, xdata, xstride);
-}
-inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata,
-                        const double *ydata, MatrixIndexT ystride,
-                        double beta, double *xdata, MatrixIndexT xstride) {
-  cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
-              ydata, ystride, beta, xdata, xstride);
-}
-
-// Implements  A += alpha * (x y'  + y x'); A is symmetric matrix.
-inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata,
-                        MatrixIndexT incX, const float *Ydata, MatrixIndexT incY,
-                          float *Adata) {
-  cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
-              incX, Ydata, incY, Adata);
-}
-inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata,
-                        MatrixIndexT incX, const double *Ydata, MatrixIndexT incY,
-                        double *Adata) {
-  cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
-              incX, Ydata, incY, Adata);
-}
-
-// Implements  A += alpha * (x x'); A is symmetric matrix.
-inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata,
-                       MatrixIndexT incX, float *Adata) {
-  cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
-}
-inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata,
-                       MatrixIndexT incX, double *Adata) {
-  cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
-}
-
-// sgemv,dgemv: y = alpha M x + beta y.
-inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, float alpha, const float *Mdata,
-                        MatrixIndexT stride, const float *xdata,
-                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
-  cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
-}
-inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, double alpha, const double *Mdata,
-                        MatrixIndexT stride, const double *xdata,
-                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
-  cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
-}
-
-// sgbmv, dgmmv: y = alpha M x +  + beta * y.
-inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, MatrixIndexT num_below,
-                        MatrixIndexT num_above, float alpha, const float *Mdata,
-                        MatrixIndexT stride, const float *xdata,
-                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
-  cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
-              incX, beta, ydata, incY);
-}
-inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, MatrixIndexT num_below,
-                        MatrixIndexT num_above, double alpha, const double *Mdata,
-                        MatrixIndexT stride, const double *xdata,
-                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
-  cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
-              incX, beta, ydata, incY);
-}
-
-
-template<typename Real>
-inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows,
-                            MatrixIndexT num_cols, Real alpha, const Real *Mdata,
-                            MatrixIndexT stride, const Real *xdata,
-                            MatrixIndexT incX, Real beta, Real *ydata,
-                            MatrixIndexT incY) {
-  if (trans == kNoTrans) {
-    if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY);
-    for (MatrixIndexT i = 0; i < num_cols; i++) {
-      Real x_i = xdata[i * incX];
-      if (x_i == 0.0) continue;
-      // Add to ydata, the i'th column of M, times alpha * x_i
-      cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY);
-    }    
-  } else {
-    if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY);
-    for (MatrixIndexT i = 0; i < num_rows; i++) {
-      Real x_i = xdata[i * incX];
-      if (x_i == 0.0) continue;
-      // Add to ydata, the i'th row of M, times alpha * x_i
-      cblas_Xaxpy(num_cols, x_i * alpha,
-                  Mdata + (i * stride), 1, ydata, incY);
-    }
-  }
-}
-
-inline void cblas_Xgemm(const float alpha,
-                        MatrixTransposeType transA,
-                        const float *Adata,
-                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
-                        MatrixTransposeType transB, 
-                        const float *Bdata, MatrixIndexT b_stride,
-                        const float beta,
-                        float *Mdata, 
-                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
-  cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
-              static_cast<CBLAS_TRANSPOSE>(transB),
-              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
-              alpha, Adata, a_stride, Bdata, b_stride,
-              beta, Mdata, stride); 
-}
-inline void cblas_Xgemm(const double alpha,
-                        MatrixTransposeType transA,
-                        const double *Adata,
-                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
-                        MatrixTransposeType transB, 
-                        const double *Bdata, MatrixIndexT b_stride,
-                        const double beta,
-                        double *Mdata, 
-                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
-  cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
-              static_cast<CBLAS_TRANSPOSE>(transB),
-              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
-              alpha, Adata, a_stride, Bdata, b_stride,
-              beta, Mdata, stride); 
-}
-
-
-inline void cblas_Xsymm(const float alpha,
-                        MatrixIndexT sz,
-                        const float *Adata,MatrixIndexT a_stride,
-                        const float *Bdata,MatrixIndexT b_stride,
-                        const float beta,
-                        float *Mdata, MatrixIndexT stride) {
-  cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
-              a_stride, Bdata, b_stride, beta, Mdata, stride);
-}
-inline void cblas_Xsymm(const double alpha,
-                        MatrixIndexT sz,
-                        const double *Adata,MatrixIndexT a_stride,
-                        const double *Bdata,MatrixIndexT b_stride,
-                        const double beta,
-                        double *Mdata, MatrixIndexT stride) {
-  cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
-              a_stride, Bdata, b_stride, beta, Mdata, stride);
-}
-// ger: M += alpha x y^T.
-inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha,
-                       const float *xdata, MatrixIndexT incX, const float *ydata,
-                       MatrixIndexT incY, float *Mdata, MatrixIndexT stride) {
-  cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
-             Mdata, stride);
-}
-inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha,
-                       const double *xdata, MatrixIndexT incX, const double *ydata,
-                       MatrixIndexT incY, double *Mdata, MatrixIndexT stride) {
-  cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
-             Mdata, stride);
-}
-
-// syrk: symmetric rank-k update.
-// if trans==kNoTrans, then C = alpha A A^T + beta C
-// else C = alpha A^T A + beta C.
-// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e.
-// num-cols(A) if kNoTrans, or num-rows(A) if kTrans.
-// We only need the row-major and lower-triangular option of this, and this
-// is hard-coded.
-inline void cblas_Xsyrk (
-    const MatrixTransposeType trans, const MatrixIndexT dim_c,
-    const MatrixIndexT other_dim_a, const float alpha, const float *A,
-    const MatrixIndexT a_stride, const float beta, float *C,
-    const MatrixIndexT c_stride) {
-  cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
-}
-
-inline void cblas_Xsyrk(
-    const MatrixTransposeType trans, const MatrixIndexT dim_c,
-    const MatrixIndexT other_dim_a, const double alpha, const double *A,
-    const MatrixIndexT a_stride, const double beta, double *C,
-    const MatrixIndexT c_stride) {
-  cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
-}
-
-/// matrix-vector multiply using a banded matrix; we always call this
-/// with b = 1 meaning we're multiplying by a diagonal matrix.  This is used for
-/// elementwise multiplication.  We miss some of the arguments out of this
-/// wrapper.
-inline void cblas_Xsbmv1(
-    const MatrixIndexT dim,
-    const double *A,
-    const double alpha,
-    const double *x,
-    const double beta,
-    double *y) {
-  cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
-              1, x, 1, beta, y, 1);
-}
-
-inline void cblas_Xsbmv1(
-    const MatrixIndexT dim,
-    const float *A,
-    const float alpha,
-    const float *x,
-    const float beta,
-    float *y) {
-  cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
-              1, x, 1, beta, y, 1);
-}
-
-
-/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
-/// extend this somehow.
-inline void mul_elements(
-    const MatrixIndexT dim,
-    const double *a,
-    double *b) { // does b *= a, elementwise.
-  double c1, c2, c3, c4;
-  MatrixIndexT i;
-  for (i = 0; i + 4 <= dim; i += 4) {
-    c1 = a[i] * b[i];
-    c2 = a[i+1] * b[i+1];
-    c3 = a[i+2] * b[i+2];
-    c4 = a[i+3] * b[i+3];
-    b[i] = c1;
-    b[i+1] = c2;
-    b[i+2] = c3;
-    b[i+3] = c4;
-  }
-  for (; i < dim; i++)
-    b[i] *= a[i];
-}
-
-inline void mul_elements(
-    const MatrixIndexT dim,
-    const float *a,
-    float *b) { // does b *= a, elementwise.
-  float c1, c2, c3, c4;
-  MatrixIndexT i;
-  for (i = 0; i + 4 <= dim; i += 4) {
-    c1 = a[i] * b[i];
-    c2 = a[i+1] * b[i+1];
-    c3 = a[i+2] * b[i+2];
-    c4 = a[i+3] * b[i+3];
-    b[i] = c1;
-    b[i+1] = c2;
-    b[i+2] = c3;
-    b[i+3] = c4;
-  }
-  for (; i < dim; i++)
-    b[i] *= a[i];
-}
-
-
-
-// add clapack here
-#if !defined(HAVE_ATLAS)
-inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
-  stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
-}
-inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) {
-  dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
-}
-// 
-inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
-                            float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
-                            KaldiBlasInt *result) {
-  sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
-}
-inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
-                            double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
-                            KaldiBlasInt *result) {
-  dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
-}
-
-// 
-inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
-                           KaldiBlasInt *pivot, float *p_work, 
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
-}
-inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
-                           KaldiBlasInt *pivot, double *p_work, 
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
-}
-//
-inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
-                           KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
-                           float *sv, float *Vdata, KaldiBlasInt *vstride,
-                           float *Udata, KaldiBlasInt *ustride, float *p_work,
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  sgesvd_(v, u,
-          num_cols, num_rows, Mdata, stride,
-          sv, Vdata, vstride, Udata, ustride, 
-          p_work, l_work, result); 
-}
-inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
-                           KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
-                           double *sv, double *Vdata, KaldiBlasInt *vstride,
-                           double *Udata, KaldiBlasInt *ustride, double *p_work,
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  dgesvd_(v, u,
-          num_cols, num_rows, Mdata, stride,
-          sv, Vdata, vstride, Udata, ustride,
-          p_work, l_work, result); 
-}
-//
-void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata, 
-                           KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) {
-  ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
-}
-void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata, 
-                           KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) {
-  dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
-}
-//
-void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata,
-                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
-  ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
-}
-void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata,
-                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
-  dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
-}
-#else
-inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
-                           float *Mdata, MatrixIndexT stride, 
-                           int *pivot, int *result) {
-  *result = clapack_sgetrf(CblasColMajor, num_rows, num_cols,
-                              Mdata, stride, pivot);
-}
-
-inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
-                           double *Mdata, MatrixIndexT stride, 
-                           int *pivot, int *result) {
-  *result = clapack_dgetrf(CblasColMajor, num_rows, num_cols,
-                              Mdata, stride, pivot);
-}
-//
-inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) {
-  return  clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
-                              Mdata, stride);
-}
-
-inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) {
-  return  clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
-                              Mdata, stride);
-}
-//
-inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride,
-                      int *pivot, int *result) {
-  *result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
-}
-inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride,
-                      int *pivot, int *result) {
-  *result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
-}
-#endif
-
-}
-// namespace kaldi
-
-#endif
--- a/Source/Readers/KaldiReader/matrix/compressed-matrix.cc
+++ b/Source/Readers/KaldiReader/matrix/compressed-matrix.cc
@ -1,558 +0,0 @@
-// matrix/compressed-matrix.cc
-
-// Copyright 2012    Johns Hopkins University (author: Daniel Povey)
-//                   Frantisek Skala
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "matrix/compressed-matrix.h"
-#include <algorithm>
-
-namespace kaldi {
-
-template<typename Real>
-void CompressedMatrix::CopyFromMat(
-    const MatrixBase<Real> &mat) {
-  if (data_ != NULL) {
-    delete [] static_cast<float*>(data_);  // call delete [] because was allocated with new float[]
-    data_ = NULL;
-  }
-  if (mat.NumRows() == 0) { return; }  // Zero-size matrix stored as zero pointer.
-
-  GlobalHeader global_header;
-  KALDI_COMPILE_TIME_ASSERT(sizeof(global_header) == 16);  // otherwise
-  // something weird is happening and our code probably won't work or
-  // won't be robust across platforms.
-
-  // Below, the point of the "safety_margin" is that the minimum
-  // and maximum values in the matrix shouldn't coincide with
-  // the minimum and maximum ranges of the 16-bit range, because
-  // this could cause certain problems in ComputeColHeader, where
-  // we need to ensure that the percentile_0 through percentile_100
-  // are in strictly increasing order.
-  float min_value = mat.Min(), max_value = mat.Max();
-  if (max_value == min_value)
-    max_value = min_value + (1.0 + fabs(min_value)); // ensure it's strictly
-                                                     // greater than min_value,
-                                                     // even if matrix is
-                                                     // constant.
-
-  global_header.min_value = min_value;
-  global_header.range = max_value - min_value;
-  // We can't compress the matrix if there are inf's or nan's.
-  // The caller should check for this first.
-  KALDI_ASSERT(KALDI_ISFINITE(global_header.min_value) &&
-               KALDI_ISFINITE(global_header.range));
-
-  // Avoid division by zero if the matrix is just a constant:
-  // make sure max_value > min_value.
-  if (global_header.range <= 0.0)
-    global_header.range = 1.0e-05;
-  global_header.num_rows = mat.NumRows();
-  global_header.num_cols = mat.NumCols();
-
-  int32 data_size = DataSize(global_header);
-
-  data_ = AllocateData(data_size);
-
-  *(reinterpret_cast<GlobalHeader*>(data_)) = global_header;
-
-  PerColHeader *header_data =
-      reinterpret_cast<PerColHeader*>(static_cast<char*>(data_) +
-                                      sizeof(GlobalHeader));
-  unsigned char *byte_data =
-      reinterpret_cast<unsigned char*>(header_data + global_header.num_cols);
-
-  const Real *matrix_data = mat.Data();
-
-  for (int32 col = 0; col < global_header.num_cols; col++) {
-    CompressColumn(global_header,
-                   matrix_data + col, mat.Stride(),
-                   global_header.num_rows,
-                   header_data, byte_data);
-    header_data++;
-    byte_data += global_header.num_rows;
-  }
-}
-
-// Instantiate the template for float and double.
-template
-void CompressedMatrix::CopyFromMat(const MatrixBase<float> &mat);
-
-template
-void CompressedMatrix::CopyFromMat(const MatrixBase<double> &mat);
-
-
-template<typename Real>
-CompressedMatrix &CompressedMatrix::operator =(const MatrixBase<Real> &mat) {
-  this->CopyFromMat(mat);
-  return *this;
-}
-
-// Instantiate the template for float and double.
-template
-CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<float> &mat);
-
-template
-CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<double> &mat);
-
-inline uint16 CompressedMatrix::FloatToUint16(
-    const GlobalHeader &global_header,
-    float value) {
-  float f = (value - global_header.min_value) /
-      global_header.range;
-  if (f > 1.0) f = 1.0;  // Note: this should not happen.
-  if (f < 0.0) f = 0.0;  // Note: this should not happen.
-  return static_cast<int>(f * 65535 + 0.499);  // + 0.499 is to
-  // round to closest int; avoids bias.
-}
-
-inline float CompressedMatrix::Uint16ToFloat(
-        const GlobalHeader &global_header,
-        uint16 value) {
-  // the constant 1.52590218966964e-05 is 1/65535.
-  return global_header.min_value
-      + global_header.range * 1.52590218966964e-05 * value;
-}
-
-template<typename Real>  // static
-void CompressedMatrix::ComputeColHeader(
-    const GlobalHeader &global_header,
-    const Real *data, MatrixIndexT stride,
-    int32 num_rows, CompressedMatrix::PerColHeader *header) {
-  KALDI_ASSERT(num_rows > 0);
-  std::vector<Real> sdata(num_rows); // the sorted data.
-  for (size_t i = 0, size = sdata.size(); i < size; i++)
-    sdata[i] = data[i*stride];
-
-  if (num_rows >= 5) {
-    int quarter_nr = num_rows/4;
-    // std::sort(sdata.begin(), sdata.end());
-    // The elements at positions 0, quarter_nr,
-    // 3*quarter_nr, and num_rows-1 need to be in sorted order.
-    std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
-    // Now, sdata.begin() + quarter_nr contains the element that would appear
-    // in sorted order, in that position.
-    std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
-    // Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
-    // that would appear at those positions in sorted order.
-    std::nth_element(sdata.begin() + quarter_nr + 1,
-                     sdata.begin() + (3*quarter_nr), sdata.end());
-    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
-    // 3*quarter_nr, contain the elements that would appear at those positions
-    // in sorted order.
-    std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
-                     sdata.end());
-    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
-    // 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
-    // at those positions in sorted order.
-    
-    header->percentile_0 = FloatToUint16(global_header, sdata[0]);
-    header->percentile_25 = std::max<uint16>(
-        FloatToUint16(global_header, sdata[quarter_nr]),
-        header->percentile_0 + static_cast<uint16>(1));
-    header->percentile_75 = std::max<uint16>(
-        FloatToUint16(global_header, sdata[3*quarter_nr]),
-        header->percentile_25 + static_cast<uint16>(1));
-    header->percentile_100 = std::max<uint16>(
-        FloatToUint16(global_header, sdata[num_rows-1]),
-        header->percentile_75 + static_cast<uint16>(1));
-    
-  } else {  // handle this pathological case.
-    std::sort(sdata.begin(), sdata.end());
-    // Note: we know num_rows is at least 1.
-    header->percentile_0 = FloatToUint16(global_header, sdata[0]);
-    if (num_rows > 1)
-      header->percentile_25 =
-          std::max<uint16>(FloatToUint16(global_header, sdata[1]),
-                           header->percentile_0 + 1);
-    else
-      header->percentile_25 = header->percentile_0 + 1;
-    if (num_rows > 2)
-      header->percentile_75 =
-          std::max<uint16>(FloatToUint16(global_header, sdata[2]),
-                           header->percentile_25 + 1);
-    else
-      header->percentile_75 = header->percentile_25 + 1;
-    if (num_rows > 3)
-      header->percentile_100 =
-          std::max<uint16>(FloatToUint16(global_header, sdata[3]),
-                           header->percentile_75 + 1);
-    else
-      header->percentile_100 = header->percentile_75 + 1;
-  }
-}
-
-// static
-inline unsigned char CompressedMatrix::FloatToChar(
-    float p0, float p25, float p75, float p100,
-    float value) {
-  int ans;
-  if (value < p25) {  // range [ p0, p25 ) covered by
-    // characters 0 .. 64.  We round to the closest int.
-    float f = (value - p0) / (p25 - p0);
-    ans = static_cast<int>(f * 64 + 0.5);
-    // Note: the checks on the next two lines
-    // are necessary in pathological cases when all the elements in a row
-    // are the same and the percentile_* values are separated by one.
-    if (ans < 0) ans = 0;
-    if (ans > 64) ans = 64;
-  } else if (value < p75) {  // range [ p25, p75 )covered
-    // by characters 64 .. 192.  We round to the closest int.
-    float f = (value - p25) / (p75 - p25);
-    ans = 64 + static_cast<int>(f * 128 + 0.5);
-    if (ans < 64) ans = 64;
-    if (ans > 192) ans = 192;
-  } else {  // range [ p75, p100 ] covered by
-    // characters 192 .. 255.  Note: this last range
-    // has fewer characters than the left range, because
-    // we go up to 255, not 256.
-    float f = (value - p75) / (p100 - p75);
-    ans = 192 + static_cast<int>(f * 63 + 0.5);
-    if (ans < 192) ans = 192;
-    if (ans > 255) ans = 255;
-  }
-  return static_cast<unsigned char>(ans);
-}
-
-
-// static
-inline float CompressedMatrix::CharToFloat(
-    float p0, float p25, float p75, float p100,
-    unsigned char value) {
-  if (value <= 64) {
-    return p0 + (p25 - p0) * value * (1/64.0);
-  } else if (value <= 192) {
-    return p25 + (p75 - p25) * (value - 64) * (1/128.0);
-  } else {
-    return p75 + (p100 - p75) * (value - 192) * (1/63.0);
-  }
-}
-
-
-template<typename Real>  // static
-void CompressedMatrix::CompressColumn(
-    const GlobalHeader &global_header,
-    const Real *data, MatrixIndexT stride,
-    int32 num_rows, CompressedMatrix::PerColHeader *header,
-    unsigned char *byte_data) {
-  ComputeColHeader(global_header, data, stride,
-                   num_rows, header);
-  
-  float p0 = Uint16ToFloat(global_header, header->percentile_0),
-      p25 = Uint16ToFloat(global_header, header->percentile_25),
-      p75 = Uint16ToFloat(global_header, header->percentile_75),
-      p100 = Uint16ToFloat(global_header, header->percentile_100);
-
-  for (int32 i = 0; i < num_rows; i++) {
-    Real this_data = data[i * stride];
-    byte_data[i] = FloatToChar(p0, p25, p75, p100, this_data);
-  }
-}
-
-// static
-void* CompressedMatrix::AllocateData(int32 num_bytes) {
-  KALDI_ASSERT(num_bytes > 0);
-  KALDI_COMPILE_TIME_ASSERT(sizeof(float) == 4);
-  // round size up to nearest number of floats.
-  return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
-}
-
-#define DEBUG_COMPRESSED_MATRIX 0 // Must be zero for Kaldi to work; use 1 only
-                                  // for debugging.
-
-void CompressedMatrix::Write(std::ostream &os, bool binary) const {
-  if (binary) {  // Binary-mode write:
-    WriteToken(os, binary, "CM");
-    if (data_ != NULL) {
-      GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
-      MatrixIndexT size = DataSize(h);  // total size of data in data_
-      os.write(reinterpret_cast<const char*>(data_), size);
-    } else {  // special case: where data_ == NULL, we treat it as an empty
-      // matrix.
-      GlobalHeader h;
-      h.range = h.min_value = 0.0;
-      h.num_rows = h.num_cols = 0;
-      os.write(reinterpret_cast<const char*>(&h), sizeof(h));
-    }
-  } else {
-    // In text mode, just use the same format as a regular matrix.
-    // This is not compressed.
-#if DEBUG_COMPRESSED_MATRIX == 0
-    Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
-                               kUndefined);
-    this->CopyToMat(&temp_mat);
-    temp_mat.Write(os, binary);
-#else
-    // Text-mode writing.  Only really useful for debug, but we'll implement it.
-    if (data_ == NULL) {
-      os << 0.0 << ' ' << 0.0 << ' ' << 0 << ' ' << 0 << '\n';
-    } else {
-      GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
-      KALDI_ASSERT(h.num_cols != 0);
-      os << h.min_value << ' ' << h.range << ' ' << h.num_rows << ' ' << h.num_cols << '\n';
-
-      PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(&h + 1);
-      unsigned char *c = reinterpret_cast<unsigned char*>(per_col_header + h.num_cols);
-
-      for (int32 i = 0; i < h.num_cols; i++, per_col_header++) {
-        os << per_col_header->percentile_0 << ' ' << per_col_header->percentile_25
-           << ' ' << per_col_header->percentile_75
-           << ' ' << per_col_header->percentile_100 << '\n';
-        for (int32 j = 0; j < h.num_rows; j++, c++)
-          os << static_cast<int>(*c) << ' ';
-        os << '\n';
-      }
-    }
-#endif
-  }
-  if (os.fail())
-    KALDI_ERR << "Error writing compressed matrix to stream.";
-}
-
-void CompressedMatrix::Read(std::istream &is, bool binary) {
-  if (data_ != NULL) {
-    delete [] (static_cast<float*>(data_));
-    data_ = NULL;
-  }
-  if (binary) {  // Binary-mode read.
-    // Caution: the following is not back compatible, if you were using
-    // CompressedMatrix before, the old format will not be readable.
-
-    int peekval = Peek(is, binary);
-    if (peekval == 'C') {
-      ExpectToken(is, binary, "CM"); 
-      GlobalHeader h;
-      is.read(reinterpret_cast<char*>(&h), sizeof(h));
-      if (is.fail())
-        KALDI_ERR << "Failed to read header";
-      if (h.num_cols == 0) {  // empty matrix.
-        return;
-      }
-      int32 size = DataSize(h), remaining_size = size - sizeof(GlobalHeader);
-      data_ = AllocateData(size);
-      *(reinterpret_cast<GlobalHeader*>(data_)) = h;
-      is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
-              remaining_size);
-    } else {
-      // Assume that what we're reading is a regular Matrix.  This might be the
-      // case if you changed your code, making a Matrix into a CompressedMatrix,
-      // and you want back-compatibility for reading.
-      Matrix<BaseFloat> M;
-      M.Read(is, binary); // This will crash if it was not a Matrix.  This might happen,
-                          // for instance, if the CompressedMatrix was written using the
-                          // older code where we didn't write the token "CM", we just
-                          // wrote the binary data directly.
-      this->CopyFromMat(M);
-    }
-  } else {  // Text-mode read.
-#if DEBUG_COMPRESSED_MATRIX == 0    
-    Matrix<BaseFloat> temp;
-    temp.Read(is, binary);
-    this->CopyFromMat(temp);
-#else
-    // The old reading code...
-    GlobalHeader h;
-    is >> h.min_value >> h.range >> h.num_rows >> h.num_cols;
-    if (is.fail())
-      KALDI_ERR << "Failed to read header.";
-    if (h.num_cols == 0) {  // Empty matrix; null data_ pointer.
-      return;
-    }
-    int32 size = DataSize(h);
-    data_ = AllocateData(size);
-    *(reinterpret_cast<GlobalHeader*>(data_)) = h;
-
-    PerColHeader *per_col_header =
-        reinterpret_cast<PerColHeader*>(static_cast<char*>(data_)
-                                        + sizeof(GlobalHeader));
-    unsigned char *c =
-        reinterpret_cast<unsigned char*>(per_col_header + h.num_cols);
-    for (int32 i = 0; i < h.num_cols; i++, per_col_header++) {
-      is >> per_col_header->percentile_0 >> per_col_header->percentile_25
-         >> per_col_header->percentile_75 >> per_col_header->percentile_100;
-      for (int32 j = 0; j < h.num_rows; j++, c++) {
-        int i;
-        is >> i;
-        KALDI_ASSERT(i >= 0 && i <= 255);
-        *c = static_cast<unsigned char>(i);
-      }
-    }
-#endif
-  }
-  if (is.fail())
-    KALDI_ERR << "Failed to read data.";
-}
-
-template<typename Real>
-void CompressedMatrix::CopyToMat(MatrixBase<Real> *mat) const {
-  if (data_ == NULL) {
-    KALDI_ASSERT(mat->NumRows() == 0);
-    KALDI_ASSERT(mat->NumCols() == 0);
-  } else {
-    GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-    PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-    unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
-                                                                h->num_cols);
-    int32 num_cols = h->num_cols, num_rows = h->num_rows;
-    KALDI_ASSERT(mat->NumRows() == num_rows);
-    KALDI_ASSERT(mat->NumCols() == num_cols);
-    for (int32 i = 0; i < num_cols; i++, per_col_header++) {
-      float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-      for (int32 j = 0; j < num_rows; j++, byte_data++) {
-        float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-        (*mat)(j, i) = f;
-      }
-    }
-  }
-}
-
-// Instantiate the template for float and double.
-template
-void CompressedMatrix::CopyToMat(MatrixBase<float> *mat) const;
-template
-void CompressedMatrix::CopyToMat(MatrixBase<double> *mat) const;
-
-template<typename Real>
-void CompressedMatrix::CopyRowToVec(MatrixIndexT row,
-                                    VectorBase<Real> *v) const {
-  KALDI_ASSERT(row < this->NumRows());
-  KALDI_ASSERT(row >= 0);
-  KALDI_ASSERT(v->Dim() == this->NumCols());
-
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-  unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
-                                                              h->num_cols);
-  byte_data += row;  // point to first value we are interested in
-  for (int32 i = 0; i < h->num_cols;
-       i++, per_col_header++, byte_data+=h->num_rows) {
-    float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-    float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-    (*v)(i) = f;
-  }
-}
-template<typename Real>
-void CompressedMatrix::CopyColToVec(MatrixIndexT col,
-                                    VectorBase<Real> *v) const {
-  KALDI_ASSERT(col < this->NumCols());
-  KALDI_ASSERT(col >= 0);
-  KALDI_ASSERT(v->Dim() == this->NumRows());
-
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-  unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
-                                                              h->num_cols);
-  byte_data += col*h->num_rows;  // point to first value in the column we want
-  per_col_header += col;
-  float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-        p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-        p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-        p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-  for (int32 i = 0; i < h->num_rows; i++, byte_data++) {
-    float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-    (*v)(i) = f;
-  }
-}
-
-// instantiate the templates.
-template void
-CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<double> *) const;
-template void
-CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<float> *) const;
-template void
-CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<double> *) const;
-template void
-CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<float> *) const;
-
-template<typename Real>
-void CompressedMatrix::CopyToMat(int32 row_offset,
-                                 int32 column_offset,
-                                 MatrixBase<Real> *dest) const {
-  KALDI_PARANOID_ASSERT(row_offset < this->NumRows());
-  KALDI_PARANOID_ASSERT(column_offset < this->NumCols());
-  KALDI_PARANOID_ASSERT(row_offset >= 0);
-  KALDI_PARANOID_ASSERT(column_offset >= 0);
-  KALDI_ASSERT(row_offset+dest->NumRows() < this->NumRows());
-  KALDI_ASSERT(column_offset+dest->NumCols() < this->NumCols());
-  // everything is OK
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-  unsigned char *byte_data = reinterpret_cast<unsigned char*>(per_col_header +
-                                                              h->num_cols);
-  int32 num_rows = h->num_rows;
-  int32 tgt_cols = dest->NumCols(), tgt_rows = dest->NumRows();
-
-  unsigned char *start_of_subcol = byte_data+row_offset;  // skip appropriate
-  // number of columns
-  start_of_subcol += column_offset*num_rows;  // skip appropriate number of rows
-
-  per_col_header += column_offset;  // skip the appropriate number of headers
-
-  for (int32 i = 0;
-       i < tgt_cols;
-       i++, per_col_header++, start_of_subcol+=num_rows) {
-    byte_data = start_of_subcol;
-    float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-    for (int32 j = 0; j < tgt_rows; j++, byte_data++) {
-      float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-      (*dest)(j, i) = f;
-    }
-  }
-}
-
-// instantiate the templates.
-template void CompressedMatrix::CopyToMat(int32,
-               int32,
-               MatrixBase<float> *dest) const;
-template void CompressedMatrix::CopyToMat(int32,
-               int32,
-               MatrixBase<double> *dest) const;
-
-void CompressedMatrix::Destroy() {
-  if (data_ != NULL) {
-    delete [] static_cast<float*>(data_);
-    data_ = NULL;
-  }
-}
-
-CompressedMatrix::CompressedMatrix(const CompressedMatrix &mat): data_(NULL) {
-  *this = mat; // use assignment operator.
-}
-
-CompressedMatrix &CompressedMatrix::operator = (const CompressedMatrix &mat) {
-  Destroy(); // now this->data_ == NULL.
-  if (mat.data_ != NULL) {
-    MatrixIndexT data_size = DataSize(*static_cast<GlobalHeader*>(mat.data_));
-    data_ = AllocateData(data_size);
-    memcpy(static_cast<void*>(data_),
-           static_cast<void*>(mat.data_),
-           data_size);
-  }
-  return *this;
-}
-
-}  // namespace kaldi
-
--- a/Source/Readers/KaldiReader/matrix/compressed-matrix.h
+++ b/Source/Readers/KaldiReader/matrix/compressed-matrix.h
@ -1,166 +0,0 @@
-// matrix/compressed-matrix.h
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
-//                 Frantisek Skala
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
-#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
-
-#include "kaldi-matrix.h"
-
-namespace kaldi {
-
-/// \addtogroup matrix_group
-/// @{
-
-/// This class does lossy compression of a matrix.  It only
-/// supports copying to-from a KaldiMatrix.  For large matrices,
-/// each element is compressed into about one byte, but there
-/// is a little overhead on top of that (globally, and also per
-/// column).
-
-/// The basic idea is for each column (in the normal configuration)
-/// we work out the values at the 0th, 25th, 50th and 100th percentiles
-/// and store them as 16-bit integers; we then encode each value in
-/// the column as a single byte, in 3 separate ranges with different
-/// linear encodings (0-25th, 25-50th, 50th-100th).
-
-class CompressedMatrix {
- public:
-  CompressedMatrix(): data_(NULL) { }
-
-  ~CompressedMatrix() { Destroy(); }
-  
-  template<typename Real>
-  CompressedMatrix(const MatrixBase<Real> &mat): data_(NULL) { CopyFromMat(mat); }
-
-
-  /// This will resize *this and copy the contents of mat to *this.
-  template<typename Real>
-  void CopyFromMat(const MatrixBase<Real> &mat);
-  
-  CompressedMatrix(const CompressedMatrix &mat);
-  
-  CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
-
-  template<typename Real>
-  CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
-  
-  // Note: mat must have the correct size, CopyToMat no longer attempts
-  // to resize the matrix
-  template<typename Real>
-  void CopyToMat(MatrixBase<Real> *mat) const;
-
-  void Write(std::ostream &os, bool binary) const;
-  
-  void Read(std::istream &is, bool binary);
-
-  /// Returns number of rows (or zero for emtpy matrix).
-  inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
-      (*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
-
-  /// Returns number of columns (or zero for emtpy matrix).
-  inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
-      (*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
-
-  /// Copies row #row of the matrix into vector v.
-  /// Note: v must have same size as #cols.
-  template<typename Real>
-  void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
-
-  /// Copies column #col of the matrix into vector v.
-  /// Note: v must have same size as #rows.
-  template<typename Real>
-  void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
-
-  /// Copies submatrix of compressed matrix into matrix dest.
-  /// Submatrix starts at row row_offset and column column_offset and it' size
-  /// is defined by size of provided matrix dest
-  template<typename Real>
-  void CopyToMat(int32 row_offset,
-                 int32 column_offset,
-                 MatrixBase<Real> *dest) const;
-
-  void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
-  
-  friend class Matrix<float>;
-  friend class Matrix<double>;
- private:
-
-  // allocates data using new [], ensures byte alignment
-  // sufficient for float.
-  static void *AllocateData(int32 num_bytes);
-
-  struct GlobalHeader {
-    float min_value;
-    float range;
-    int32 num_rows;
-    int32 num_cols;
-  };
-
-  static MatrixIndexT DataSize(const GlobalHeader &header) {
-    // Returns size in bytes of the data.
-    return sizeof(GlobalHeader) +
-        header.num_cols * (sizeof(PerColHeader) + header.num_rows);
-  }
-
-  struct PerColHeader {
-    uint16 percentile_0;
-    uint16 percentile_25;
-    uint16 percentile_75;
-    uint16 percentile_100;
-  };
-
-  template<typename Real>
-  static void CompressColumn(const GlobalHeader &global_header,
-                             const Real *data, MatrixIndexT stride,
-                             int32 num_rows, PerColHeader *header,
-                             unsigned char *byte_data);
-  template<typename Real>
-  static void ComputeColHeader(const GlobalHeader &global_header,
-                               const Real *data, MatrixIndexT stride,
-                               int32 num_rows, PerColHeader *header);
-
-  static inline uint16 FloatToUint16(const GlobalHeader &global_header,
-                                     float value);
-
-  static inline float Uint16ToFloat(const GlobalHeader &global_header,
-                                     uint16 value);
-  static inline unsigned char FloatToChar(float p0, float p25,
-                                          float p75, float p100,
-                                          float value);
-  static inline float CharToFloat(float p0, float p25,
-                                  float p75, float p100,
-                                  unsigned char value);
-  
-  void Destroy();
-  
-  void *data_; // first GlobalHeader, then PerColHeader (repeated), then
-  // the byte data for each column (repeated).  Note: don't intersperse
-  // the byte data with the PerColHeaders, because of alignment issues.
-
-};
-
-
-/// @} end of \addtogroup matrix_group
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_MATRIX_COMPRESSED_MATRIX_H_
--- a/Source/Readers/KaldiReader/matrix/jama-eig.h
+++ b/Source/Readers/KaldiReader/matrix/jama-eig.h
@ -1,924 +0,0 @@
-// matrix/jama-eig.h
-
-// Copyright 2009-2011 Microsoft Corporation 
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This file consists of a port and modification of materials from
-//   JAMA: A Java Matrix Package
-// under the following notice: This software is a cooperative product of
-// The MathWorks and the National Institute of Standards and Technology (NIST)
-// which has been released to the public.  This notice and the original code are
-// available at http://math.nist.gov/javanumerics/jama/domain.notice
-
-
-
-#ifndef KALDI_MATRIX_JAMA_EIG_H_
-#define KALDI_MATRIX_JAMA_EIG_H_ 1
-
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-// This class is not to be used externally.  See the Eig function in the Matrix
-// class in kaldi-matrix.h.  This is the external interface.
-
-template<typename Real> class EigenvalueDecomposition {
-  // This class is based on the EigenvalueDecomposition class from the JAMA
-  // library (version 1.0.2).
- public:
-  EigenvalueDecomposition(const MatrixBase<Real> &A);
-
-  ~EigenvalueDecomposition();  // free memory.
-
-  void GetV(MatrixBase<Real> *V_out) {  // V is what we call P externally; it's the matrix of
-    // eigenvectors.
-    KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_)
-                 && V_out->NumCols() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        (*V_out)(i, j) = V(i, j);  // V(i, j) is member function.
-  }
-  void GetRealEigenvalues(VectorBase<Real> *r_out) {
-    // returns real part of eigenvalues.
-    KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      (*r_out)(i) = d_[i];
-  }
-  void GetImagEigenvalues(VectorBase<Real> *i_out) {
-    // returns imaginary part of eigenvalues.
-    KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      (*i_out)(i) = e_[i];
-  }
- private:
-
-  inline Real &H(int r, int c) { return H_[r*n_ + c]; }
-  inline Real &V(int r, int c) { return V_[r*n_ + c]; }
-
-  // complex division
-  inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) {
-    Real r, d;
-    if (std::abs(yr) > std::abs(yi)) {
-      r = yi/yr;
-      d = yr + r*yi;
-      *cdivr = (xr + r*xi)/d;
-      *cdivi = (xi - r*xr)/d;
-    } else {
-      r = yr/yi;
-      d = yi + r*yr;
-      *cdivr = (r*xr + xi)/d;
-      *cdivi = (r*xi - xr)/d;
-    }
-  }
-
-  // Nonsymmetric reduction from Hessenberg to real Schur form.
-  void Hqr2 ();
-
-
-  int n_;  // matrix dimension.
-
-  Real *d_, *e_;  // real and imaginary parts of eigenvalues.
-  Real *V_;  // the eigenvectors (P in our external notation)
-  Real *H_;  // the nonsymmetric Hessenberg form.
-  Real *ort_;  // working storage for nonsymmetric algorithm.
-
-  // Symmetric Householder reduction to tridiagonal form.
-  void Tred2 ();
-
-  // Symmetric tridiagonal QL algorithm.
-  void Tql2 ();
-
-  // Nonsymmetric reduction to Hessenberg form.
-  void Orthes ();
-
-};
-
-template class EigenvalueDecomposition<float>;  // force instantiation.
-template class EigenvalueDecomposition<double>;  // force instantiation.
-
-template<typename Real> void  EigenvalueDecomposition<Real>::Tred2() {
-  //  This is derived from the Algol procedures tred2 by
-  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
-  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  for (int j = 0; j < n_; j++) {
-    d_[j] = V(n_-1, j);
-  }
-
-  // Householder reduction to tridiagonal form.
-
-  for (int i = n_-1; i > 0; i--) {
-
-    // Scale to avoid under/overflow.
-
-    Real scale = 0.0;
-    Real h = 0.0;
-    for (int k = 0; k < i; k++) {
-      scale = scale + std::abs(d_[k]);
-    }
-    if (scale == 0.0) {
-      e_[i] = d_[i-1];
-      for (int j = 0; j < i; j++) {
-        d_[j] = V(i-1, j);
-        V(i, j) = 0.0;
-        V(j, i) = 0.0;
-      }
-    } else {
-
-      // Generate Householder vector.
-
-      for (int k = 0; k < i; k++) {
-        d_[k] /= scale;
-        h += d_[k] * d_[k];
-      }
-      Real f = d_[i-1];
-      Real g = std::sqrt(h);
-      if (f > 0) {
-        g = -g;
-      }
-      e_[i] = scale * g;
-      h = h - f * g;
-      d_[i-1] = f - g;
-      for (int j = 0; j < i; j++) {
-        e_[j] = 0.0;
-      }
-
-      // Apply similarity transformation to remaining columns.
-
-      for (int j = 0; j < i; j++) {
-        f = d_[j];
-        V(j, i) = f;
-        g =e_[j] + V(j, j) * f;
-        for (int k = j+1; k <= i-1; k++) {
-          g += V(k, j) * d_[k];
-          e_[k] += V(k, j) * f;
-        }
-        e_[j] = g;
-      }
-      f = 0.0;
-      for (int j = 0; j < i; j++) {
-        e_[j] /= h;
-        f += e_[j] * d_[j];
-      }
-      Real hh = f / (h + h);
-      for (int j = 0; j < i; j++) {
-        e_[j] -= hh * d_[j];
-      }
-      for (int j = 0; j < i; j++) {
-        f = d_[j];
-        g = e_[j];
-        for (int k = j; k <= i-1; k++) {
-          V(k, j) -= (f * e_[k] + g * d_[k]);
-        }
-        d_[j] = V(i-1, j);
-        V(i, j) = 0.0;
-      }
-    }
-    d_[i] = h;
-  }
-
-  // Accumulate transformations.
-
-  for (int i = 0; i < n_-1; i++) {
-    V(n_-1, i) = V(i, i);
-    V(i, i) = 1.0;
-    Real h = d_[i+1];
-    if (h != 0.0) {
-      for (int k = 0; k <= i; k++) {
-        d_[k] = V(k, i+1) / h;
-      }
-      for (int j = 0; j <= i; j++) {
-        Real g = 0.0;
-        for (int k = 0; k <= i; k++) {
-          g += V(k, i+1) * V(k, j);
-        }
-        for (int k = 0; k <= i; k++) {
-          V(k, j) -= g * d_[k];
-        }
-      }
-    }
-    for (int k = 0; k <= i; k++) {
-      V(k, i+1) = 0.0;
-    }
-  }
-  for (int j = 0; j < n_; j++) {
-    d_[j] = V(n_-1, j);
-    V(n_-1, j) = 0.0;
-  }
-  V(n_-1, n_-1) = 1.0;
-   e_[0] = 0.0;
-}
-
-template<typename Real> void EigenvalueDecomposition<Real>::Tql2() {
-  //  This is derived from the Algol procedures tql2, by
-  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
-  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  for (int i = 1; i < n_; i++) {
-     e_[i-1] = e_[i];
-  }
-   e_[n_-1] = 0.0;
-
-  Real f = 0.0;
-  Real tst1 = 0.0;
-  Real eps = std::numeric_limits<Real>::epsilon();
-  for (int l = 0; l < n_; l++) {
-
-    // Find small subdiagonal element
-
-    tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l]));
-    int m = l;
-    while (m < n_) {
-      if (std::abs(e_[m]) <= eps*tst1) {
-        break;
-      }
-      m++;
-    }
-
-    // If m == l, d_[l] is an eigenvalue,
-    // otherwise, iterate.
-
-    if (m > l) {
-      int iter = 0;
-      do {
-        iter = iter + 1;  // (Could check iteration count here.)
-
-        // Compute implicit shift
-
-        Real g = d_[l];
-        Real p = (d_[l+1] - g) / (2.0 *e_[l]);
-        Real r = Hypot(p, static_cast<Real>(1.0));  // This is a Kaldi version of hypot that works with templates.
-        if (p < 0) {
-          r = -r;
-        }
-        d_[l] =e_[l] / (p + r);
-        d_[l+1] =e_[l] * (p + r);
-        Real dl1 = d_[l+1];
-        Real h = g - d_[l];
-        for (int i = l+2; i < n_; i++) {
-          d_[i] -= h;
-        }
-        f = f + h;
-
-        // Implicit QL transformation.
-
-        p = d_[m];
-        Real c = 1.0;
-        Real c2 = c;
-        Real c3 = c;
-        Real el1 =e_[l+1];
-        Real s = 0.0;
-        Real s2 = 0.0;
-        for (int i = m-1; i >= l; i--) {
-          c3 = c2;
-          c2 = c;
-          s2 = s;
-          g = c *e_[i];
-          h = c * p;
-          r = Hypot(p, e_[i]);  // This is a Kaldi version of Hypot that works with templates.
-          e_[i+1] = s * r;
-          s =e_[i] / r;
-          c = p / r;
-          p = c * d_[i] - s * g;
-          d_[i+1] = h + s * (c * g + s * d_[i]);
-
-          // Accumulate transformation.
-
-          for (int k = 0; k < n_; k++) {
-            h = V(k, i+1);
-            V(k, i+1) = s * V(k, i) + c * h;
-            V(k, i) = c * V(k, i) - s * h;
-          }
-        }
-        p = -s * s2 * c3 * el1 *e_[l] / dl1;
-        e_[l] = s * p;
-        d_[l] = c * p;
-
-        // Check for convergence.
-
-      } while (std::abs(e_[l]) > eps*tst1);
-    }
-    d_[l] = d_[l] + f;
-    e_[l] = 0.0;
-  }
-
-  // Sort eigenvalues and corresponding vectors.
-
-  for (int i = 0; i < n_-1; i++) {
-    int k = i;
-    Real p = d_[i];
-    for (int j = i+1; j < n_; j++) {
-      if (d_[j] < p) {
-        k = j;
-        p = d_[j];
-      }
-    }
-    if (k != i) {
-      d_[k] = d_[i];
-      d_[i] = p;
-      for (int j = 0; j < n_; j++) {
-        p = V(j, i);
-        V(j, i) = V(j, k);
-        V(j, k) = p;
-      }
-    }
-  }
-}
-
-template<typename Real>
-void EigenvalueDecomposition<Real>::Orthes() {
-
-  //  This is derived from the Algol procedures orthes and ortran,
-  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
-  //  Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutines in EISPACK.
-
-  int low = 0;
-  int high = n_-1;
-
-  for (int m = low+1; m <= high-1; m++) {
-
-    // Scale column.
-
-    Real scale = 0.0;
-    for (int i = m; i <= high; i++) {
-      scale = scale + std::abs(H(i, m-1));
-    }
-    if (scale != 0.0) {
-
-      // Compute Householder transformation.
-
-      Real h = 0.0;
-      for (int i = high; i >= m; i--) {
-        ort_[i] = H(i, m-1)/scale;
-        h += ort_[i] * ort_[i];
-      }
-      Real g = std::sqrt(h);
-      if (ort_[m] > 0) {
-        g = -g;
-      }
-      h = h - ort_[m] * g;
-      ort_[m] = ort_[m] - g;
-
-      // Apply Householder similarity transformation
-      // H = (I-u*u'/h)*H*(I-u*u')/h)
-
-      for (int j = m; j < n_; j++) {
-        Real f = 0.0;
-        for (int i = high; i >= m; i--) {
-          f += ort_[i]*H(i, j);
-        }
-        f = f/h;
-        for (int i = m; i <= high; i++) {
-          H(i, j) -= f*ort_[i];
-        }
-      }
-
-      for (int i = 0; i <= high; i++) {
-        Real f = 0.0;
-        for (int j = high; j >= m; j--) {
-          f += ort_[j]*H(i, j);
-        }
-        f = f/h;
-        for (int j = m; j <= high; j++) {
-          H(i, j) -= f*ort_[j];
-        }
-      }
-      ort_[m] = scale*ort_[m];
-      H(m, m-1) = scale*g;
-    }
-  }
-
-  // Accumulate transformations (Algol's ortran).
-
-  for (int i = 0; i < n_; i++) {
-    for (int j = 0; j < n_; j++) {
-      V(i, j) = (i == j ? 1.0 : 0.0);
-    }
-  }
-
-  for (int m = high-1; m >= low+1; m--) {
-    if (H(m, m-1) != 0.0) {
-      for (int i = m+1; i <= high; i++) {
-        ort_[i] = H(i, m-1);
-      }
-      for (int j = m; j <= high; j++) {
-        Real g = 0.0;
-        for (int i = m; i <= high; i++) {
-          g += ort_[i] * V(i, j);
-        }
-        // Double division avoids possible underflow
-        g = (g / ort_[m]) / H(m, m-1);
-        for (int i = m; i <= high; i++) {
-          V(i, j) += g * ort_[i];
-        }
-      }
-    }
-  }
-}
-
-template<typename Real> void  EigenvalueDecomposition<Real>::Hqr2() {
-  //  This is derived from the Algol procedure hqr2,
-  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
-  //  Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  int nn = n_;
-  int n = nn-1;
-  int low = 0;
-  int high = nn-1;
-  Real eps = std::numeric_limits<Real>::epsilon();
-  Real exshift = 0.0;
-  Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y;
-
-  // Store roots isolated by balanc and compute matrix norm
-
-  Real norm = 0.0;
-  for (int i = 0; i < nn; i++) {
-    if (i < low || i > high) {
-      d_[i] = H(i, i);
-      e_[i] = 0.0;
-    }
-    for (int j = std::max(i-1, 0); j < nn; j++) {
-      norm = norm + std::abs(H(i, j));
-    }
-  }
-
-  // Outer loop over eigenvalue index
-
-  int iter = 0;
-  while (n >= low) {
-
-    // Look for single small sub-diagonal element
-
-    int l = n;
-    while (l > low) {
-      s = std::abs(H(l-1, l-1)) + std::abs(H(l, l));
-      if (s == 0.0) {
-        s = norm;
-      }
-      if (std::abs(H(l, l-1)) < eps * s) {
-        break;
-      }
-      l--;
-    }
-
-    // Check for convergence
-    // One root found
-
-    if (l == n) {
-      H(n, n) = H(n, n) + exshift;
-      d_[n] = H(n, n);
-      e_[n] = 0.0;
-      n--;
-      iter = 0;
-
-      // Two roots found
-
-    } else if (l == n-1) {
-      w = H(n, n-1) * H(n-1, n);
-      p = (H(n-1, n-1) - H(n, n)) / 2.0;
-      q = p * p + w;
-      z = std::sqrt(std::abs(q));
-      H(n, n) = H(n, n) + exshift;
-      H(n-1, n-1) = H(n-1, n-1) + exshift;
-      x = H(n, n);
-
-      // Real pair
-
-      if (q >= 0) {
-        if (p >= 0) {
-          z = p + z;
-        } else {
-          z = p - z;
-        }
-        d_[n-1] = x + z;
-        d_[n] = d_[n-1];
-        if (z != 0.0) {
-          d_[n] = x - w / z;
-        }
-        e_[n-1] = 0.0;
-        e_[n] = 0.0;
-        x = H(n, n-1);
-        s = std::abs(x) + std::abs(z);
-        p = x / s;
-        q = z / s;
-        r = std::sqrt(p * p+q * q);
-        p = p / r;
-        q = q / r;
-
-        // Row modification
-
-        for (int j = n-1; j < nn; j++) {
-          z = H(n-1, j);
-          H(n-1, j) = q * z + p * H(n, j);
-          H(n, j) = q * H(n, j) - p * z;
-        }
-
-        // Column modification
-
-        for (int i = 0; i <= n; i++) {
-          z = H(i, n-1);
-          H(i, n-1) = q * z + p * H(i, n);
-          H(i, n) = q * H(i, n) - p * z;
-        }
-
-        // Accumulate transformations
-
-        for (int i = low; i <= high; i++) {
-          z = V(i, n-1);
-          V(i, n-1) = q * z + p * V(i, n);
-          V(i, n) = q * V(i, n) - p * z;
-        }
-
-        // Complex pair
-
-      } else {
-        d_[n-1] = x + p;
-        d_[n] = x + p;
-        e_[n-1] = z;
-        e_[n] = -z;
-      }
-      n = n - 2;
-      iter = 0;
-
-      // No convergence yet
-
-    } else {
-
-      // Form shift
-
-      x = H(n, n);
-      y = 0.0;
-      w = 0.0;
-      if (l < n) {
-        y = H(n-1, n-1);
-        w = H(n, n-1) * H(n-1, n);
-      }
-
-      // Wilkinson's original ad hoc shift
-
-      if (iter == 10) {
-        exshift += x;
-        for (int i = low; i <= n; i++) {
-          H(i, i) -= x;
-        }
-        s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2));
-        x = y = 0.75 * s;
-        w = -0.4375 * s * s;
-      }
-
-      // MATLAB's new ad hoc shift
-
-      if (iter == 30) {
-        s = (y - x) / 2.0;
-        s = s * s + w;
-        if (s > 0) {
-          s = std::sqrt(s);
-          if (y < x) {
-            s = -s;
-          }
-          s = x - w / ((y - x) / 2.0 + s);
-          for (int i = low; i <= n; i++) {
-            H(i, i) -= s;
-          }
-          exshift += s;
-          x = y = w = 0.964;
-        }
-      }
-
-      iter = iter + 1;   // (Could check iteration count here.)
-
-      // Look for two consecutive small sub-diagonal elements
-
-      int m = n-2;
-      while (m >= l) {
-        z = H(m, m);
-        r = x - z;
-        s = y - z;
-        p = (r * s - w) / H(m+1, m) + H(m, m+1);
-        q = H(m+1, m+1) - z - r - s;
-        r = H(m+2, m+1);
-        s = std::abs(p) + std::abs(q) + std::abs(r);
-        p = p / s;
-        q = q / s;
-        r = r / s;
-        if (m == l) {
-          break;
-        }
-        if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) <
-            eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) +
-                                  std::abs(H(m+1, m+1))))) {
-          break;
-        }
-        m--;
-      }
-
-      for (int i = m+2; i <= n; i++) {
-        H(i, i-2) = 0.0;
-        if (i > m+2) {
-          H(i, i-3) = 0.0;
-        }
-      }
-
-      // Double QR step involving rows l:n and columns m:n
-
-      for (int k = m; k <= n-1; k++) {
-        bool notlast = (k != n-1);
-        if (k != m) {
-          p = H(k, k-1);
-          q = H(k+1, k-1);
-          r = (notlast ? H(k+2, k-1) : 0.0);
-          x = std::abs(p) + std::abs(q) + std::abs(r);
-          if (x != 0.0) {
-            p = p / x;
-            q = q / x;
-            r = r / x;
-          }
-        }
-        if (x == 0.0) {
-          break;
-        }
-        s = std::sqrt(p * p + q * q + r * r);
-        if (p < 0) {
-          s = -s;
-        }
-        if (s != 0) {
-          if (k != m) {
-            H(k, k-1) = -s * x;
-          } else if (l != m) {
-            H(k, k-1) = -H(k, k-1);
-          }
-          p = p + s;
-          x = p / s;
-          y = q / s;
-          z = r / s;
-          q = q / p;
-          r = r / p;
-
-          // Row modification
-
-          for (int j = k; j < nn; j++) {
-            p = H(k, j) + q * H(k+1, j);
-            if (notlast) {
-              p = p + r * H(k+2, j);
-              H(k+2, j) = H(k+2, j) - p * z;
-            }
-            H(k, j) = H(k, j) - p * x;
-            H(k+1, j) = H(k+1, j) - p * y;
-          }
-
-          // Column modification
-
-          for (int i = 0; i <= std::min(n, k+3); i++) {
-            p = x * H(i, k) + y * H(i, k+1);
-            if (notlast) {
-              p = p + z * H(i, k+2);
-              H(i, k+2) = H(i, k+2) - p * r;
-            }
-            H(i, k) = H(i, k) - p;
-            H(i, k+1) = H(i, k+1) - p * q;
-          }
-
-          // Accumulate transformations
-
-          for (int i = low; i <= high; i++) {
-            p = x * V(i, k) + y * V(i, k+1);
-            if (notlast) {
-              p = p + z * V(i, k+2);
-              V(i, k+2) = V(i, k+2) - p * r;
-            }
-            V(i, k) = V(i, k) - p;
-            V(i, k+1) = V(i, k+1) - p * q;
-          }
-        }  // (s != 0)
-      }  // k loop
-    }  // check convergence
-  }  // while (n >= low)
-
-  // Backsubstitute to find vectors of upper triangular form
-
-  if (norm == 0.0) {
-    return;
-  }
-
-  for (n = nn-1; n >= 0; n--) {
-    p = d_[n];
-    q = e_[n];
-
-    // Real vector
-
-    if (q == 0) {
-      int l = n;
-      H(n, n) = 1.0;
-      for (int i = n-1; i >= 0; i--) {
-        w = H(i, i) - p;
-        r = 0.0;
-        for (int j = l; j <= n; j++) {
-          r = r + H(i, j) * H(j, n);
-        }
-        if (e_[i] < 0.0) {
-          z = w;
-          s = r;
-        } else {
-          l = i;
-          if (e_[i] == 0.0) {
-            if (w != 0.0) {
-              H(i, n) = -r / w;
-            } else {
-              H(i, n) = -r / (eps * norm);
-            }
-
-            // Solve real equations
-
-          } else {
-            x = H(i, i+1);
-            y = H(i+1, i);
-            q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i];
-            t = (x * s - z * r) / q;
-            H(i, n) = t;
-            if (std::abs(x) > std::abs(z)) {
-              H(i+1, n) = (-r - w * t) / x;
-            } else {
-              H(i+1, n) = (-s - y * t) / z;
-            }
-          }
-
-          // Overflow control
-
-          t = std::abs(H(i, n));
-          if ((eps * t) * t > 1) {
-            for (int j = i; j <= n; j++) {
-              H(j, n) = H(j, n) / t;
-            }
-          }
-        }
-      }
-
-      // Complex vector
-
-    } else if (q < 0) {
-      int l = n-1;
-
-      // Last vector component imaginary so matrix is triangular
-
-      if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) {
-        H(n-1, n-1) = q / H(n, n-1);
-        H(n-1, n) = -(H(n, n) - p) / H(n, n-1);
-      } else {
-        Real cdivr, cdivi;
-        cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi);
-        H(n-1, n-1) = cdivr;
-        H(n-1, n) = cdivi;
-      }
-      H(n, n-1) = 0.0;
-      H(n, n) = 1.0;
-      for (int i = n-2; i >= 0; i--) {
-        Real ra, sa, vr, vi;
-        ra = 0.0;
-        sa = 0.0;
-        for (int j = l; j <= n; j++) {
-          ra = ra + H(i, j) * H(j, n-1);
-          sa = sa + H(i, j) * H(j, n);
-        }
-        w = H(i, i) - p;
-
-        if (e_[i] < 0.0) {
-          z = w;
-          r = ra;
-          s = sa;
-        } else {
-          l = i;
-          if (e_[i] == 0) {
-            Real cdivr, cdivi;
-            cdiv(-ra, -sa, w, q, &cdivr, &cdivi);
-            H(i, n-1) = cdivr;
-            H(i, n) = cdivi;
-          } else {
-            Real cdivr, cdivi;
-            // Solve complex equations
-
-            x = H(i, i+1);
-            y = H(i+1, i);
-            vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q;
-            vi = (d_[i] - p) * 2.0 * q;
-            if (vr == 0.0 && vi == 0.0) {
-              vr = eps * norm * (std::abs(w) + std::abs(q) +
-                                 std::abs(x) + std::abs(y) + std::abs(z));
-            }
-            cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi);
-            H(i, n-1) = cdivr;
-            H(i, n) = cdivi;
-            if (std::abs(x) > (std::abs(z) + std::abs(q))) {
-              H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x;
-              H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x;
-            } else {
-              cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi);
-              H(i+1, n-1) = cdivr;
-              H(i+1, n) = cdivi;
-            }
-          }
-
-          // Overflow control
-
-          t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n)));
-          if ((eps * t) * t > 1) {
-            for (int j = i; j <= n; j++) {
-              H(j, n-1) = H(j, n-1) / t;
-              H(j, n) = H(j, n) / t;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // Vectors of isolated roots
-
-  for (int i = 0; i < nn; i++) {
-    if (i < low || i > high) {
-      for (int j = i; j < nn; j++) {
-        V(i, j) = H(i, j);
-      }
-    }
-  }
-
-  // Back transformation to get eigenvectors of original matrix
-
-  for (int j = nn-1; j >= low; j--) {
-    for (int i = low; i <= high; i++) {
-      z = 0.0;
-      for (int k = low; k <= std::min(j, high); k++) {
-        z = z + V(i, k) * H(k, j);
-      }
-      V(i, j) = z;
-    }
-  }
-}
-
-template<typename Real>
-EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) {
-  KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1);
-  n_ = A.NumRows();
-  V_ = new Real[n_*n_];
-  d_ = new Real[n_];
-  e_ = new Real[n_];
-  H_ = NULL;
-  ort_ = NULL;
-  if (A.IsSymmetric(0.0)) {
-
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        V(i, j) = A(i, j);  // Note that V(i, j) is a member function; A(i, j) is an operator
-    // of the matrix A.
-    // Tridiagonalize.
-    Tred2();
-
-    // Diagonalize.
-    Tql2();
-  } else {
-    H_ = new Real[n_*n_];
-    ort_ = new Real[n_];
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        H(i, j) = A(i, j);  // as before: H is member function, A(i, j) is operator of matrix.
-
-    // Reduce to Hessenberg form.
-    Orthes();
-
-    // Reduce Hessenberg to real Schur form.
-    Hqr2();
-  }
-}
-
-template<typename Real>
-EigenvalueDecomposition<Real>::~EigenvalueDecomposition() {
-  delete [] d_;
-  delete [] e_;
-  delete [] V_;
-  if (H_) delete [] H_;
-  if (ort_) delete [] ort_;
-}
-
-// see function MatrixBase<Real>::Eig in kaldi-matrix.cc
-
-
-} // namespace kaldi
-
-#endif // KALDI_MATRIX_JAMA_EIG_H_
--- a/Source/Readers/KaldiReader/matrix/jama-svd.h
+++ b/Source/Readers/KaldiReader/matrix/jama-svd.h
@ -1,531 +0,0 @@
-// matrix/jama-svd.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This file consists of a port and modification of materials from
-//   JAMA: A Java Matrix Package
-// under the following notice: This software is a cooperative product of
-// The MathWorks and the National Institute of Standards and Technology (NIST)
-// which has been released to the public.  This notice and the original code are
-// available at http://math.nist.gov/javanumerics/jama/domain.notice
-
-
-#ifndef KALDI_MATRIX_JAMA_SVD_H_
-#define KALDI_MATRIX_JAMA_SVD_H_ 1
-
-
-#include "matrix/kaldi-matrix.h"
-#include "matrix/sp-matrix.h"
-#include "matrix/cblas-wrappers.h"
-
-namespace kaldi {
-
-#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
-// using ATLAS as our math library, which doesn't have SVD -> need
-// to implement it.
-
-// This routine is a modified form of jama_svd.h which is part of the TNT distribution.
-// (originally comes from JAMA).
-
-/** Singular Value Decomposition.
- * <P>
- * For an m-by-n matrix A with m >= n, the singular value decomposition is
- * an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and
- * an n-by-n orthogonal matrix V so that A = U*S*V'.
- * <P>
- * The singular values, sigma[k] = S(k, k), are ordered so that
- * sigma[0] >= sigma[1] >= ... >= sigma[n-1].
- * <P>
- * The singular value decompostion always exists, so the constructor will
- * never fail.  The matrix condition number and the effective numerical
- * rank can be computed from this decomposition.
-
- * <p>
- *     (Adapted from JAMA, a Java Matrix Library, developed by jointly
- *     by the Mathworks and NIST; see  http://math.nist.gov/javanumerics/jama).
- */
-
-
-template<typename Real>
-bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
-                               MatrixBase<Real> *U_in,
-                               MatrixBase<Real> *V_in) {  //  Destructive!
-  KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
-  int wantu = (U_in != NULL), wantv = (V_in != NULL);
-  Matrix<Real> Utmp, Vtmp;
-  MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
-  VectorBase<Real> &s = *s_in;
-
-  int m = num_rows_, n = num_cols_;
-  KALDI_ASSERT(m>=n && m != 0 && n != 0);
-  if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
-  if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
-  KALDI_ASSERT((int)s.Dim() == n);  // n<=m so n is min.
-
-  int nu = n;
-  U.SetZero();  // make sure all zero.
-  Vector<Real> e(n);
-  Vector<Real> work(m);
-  MatrixBase<Real> &A(*this);
-  Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
-      *udata = U.Data(), *vdata = V.Data();
-  int astride = static_cast<int>(A.Stride()),
-      ustride = static_cast<int>(U.Stride()),
-      vstride = static_cast<int>(V.Stride());
-  int i = 0, j = 0, k = 0;
-
-  // Reduce A to bidiagonal form, storing the diagonal elements
-  // in s and the super-diagonal elements in e.
-
-  int nct = std::min(m-1, n);
-  int nrt = std::max(0, std::min(n-2, m));
-  for (k = 0; k < std::max(nct, nrt); k++) {
-    if (k < nct) {
-
-      // Compute the transformation for the k-th column and
-      // place the k-th diagonal in s(k).
-      // Compute 2-norm of k-th column without under/overflow.
-      s(k) = 0;
-      for (i = k; i < m; i++) {
-        s(k) = hypot(s(k), A(i, k));
-      }
-      if (s(k) != 0.0) {
-        if (A(k, k) < 0.0) {
-          s(k) = -s(k);
-        }
-        for (i = k; i < m; i++) {
-          A(i, k) /= s(k);
-        }
-        A(k, k) += 1.0;
-      }
-      s(k) = -s(k);
-    }
-    for (j = k+1; j < n; j++) {
-      if ((k < nct) && (s(k) != 0.0))  {
-
-        // Apply the transformation.
-
-        Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
-                            adata + astride*k + j, astride);
-        /*for (i = k; i < m; i++) {
-          t += adata[i*astride + k]*adata[i*astride + j];  //   A(i, k)*A(i, j); // 3
-          }*/
-        t = -t/A(k, k);
-        cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
-                    adata + k*astride + j, astride);
-        /*for (i = k; i < m; i++) {
-          adata[i*astride + j] += t*adata[i*astride + k];  // A(i, j) += t*A(i, k); // 5
-          }*/
-      }
-
-      // Place the k-th row of A into e for the
-      // subsequent calculation of the row transformation.
-
-      e(j) = A(k, j);
-    }
-    if (wantu & (k < nct)) {
-
-      // Place the transformation in U for subsequent back
-      // multiplication.
-
-      for (i = k; i < m; i++) {
-        U(i, k) = A(i, k);
-      }
-    }
-    if (k < nrt) {
-
-      // Compute the k-th row transformation and place the
-      // k-th super-diagonal in e(k).
-      // Compute 2-norm without under/overflow.
-      e(k) = 0;
-      for (i = k+1; i < n; i++) {
-        e(k) = hypot(e(k), e(i));
-      }
-      if (e(k) != 0.0) {
-        if (e(k+1) < 0.0) {
-          e(k) = -e(k);
-        }
-        for (i = k+1; i < n; i++) {
-          e(i) /= e(k);
-        }
-        e(k+1) += 1.0;
-      }
-      e(k) = -e(k);
-      if ((k+1 < m) & (e(k) != 0.0)) {
-
-        // Apply the transformation.
-
-        for (i = k+1; i < m; i++) {
-          work(i) = 0.0;
-        }
-        for (j = k+1; j < n; j++) {
-          for (i = k+1; i < m; i++) {
-            workdata[i] += edata[j] * adata[i*astride + j];  // work(i) += e(j)*A(i, j); // 5
-          }
-        }
-        for (j = k+1; j < n; j++) {
-          Real t(-e(j)/e(k+1));
-          cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
-                      adata + (k+1)*astride + j, astride);
-          /*
-          for (i = k+1; i < m; i++) {
-            adata[i*astride + j] += t*workdata[i];  // A(i, j) += t*work(i); // 5
-            }*/
-        }
-      }
-      if (wantv) {
-
-        // Place the transformation in V for subsequent
-        // back multiplication.
-
-        for (i = k+1; i < n; i++) {
-          V(i, k) = e(i);
-        }
-      }
-    }
-  }
-
-  // Set up the final bidiagonal matrix or order p.
-
-  int p = std::min(n, m+1);
-  if (nct < n) {
-    s(nct) = A(nct, nct);
-  }
-  if (m < p) {
-    s(p-1) = 0.0;
-  }
-  if (nrt+1 < p) {
-    e(nrt) = A(nrt, p-1);
-  }
-  e(p-1) = 0.0;
-
-  // If required, generate U.
-
-  if (wantu) {
-    for (j = nct; j < nu; j++) {
-      for (i = 0; i < m; i++) {
-        U(i, j) = 0.0;
-      }
-      U(j, j) = 1.0;
-    }
-    for (k = nct-1; k >= 0; k--) {
-      if (s(k) != 0.0) {
-        for (j = k+1; j < nu; j++) {
-          Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
-          //for (i = k; i < m; i++) {
-          //  t += udata[i*ustride + k]*udata[i*ustride + j];  // t += U(i, k)*U(i, j); // 8
-          // }
-          t = -t/U(k, k);
-          cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
-                      udata + k*ustride + j, ustride);
-          /*for (i = k; i < m; i++) {
-            udata[i*ustride + j] += t*udata[i*ustride + k];  // U(i, j) += t*U(i, k); // 4
-            }*/
-        }
-        for (i = k; i < m; i++ ) {
-          U(i, k) = -U(i, k);
-        }
-        U(k, k) = 1.0 + U(k, k);
-        for (i = 0; i < k-1; i++) {
-          U(i, k) = 0.0;
-        }
-      } else {
-        for (i = 0; i < m; i++) {
-          U(i, k) = 0.0;
-        }
-        U(k, k) = 1.0;
-      }
-    }
-  }
-
-  // If required, generate V.
-
-  if (wantv) {
-    for (k = n-1; k >= 0; k--) {
-      if ((k < nrt) & (e(k) != 0.0)) {
-        for (j = k+1; j < nu; j++) {
-          Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
-                              vdata + (k+1)*vstride + j, vstride); 
-          /*Real t (0.0);
-          for (i = k+1; i < n; i++) {
-            t += vdata[i*vstride + k]*vdata[i*vstride + j];  // t += V(i, k)*V(i, j); // 7
-            }*/
-          t = -t/V(k+1, k);
-          cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
-                      vdata + (k+1)*vstride + j, vstride);
-          /*for (i = k+1; i < n; i++) {
-            vdata[i*vstride + j] += t*vdata[i*vstride + k];  // V(i, j) += t*V(i, k); // 7
-            }*/
-        }
-      }
-      for (i = 0; i < n; i++) {
-        V(i, k) = 0.0;
-      }
-      V(k, k) = 1.0;
-    }
-  }
-
-  // Main iteration loop for the singular values.
-
-  int pp = p-1;
-  int iter = 0;
-  // note: -52.0 is from Jama code; the -23 is the extension
-  // to float, because mantissa length in (double, float)
-  // is (52, 23) bits respectively.
-  Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
-  // Note: the -966 was taken from Jama code, but the -120 is a guess
-  // of how to extend this to float... the exponent in double goes
-  // from -1022 .. 1023, and in float from -126..127.  I'm not sure
-  // what the significance of 966 is, so -120 just represents a number
-  // that's a bit less negative than -126.  If we get convergence
-  // failure in float only, this may mean that we have to make the
-  // -120 value less negative.
-  Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
-  
-  while (p > 0) {
-    int k = 0;
-    int kase = 0;
-
-    if (iter == 500 || iter == 750) {
-      KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
-      eps = pow(static_cast<Real>(0.8), eps);
-      tiny = pow(static_cast<Real>(0.8), tiny);
-    }
-    if (iter > 1000) {
-      KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
-      return false;
-    }
-
-    // This section of the program inspects for
-    // negligible elements in the s and e arrays.  On
-    // completion the variables kase and k are set as follows.
-
-    // kase = 1     if s(p) and e(k-1) are negligible and k < p
-    // kase = 2     if s(k) is negligible and k < p
-    // kase = 3     if e(k-1) is negligible, k < p, and
-    //              s(k), ..., s(p) are not negligible (qr step).
-    // kase = 4     if e(p-1) is negligible (convergence).
-
-    for (k = p-2; k >= -1; k--) {
-      if (k == -1) {
-        break;
-      }
-      if (std::abs(e(k)) <=
-          tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
-        e(k) = 0.0;
-        break;
-      }
-    }
-    if (k == p-2) {
-      kase = 4;
-    } else {
-      int ks;
-      for (ks = p-1; ks >= k; ks--) {
-        if (ks == k) {
-          break;
-        }
-        Real t( (ks != p ? std::abs(e(ks)) : 0.) +
-                (ks != k+1 ? std::abs(e(ks-1)) : 0.));
-        if (std::abs(s(ks)) <= tiny + eps*t)  {
-          s(ks) = 0.0;
-          break;
-        }
-      }
-      if (ks == k) {
-        kase = 3;
-      } else if (ks == p-1) {
-        kase = 1;
-      } else {
-        kase = 2;
-        k = ks;
-      }
-    }
-    k++;
-
-    // Perform the task indicated by kase.
-
-    switch (kase) {
-
-      // Deflate negligible s(p).
-
-      case 1: {
-        Real f(e(p-2));
-        e(p-2) = 0.0;
-        for (j = p-2; j >= k; j--) {
-          Real t( hypot(s(j), f));
-          Real cs(s(j)/t);
-          Real sn(f/t);
-          s(j) = t;
-          if (j != k) {
-            f = -sn*e(j-1);
-            e(j-1) = cs*e(j-1);
-          }
-          if (wantv) {
-            for (i = 0; i < n; i++) {
-              t = cs*V(i, j) + sn*V(i, p-1);
-              V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
-              V(i, j) = t;
-            }
-          }
-        }
-      }
-        break;
-
-        // Split at negligible s(k).
-
-      case 2: {
-        Real f(e(k-1));
-        e(k-1) = 0.0;
-        for (j = k; j < p; j++) {
-          Real t(hypot(s(j), f));
-          Real cs( s(j)/t);
-          Real sn(f/t);
-          s(j) = t;
-          f = -sn*e(j);
-          e(j) = cs*e(j);
-          if (wantu) {
-            for (i = 0; i < m; i++) {
-              t = cs*U(i, j) + sn*U(i, k-1);
-              U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
-              U(i, j) = t;
-            }
-          }
-        }
-      }
-        break;
-
-        // Perform one qr step.
-
-      case 3: {
-
-        // Calculate the shift.
-
-        Real scale = std::max(std::max(std::max(std::max(
-            std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
-                                       std::abs(s(k))), std::abs(e(k)));
-        Real sp = s(p-1)/scale;
-        Real spm1 = s(p-2)/scale;
-        Real epm1 = e(p-2)/scale;
-        Real sk = s(k)/scale;
-        Real ek = e(k)/scale;
-        Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
-        Real c = (sp*epm1)*(sp*epm1);
-        Real shift = 0.0;
-        if ((b != 0.0) || (c != 0.0)) {
-          shift = std::sqrt(b*b + c);
-          if (b < 0.0) {
-            shift = -shift;
-          }
-          shift = c/(b + shift);
-        }
-        Real f = (sk + sp)*(sk - sp) + shift;
-        Real g = sk*ek;
-
-        // Chase zeros.
-
-        for (j = k; j < p-1; j++) {
-          Real t = hypot(f, g);
-          Real cs = f/t;
-          Real sn = g/t;
-          if (j != k) {
-            e(j-1) = t;
-          }
-          f = cs*s(j) + sn*e(j);
-          e(j) = cs*e(j) - sn*s(j);
-          g = sn*s(j+1);
-          s(j+1) = cs*s(j+1);
-          if (wantv) {
-            cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
-            /*for (i = 0; i < n; i++) {
-              t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1];  // t = cs*V(i, j) + sn*V(i, j+1);         // 13
-              vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1];  // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
-              vdata[i*vstride + j] = t;  // V(i, j) = t; // 4
-              }*/
-          }
-          t = hypot(f, g);
-          cs = f/t;
-          sn = g/t;
-          s(j) = t;
-          f = cs*e(j) + sn*s(j+1);
-          s(j+1) = -sn*e(j) + cs*s(j+1);
-          g = sn*e(j+1);
-          e(j+1) = cs*e(j+1);
-          if (wantu && (j < m-1)) {
-            cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
-            /*for (i = 0; i < m; i++) {
-              t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1];  // t = cs*U(i, j) + sn*U(i, j+1); // 7
-              udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1];  // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
-              udata[i*ustride + j] = t;  // U(i, j) = t; // 1
-              }*/
-          }
-        }
-        e(p-2) = f;
-        iter = iter + 1;
-      }
-        break;
-
-        // Convergence.
-
-      case 4: {
-
-        // Make the singular values positive.
-
-        if (s(k) <= 0.0) {
-          s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
-          if (wantv) {
-            for (i = 0; i <= pp; i++) {
-              V(i, k) = -V(i, k);
-            }
-          }
-        }
-
-        // Order the singular values.
-
-        while (k < pp) {
-          if (s(k) >= s(k+1)) {
-            break;
-          }
-          Real t = s(k);
-          s(k) = s(k+1);
-          s(k+1) = t;
-          if (wantv && (k < n-1)) {
-            for (i = 0; i < n; i++) {
-              t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
-            }
-          }
-          if (wantu && (k < m-1)) {
-            for (i = 0; i < m; i++) {
-              t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
-            }
-          }
-          k++;
-        }
-        iter = 0;
-        p--;
-      }
-        break;
-    }
-  }
-  return true;
-}
-
-#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
-
-} // namespace kaldi
-
-#endif // KALDI_MATRIX_JAMA_SVD_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-blas.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-blas.h
@ -1,129 +0,0 @@
-// matrix/kaldi-blas.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_MATRIX_KALDI_BLAS_H_
-#define KALDI_MATRIX_KALDI_BLAS_H_
-
-// This file handles the #includes for BLAS, LAPACK and so on.
-// It manipulates the declarations into a common format that kaldi can handle.
-// However, the kaldi code will check whether HAVE_ATLAS is defined as that
-// code is called a bit differently from CLAPACK that comes from other sources.
-
-// There are three alternatives:
-//   (i) you have ATLAS, which includes the ATLAS implementation of CBLAS
-//   plus a subset of CLAPACK (but with clapack_ in the function declarations).
-//   In this case, define HAVE_ATLAS and make sure the relevant directories are
-//   in the include path.
-
-//   (ii) you have CBLAS (some implementation thereof) plus CLAPACK.
-//   In this case, define HAVE_CLAPACK.
-//   [Since CLAPACK depends on BLAS, the presence of BLAS is implicit].
-
-//  (iii) you have the MKL library, which includes CLAPACK and CBLAS.
-
-// Note that if we are using ATLAS, no Svd implementation is supplied,
-// so we define HAVE_Svd to be zero and this directs our implementation to
-// supply its own "by hand" implementation which is based on TNT code.
-
-
-
-
-#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \
-    || (defined(HAVE_ATLAS) && defined(HAVE_MKL))
-#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL"
-#endif
-
-#ifdef HAVE_ATLAS
-  extern "C" {
-    #include <cblas.h>
-    #include <clapack.h>
-  }
-#elif defined(HAVE_CLAPACK)
-  #ifdef __APPLE__
-    #include <Accelerate/Accelerate.h>
-    typedef __CLPK_integer          integer;
-    typedef __CLPK_logical          logical;
-    typedef __CLPK_real             real;
-    typedef __CLPK_doublereal       doublereal;
-    typedef __CLPK_complex          complex;
-    typedef __CLPK_doublecomplex    doublecomplex;
-    typedef __CLPK_ftnlen           ftnlen;
-  #else
-    extern "C" {
-      // May be in /usr/[local]/include if installed; else this uses the one
-      // from the tools/CLAPACK_include directory.
-      #include <cblas.h>
-      #include <f2c.h>
-      #include <clapack.h>  
-
-      // get rid of macros from f2c.h -- these are dangerous.
-      #undef abs
-      #undef dabs
-      #undef min
-      #undef max
-      #undef dmin
-      #undef dmax
-      #undef bit_test
-      #undef bit_clear
-      #undef bit_set
-    }
-  #endif
-#elif defined(HAVE_MKL)
-  extern "C" {
-    #include <mkl.h>
-  }
-#elif defined(HAVE_OPENBLAS)
-extern "C" {
-  // getting cblas.h and lapacke.h from <openblas-install-dir>/.
-  // putting in "" not <> to search -I before system libraries.
-  #include "cblas.h"
-  #include "lapacke.h"
-  #undef I
-  #undef complex
-  // get rid of macros from f2c.h -- these are dangerous.
-  #undef abs
-  #undef dabs
-  #undef min
-  #undef max
-  #undef dmin
-  #undef dmax
-  #undef bit_test
-  #undef bit_clear
-  #undef bit_set
-}
-#else
-  #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"  
-#endif
-
-#ifdef HAVE_OPENBLAS
-typedef int KaldiBlasInt; // try int.
-#endif
-#ifdef HAVE_CLAPACK
-typedef integer KaldiBlasInt;
-#endif
-#ifdef HAVE_MKL
-typedef MKL_INT KaldiBlasInt;
-#endif
-
-#ifdef HAVE_ATLAS
-// in this case there is no need for KaldiBlasInt-- this typedef is only needed
-// for Svd code which is not included in ATLAS (we re-implement it).
-#endif
-
-
-#endif  // KALDI_MATRIX_KALDI_BLAS_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-gpsr-test.cc
+++ b/Source/Readers/KaldiReader/matrix/kaldi-gpsr-test.cc
@ -1,103 +0,0 @@
-// matrix/kaldi-gpsr-test.cc
-
-// Copyright 2012   Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "gmm/model-test-common.h"
-#include "matrix/kaldi-gpsr.h"
-#include "util/kaldi-io.h"
-
-using kaldi::int32;
-using kaldi::BaseFloat;
-namespace ut = kaldi::unittest;
-
-namespace kaldi {
-
-template<typename Real> static void InitRand(VectorBase<Real> *v) {
-  for (MatrixIndexT i = 0;i < v->Dim();i++)
-    (*v)(i) = RandGauss();
-}
-
-template<typename Real> static void InitRand(MatrixBase<Real> *M) {
- start:
-  for (MatrixIndexT i = 0;i < M->NumRows();i++)
-    for (MatrixIndexT j = 0;j < M->NumCols();j++)
-      (*M)(i, j) = RandGauss();
-    if (M->NumRows() != 0 && M->Cond() > 100) {
-      KALDI_WARN << "Condition number of random matrix large" << M->Cond()
-                 << ": trying again (this is normal)";
-      goto start;
-    }
-}
-
-template<typename Real> static void InitRand(SpMatrix<Real> *M) {
- start_sp:
-  for (MatrixIndexT i = 0;i < M->NumRows();i++)
-    for (MatrixIndexT j = 0;j<=i;j++)
-      (*M)(i, j) = RandGauss();
-  if (M->NumRows() != 0 && M->Cond() > 100) {
-    KALDI_WARN << "Condition number of random matrix large" << M->Cond()
-               << ": trying again (this is normal)";
-    goto start_sp;
-  }
-}
-
-template<typename Real> static void UnitTestGpsr() {
-  for (int32 i = 0; i < 5; i++) {
-    MatrixIndexT dim1 = (rand() % 10) + 10;
-    MatrixIndexT dim2 = (rand() % 10) + 10;
-
-    Matrix<Real> M(dim1, dim2);
-    InitRand(&M);
-    SpMatrix<Real> H(dim2);
-    H.AddMat2(1.0, M, kTrans, 0.0);  // H = M^T M
-//    InitRand(&H);
-//    KALDI_LOG << "dim 1 " << dim1 << "; dim 2 " << dim2 << " LD " << H.LogDet()
-//              << " Cond " << H.Cond() << "\nH " << H;
-//    KALDI_ASSERT(H.IsPosDef());
-
-    Vector<Real> x(dim2);
-    InitRand(&x);
-    Vector<Real> g(dim2);
-    InitRand(&g);
-    GpsrConfig opts;
-    opts.debias = (rand()%2 == 0);
-    Real objf_old = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
-        opts.gpsr_tau * x.Norm(1.0);
-    GpsrBasic(opts, H, g, &x);
-    Real objf_new = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
-        opts.gpsr_tau * x.Norm(1.0);
-    KALDI_ASSERT(objf_old >= objf_new);  // since we are minimizing
-    KALDI_LOG << "GPSR-basic: objf old = " << objf_old << "; new = " << objf_new;
-    Vector<Real> x2(x);
-    GpsrBB(opts, H, g, &x);
-    Real objf_new_bb = 0.5* VecSpVec(x, H, x) - VecVec(x, g) +
-        opts.gpsr_tau * x.Norm(1.0);
-    KALDI_ASSERT(objf_old >= objf_new_bb);  // since we are minimizing
-    KALDI_LOG << "GPSR-BB: objf old = " << objf_old << "; new = " << objf_new_bb;
-  }
-}
-
-}
-
-int main() {
-  kaldi::g_kaldi_verbose_level = 1;
-  kaldi::UnitTestGpsr<float>();
-  kaldi::UnitTestGpsr<double>();
-  std::cout << "Test OK.\n";
-  return 0;
-}
--- a/Source/Readers/KaldiReader/matrix/kaldi-gpsr.cc
+++ b/Source/Readers/KaldiReader/matrix/kaldi-gpsr.cc
@ -1,496 +0,0 @@
-// matrix/kaldi-gpsr.cc
-
-// Copyright 2010-2012   Liang Lu,  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and
-// Wright, "Gradient Projection for Sparse Reconstruction: Application to
-// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected
-// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007.
-// http://dx.doi.org/10.1109/JSTSP.2007.910281
-
-#include <algorithm>
-#include <string>
-#include <vector>
-using std::vector;
-
-#include "matrix/kaldi-gpsr.h"
-
-namespace kaldi {
-
-/// This calculates the objective function: \f$ c^T z + 0.5 * z^T B z, \f$
-/// where z is formed by stacking u and v, and B = [H -H; -H H].
-double GpsrObjective(const SpMatrix<double> &H, const Vector<double> &c,
-                     const Vector<double> &u, const Vector<double> &v) {
-  KALDI_ASSERT(u.Dim() == v.Dim() && u.Dim() > 0);
-  KALDI_ASSERT(c.Dim() == 2 * u.Dim());
-  KALDI_VLOG(2) << "u dim = " << u.Dim() << ", v dim = " << v.Dim()
-                << ", c dim = " << c.Dim();
-
-  MatrixIndexT dim = u.Dim();
-  Vector<double> H_x(dim), x(dim);
-  // x = u - v, where u_i = (x_i)_+; v_i = (-x_i)_+; and (x)_+ = max{0,x}
-  x.CopyFromVec(u);
-  x.AddVec(-1.0, v);
-
-  // Calculate c^T z = c^T [u^T v^T]^T
-  double objf = VecVec(c.Range(0, dim), u);
-  objf += VecVec(c.Range(dim, dim), v);
-
-  // Now, calculate the quadratic term: z^T B z = (u-v)^T H (u-v) = x^T H x
-  H_x.AddSpVec(1.0, H, x, 0.0);
-  objf += 0.5 * VecVec(x, H_x);
-  return objf;
-}
-
-/// This calculates the gradient: \f$ c + B z, \f$
-/// where z is formed by stacking u and v, and B = [H -H; -H H].
-void GpsrGradient(const SpMatrix<double> &H, const Vector<double> &c,
-                     const Vector<double> &u, const Vector<double> &v,
-                     Vector<double> *grad_u, Vector<double> *grad_v) {
-  KALDI_ASSERT(u.Dim() == v.Dim() && u.Dim() > 0);
-  KALDI_ASSERT(u.Dim() == grad_u->Dim() && v.Dim() == grad_v->Dim());
-  KALDI_ASSERT(c.Dim() == 2 * u.Dim());
-  KALDI_VLOG(2) << "u dim = " << u.Dim() << ", v dim = " << v.Dim()
-                << ", c dim = " << c.Dim();
-
-  MatrixIndexT dim = u.Dim();
-  Vector<double> H_x(dim), x(dim);
-  // x = u - v, where u_i = (x_i)_+; v_i = (-x_i)_+; and (x)_+ = max{0,x}
-  x.CopyFromVec(u);
-  x.AddVec(-1.0, v);
-  // To calculate B z = [ H (u-v); -H (u-v) ] = [ H x; -H x ], we only need H x
-  H_x.AddSpVec(1.0, H, x, 0.0);
-  grad_u->CopyFromVec(c.Range(0, dim));
-  grad_u->AddVec(1.0, H_x);
-  grad_v->CopyFromVec(c.Range(dim, dim));
-  grad_v->AddVec(-1.0, H_x);
-}
-
-/// Returns the initial guess of step size in the feasible direction.
-/// This is the exact minimizer of the objective function along the feasible
-/// direction, which is the negative gradient projected on to the constraint
-/// set, or the non-negative orthant, in this case:
-/// \f[ \alpha = \frac{g^T g}{g^T B g},  \f]
-/// where g is the projected gradient, formed by stacking the projected
-/// gradients for the positive & negative parts (u & v); and B = [H -H; -H H].
-double GpsrBasicAlpha(const SpMatrix<double> &H, const Vector<double> &u,
-                      const Vector<double> &v, const Vector<double> &grad_u,
-                      const Vector<double> &grad_v) {
-  KALDI_ASSERT(H.NumRows() == grad_u.Dim() && grad_u.Dim() == grad_v.Dim() &&
-               grad_u.Dim() > 0);
-  KALDI_VLOG(2) << "grad_u dim = " << grad_u.Dim() << ", grad_v dim = "
-                << grad_v.Dim() << ", H rows = " << H.NumRows();
-  MatrixIndexT dim = grad_u.Dim();
-
-  // Find the projection of the gradient on the nonnegative orthant, or, more
-  // precisely, the projection s.t. the next iterate will be in the orthant.
-  Vector<double> proj_grad_u(dim);
-  Vector<double> proj_grad_v(dim);
-  for (MatrixIndexT i = 0; i < dim; i++) {
-    proj_grad_u(i) = (u(i) > 0 || grad_u(i) < 0)? grad_u(i) : 0;
-    proj_grad_v(i) = (v(i) > 0 || grad_v(i) < 0)? grad_v(i) : 0;
-  }
-
-  // The numerator: g^T g = g_u^T g_u + g_v^T g_v
-  double alpha = VecVec(proj_grad_u, proj_grad_u);
-  alpha += VecVec(proj_grad_v, proj_grad_v);
-
-  // The denominator: g^T B g = (g_u - g_v)^T H (g_u - g_v)
-  Vector<double> diff_g(proj_grad_u);
-  diff_g.AddVec(-1.0, proj_grad_v);
-  Vector<double> H_diff_g(dim);
-  H_diff_g.AddSpVec(1.0, H, diff_g, 0.0);
-  alpha /= (VecVec(diff_g, H_diff_g) + DBL_EPSILON);
-  return alpha;
-}
-
-/// This calculates the coefficient for the linear term used in the
-/// bound-constrained quadratic program: c = \tau 1_{2n} + [-g; g]
-void GpsrCalcLinearCoeff(double tau, const Vector<double> &g,
-                         Vector<double> *c) {
-  KALDI_ASSERT(c->Dim() == 2 * g.Dim() && g.Dim() != 0);
-  MatrixIndexT dim = g.Dim();
-  c->Set(tau);
-  c->Range(0, dim).AddVec(-1.0, g);
-  c->Range(dim, dim).AddVec(1.0, g);
-}
-
-// This removes the L1 penalty term, and uses conjugate gradient to solve the
-// resulting quadratic problem while keeping the zero elements fixed at 0.
-double Debias(const GpsrConfig &opts, const SpMatrix<double> &H,
-              const Vector<double> &g, Vector<double> *x) {
-  KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
-//  KALDI_ASSERT(H.IsPosDef() &&
-//               "Must have positive definite matrix for conjugate gradient.");
-  MatrixIndexT dim = x->Dim();
-
-  Vector<double> x_bias(*x);
-  Vector<double> nonzero_indices(dim);
-  // Initialize the index of non-zero elements in x
-  for (MatrixIndexT i = 0; i < dim; i++)
-    nonzero_indices(i) = (x_bias(i) == 0)? 0.0 : 1.0;
-
-  Vector<double> residual(dim);
-  Vector<double> conj_direction(dim);
-  Vector<double> resid_change(dim);
-  double alpha_cg;  // CG step size for iterate: x <- x + \alpha p
-  double beta_cg;   // CG step size for conj. direction: p <- \beta p - r
-  double resid_prod, resid_prod_new;  // inner product of residual vectors
-
-  // Calculate the initial residual: r = H x_0 - g
-  residual.AddSpVec(1.0, H, x_bias, 0.0);
-  residual.AddVec(-1.0, g);
-  residual.MulElements(nonzero_indices);  // only change non-zero elements of x
-
-  conj_direction.CopyFromVec(residual);
-  conj_direction.Scale(-1.0);  // Initial conjugate direction p = -r
-  resid_prod = VecVec(residual, residual);
-
-  // set the convergence threshold for residual
-  double tol_debias = opts.stop_thresh_debias * VecVec(residual, residual);
-
-  for (int32 iter = 0; iter < opts.max_iters_debias; iter++) {
-    resid_change.AddSpVec(1.0, H, conj_direction, 0.0);
-    resid_change.MulElements(nonzero_indices);  // only change non-zero elements
-
-    alpha_cg = resid_prod / VecVec(conj_direction, resid_change);
-    x_bias.AddVec(alpha_cg, conj_direction);
-    residual.AddVec(alpha_cg, resid_change);
-
-    resid_prod_new = VecVec(residual, residual);
-    beta_cg = resid_prod_new / resid_prod;
-    conj_direction.Scale(beta_cg);
-    conj_direction.AddVec(-1.0, residual);
-    resid_prod = resid_prod_new;
-
-    if (resid_prod < tol_debias) {
-      KALDI_VLOG(1) << "iter=" << iter << "\t residual =" << resid_prod
-                    << "\t tol_debias=" << tol_debias;
-      break;
-    }
-  }  // end CG iters
-
-  x->CopyFromVec(x_bias);
-  return resid_prod;
-}
-
-template<>
-double GpsrBasic(const GpsrConfig &opts, const SpMatrix<double> &H,
-                 const Vector<double> &g, Vector<double> *x,
-                 const char *debug_str) {
-  KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
-  MatrixIndexT dim = x->Dim();
-  if (H.IsZero(0.0)) {
-    KALDI_WARN << "Zero quadratic term in GPSR for " << debug_str
-               << ": leaving it unchanged.";
-    return 0.0;
-  }
-
-  // initialize the positive (u) and negative (v) parts of x, s.t. x = u - v
-  Vector<double> u(dim, kSetZero);
-  Vector<double> v(dim, kSetZero);
-  for (MatrixIndexT i = 0; i < dim; i++) {
-    if ((*x)(i) > 0) {
-      u(i) = (*x)(i);
-    } else {
-      v(i) = -(*x)(i);
-    }
-  }
-
-  double tau = opts.gpsr_tau;  // May be modified later.
-  Vector<double> c(2*dim);
-  GpsrCalcLinearCoeff(tau, g, &c);
-
-  double objf_ori = GpsrObjective(H, c, u, v);  // the obj. function at start
-  KALDI_VLOG(2) << "GPSR for " << debug_str << ": tau = " << tau
-                << ";\t objf = " << objf_ori;
-
-  Vector<double> grad_u(dim);
-  Vector<double> grad_v(dim);
-  Vector<double> delta_u(dim);
-  Vector<double> delta_v(dim);
-  Vector<double> u_new(dim);
-  Vector<double> v_new(dim);
-  double objf_old, objf_new, num_zeros;
-  bool keep_going = true;
-
-  for (int32 iter = 0; keep_going; iter++) {
-    objf_old = GpsrObjective(H, c, u, v);
-    GpsrGradient(H, c, u, v, &grad_u, &grad_v);
-    double alpha = GpsrBasicAlpha(H, u, v, grad_u, grad_v);
-    if (alpha < opts.alpha_min) alpha = opts.alpha_min;
-    if (alpha > opts.alpha_max) alpha = opts.alpha_max;
-
-    // This is the backtracking line search part:
-    for (int32 k = 0; k < opts.max_iters_backtrak; k++) {
-      // Calculate the potential new iterate: [z_k - \alpha_k \grad F(z_k)]_+
-      u_new.CopyFromVec(u);
-      u_new.AddVec(-alpha, grad_u);
-      u_new.ApplyFloor(0.0);
-      v_new.CopyFromVec(v);
-      v_new.AddVec(-alpha, grad_v);
-      v_new.ApplyFloor(0.0);
-
-      delta_u.CopyFromVec(u_new);
-      delta_v.CopyFromVec(v_new);
-      delta_u.AddVec(-1.0, u);
-      delta_v.AddVec(-1.0, v);
-
-      double delta_objf_apx = opts.gpsr_mu * (VecVec(grad_u, delta_u) +
-                                              VecVec(grad_v, delta_v));
-      objf_new = GpsrObjective(H, c, u_new, v_new);
-      double delta_objf_real = objf_new - objf_old;
-
-      KALDI_VLOG(2) << "GPSR for " << debug_str << ": iter " << iter
-                    << "; tau = " << tau << ";\t objf = " << objf_new
-                    << ";\t alpha = " << alpha << ";\t delta_apx = "
-                    << delta_objf_apx << ";\t delta_real = " << delta_objf_real;
-
-      if (delta_objf_real < delta_objf_apx + DBL_EPSILON)
-        break;
-      else
-        alpha *= opts.gpsr_beta;
-
-      if (k == opts.max_iters_backtrak - 1) {  // Stop further optimization
-        KALDI_WARN << "Backtracking line search did not decrease objective.";
-        u_new.CopyFromVec(u);
-        u_new.ApplyFloor(0.0);
-        v_new.CopyFromVec(v);
-        v_new.ApplyFloor(0.0);
-        delta_u.SetZero();
-        delta_v.SetZero();
-      }
-    }  // end of backtracking line search
-
-    x->CopyFromVec(u_new);
-    x->AddVec(-1.0, v_new);
-
-    num_zeros = 0;
-    for (MatrixIndexT i = 0; i < dim; i++)
-      if ((*x)(i) == 0)
-        num_zeros++;
-
-    // ad hoc way to modify tau, if the solution is too sparse
-    if ((num_zeros / static_cast<double>(dim)) > opts.max_sparsity) {
-      std::ostringstream msg;
-      msg << num_zeros << " out of " << dim << " dimensions set to 0. "
-          << "Changing tau from " << tau;
-      tau *= opts.tau_reduction;
-      GpsrCalcLinearCoeff(tau, g, &c);  // Recalculate c with new tau
-      double tmp_objf = GpsrObjective(H, c, u, v);
-      msg << " to " << tau << ".\n\tStarting objective function changed from "
-          << objf_ori << " to " << tmp_objf << ".";
-      KALDI_LOG << "GPSR for " << debug_str << ": " << msg.str();
-      iter = 0;
-      keep_going = true;
-      continue;
-    }
-
-    u.CopyFromVec(u_new);
-    v.CopyFromVec(v_new);
-    double delta = (delta_u.Norm(2.0) + delta_v.Norm(2.0)) / x->Norm(2.0);
-    KALDI_VLOG(1) << "GPSR for " << debug_str << ": iter " << iter
-                  << ", objf = " << objf_new << ", delta = " << delta;
-
-    keep_going = (iter < opts.max_iters) && (delta > opts.stop_thresh);
-
-    KALDI_VLOG(3) << "GPSR for " << debug_str << ": iter " << iter
-                  << ", objf = " << objf_new << ", value = " << x;
-  }
-
-  if (num_zeros != 0) {
-    KALDI_LOG << "GPSR for " << debug_str << ": number of 0's = " << num_zeros
-              << " out of " << dim << " dimensions.";
-  }
-
-  if (opts.debias && num_zeros != 0) {
-    double residual = Debias(opts, H, g, x);
-    KALDI_LOG << "Debiasing: new residual = " << residual;
-  }
-  return objf_new - objf_ori;
-}
-
-template<>
-float GpsrBasic(const GpsrConfig &opts, const SpMatrix<float> &H,
-                const Vector<float> &g, Vector<float> *x,
-                const char *debug_str) {
-  KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
-  SpMatrix<double> Hd(H);
-  Vector<double> gd(g);
-  Vector<double> xd(*x);
-  float ans = GpsrBasic(opts, Hd, gd, &xd, debug_str);
-  x->CopyFromVec(xd);
-  return ans;
-}
-
-template<>
-double GpsrBB(const GpsrConfig &opts, const SpMatrix<double> &H,
-              const Vector<double> &g, Vector<double> *x,
-              const char *debug_str) {
-  KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
-  MatrixIndexT dim = x->Dim();
-  if (H.IsZero(0.0)) {
-    KALDI_WARN << "Zero quadratic term in GPSR for " << debug_str
-               << ": leaving it unchanged.";
-    return 0.0;
-  }
-
-  // initialize the positive (u) and negative (v) parts of x, s.t. x = u - v
-  Vector<double> u(dim, kSetZero);
-  Vector<double> v(dim, kSetZero);
-  for (MatrixIndexT i = 0; i < dim; i++) {
-    if ((*x)(i) > 0) {
-      u(i) = (*x)(i);
-    } else {
-      v(i) = -(*x)(i);
-    }
-  }
-
-  double tau = opts.gpsr_tau;  // May be modified later.
-  Vector<double> c(2*dim);
-  GpsrCalcLinearCoeff(tau, g, &c);
-
-  double objf_ori = GpsrObjective(H, c, u, v);  // the obj. function at start
-  KALDI_VLOG(2) << "GPSR for " << debug_str << ": tau = " << tau
-                << ";\t objf = " << objf_ori;
-
-  Vector<double> grad_u(dim);
-  Vector<double> grad_v(dim);
-  Vector<double> delta_u(dim);
-  Vector<double> delta_v(dim);
-  Vector<double> delta_x(dim);
-  Vector<double> H_delta_x(dim);
-  Vector<double> u_new(dim);
-  Vector<double> v_new(dim);
-  double objf_old, objf_new, num_zeros;
-  bool keep_going = true;
-  double alpha = 1.0;
-
-  for (int32 iter = 0; keep_going; iter++) {
-    objf_old = GpsrObjective(H, c, u, v);
-    GpsrGradient(H, c, u, v, &grad_u, &grad_v);
-
-    // Calculate the new step: [z_k - \alpha_k \grad F(z_k)]_+ - z_k
-    delta_u.CopyFromVec(u);
-    delta_u.AddVec(-alpha, grad_u);
-    delta_u.ApplyFloor(0.0);
-    delta_u.AddVec(-1.0, u);
-    delta_v.CopyFromVec(v);
-    delta_v.AddVec(-alpha, grad_v);
-    delta_v.ApplyFloor(0.0);
-    delta_v.AddVec(-1.0, v);
-
-    delta_x.CopyFromVec(delta_u);
-    delta_x.AddVec(-1.0, delta_v);
-    H_delta_x.AddSpVec(1.0, H, delta_x, 0.0);
-    double dx_H_dx = VecVec(delta_x, H_delta_x);
-
-    double lambda = -(VecVec(delta_u, grad_u) + VecVec(delta_v, grad_v))
-                / (dx_H_dx + DBL_EPSILON);  // step length
-    if (lambda < 0)
-      KALDI_WARN << "lambda is less than zero\n";
-    if (lambda > 1.0) lambda = 1.0;
-
-    //update alpha
-    alpha = (VecVec(delta_u, delta_u) + VecVec(delta_v, delta_v))
-                / (dx_H_dx + DBL_EPSILON);
-    if (dx_H_dx <= 0) {
-      KALDI_WARN << "nonpositive curvature detected";
-      alpha = opts.alpha_max;
-    }
-    else if (alpha < opts.alpha_min)
-      alpha = opts.alpha_min;
-    else if (alpha > opts.alpha_max) alpha = opts.alpha_max;
-
-    u_new.CopyFromVec(delta_u);
-    u_new.Scale(lambda);
-    v_new.CopyFromVec(delta_v);
-    v_new.Scale(lambda);
-    u_new.AddVec(1.0, u);
-    v_new.AddVec(1.0, v);
-
-    objf_new = GpsrObjective(H, c, u_new, v_new);
-    double delta_objf = objf_old - objf_new;
-    KALDI_VLOG(2) << "GPSR for " << debug_str << ": iter " << iter
-                  << "; tau = " << tau << ";\t objf = " << objf_new
-                  << ";\t alpha = " << alpha << ";\t delta_real = "
-                  << delta_objf;
-
-    u.CopyFromVec(u_new);
-    v.CopyFromVec(v_new);
-    x->CopyFromVec(u);
-    x->AddVec(-1.0, v);
-
-    num_zeros = 0;
-    for (MatrixIndexT i = 0; i < dim; i++)
-      if ((*x)(i) == 0)
-        num_zeros++;
-
-    // ad hoc way to modify tau, if the solution is too sparse
-    if ((num_zeros / static_cast<double>(dim)) > opts.max_sparsity) {
-      std::ostringstream msg;
-      msg << num_zeros << " out of " << dim << " dimensions set to 0. "
-          << "Changing tau from " << tau;
-      tau *= 0.9;
-      GpsrCalcLinearCoeff(tau, g, &c);  // Recalculate c with new tau
-      double tmp_objf = GpsrObjective(H, c, u, v);
-      msg << " to " << tau << ".\n\tStarting objective function changed from "
-          << objf_ori << " to " << tmp_objf << ".";
-      KALDI_LOG << "GPSR for " << debug_str << ": " << msg.str();
-      iter = 0;
-      keep_going = true;
-      continue;
-    }
-
-    double delta = (delta_u.Norm(2.0) + delta_v.Norm(2.0)) / x->Norm(2.0);
-    KALDI_VLOG(1) << "GPSR for " << debug_str << ": iter " << iter
-                  << ", objf = " << objf_new << ", delta = " << delta;
-
-    keep_going = (iter < opts.max_iters) && (delta > opts.stop_thresh);
-
-    KALDI_VLOG(3) << "GPSR for " << debug_str << ": iter " << iter
-                  << ", objf = " << objf_new << ", value = " << x;
-  }
-
-  if (num_zeros != 0) {
-    KALDI_LOG << "GPSR for " << debug_str << ": number of 0's = " << num_zeros
-              << " out of " << dim << " dimensions.";
-  }
-
-  if (opts.debias && num_zeros != 0) {
-    double residual = Debias(opts, H, g, x);
-    KALDI_LOG << "Debiasing: new residual = " << residual;
-  }
-  return objf_new - objf_ori;
-}
-
-template<>
-float GpsrBB(const GpsrConfig &opts, const SpMatrix<float> &H,
-             const Vector<float> &g, Vector<float> *x,
-             const char *debug_str) {
-  KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
-  SpMatrix<double> Hd(H);
-  Vector<double> gd(g);
-  Vector<double> xd(*x);
-  float ans = GpsrBB(opts, Hd, gd, &xd, debug_str);
-  x->CopyFromVec(xd);
-  return ans;
-}
-
-}  // namespace kaldi
-
--- a/Source/Readers/KaldiReader/matrix/kaldi-gpsr.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-gpsr.h
@ -1,166 +0,0 @@
-// matrix/kaldi-gpsr.h
-
-// Copyright 2012  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_KALDI_GPSR_H_
-#define KALDI_MATRIX_KALDI_GPSR_H_
-
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "matrix/matrix-lib.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and
-/// Wright, "Gradient Projection for Sparse Reconstruction: Application to
-/// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected
-/// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007.
-/// http://dx.doi.org/10.1109/JSTSP.2007.910281
-
-/// The GPSR algorithm, described in Figueiredo, et al., 2007, solves:
-/// \f[ \min_x 0.5 * ||y - Ax||_2^2 + \tau ||x||_1, \f]
-/// where \f$ x \in R^n, y \in R^k \f$, and \f$ A \in R^{n \times k} \f$.
-/// In this implementation, we solve:
-/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1, \f]
-/// which is the more natural form in which such problems arise in our case.
-/// Here, \f$ H = A^T A \in R^{n \times n} \f$ and \f$ g = A^T y \in R^n \f$.
-
-
-/** \struct GpsrConfig
- *  Configuration variables needed in the GPSR algorithm.
- */
-struct GpsrConfig {
-  bool use_gpsr_bb;  ///< Use the Barzilai-Borwein gradient projection method
-
-  /// The following options are common to both the basic & Barzilai-Borwein
-  /// versions of GPSR
-  double stop_thresh;  ///< Stopping threshold
-  int32 max_iters;  ///< Maximum number of iterations
-  double gpsr_tau;  ///< Regularization scale
-  double alpha_min;  ///< Minimum step size in the feasible direction
-  double alpha_max;  ///< Maximum step size in the feasible direction
-  double max_sparsity;  ///< Maximum percentage of dimensions set to 0
-  double tau_reduction;  ///< Multiply tau by this if max_sparsity reached
-
-  /// The following options are for the backtracking line search in basic GPSR.
-  /// Step size reduction factor in backtracking line search. 0 < beta < 1
-  double gpsr_beta;
-  /// Improvement factor in backtracking line search, i.e. the new objective
-  /// function must be less than the old one by mu times the gradient in the
-  /// direction of the change in x. 0 < mu < 1
-  double gpsr_mu;
-  int32 max_iters_backtrak;  ///< Max iterations for backtracking line search
-
-  bool debias;  ///< Do debiasing, i.e. unconstrained optimization at the end
-  double stop_thresh_debias;  ///< Stopping threshold for debiasing stage
-  int32 max_iters_debias;  ///< Maximum number of iterations for debiasing stage
-
-  GpsrConfig() {
-    use_gpsr_bb = true;
-
-    stop_thresh = 0.005;
-    max_iters = 100;
-    gpsr_tau = 10;
-    alpha_min = 1.0e-10;
-    alpha_max = 1.0e+20;
-    max_sparsity = 0.9;
-    tau_reduction = 0.8;
-
-    gpsr_beta = 0.5;
-    gpsr_mu = 0.1;
-    max_iters_backtrak = 50;
-
-    debias = false;
-    stop_thresh_debias = 0.001;
-    max_iters_debias = 50;
-  }
-
-  void Register(OptionsItf *po);
-};
-
-inline void GpsrConfig::Register(OptionsItf *po) {
-  std::string module = "GpsrConfig: ";
-  po->Register("use-gpsr-bb", &use_gpsr_bb, module+
-               "Use the Barzilai-Borwein gradient projection method.");
-
-  po->Register("stop-thresh", &stop_thresh, module+
-               "Stopping threshold for GPSR.");
-  po->Register("max-iters", &max_iters, module+
-               "Maximum number of iterations of GPSR.");
-  po->Register("gpsr-tau", &gpsr_tau, module+
-               "Regularization scale for GPSR.");
-  po->Register("alpha-min", &alpha_min, module+
-               "Minimum step size in feasible direction.");
-  po->Register("alpha-max", &alpha_max, module+
-               "Maximum step size in feasible direction.");
-  po->Register("max-sparsity", &max_sparsity, module+
-               "Maximum percentage of dimensions set to 0.");
-  po->Register("tau-reduction", &tau_reduction, module+
-               "Multiply tau by this if maximum sparsity is reached.");
-
-  po->Register("gpsr-beta", &gpsr_beta, module+
-               "Step size reduction factor in backtracking line search (0<beta<1).");
-  po->Register("gpsr-mu", &gpsr_mu, module+
-               "Improvement factor in backtracking line search (0<mu<1).");
-  po->Register("max-iters-backtrack", &max_iters_backtrak, module+
-               "Maximum number of iterations of backtracking line search.");
-
-  po->Register("debias", &debias, module+
-               "Do final debiasing step.");
-  po->Register("stop-thresh-debias", &stop_thresh_debias, module+
-               "Stopping threshold for debiaisng step.");
-  po->Register("max-iters-debias", &max_iters_debias, module+
-               "Maximum number of iterations of debiasing.");
-}
-
-/// Solves a quadratic program in \f$ x \f$, with L_1 regularization:
-/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1. \f]
-/// This is similar to SolveQuadraticProblem() in sp-matrix.h with an added
-/// L_1 term.
-template<typename Real>
-Real Gpsr(const GpsrConfig &opts, const SpMatrix<Real> &H,
-          const Vector<Real> &g, Vector<Real> *x,
-          const char *debug_str = "[unknown]") {
-  if (opts.use_gpsr_bb)
-    return GpsrBB(opts, H, g, x, debug_str);
-  else
-    return GpsrBasic(opts, H, g, x, debug_str);
-}
-
-/// This is the basic GPSR algorithm, where the step size is determined by a
-/// backtracking line search. The line search is called "Armijo rule along the
-/// projection arc" in Bertsekas, Nonlinear Programming, 2nd ed. page 230.
-template<typename Real>
-Real GpsrBasic(const GpsrConfig &opts, const SpMatrix<Real> &H,
-               const Vector<Real> &g, Vector<Real> *x,
-               const char *debug_str = "[unknown]");
-
-/// This is the paper calls the Barzilai-Borwein variant. This is a constrained
-/// Netwon's method where the Hessian is approximated by scaled identity matrix
-template<typename Real>
-Real GpsrBB(const GpsrConfig &opts, const SpMatrix<Real> &H,
-            const Vector<Real> &g, Vector<Real> *x,
-            const char *debug_str = "[unknown]");
-
-
-}  // namespace kaldi
-
-#endif  // KALDI_MATRIX_KALDI_GPSR_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-matrix-inl.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-matrix-inl.h
@ -1,62 +0,0 @@
-// matrix/kaldi-matrix-inl.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_
-#define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1
-
-#include "matrix/kaldi-vector.h"
-
-namespace kaldi {
-
-/// Empty constructor
-template<typename Real>
-Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
-
-
-template<>
-template<>
-void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
-
-template<>
-template<>
-void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); 
-
-template<typename Real>
-inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
-  M.Write(os, false);
-  return os;
-}
-
-template<typename Real>
-inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
-  M.Read(is, false);
-  return is;
-}
-
-
-template<typename Real>
-inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
-  M.Read(is, false);
-  return is;
-}
-
-}// namespace kaldi
-
-
-#endif  // KALDI_MATRIX_KALDI_MATRIX_INL_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-matrix.cc
+++ b/Source/Readers/KaldiReader/matrix/kaldi-matrix.cc
--- a/Source/Readers/KaldiReader/matrix/kaldi-matrix.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-matrix.h
@ -1,960 +0,0 @@
-// matrix/kaldi-matrix.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
-//                      Saarland University;  Petr Schwarz;  Yanmin Qian;
-//                      Karel Vesely;  Go Vivace Inc.;  Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_KALDI_MATRIX_H_
-#define KALDI_MATRIX_KALDI_MATRIX_H_ 1
-
-#include "matrix-common.h"
-
-namespace kaldi {
-
-/// @{ \addtogroup matrix_funcs_scalar
-
-/// We need to declare this here as it will be a friend function.
-/// tr(A B), or tr(A B^T).
-template<typename Real>
-Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
-                 MatrixTransposeType trans = kNoTrans);
-/// @}
-
-/// \addtogroup matrix_group
-/// @{
-
-/// Base class which provides matrix operations not involving resizing
-/// or allocation.   Classes Matrix and SubMatrix inherit from it and take care
-/// of allocation and resizing.
-template<typename Real>
-class MatrixBase {
- public:
-  // so this child can access protected members of other instances.
-  friend class Matrix<Real>;
-  // friend declarations for CUDA matrices (see ../cudamatrix/)
-  friend class CuMatrixBase<Real>;
-  friend class CuMatrix<Real>;
-  friend class CuSubMatrix<Real>;
-  friend class CuPackedMatrix<Real>;
-  
-  friend class PackedMatrix<Real>;
-
-  /// Returns number of rows (or zero for emtpy matrix).
-  inline MatrixIndexT  NumRows() const { return num_rows_; }
-
-  /// Returns number of columns (or zero for emtpy matrix).
-  inline MatrixIndexT NumCols() const { return num_cols_; }
-
-  /// Stride (distance in memory between each row).  Will be >= NumCols.
-  inline MatrixIndexT Stride() const {  return stride_; }
-
-  /// Returns size in bytes of the data held by the matrix.
-  size_t  SizeInBytes() const {
-    return static_cast<size_t>(num_rows_) * static_cast<size_t>(stride_) *
-        sizeof(Real);
-  }
-
-  /// Gives pointer to raw data (const).
-  inline const Real* Data() const {
-    return data_;
-  }
-
-  /// Gives pointer to raw data (non-const).
-  inline Real* Data() { return data_; }
-
-  /// Returns pointer to data for one row (non-const)
-  inline  Real* RowData(MatrixIndexT i) {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_));
-    return data_ + i * stride_;
-  }
-
-  /// Returns pointer to data for one row (const)
-  inline const Real* RowData(MatrixIndexT i) const {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_));
-    return data_ + i * stride_;
-  }
-
-  /// Indexing operator, non-const
-  /// (only checks sizes if compiled with -DKALDI_PARANOID)
-  inline Real&  operator() (MatrixIndexT r, MatrixIndexT c) {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
-                          static_cast<UnsignedMatrixIndexT>(num_rows_) &&
-                          static_cast<UnsignedMatrixIndexT>(c) <
-                          static_cast<UnsignedMatrixIndexT>(num_cols_));
-    return *(data_ + r * stride_ + c);
-  }
-  /// Indexing operator, provided for ease of debugging (gdb doesn't work
-  /// with parenthesis operator).
-  Real &Index (MatrixIndexT r, MatrixIndexT c) {  return (*this)(r, c); }
-  
-  /// Indexing operator, const
-  /// (only checks sizes if compiled with -DKALDI_PARANOID)
-  inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
-                          static_cast<UnsignedMatrixIndexT>(num_rows_) &&
-                          static_cast<UnsignedMatrixIndexT>(c) <
-                          static_cast<UnsignedMatrixIndexT>(num_cols_));
-    return *(data_ + r * stride_ + c);
-  }
-
-  /*   Basic setting-to-special values functions. */
-
-  /// Sets matrix to zero.
-  void SetZero();
-  /// Sets all elements to a specific value.
-  void Set(Real);
-  /// Sets to zero, except ones along diagonal [for non-square matrices too]
-  void SetUnit();
-  /// Sets to random values of a normal distribution
-  void SetRandn();
-  /// Sets to numbers uniformly distributed on (0, 1)
-  void SetRandUniform();
-
-  /*  Copying functions.  These do not resize the matrix! */
-
-
-  /// Copy given matrix. (no resize is done).
-  template<typename OtherReal>
-  void CopyFromMat(const MatrixBase<OtherReal> & M,
-                   MatrixTransposeType trans = kNoTrans);
-
-  /// Copy from compressed matrix.
-  void CopyFromMat(const CompressedMatrix &M);
-  
-  /// Copy given spmatrix. (no resize is done).
-  template<typename OtherReal>
-  void CopyFromSp(const SpMatrix<OtherReal> &M);
-
-  /// Copy given tpmatrix. (no resize is done).
-  template<typename OtherReal>
-  void CopyFromTp(const TpMatrix<OtherReal> &M,
-                  MatrixTransposeType trans = kNoTrans);
-  
-  /// Copy from CUDA matrix.  Implemented in ../cudamatrix/cu-matrix.h
-  template<typename OtherReal>  
-  void CopyFromMat(const CuMatrixBase<OtherReal> &M,
-                   MatrixTransposeType trans = kNoTrans);
-
-  /// Inverse of vec() operator. Copies vector into matrix, row-by-row.
-  /// Note that rv.Dim() must either equal NumRows()*NumCols() or
-  /// NumCols()-- this has two modes of operation.
-  void CopyRowsFromVec(const VectorBase<Real> &v);
-
-  /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
-  void CopyRowsFromVec(const CuVectorBase<Real> &v);
-  
-  template<typename OtherReal>
-  void CopyRowsFromVec(const VectorBase<OtherReal> &v);
-
-  /// Copies vector into matrix, column-by-column.
-  /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows();
-  /// this has two modes of operation.
-  void CopyColsFromVec(const VectorBase<Real> &v);
-  
-  /// Copy vector into specific column of matrix.
-  void CopyColFromVec(const VectorBase<Real> &v, const MatrixIndexT col);
-  /// Copy vector into specific row of matrix.
-  void CopyRowFromVec(const VectorBase<Real> &v, const MatrixIndexT row);
-  /// Copy vector into diagonal of matrix.
-  void CopyDiagFromVec(const VectorBase<Real> &v);
-
-  /* Accessing of sub-parts of the matrix. */
-
-  /// Return specific row of matrix [const].
-  inline const SubVector<Real> Row(MatrixIndexT i) const {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_));
-    return SubVector<Real>(data_ + (i * stride_), NumCols());
-  }
-
-  /// Return specific row of matrix.
-  inline SubVector<Real> Row(MatrixIndexT i) {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_));
-    return SubVector<Real>(data_ + (i * stride_), NumCols());
-  }
-
-  /// Return a sub-part of matrix.
-  inline SubMatrix<Real> Range(const MatrixIndexT row_offset,
-                               const MatrixIndexT num_rows,
-                               const MatrixIndexT col_offset,
-                               const MatrixIndexT num_cols) const {
-    return SubMatrix<Real>(*this, row_offset, num_rows,
-                           col_offset, num_cols);
-  }
-  inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
-                                  const MatrixIndexT num_rows) const {
-    return SubMatrix<Real>(*this, row_offset, num_rows, 0, num_cols_);
-  }  
-  inline SubMatrix<Real> ColRange(const MatrixIndexT col_offset,
-                                  const MatrixIndexT num_cols) const {
-    return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
-  }  
-
-  /* Various special functions. */
-  /// Returns sum of all elements in matrix.
-  Real Sum() const;
-  /// Returns trace of matrix.
-  Real Trace(bool check_square = true) const;
-  // If check_square = true, will crash if matrix is not square.
-
-  /// Returns maximum element of matrix.
-  Real Max() const;
-  /// Returns minimum element of matrix.
-  Real Min() const;
-
-  /// Element by element multiplication with a given matrix.
-  void MulElements(const MatrixBase<Real> &A);
-
-  /// Divide each element by the corresponding element of a given matrix.
-  void DivElements(const MatrixBase<Real> &A);
-
-  /// Multiply each element with a scalar value.
-  void Scale(Real alpha);
-
-  /// Set, element-by-element, *this = max(*this, A)
-  void Max(const MatrixBase<Real> &A);
-
-  /// Equivalent to (*this) = (*this) * diag(scale).  Scaling
-  /// each column by a scalar taken from that dimension of the vector.
-  void MulColsVec(const VectorBase<Real> &scale);
-
-  /// Equivalent to (*this) = diag(scale) * (*this).  Scaling
-  /// each row by a scalar taken from that dimension of the vector.
-  void MulRowsVec(const VectorBase<Real> &scale);
-
-  /// divide each row into src.NumCols() groups, 
-  /// and then scale i'th row's jth group of elements by src[i, j].   
-  void MulRowsGroupMat(const MatrixBase<Real> &src);
-    
-  /// Returns logdet of matrix.
-  Real LogDet(Real *det_sign = NULL) const;
-  
-  /// matrix inverse.
-  /// if inverse_needed = false, will fill matrix with garbage.
-  /// (only useful if logdet wanted).
-  void Invert(Real *log_det = NULL, Real *det_sign = NULL,
-              bool inverse_needed = true);
-  /// matrix inverse [double].
-  /// if inverse_needed = false, will fill matrix with garbage
-  /// (only useful if logdet wanted).
-  /// Does inversion in double precision even if matrix was not double.
-  void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
-                      bool inverse_needed = true);
-
-  /// Inverts all the elements of the matrix
-  void InvertElements();
-
-  /// Transpose the matrix.  This one is only
-  /// applicable to square matrices (the one in the
-  /// Matrix child class works also for non-square.
-  void Transpose();
-
-  /// Copies column r from column indices[r] of src.
-  /// As a special case, if indexes[i] == -1, sets column i to zero
-  /// indices.size() must equal this->NumCols(),
-  /// all elements of "reorder" must be in [-1, src.NumCols()-1],
-  /// and src.NumRows() must equal this.NumRows()
-  void CopyCols(const MatrixBase<Real> &src,
-                const std::vector<MatrixIndexT> &indices);
-
-  /// Copies row r from row indices[r] of src.
-  /// As a special case, if indexes[i] == -1, sets row i to zero
-  /// "reorder".size() must equal this->NumRows(),
-  /// all elements of "reorder" must be in [-1, src.NumRows()-1],
-  /// and src.NumCols() must equal this.NumCols()
-  void CopyRows(const MatrixBase<Real> &src,
-                const std::vector<MatrixIndexT> &indices);
-  
-  /// Applies floor to all matrix elements
-  void ApplyFloor(Real floor_val);
-
-  /// Applies floor to all matrix elements
-  void ApplyCeiling(Real ceiling_val);
-
-  /// Calculates log of all the matrix elemnts
-  void ApplyLog();
-
-  /// Exponentiate each of the elements.
-  void ApplyExp();
-
-  /// Applies power to all matrix elements
-  void ApplyPow(Real power);
-
-  /// Applies the Heaviside step function (x > 0 ? 1 : 0) to all matrix elements
-  /// Note: in general you can make different choices for x = 0, but for now
-  /// please leave it as it (i.e. returning zero) because it affects the
-  /// RectifiedLinearComponent in the neural net code.
-  void ApplyHeaviside();
-  
-  /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
-  /// P^{-1}.  Be careful: the relationship of D to the eigenvalues we output is
-  /// slightly complicated, due to the need for P to be real.  In the symmetric
-  /// case D is diagonal and real, but in
-  /// the non-symmetric case there may be complex-conjugate pairs of eigenvalues.
-  /// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually
-  /// be block diagonal, with 2x2 blocks corresponding to any such pairs.  If a
-  /// pair is lambda +- i*mu, D will have a corresponding 2x2 block
-  /// [lambda, mu; -mu, lambda].
-  /// Note that if the input matrix (*this) is non-invertible, P may not be invertible
-  /// so in this case instead of the equation (*this) = P D P^{-1} holding, we have
-  /// instead (*this) P = P D.
-  ///
-  /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
-  void Eig(MatrixBase<Real> *P,
-           VectorBase<Real> *eigs_real,
-           VectorBase<Real> *eigs_imag) const;
-
-  /// The Power method attempts to take the matrix to a power using a method that
-  /// works in general for fractional and negative powers.  The input matrix must
-  /// be invertible and have reasonable condition (or we don't guarantee the
-  /// results.  The method is based on the eigenvalue decomposition.  It will
-  /// return false and leave the matrix unchanged, if at entry the matrix had
-  /// real negative eigenvalues (or if it had zero eigenvalues and the power was
-  /// negative).
-  bool Power(Real pow);
-
-  /** Singular value decomposition
-     Major limitations:
-     For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return
-     the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
-     one on the left is rectangular.
-
-     In Svd, *this = U*diag(S)*Vt.
-     Null pointers for U and/or Vt at input mean we do not want that output.  We
-     expect that S.Dim() == m, U is either NULL or m by n,
-     and v is either NULL or n by n.
-     The singular values are not sorted (use SortSvd for that).  */
-  void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-                      MatrixBase<Real> *Vt);  // Destroys calling matrix.
-
-  /// Compute SVD (*this) = U diag(s) Vt.   Note that the V in the call is already
-  /// transposed; the normal formulation is U diag(s) V^T.
-  /// Null pointers for U or V mean we don't want that output (this saves
-  /// compute).  The singular values are not sorted (use SortSvd for that).
-  void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
-           MatrixBase<Real> *Vt) const;
-  /// Compute SVD but only retain the singular values.
-  void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
-
-
-  /// Returns smallest singular value.
-  Real MinSingularValue() const {
-    Vector<Real> tmp(std::min(NumRows(), NumCols()));
-    Svd(&tmp);
-    return tmp.Min();
-  }
-
-  void TestUninitialized() const; // This function is designed so that if any element
-  // if the matrix is uninitialized memory, valgrind will complain.
-  
-  /// returns condition number by computing Svd.  Works even if cols > rows.
-  Real Cond() const;
-
-  /// Returns true if matrix is Symmetric.
-  bool IsSymmetric(Real cutoff = 1.0e-05) const;  // replace magic number
-
-  /// Returns true if matrix is Diagonal.
-  bool IsDiagonal(Real cutoff = 1.0e-05) const;  // replace magic number
-
-  /// returns true if matrix is all zeros, but ones on diagonal
-  /// (not necessarily square).
-  bool IsUnit(Real cutoff = 1.0e-05) const;     // replace magic number
-
-  /// Returns true if matrix is all zeros.
-  bool IsZero(Real cutoff = 1.0e-05) const;     // replace magic number
-
-  /// Frobenius norm, which is the sqrt of sum of square elements.  Same as Schatten 2-norm,
-  /// or just "2-norm".
-  Real FrobeniusNorm() const;
-
-  /// Returns true if ((*this)-other).FrobeniusNorm()
-  /// <= tol * (*this).FrobeniusNorm().
-  bool ApproxEqual(const MatrixBase<Real> &other, float tol = 0.01) const;
-
-  /// Tests for exact equality.  It's usually preferable to use ApproxEqual.
-  bool Equal(const MatrixBase<Real> &other) const;
-
-  /// largest absolute value.
-  Real LargestAbsElem() const;  // largest absolute value.
-
-  /// Returns log(sum(exp())) without exp overflow
-  /// If prune > 0.0, it uses a pruning beam, discarding
-  /// terms less than (max - prune).  Note: in future
-  /// we may change this so that if prune = 0.0, it takes
-  /// the max, so use -1 if you don't want to prune.
-  Real LogSumExp(Real prune = -1.0) const;
-
-  /// Apply soft-max to the collection of all elements of the
-  /// matrix and return normalizer (log sum of exponentials).
-  Real ApplySoftMax();
-  
-  /// Set each element to the sigmoid of the corresponding element of "src".
-  void Sigmoid(const MatrixBase<Real> &src);
-
-  /// Set each element to y = log(1 + exp(x))
-  void SoftHinge(const MatrixBase<Real> &src);
-  
-  /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p)
-  /// where G = x.NumCols() / y.NumCols() must be an integer.
-  void GroupPnorm(const MatrixBase<Real> &src, Real power);
-
-
-  /// Calculate derivatives for the GroupPnorm function above...
-  /// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
-  /// and "output" is the result of the computation (i.e. the "this" of that function
-  /// call), and *this has the same dimension as "input", then it sets each element
-  /// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where
-  /// "output-elem" is whichever element of output depends on that input element.
-  void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output,
-                       Real power);
-
-
-  /// Set each element to the tanh of the corresponding element of "src".
-  void Tanh(const MatrixBase<Real> &src);
-
-  // Function used in backpropagating derivatives of the sigmoid function:
-  // element-by-element, set *this = diff * value * (1.0 - value).
-  void DiffSigmoid(const MatrixBase<Real> &value,
-                   const MatrixBase<Real> &diff);
-
-  // Function used in backpropagating derivatives of the tanh function:
-  // element-by-element, set *this = diff * (1.0 - value^2).
-  void DiffTanh(const MatrixBase<Real> &value,
-                const MatrixBase<Real> &diff);
-  
-  /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
-   * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
-   * orthogonal matrix so rP^{-1} = rP^T.   Throws exception if input was not
-   * positive semi-definite (check_thresh controls how stringent the check is;
-   * set it to 2 to ensure it won't ever complain, but it will zero out negative
-   * dimensions in your matrix.
-  */
-  void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
-                        Real check_thresh = 0.001);
-
-  friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
-      const MatrixBase<Real> &B, MatrixTransposeType trans);  // tr (A B)
-
-  // so it can get around const restrictions on the pointer to data_.
-  friend class SubMatrix<Real>;
-
-  /// Add a scalar to each element
-  void Add(const Real alpha);
-
-  /// Add a scalar to each diagonal element.
-  void AddToDiag(const Real alpha);
-
-  /// *this += alpha * a * b^T
-  template<typename OtherReal>
-  void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a,
-                 const VectorBase<OtherReal> &b);
-
-  /// [each row of *this] += alpha * v
-  template<typename OtherReal>
-  void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v);
-  
-  /// [each col of *this] += alpha * v
-  template<typename OtherReal>
-  void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v);      
-  
-  /// *this += alpha * M [or M^T]
-  void AddMat(const Real alpha, const MatrixBase<Real> &M,
-              MatrixTransposeType transA = kNoTrans);
-
-  /// *this = beta * *this + alpha * M M^T, for symmetric matrices.  It only
-  /// updates the lower triangle of *this.  It will leave the matrix asymmetric;
-  /// if you need it symmetric as a regular matrix, do CopyLowerToUpper().
-  void SymAddMat2(const Real alpha, const MatrixBase<Real> &M,
-                  MatrixTransposeType transA, Real beta);
-
-  /// *this = beta * *this + alpha * diag(v) * M [or M^T].
-  /// The same as adding M but scaling each row M_i by v(i).
-  void AddDiagVecMat(const Real alpha, VectorBase<Real> &v,
-                     const MatrixBase<Real> &M, MatrixTransposeType transM, 
-                     Real beta = 1.0);
-  
-  /// *this += alpha * S
-  template<typename OtherReal>
-  void AddSp(const Real alpha, const SpMatrix<OtherReal> &S);
-
-  void AddMatMat(const Real alpha,
-                 const MatrixBase<Real>& A, MatrixTransposeType transA,
-                 const MatrixBase<Real>& B, MatrixTransposeType transB,
-                 const Real beta);
- 
-  /// *this = a * b / c (by element; when c = 0, *this = a)
-  void AddMatMatDivMat(const MatrixBase<Real>& A,
-             	       const MatrixBase<Real>& B,
-                       const MatrixBase<Real>& C);
-
-  /// A version of AddMatMat specialized for when the second argument
-  /// contains a lot of zeroes.
-  void AddMatSmat(const Real alpha,
-                  const MatrixBase<Real>& A, MatrixTransposeType transA,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const Real beta);
-
-  /// A version of AddMatMat specialized for when the first argument
-  /// contains a lot of zeroes.  
-  void AddSmatMat(const Real alpha,
-                  const MatrixBase<Real>& A, MatrixTransposeType transA,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const Real beta);
-
-  /// this <-- beta*this + alpha*A*B*C.
-  void AddMatMatMat(const Real alpha,
-                    const MatrixBase<Real>& A, MatrixTransposeType transA,
-                    const MatrixBase<Real>& B, MatrixTransposeType transB,
-                    const MatrixBase<Real>& C, MatrixTransposeType transC,
-                    const Real beta);
-
-  /// this <-- beta*this + alpha*SpA*B.
-  // This and the routines below are really
-  // stubs that need to be made more efficient.
-  void AddSpMat(const Real alpha,
-                const SpMatrix<Real>& A,
-                const MatrixBase<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(A);
-    return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddTpMat(const Real alpha,
-                const TpMatrix<Real>& A, MatrixTransposeType transA,
-                const MatrixBase<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(A);
-    return AddMatMat(alpha, M, transA, B, transB, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddMatSp(const Real alpha,
-                const MatrixBase<Real>& A, MatrixTransposeType transA,
-                const SpMatrix<Real>& B,
-                const Real beta) {
-    Matrix<Real> M(B);
-    return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
-  }
-  /// this <-- beta*this + alpha*A*B*C.
-  void AddSpMatSp(const Real alpha,
-                  const SpMatrix<Real> &A,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const SpMatrix<Real>& C,
-                const Real beta) {
-    Matrix<Real> M(A), N(C);
-    return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddMatTp(const Real alpha,
-                const MatrixBase<Real>& A, MatrixTransposeType transA,
-                const TpMatrix<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(B);
-    return AddMatMat(alpha, A, transA, M, transB, beta);
-  }
-
-  /// this <-- beta*this + alpha*A*B.
-  void AddTpTp(const Real alpha,
-               const TpMatrix<Real>& A, MatrixTransposeType transA,
-               const TpMatrix<Real>& B, MatrixTransposeType transB,
-               const Real beta) {
-    Matrix<Real> M(A), N(B);
-    return AddMatMat(alpha, M, transA, N, transB, beta);
-  }
-
-  /// this <-- beta*this + alpha*A*B.
-  // This one is more efficient, not like the others above.
-  void AddSpSp(const Real alpha,
-               const SpMatrix<Real>& A, const SpMatrix<Real>& B,
-               const Real beta);
-
-  /// Copy lower triangle to upper triangle (symmetrize)
-  void CopyLowerToUpper();
-
-  /// Copy upper triangle to lower triangle (symmetrize)
-  void CopyUpperToLower();
-  
-  /// This function orthogonalizes the rows of a matrix using the Gram-Schmidt
-  /// process.  It is only applicable if NumRows() <= NumCols().  It will use
-  /// random number generation to fill in rows with something nonzero, in cases
-  /// where the original matrix was of deficient row rank.
-  void OrthogonalizeRows();
-
-  /// stream read.
-  /// Use instead of stream<<*this, if you want to add to existing contents.
-  // Will throw exception on failure.
-  void Read(std::istream & in, bool binary, bool add = false);
-  /// write to stream.
-  void Write(std::ostream & out, bool binary) const;
-
-  // Below is internal methods for Svd, user does not have to know about this.
-#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
-  // protected:
-  // Should be protected but used directly in testing routine.
-  // destroys *this!
-  void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-                     MatrixBase<Real> *Vt);
-#else
- protected:
-  // destroys *this!
-  bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-               MatrixBase<Real> *V);
-
-#endif
- protected:
-
-  ///  Initializer, callable only from child.
-  explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) :
-    data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
-    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
-  }
-
-  ///  Initializer, callable only from child.
-  /// Empty initializer, for un-initialized matrix.
-  explicit MatrixBase(): data_(NULL) {
-    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
-  }
-
-  // Make sure pointers to MatrixBase cannot be deleted.
-  ~MatrixBase() { }
-
-  /// A workaround that allows SubMatrix to get a pointer to non-const data
-  /// for const Matrix. Unfortunately C++ does not allow us to declare a
-  /// "public const" inheritance or anything like that, so it would require
-  /// a lot of work to make the SubMatrix class totally const-correct--
-  /// we would have to override many of the Matrix functions.
-  inline Real*  Data_workaround() const {
-    return data_;
-  }
-
-  /// data memory area
-  Real*   data_;
-
-  /// these atributes store the real matrix size as it is stored in memory
-  /// including memalignment
-  MatrixIndexT    num_cols_;   /// < Number of columns
-  MatrixIndexT    num_rows_;   /// < Number of rows
-  /** True number of columns for the internal matrix. This number may differ
-   * from num_cols_ as memory alignment might be used. */
-  MatrixIndexT    stride_;
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
-};
-
-/// A class for storing matrices.
-template<typename Real>
-class Matrix : public MatrixBase<Real> {
- public:
-
-  /// Empty constructor.
-  Matrix();
-
-  /// Basic constructor.  Sets to zero by default.
-  /// if set_zero == false, memory contents are undefined.
-  Matrix(const MatrixIndexT r, const MatrixIndexT c,
-         MatrixResizeType resize_type = kSetZero):
-      MatrixBase<Real>() { Resize(r, c, resize_type); }
-  
-  /// Copy constructor from CUDA matrix
-  /// This is defined in ../cudamatrix/cu-matrix.h
-  template<typename OtherReal>
-  explicit Matrix(const CuMatrixBase<OtherReal> &cu,
-                  MatrixTransposeType trans = kNoTrans);
-
-
-  /// Swaps the contents of *this and *other.  Shallow swap.
-  void Swap(Matrix<Real> *other);
-
-  /// Defined in ../cudamatrix/cu-matrix.cc
-  void Swap(CuMatrix<Real> *mat);
-
-  /// Constructor from any MatrixBase. Can also copy with transpose.
-  /// Allocates new memory.
-  explicit Matrix(const MatrixBase<Real> & M,
-                  MatrixTransposeType trans = kNoTrans);
-  
-  /// Same as above, but need to avoid default copy constructor.
-  Matrix(const Matrix<Real> & M);  //  (cannot make explicit)
-
-  /// Copy constructor: as above, but from another type.
-  template<typename OtherReal>
-  explicit Matrix(const MatrixBase<OtherReal> & M,
-                    MatrixTransposeType trans = kNoTrans);
-
-  /// Copy constructor taking SpMatrix...
-  /// It is symmetric, so no option for transpose, and NumRows == Cols
-  template<typename OtherReal>
-  explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
-    Resize(M.NumRows(), M.NumRows(), kUndefined);
-    this->CopyFromSp(M);
-  }
-
-  /// Constructor from CompressedMatrix
-  explicit Matrix(const CompressedMatrix &C);
-  
-  /// Copy constructor taking TpMatrix...
-  template <typename OtherReal>
-  explicit Matrix(const TpMatrix<OtherReal> & M,
-                  MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
-    if (trans == kNoTrans) {
-      Resize(M.NumRows(), M.NumCols(), kUndefined);
-      this->CopyFromTp(M);
-    } else {
-      Resize(M.NumCols(), M.NumRows(), kUndefined);
-      this->CopyFromTp(M, kTrans);
-    }
-  }
-
-  /// read from stream.
-  // Unlike one in base, allows resizing.
-  void Read(std::istream & in, bool binary, bool add = false);
-
-  /// Remove a specified row.
-  void RemoveRow(MatrixIndexT i);
-  
-  /// Transpose the matrix.  Works for non-square
-  /// matrices as well as square ones.
-  void Transpose();
-
-  /// Distructor to free matrices.
-  ~Matrix() { Destroy(); }
-
-  /// Sets matrix to a specified size (zero is OK as long as both r and c are
-  /// zero).  The value of the new data depends on resize_type:
-  ///   -if kSetZero, the new data will be zero
-  ///   -if kUndefined, the new data will be undefined
-  ///   -if kCopyData, the new data will be the same as the old data in any
-  ///      shared positions, and zero elsewhere.
-  /// This function takes time proportional to the number of data elements.
-  void Resize(const MatrixIndexT r,
-              const MatrixIndexT c,
-              MatrixResizeType resize_type = kSetZero);
-
-  /// Assignment operator that takes MatrixBase.
-  Matrix<Real> &operator = (const MatrixBase<Real> &other) {
-    if (MatrixBase<Real>::NumRows() != other.NumRows() ||
-        MatrixBase<Real>::NumCols() != other.NumCols())
-      Resize(other.NumRows(), other.NumCols(), kUndefined);
-    MatrixBase<Real>::CopyFromMat(other);
-    return *this;
-  }
-
-  /// Assignment operator. Needed for inclusion in std::vector.
-  Matrix<Real> &operator = (const Matrix<Real> &other) {
-    if (MatrixBase<Real>::NumRows() != other.NumRows() ||
-        MatrixBase<Real>::NumCols() != other.NumCols())
-      Resize(other.NumRows(), other.NumCols(), kUndefined);
-    MatrixBase<Real>::CopyFromMat(other);
-    return *this;
-  }
-  
-
- private:
-  /// Deallocates memory and sets to empty matrix (dimension 0, 0).
-  void Destroy();
-  
-  /// Init assumes the current class contents are invalid (i.e. junk or have
-  /// already been freed), and it sets the matrix to newly allocated memory with
-  /// the specified number of rows and columns.  r == c == 0 is acceptable.  The data
-  /// memory contents will be undefined.
-  void Init(const MatrixIndexT r,
-            const MatrixIndexT c);
-
-};
-/// @} end "addtogroup matrix_group"
-
-/// \addtogroup matrix_funcs_io
-/// @{
-
-/// A structure containing the HTK header.
-/// [TODO: change the style of the variables to Kaldi-compliant]
-struct HtkHeader {
-  /// Number of samples.
-  int32    mNSamples;
-  /// Sample period.
-  int32    mSamplePeriod;
-  /// Sample size
-  int16    mSampleSize;
-  /// Sample kind.
-  uint16   mSampleKind;
-};
-
-// Read HTK formatted features from file into matrix.
-template<typename Real>
-bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr);
-
-// Write (HTK format) features to file from matrix.
-template<typename Real>
-bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr);
-
-// Write (CMUSphinx format) features to file from matrix.
-template<typename Real>
-bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M);
-
-/// @} end of "addtogroup matrix_funcs_io"
-
-/**
-  Sub-matrix representation.
-  Can work with sub-parts of a matrix using this class.
-  Note that SubMatrix is not very const-correct-- it allows you to
-  change the contents of a const Matrix.  Be careful!
-*/
-
-template<typename Real>
-class SubMatrix : public MatrixBase<Real> {
- public:
-  // Initialize a SubMatrix from part of a matrix; this is
-  // a bit like A(b:c, d:e) in Matlab.
-  // This initializer is against the proper semantics of "const", since
-  // SubMatrix can change its contents.  It would be hard to implement
-  // a "const-safe" version of this class.
-  SubMatrix(const MatrixBase<Real>& T,
-            const MatrixIndexT ro,  // row offset, 0 < ro < NumRows()
-            const MatrixIndexT r,   // number of rows, r > 0
-            const MatrixIndexT co,  // column offset, 0 < co < NumCols()
-            const MatrixIndexT c);   // number of columns, c > 0
-  
-  // This initializer is mostly intended for use in CuMatrix and related
-  // classes.  Be careful!
-  SubMatrix(Real *data,
-            MatrixIndexT num_rows,
-            MatrixIndexT num_cols,
-            MatrixIndexT stride);
-  
-  ~SubMatrix<Real>() {}
-  
-  /// This type of constructor is needed for Range() to work [in Matrix base
-  /// class]. Cannot make it explicit.
-  SubMatrix<Real> (const SubMatrix &other):
-  MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_,
-                    other.stride_) {}
-
- private:
-  /// Disallow assignment.
-  SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
-};
-/// @} End of "addtogroup matrix_funcs_io".
-
-/// \addtogroup matrix_funcs_scalar
-/// @{
-
-// Some declarations.  These are traces of products.
-
-
-template<typename Real>
-bool ApproxEqual(const MatrixBase<Real> &A,
-                 const MatrixBase<Real> &B, Real tol = 0.01) {
-  return A.ApproxEqual(B, tol);
-}
-
-template<typename Real>
-inline void AssertEqual(MatrixBase<Real> &A, MatrixBase<Real> &B,
-                        float tol = 0.01) {
-  KALDI_ASSERT(A.ApproxEqual(B, tol));
-}
-
-/// Returns trace of matrix.
-template <typename Real>
-double TraceMat(const MatrixBase<Real> &A) { return A.Trace(); }
-
-
-/// Returns tr(A B C)
-template <typename Real>
-Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
-                      const MatrixBase<Real> &B, MatrixTransposeType transB,
-                      const MatrixBase<Real> &C, MatrixTransposeType transC);
-
-/// Returns tr(A B C D)
-template <typename Real>
-Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
-                         const MatrixBase<Real> &B, MatrixTransposeType transB,
-                         const MatrixBase<Real> &C, MatrixTransposeType transC,
-                         const MatrixBase<Real> &D, MatrixTransposeType transD);
-
-/// @} end "addtogroup matrix_funcs_scalar"
-
-
-/// \addtogroup matrix_funcs_misc
-/// @{
-
-
-/// Function to ensure that SVD is sorted.  This function is made as generic as
-/// possible, to be applicable to other types of problems.  s->Dim() should be
-/// the same as U->NumCols(), and we sort s from greatest to least absolute
-/// value (if sort_on_absolute_value == true) or greatest to least value
-/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
-/// exists, around in the same way.  Note: the "absolute value" part won't matter
-/// if this is an actual SVD, since singular values are non-negative.
-template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-                                     MatrixBase<Real>* Vt = NULL,
-                                     bool sort_on_absolute_value = true);
-
-/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig.
-/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
-/// for complex pairs.  If a complex pair is lambda +- i*mu, D will have a corresponding
-/// 2x2 block [lambda, mu; -mu, lambda].
-/// This function will throw if any complex eigenvalues are not in complex conjugate
-/// pairs (or the members of such pairs are not consecutively numbered).
-template<typename Real>
-void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag,
-                            MatrixBase<Real> *D);
-
-/// The following function is used in Matrix::Power, and separately tested, so we
-/// declare it here mainly for the testing code to see.  It takes a complex value to
-/// a power using a method that will work for noninteger powers (but will fail if the
-/// complex value is real and negative).
-template<typename Real>
-bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
-
-
-
-/// @} end of addtogroup matrix_funcs_misc
-
-/// \addtogroup matrix_funcs_io
-/// @{
-template<typename Real>
-std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M);
-
-template<typename Real>
-std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
-
-// The Matrix read allows resizing, so we override the MatrixBase one.
-template<typename Real>
-std::istream & operator >> (std::istream & In, Matrix<Real> & M);
-
-
-template<typename Real>
-bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
-  return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
-}
-
-/// @} end of \addtogroup matrix_funcs_io
-
-
-}  // namespace kaldi
-
-
-
-// we need to include the implementation and some
-// template specializations.
-#include "matrix/kaldi-matrix-inl.h"
-
-
-#endif  // KALDI_MATRIX_KALDI_MATRIX_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-vector-inl.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-vector-inl.h
@ -1,58 +0,0 @@
-// matrix/kaldi-vector-inl.h
-
-// Copyright 2009-2011   Ondrej Glembek;  Microsoft Corporation;
-//                       Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This is an internal header file, included by other library headers.
-// You should not attempt to use it directly.
-
-#ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_
-#define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1
-
-namespace kaldi {
-
-template<typename Real>
-std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
-  rv.Write(os, false);
-  return os;
-}
-
-template<typename Real>
-std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
-  rv.Read(is, false);
-  return is;
-}
-
-template<typename Real>
-std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
-  rv.Read(is, false);
-  return is;
-}
-
-template<>
-template<>
-void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
-
-template<>
-template<>
-void VectorBase<double>::AddVec<double>(const double alpha,
-                                        const VectorBase<double> &rv);
-
-}  // namespace kaldi
-
-#endif  // KALDI_MATRIX_KALDI_VECTOR_INL_H_
--- a/Source/Readers/KaldiReader/matrix/kaldi-vector.cc
+++ b/Source/Readers/KaldiReader/matrix/kaldi-vector.cc
--- a/Source/Readers/KaldiReader/matrix/kaldi-vector.h
+++ b/Source/Readers/KaldiReader/matrix/kaldi-vector.h
@ -1,570 +0,0 @@
-// matrix/kaldi-vector.h
-
-// Copyright 2009-2012   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
-//                       Saarland University (Author: Arnab Ghoshal);
-//                       Ariya Rastrow;  Petr Schwarz;  Yanmin Qian;
-//                       Karel Vesely;  Go Vivace Inc.;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
-#define KALDI_MATRIX_KALDI_VECTOR_H_ 1
-
-#include "matrix/matrix-common.h"
-
-namespace kaldi {
-
-/// \addtogroup matrix_group
-/// @{
-
-///  Provides a vector abstraction class.
-///  This class provides a way to work with vectors in kaldi.
-///  It encapsulates basic operations and memory optimizations.
-template<typename Real>
-class VectorBase {
- public:
-  /// Set vector to all zeros.
-  void SetZero();
-
-  /// Returns true if matrix is all zeros.
-  bool IsZero(Real cutoff = 1.0e-06) const;     // replace magic number
-
-  /// Set all members of a vector to a specified value.
-  void Set(Real f);
-
-  /// Set vector to random normally-distributed noise.
-  void SetRandn();
-
-  /// This function returns a random index into this vector,
-  /// chosen with probability proportional to the corresponding
-  /// element.  Requires that this->Min() >= 0 and this->Sum() > 0.
-  MatrixIndexT RandCategorical() const;
-  
-  /// Returns the  dimension of the vector.
-  inline MatrixIndexT Dim() const { return dim_; }
-
-  /// Returns the size in memory of the vector, in bytes.
-  inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
-
-  /// Returns a pointer to the start of the vector's data.
-  inline Real* Data() { return data_; }
-
-  /// Returns a pointer to the start of the vector's data (const).
-  inline const Real* Data() const { return data_; }
-
-  /// Indexing  operator (const).
-  inline Real operator() (MatrixIndexT i) const {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /// Indexing operator (non-const).
-  inline Real & operator() (MatrixIndexT i) {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /** @brief Returns a sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /** @brief Returns a const sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  const SubVector<Real> Range(const MatrixIndexT o,
-                              const MatrixIndexT l) const {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /// Copy data from another vector (must match own size).
-  void CopyFromVec(const VectorBase<Real> &v);
-
-  /// Copy data from a SpMatrix or TpMatrix (must match own size).
-  template<typename OtherReal>
-  void CopyFromPacked(const PackedMatrix<OtherReal> &M);
-  
-  /// Copy data from another vector of different type (double vs. float)
-  template<typename OtherReal>
-  void CopyFromVec(const VectorBase<OtherReal> &v);
-
-  /// Copy from CuVector.  This is defined in ../cudamatrix/cu-vector.h
-  template<typename OtherReal>
-  void CopyFromVec(const CuVectorBase<OtherReal> &v);
-
-  
-  /// Apply natural log to all elements.  Throw if any element of
-  /// the vector is negative (but doesn't complain about zero; the
-  /// log will be -infinity
-  void ApplyLog();
-
-  /// Apply natural log to another vector and put result in *this.
-  void ApplyLogAndCopy(const VectorBase<Real> &v);
-
-  /// Apply exponential to each value in vector.
-  void ApplyExp();
-
-  /// Take absolute value of each of the elements
-  void Abs();
-
-  /// Applies floor to all elements. Returns number of elements floored.
-  MatrixIndexT ApplyFloor(Real floor_val);
-
-  /// Applies ceiling to all elements. Returns number of elements changed.
-  MatrixIndexT ApplyCeiling(Real ceil_val);
-  
-  /// Applies floor to all elements. Returns number of elements floored.
-  MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec);
-
-  /// Apply soft-max to vector and return normalizer (log sum of exponentials).
-  /// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
-  Real ApplySoftMax();
-
-  /// Sets each element of *this to the tanh of the corresponding element of "src".
-  void Tanh(const VectorBase<Real> &src);
-
-  /// Sets each element of *this to the sigmoid function of the corresponding
-  /// element of "src".
-  void Sigmoid(const VectorBase<Real> &src);
-  
-  /// Take all  elements of vector to a power.
-  void ApplyPow(Real power);
-
-  /// Compute the p-th norm of the vector.
-  Real Norm(Real p) const;
-  
-  /// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
-  bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const;
-
-  /// Invert all elements.
-  void InvertElements();
-
-  /// Add vector : *this = *this + alpha * rv (with casting between floats and
-  /// doubles)
-  template<typename OtherReal>
-  void AddVec(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring].
-  void AddVec2(const Real alpha, const VectorBase<Real> &v);
-
-  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring],
-  /// with casting between floats and doubles.
-  template<typename OtherReal>
-  void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// Add matrix times vector : this <-- beta*this + alpha*M*v.
-  /// Calls BLAS GEMV.
-  void AddMatVec(const Real alpha, const MatrixBase<Real> &M,
-                 const MatrixTransposeType trans,  const VectorBase<Real> &v,
-                 const Real beta); // **beta previously defaulted to 0.0**
-
-  /// This is as AddMatVec, except optimized for where v contains a lot
-  /// of zeros.
-  void AddMatSvec(const Real alpha, const MatrixBase<Real> &M,
-                  const MatrixTransposeType trans,  const VectorBase<Real> &v,
-                  const Real beta); // **beta previously defaulted to 0.0**
-
-  
-  /// Add symmetric positive definite matrix times vector:
-  ///  this <-- beta*this + alpha*M*v.   Calls BLAS SPMV.
-  void AddSpVec(const Real alpha, const SpMatrix<Real> &M,
-                const VectorBase<Real> &v, const Real beta);  // **beta previously defaulted to 0.0**
-
-  /// Add triangular matrix times vector: this <-- beta*this + alpha*M*v.
-  /// Works even if rv == *this.
-  void AddTpVec(const Real alpha, const TpMatrix<Real> &M,
-                const MatrixTransposeType trans, const VectorBase<Real> &v,
-                const Real beta);  // **beta previously defaulted to 0.0**
-
-  /// Set each element to y = (x == orig ? changed : x).
-  void ReplaceValue(Real orig, Real changed);
-
-  /// Multipy element-by-element by another vector.
-  void MulElements(const VectorBase<Real> &v);
-  /// Multipy element-by-element by another vector of different type.
-  template<typename OtherReal>
-  void MulElements(const VectorBase<OtherReal> &v);
-
-  /// Divide element-by-element by a vector.
-  void DivElements(const VectorBase<Real> &v);
-  /// Divide element-by-element by a vector of different type.
-  template<typename OtherReal>
-  void DivElements(const VectorBase<OtherReal> &v);
-
-  /// Add a constant to each element of a vector.
-  void Add(Real c);
-
-  /// Add element-by-element product of vectlrs:
-  //  this <-- alpha * v .* r + beta*this .
-  void AddVecVec(Real alpha, const VectorBase<Real> &v,
-                 const VectorBase<Real> &r, Real beta);
-
-  /// Add element-by-element quotient of two vectors.
-  ///  this <---- alpha*v/r + beta*this
-  void AddVecDivVec(Real alpha, const VectorBase<Real> &v,
-                    const VectorBase<Real> &r, Real beta);
-
-  /// Multiplies all elements by this constant.
-  void Scale(Real alpha);
-
-  /// Multiplies this vector by lower-triangular marix:  *this <-- *this *M
-  void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans);
-
-  /// Performs a row stack of the matrix M
-  void CopyRowsFromMat(const MatrixBase<Real> &M);
-  template<typename OtherReal>
-  void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
-
-  /// The following is implemented in ../cudamatrix/cu-matrix.cc
-  void CopyRowsFromMat(const CuMatrixBase<Real> &M);
-
-  /// Performs a column stack of the matrix M
-  void CopyColsFromMat(const MatrixBase<Real> &M);
-
-  /// Extracts a row of the matrix M.  Could also do this with
-  /// this->Copy(M[row]).
-  void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
-  /// Extracts a row of the matrix M with type conversion.
-  template<typename OtherReal>
-  void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
-
-  /// Extracts a row of the symmetric matrix S.
-  template<typename OtherReal>
-  void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row);
-  
-  /// Extracts a column of the matrix M.
-  template<typename OtherReal>
-  void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
-
-  /// Extracts the diagonal of the matrix M.
-  void CopyDiagFromMat(const MatrixBase<Real> &M);
-
-  /// Extracts the diagonal of a packed matrix M; works for Sp or Tp.
-  void CopyDiagFromPacked(const PackedMatrix<Real> &M);
-
-
-  /// Extracts the diagonal of a symmetric matrix.
-  inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); }
-
-  /// Extracts the diagonal of a triangular matrix.
-  inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); }
-
-  /// Returns the maximum value of any element.
-  Real Max() const;
-
-  /// Returns the maximum value of any element, and the associated index.
-  Real Max(MatrixIndexT *index) const;
-  
-  /// Returns the minimum value of any element.
-  Real Min() const;
-
-  /// Returns the minimum value of any element, and the associated index.
-  Real Min(MatrixIndexT *index) const;
-  
-  /// Returns sum of the elements
-  Real Sum() const;
-
-  /// Returns sum of the logs of the elements.  More efficient than
-  /// just taking log of each.  Will return NaN if any elements are
-  /// negative.
-  Real SumLog() const;
-
-  /// Does *this = alpha * (sum of rows of M) + beta * *this.
-  void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
-  
-  /// Does *this = alpha * (sum of columns of M) + beta * *this.
-  void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
-
-  /// Add the diagonal of a matrix times itself:
-  /// *this = diag(M M^T) +  beta * *this (if trans == kNoTrans), or
-  /// *this = diag(M^T M) +  beta * *this (if trans == kTrans).
-  void AddDiagMat2(Real alpha, const MatrixBase<Real> &M,
-                   MatrixTransposeType trans = kNoTrans, Real beta = 1.0);
-
-  /// Add the diagonal of a matrix product: *this = diag(M N), assuming the
-  /// "trans" arguments are both kNoTrans; for transpose arguments, it behaves
-  /// as you would expect.
-  void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM,
-                     const MatrixBase<Real> &N, MatrixTransposeType transN,
-                     Real beta = 1.0);  
-
-  /// Returns log(sum(exp())) without exp overflow
-  /// If prune > 0.0, ignores terms less than the max - prune.
-  /// [Note: in future, if prune = 0.0, it will take the max.
-  /// For now, use -1 if you don't want it to prune.]
-  Real LogSumExp(Real prune = -1.0) const;
-
-  /// Reads from C++ stream (option to add to existing contents).
-  /// Throws exception on failure
-  void Read(std::istream & in, bool binary, bool add = false);
-
-  /// Writes to C++ stream (option to write in binary).
-  void Write(std::ostream &Out, bool binary) const;
-
-  friend class VectorBase<double>;
-  friend class VectorBase<float>;
-  friend class CuVectorBase<Real>;
-  friend class CuVector<Real>;
- protected:
-  /// Destructor;  does not deallocate memory, this is handled by child classes.
-  /// This destructor is protected so this object so this object can only be
-  /// deleted via a child.
-  ~VectorBase() {}
-
-  /// Empty initializer, corresponds to vector of zero size.
-  explicit VectorBase(): data_(NULL), dim_(0) {
-    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
-  }
-
-// Took this out since it is not currently used, and it is possible to create
-// objects where the allocated memory is not the same size as dim_ : Arnab
-//  /// Initializer from a pointer and a size; keeps the pointer internally
-//  /// (ownership or non-ownership depends on the child class).
-//  explicit VectorBase(Real* data, MatrixIndexT dim)
-//      : data_(data), dim_(dim) {}
-
-  // Arnab : made this protected since it is unsafe too.
-  /// Load data into the vector: sz must match own size.
-  void CopyFromPtr(const Real* Data, MatrixIndexT sz);
-
-  /// data memory area
-  Real* data_;
-  /// dimension of vector
-  MatrixIndexT dim_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
-}; // class VectorBase
-
-/** @brief A class representing a vector.
- *
- *  This class provides a way to work with vectors in kaldi.
- *  It encapsulates basic operations and memory optimizations.  */
-template<typename Real>
-class Vector: public VectorBase<Real> {
- public:
-  /// Constructor that takes no arguments.  Initializes to empty.
-  Vector(): VectorBase<Real>() {}
-
-  /// Constructor with specific size.  Sets to all-zero by default
-  /// if set_zero == false, memory contents are undefined.
-  explicit Vector(const MatrixIndexT s,
-                  MatrixResizeType resize_type = kSetZero)
-      : VectorBase<Real>() {  Resize(s, resize_type);  }
-
-  /// Copy constructor from CUDA vector
-  /// This is defined in ../cudamatrix/cu-vector.h
-  template<typename OtherReal>
-  explicit Vector(const CuVectorBase<OtherReal> &cu);
-
-  /// Copy constructor.  The need for this is controversial.
-  Vector(const Vector<Real> &v) : VectorBase<Real>()  { //  (cannot be explicit)
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Copy-constructor from base-class, needed to copy from SubVector.
-  explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Type conversion constructor.
-  template<typename OtherReal>
-  explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-// Took this out since it is unsafe : Arnab
-//  /// Constructor from a pointer and a size; copies the data to a location
-//  /// it owns.
-//  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
-//    Resize(s);
-  //    CopyFromPtr(Data, s);
-//  }
-
-
-  /// Swaps the contents of *this and *other.  Shallow swap.
-  void Swap(Vector<Real> *other);
-
-  /// Destructor.  Deallocates memory.
-  ~Vector() { Destroy(); }
-
-  /// Read function using C++ streams.  Can also add to existing contents
-  /// of matrix.
-  void Read(std::istream & in, bool binary, bool add = false);
-
-  /// Set vector to a specified size (can be zero).
-  /// The value of the new data depends on resize_type:
-  ///   -if kSetZero, the new data will be zero
-  ///   -if kUndefined, the new data will be undefined
-  ///   -if kCopyData, the new data will be the same as the old data in any
-  ///      shared positions, and zero elsewhere.
-  /// This function takes time proportional to the number of data elements.
-  void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
-
-  /// Remove one element and shifts later elements down.
-  void RemoveElement(MatrixIndexT i);
-
-  /// Assignment operator, protected so it can only be used by std::vector
-  Vector<Real> &operator = (const Vector<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
-
-  /// Assignment operator that takes VectorBase.
-  Vector<Real> &operator = (const VectorBase<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
- private:
-  /// Init assumes the current contents of the class are invalid (i.e. junk or
-  /// has already been freed), and it sets the vector to newly allocated memory
-  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
-  /// pointed to by data_ will be undefined.
-  void Init(const MatrixIndexT dim);
-
-  /// Destroy function, called internally.
-  void Destroy();
-
-};
-
-
-/// Represents a non-allocating general vector which can be defined
-/// as a sub-vector of higher-level vector [or as the row of a matrix].
-template<typename Real>
-class SubVector : public VectorBase<Real> {
- public:
-  /// Constructor from a Vector or SubVector.
-  /// SubVectors are not const-safe and it's very hard to make them
-  /// so for now we just give up.  This function contains const_cast.
-  SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
-            const MatrixIndexT length) : VectorBase<Real>() {
-    // following assert equiv to origin>=0 && length>=0 &&
-    // origin+length <= rt.dim_
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
-                 static_cast<UnsignedMatrixIndexT>(length) <=
-                 static_cast<UnsignedMatrixIndexT>(t.Dim()));
-    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
-    VectorBase<Real>::dim_   = length;
-  }
-
-  /// This constructor initializes the vector to point at the contents
-  /// of this packed matrix (SpMatrix or TpMatrix).
-  SubVector(const PackedMatrix<Real> &M) {
-    VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
-    VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
-  }
-  
-  /// Copy constructor
-  SubVector(const SubVector &other) : VectorBase<Real> () {
-    // this copy constructor needed for Range() to work in base class.
-    VectorBase<Real>::data_ = other.data_;
-    VectorBase<Real>::dim_ = other.dim_;
-  }
-
-  /// Constructor from a pointer to memory and a length.  Keeps a pointer
-  /// to the data but does not take ownership (will never delete).
-  SubVector(Real *data, MatrixIndexT length) : VectorBase<Real> () {
-    VectorBase<Real>::data_ = data;
-    VectorBase<Real>::dim_   = length;
-  }
-
-
-  /// This operation does not preserve const-ness, so be careful.
-  SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
-    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
-    VectorBase<Real>::dim_   = matrix.NumCols();
-  }
-
-  ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
-
- private:
-  /// Disallow assignment operator.
-  SubVector & operator = (const SubVector &other) {}
-};
-
-/// @} end of "addtogroup matrix_group"
-/// \addtogroup matrix_funcs_io
-/// @{
-/// Output to a C++ stream.  Non-binary by default (use Write for
-/// binary output).
-template<typename Real>
-std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
-
-/// Input from a C++ stream.  Will automatically read text or
-/// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
-
-/// Input from a C++ stream. Will automatically read text or
-/// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, Vector<Real> & v);
-/// @} end of \addtogroup matrix_funcs_io
-
-/// \addtogroup matrix_funcs_scalar
-/// @{
-
-
-template<typename Real>
-bool ApproxEqual(const VectorBase<Real> &a,
-                 const VectorBase<Real> &b, Real tol = 0.01) {
-  return a.ApproxEqual(b, tol);
-}
-
-template<typename Real>
-inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
-                        float tol = 0.01) {
-  KALDI_ASSERT(a.ApproxEqual(b, tol));
-}
-
-
-/// Returns dot product between v1 and v2.
-template<typename Real>
-Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2);
-
-template<typename Real, typename OtherReal>
-Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2);
-
-
-/// Returns \f$ v_1^T M v_2  \f$ .
-/// Not as efficient as it could be where v1 == v2.
-template<typename Real>
-Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
-               const VectorBase<Real> &v2);
-
-/// @} End of "addtogroup matrix_funcs_scalar"
-
-
-}  // namespace kaldi
-
-// we need to include the implementation
-#include "matrix/kaldi-vector-inl.h"
-
-
-
-#endif  // KALDI_MATRIX_KALDI_VECTOR_H_
-
--- a/Source/Readers/KaldiReader/matrix/matrix-common.h
+++ b/Source/Readers/KaldiReader/matrix/matrix-common.h
@ -1,100 +0,0 @@
-// matrix/matrix-common.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_MATRIX_MATRIX_COMMON_H_
-#define KALDI_MATRIX_MATRIX_COMMON_H_
-
-// This file contains some #includes, forward declarations
-// and typedefs that are needed by all the main header
-// files in this directory.
-
-#include "base/kaldi-common.h"
-#include "matrix/kaldi-blas.h"
-
-namespace kaldi {
-typedef enum {
-  kTrans    = CblasTrans,
-  kNoTrans = CblasNoTrans
-} MatrixTransposeType;
-
-typedef enum {
-  kSetZero,
-  kUndefined,
-  kCopyData
-} MatrixResizeType;
-
-typedef enum {
-  kTakeLower,
-  kTakeUpper,
-  kTakeMean,
-  kTakeMeanAndCheck
-} SpCopyType;
-
-template<typename Real> class VectorBase;
-template<typename Real> class Vector;
-template<typename Real> class SubVector;
-template<typename Real> class MatrixBase;
-template<typename Real> class SubMatrix;
-template<typename Real> class Matrix;
-template<typename Real> class SpMatrix;
-template<typename Real> class TpMatrix;
-template<typename Real> class PackedMatrix;
-
-// these are classes that won't be defined in this
-// directory; they're mostly needed for friend declarations.
-template<typename Real> class CuMatrixBase;
-template<typename Real> class CuSubMatrix;
-template<typename Real> class CuMatrix;
-template<typename Real> class CuVectorBase;
-template<typename Real> class CuSubVector;
-template<typename Real> class CuVector;
-template<typename Real> class CuPackedMatrix;
-template<typename Real> class CuSpMatrix;
-template<typename Real> class CuTpMatrix;
-
-class CompressedMatrix;
-
-/// This class provides a way for switching between double and float types.
-template<typename T> class OtherReal { };  // useful in reading+writing routines
-                                           // to switch double and float.
-/// A specialized class for switching from float to double.
-template<> class OtherReal<float> {
- public:
-  typedef double Real;
-};
-/// A specialized class for switching from double to float.
-template<> class OtherReal<double> {
- public:
-  typedef float Real;
-};
-
-
-typedef int32 MatrixIndexT;
-typedef int32 SignedMatrixIndexT;
-typedef uint32 UnsignedMatrixIndexT;
-
-// If you want to use size_t for the index type, do as follows instead:
-//typedef size_t MatrixIndexT;
-//typedef ssize_t SignedMatrixIndexT;
-//typedef size_t UnsignedMatrixIndexT;
-
-}
-
-
-
-#endif  // KALDI_MATRIX_MATRIX_COMMON_H_
--- a/Source/Readers/KaldiReader/matrix/matrix-functions-inl.h
+++ b/Source/Readers/KaldiReader/matrix/matrix-functions-inl.h
@ -1,56 +0,0 @@
-// matrix/matrix-functions-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-
-
-#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-
-namespace kaldi {
-
-//! ComplexMul implements, inline, the complex multiplication b *= a.
-template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
-                                            Real *b_re, Real *b_im) {
-  Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
-  *b_im = *b_re * a_im + *b_im * a_re;
-  *b_re = tmp_re;
-}
-
-template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
-                                                   const Real &b_re, const Real &b_im,
-                                                   Real *c_re, Real *c_im) {
-  *c_re += b_re*a_re - b_im*a_im;
-  *c_im += b_re*a_im + b_im*a_re;
-}
-
-
-template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
-  *a_re = std::cos(x);
-  *a_im = std::sin(x);
-}
-
-
-} // end namespace kaldi
-
-
-#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-
--- a/Source/Readers/KaldiReader/matrix/matrix-functions.cc
+++ b/Source/Readers/KaldiReader/matrix/matrix-functions.cc
@ -1,982 +0,0 @@
-// matrix/matrix-functions.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Go Vivace Inc.;  Jan Silovsky
-//                      Yanmin Qian;  Saarland University;  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-#include "matrix/matrix-functions.h"
-#include "matrix/sp-matrix.h"
-
-namespace kaldi {
-
-template<typename Real> void ComplexFt (const VectorBase<Real> &in,
-                                     VectorBase<Real> *out, bool forward) {
-  int exp_sign = (forward ? -1 : 1);
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT(in.Dim() == out->Dim());
-  KALDI_ASSERT(in.Dim() % 2 == 0);
-  int twoN = in.Dim(), N = twoN / 2;
-  const Real *data_in = in.Data();
-  Real *data_out = out->Data();
-
-  Real exp1N_re, exp1N_im;  //  forward -> exp(-2pi / N), backward -> exp(2pi / N).
-  Real fraction = exp_sign * M_2PI / static_cast<Real>(N);  // forward -> -2pi/N, backward->-2pi/N
-  ComplexImExp(fraction, &exp1N_re, &exp1N_im);
-
-  Real expm_re = 1.0, expm_im = 0.0;  // forward -> exp(-2pi m / N).
-
-  for (int two_m = 0; two_m < twoN; two_m+=2) {  // For each output component.
-    Real expmn_re = 1.0, expmn_im = 0.0;  // forward -> exp(-2pi m n / N).
-    Real sum_re = 0.0, sum_im = 0.0;  // complex output for index m (the sum expression)
-    for (int two_n = 0; two_n < twoN; two_n+=2) {
-      ComplexAddProduct(data_in[two_n], data_in[two_n+1],
-                        expmn_re, expmn_im,
-                        &sum_re, &sum_im);
-      ComplexMul(expm_re, expm_im, &expmn_re, &expmn_im);
-    }
-    data_out[two_m] = sum_re;
-    data_out[two_m + 1] = sum_im;
-
-
-    if (two_m % 10 == 0) {  // occasionally renew "expm" from scratch to avoid
-      // loss of precision.
-      int nextm = 1 + two_m/2;
-      Real fraction_mult = fraction * nextm;
-      ComplexImExp(fraction_mult, &expm_re, &expm_im);
-    } else {
-      ComplexMul(exp1N_re, exp1N_im, &expm_re, &expm_im);
-    }
-  }
-}
-
-template
-void ComplexFt (const VectorBase<float> &in,
-                VectorBase<float> *out, bool forward);
-template
-void ComplexFt (const VectorBase<double> &in,
-                VectorBase<double> *out, bool forward);
-
-
-#define KALDI_COMPLEXFFT_BLOCKSIZE 8192
-// This #define affects how we recurse in ComplexFftRecursive.
-// We assume that memory-caching happens on a scale at
-// least as small as this.
-
-
-//! ComplexFftRecursive is a recursive function that computes the
-//! complex FFT of size N.  The "nffts" arguments specifies how many
-//! separate FFTs to compute in parallel (we assume the data for
-//! each one is consecutive in memory).  The "forward argument"
-//! specifies whether to do the FFT (true) or IFFT (false), although
-//! note that we do not include the factor of 1/N (the user should
-//! do this if required.  The iterators factor_begin and factor_end
-//! point to the beginning and end (i.e. one past the last element)
-//! of an array of small factors of N (typically prime factors).
-//! See the comments below this code for the detailed equations
-//! of the recursion.
-
-
-template<typename Real>
-void ComplexFftRecursive (Real *data, int nffts, int N,
-                          const int *factor_begin,
-                          const int *factor_end, bool forward,
-                          Vector<Real> *tmp_vec) {
-  if (factor_begin == factor_end) {
-    KALDI_ASSERT(N == 1);
-    return;
-  }
-
-  {  // an optimization: compute in smaller blocks.
-    // this block of code could be removed and it would still work.
-    MatrixIndexT size_perblock = N * 2 * sizeof(Real);
-    if (nffts > 1 && size_perblock*nffts > KALDI_COMPLEXFFT_BLOCKSIZE) {  // can break it up...
-      // Break up into multiple blocks.  This is an optimization.  We make
-      // no progress on the FFT when we do this.
-      int block_skip = KALDI_COMPLEXFFT_BLOCKSIZE / size_perblock;  // n blocks per call
-      if (block_skip == 0) block_skip = 1;
-      if (block_skip < nffts) {
-        int blocks_left = nffts;
-        while (blocks_left > 0) {
-          int skip_now = std::min(blocks_left, block_skip);
-          ComplexFftRecursive(data, skip_now, N, factor_begin, factor_end, forward, tmp_vec);
-          blocks_left -= skip_now;
-          data += skip_now * N*2;
-        }
-        return;
-      } // else do the actual algorithm.
-    } // else do the actual algorithm.
-  }
-
-  int P = *factor_begin;
-  KALDI_ASSERT(P > 1);
-  int Q = N / P;
-
-
-  if (P > 1 && Q > 1) {  // Do the rearrangement.   C.f. eq. (8) below.  Transform
-    // (a) to (b).
-    Real *data_thisblock = data;
-    if (tmp_vec->Dim() < (MatrixIndexT)N) tmp_vec->Resize(N);
-    Real *data_tmp = tmp_vec->Data();
-    for (int thisfft = 0; thisfft < nffts; thisfft++, data_thisblock+=N*2) {
-      for (int offset = 0; offset < 2; offset++) {  // 0 == real, 1 == im.
-        for (int p = 0; p < P; p++) {
-          for (int q = 0; q < Q; q++) {
-            int aidx = q*P + p, bidx = p*Q + q;
-            data_tmp[bidx] = data_thisblock[2*aidx+offset];
-          }
-        }
-        for (int n = 0;n < P*Q;n++) data_thisblock[2*n+offset] = data_tmp[n];
-      }
-    }
-  }
-
-  {  // Recurse.
-    ComplexFftRecursive(data, nffts*P, Q, factor_begin+1, factor_end, forward, tmp_vec);
-  }
-
-  int exp_sign = (forward ? -1 : 1);
-  Real rootN_re, rootN_im;  // Nth root of unity.
-  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / N), &rootN_re, &rootN_im);
-
-  Real rootP_re, rootP_im;  // Pth root of unity.
-  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / P), &rootP_re, &rootP_im);
-
-  {  // Do the multiplication
-    // could avoid a bunch of complex multiplies by moving the loop over data_thisblock
-    // inside.
-    if (tmp_vec->Dim() < (MatrixIndexT)(P*2)) tmp_vec->Resize(P*2);
-    Real *temp_a = tmp_vec->Data();
-
-    Real *data_thisblock = data, *data_end = data+(N*2*nffts);
-    for (; data_thisblock != data_end; data_thisblock += N*2) {  // for each separate fft.
-      Real qd_re = 1.0, qd_im = 0.0;  // 1^(q'/N)
-      for (int qd = 0; qd < Q; qd++) {
-        Real pdQ_qd_re = qd_re, pdQ_qd_im = qd_im;  // 1^((p'Q+q') / N) == 1^((p'/P) + (q'/N))
-                                              // Initialize to q'/N, corresponding to p' == 0.
-        for (int pd = 0; pd < P; pd++) {  // pd == p'
-          {  // This is the p = 0 case of the loop below [an optimization].
-            temp_a[pd*2] = data_thisblock[qd*2];
-            temp_a[pd*2 + 1] = data_thisblock[qd*2 + 1];
-          }
-          {  // This is the p = 1 case of the loop below [an optimization]
-            // **** MOST OF THE TIME (>60% I think) gets spent here. ***
-            ComplexAddProduct(pdQ_qd_re, pdQ_qd_im,
-                              data_thisblock[(qd+Q)*2], data_thisblock[(qd+Q)*2 + 1],
-                              &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
-          }
-          if (P > 2) {
-            Real p_pdQ_qd_re = pdQ_qd_re, p_pdQ_qd_im = pdQ_qd_im;  // 1^(p(p'Q+q')/N)
-            for (int p = 2; p < P; p++) {
-              ComplexMul(pdQ_qd_re, pdQ_qd_im, &p_pdQ_qd_re, &p_pdQ_qd_im);  // p_pdQ_qd *= pdQ_qd.
-              int data_idx = p*Q + qd;
-              ComplexAddProduct(p_pdQ_qd_re, p_pdQ_qd_im,
-                                data_thisblock[data_idx*2], data_thisblock[data_idx*2 + 1],
-                                &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
-            }
-          }
-          if (pd != P-1)
-            ComplexMul(rootP_re, rootP_im, &pdQ_qd_re, &pdQ_qd_im);  // pdQ_qd *= (rootP == 1^{1/P})
-          // (using 1/P == Q/N)
-        }
-        for (int pd = 0; pd < P; pd++) {
-          data_thisblock[(pd*Q + qd)*2] = temp_a[pd*2];
-          data_thisblock[(pd*Q + qd)*2 + 1] = temp_a[pd*2 + 1];
-        }
-        ComplexMul(rootN_re, rootN_im, &qd_re, &qd_im);  // qd *= rootN.
-      }
-    }
-  }
-}
-
-/* Equations for ComplexFftRecursive.
-   We consider here one of the "nffts" separate ffts; it's just a question of
-   doing them all in parallel.  We also write all equations in terms of
-   complex math (the conversion to real arithmetic is not hard, and anyway
-   takes place inside function calls).
-
-
-   Let the input (i.e. "data" at start) be a_n, n = 0..N-1, and
-   the output (Fourier transform) be d_k, k = 0..N-1.  We use these letters because
-   there will be two intermediate variables b and c.
-   We want to compute:
-
-     d_k = \sum_n a_n 1^(kn/N)                                             (1)
-
-   where we use 1^x as shorthand for exp(-2pi x) for the forward algorithm
-   and exp(2pi x) for the backward one.
-
-   We factorize N = P Q (P small, Q usually large).
-   With p = 0..P-1 and q = 0..Q-1, and also p'=0..P-1 and q'=0..P-1, we let:
-
-    k == p'Q + q'                                                           (2)
-    n == qP + p                                                             (3)
-
-   That is, we let p, q, p', q' range over these indices and observe that this way we
-   can cover all n, k.  Expanding (1) using (2) and (3), we can write:
-
-      d_k = \sum_{p, q}  a_n 1^((p'Q+q')(qP+p)/N)
-          = \sum_{p, q}  a_n 1^(p'pQ/N) 1^(q'qP/N) 1^(q'p/N)                 (4)
-
-   using 1^(PQ/N) = 1 to get rid of the terms with PQ in them.  Rearranging (4),
-
-     d_k =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  \sum_q 1^(q'qP/N) a_n              (5)
-
-   The point here is to separate the index q.  Now we can expand out the remaining
-   instances of k and n using (2) and (3):
-
-     d_(p'Q+q') =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  \sum_q 1^(q'qP/N) a_(qP+p)   (6)
-
-   The expression \sum_q varies with the indices p and q'.  Let us define
-
-         C_{p, q'} =  \sum_q 1^(q'qP/N) a_(qP+p)                            (7)
-
-   Here, C_{p, q'}, viewed as a sequence in q', is just the DFT of the points
-   a_(qP+p) for q = 1..Q-1.  These points are not consecutive in memory though,
-   they jump by P each time.  Let us define b as a rearranged version of a,
-   so that
-
-         b_(pQ+q) = a_(qP+p)                                                  (8)
-
-   How to do this rearrangement in place?  In
-
-   We can rearrange (7) to be written in terms of the b's, using (8), so that
-
-         C_{p, q'} =  \sum_q 1^(q'q (P/N)) b_(pQ+q)                            (9)
-
-   Here, the sequence of C_{p, q'} over q'=0..Q-1, is just the DFT of the sequence
-   of b_(pQ) .. b_(p(Q+1)-1).  Let's arrange the C_{p, q'} in a single array in
-   memory in the same way as the b's, i.e. we define
-         c_(pQ+q') == C_{p, q'}.                                                (10)
-   Note that we could have written (10) with q in place of q', as there is only
-   one index of type q present, but q' is just a more natural variable name to use
-   since we use q' elsewhere to subscript c and C.
-
-   Rewriting (9), we have:
-         c_(pQ+q')  = \sum_q 1^(q'q (P/N)) b_(pQ+q)                            (11)
-    which is the DFT computed by the recursive call to this function [after computing
-    the b's by rearranging the a's].  From the c's we want to compute the d's.
-    Taking (6), substituting in the sum (7), and using (10) to write it as an array,
-    we have:
-         d_(p'Q+q') =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  c_(pQ+q')                   (12)
-    This sum is independent for different values of q'.  Note that d overwrites c
-    in memory.  We compute this in  a direct way, using a little array of size P to
-    store the computed d values for one value of q' (we reuse the array for each value
-    of q').
-
-    So the overall picture is this:
-    We get a call to compute DFT on size N.
-
-    - If N == 1 we return (nothing to do).
-    - We factor N = P Q (typically, P is small).
-    - Using (8), we rearrange the data in memory so that we have b not a in memory
-       (this is the block "do the rearrangement").
-       The pseudocode for this is as follows.  For simplicity we use a temporary array.
-
-          for p = 0..P-1
-             for q = 0..Q-1
-                bidx = pQ + q
-                aidx = qP + p
-                tmp[bidx] = data[aidx].
-             end
-          end
-          data <-- tmp
-        else
-
-        endif
-
-
-        The reason this accomplishes (8) is that we want pQ+q and qP+p to be swapped
-        over for each p, q, and the "if m > n" is a convenient way of ensuring that
-        this swapping happens only once (otherwise it would happen twice, since pQ+q
-        and qP+p both range over the entire set of numbers 0..N-1).
-
-    - We do the DFT on the smaller block size to compute c from b (this eq eq. (11)).
-      Note that this is actually multiple DFTs, one for each value of p, but this
-      goes to the "nffts" argument of the function call, which we have ignored up to now.
-
-    -We compute eq. (12) via a loop, as follows
-         allocate temporary array e of size P.
-         For q' = 0..Q-1:
-            for p' = 0..P-1:
-               set sum to zero [this will go in e[p']]
-               for p = p..P-1:
-                  sum += 1^(p'pQ/N) 1^(q'p/N)  c_(pQ+q')
-               end
-               e[p'] = sum
-            end
-            for p' = 0..P-1:
-               d_(p'Q+q') = e[p']
-            end
-         end
-         delete temporary array e
-
-*/
-
-// This is the outer-layer calling code for ComplexFftRecursive.
-// It factorizes the dimension and then calls the FFT routine.
-template<typename Real> void ComplexFft(VectorBase<Real> *v, bool forward, Vector<Real> *tmp_in) {
-  KALDI_ASSERT(v != NULL);
-
-  if (v->Dim()<=1) return;
-  KALDI_ASSERT(v->Dim() % 2 == 0);  // complex input.
-  int N = v->Dim() / 2;
-  std::vector<int> factors;
-  Factorize(N, &factors);
-  int *factor_beg = NULL;
-  if (factors.size() > 0)
-    factor_beg = &(factors[0]);
-  Vector<Real> tmp;  // allocated in ComplexFftRecursive.
-  ComplexFftRecursive(v->Data(), 1, N, factor_beg, factor_beg+factors.size(), forward, (tmp_in?tmp_in:&tmp));
-}
-
-//! Inefficient version of Fourier transform, for testing purposes.
-template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward) {
-  KALDI_ASSERT(v != NULL);
-  MatrixIndexT N = v->Dim();
-  KALDI_ASSERT(N%2 == 0);
-  if (N == 0) return;
-  Vector<Real> vtmp(N*2);  // store as complex.
-  if (forward) {
-    for (MatrixIndexT i = 0; i < N; i++)  vtmp(i*2) = (*v)(i);
-    ComplexFft(&vtmp, forward);  // this is already tested so we can use this.
-    v->CopyFromVec( vtmp.Range(0, N) );
-    (*v)(1) = vtmp(N);  // Copy the N/2'th fourier component, which is real,
-    // to the imaginary part of the 1st complex output.
-  } else {
-    // reverse the transformation above to get the complex spectrum.
-    vtmp(0) = (*v)(0);  // copy F_0 which is real
-    vtmp(N) = (*v)(1);  // copy F_{N/2} which is real
-    for (MatrixIndexT i = 1; i < N/2; i++) {
-      // Copy i'th to i'th fourier component
-      vtmp(2*i) = (*v)(2*i);
-      vtmp(2*i+1) = (*v)(2*i+1);
-      // Copy i'th to N-i'th, conjugated.
-      vtmp(2*(N-i)) = (*v)(2*i);
-      vtmp(2*(N-i)+1) = -(*v)(2*i+1);
-    }
-    ComplexFft(&vtmp, forward);  // actually backward since forward == false
-    // Copy back real part.  Complex part should be zero.
-    for (MatrixIndexT i = 0; i < N; i++)
-      (*v)(i) = vtmp(i*2);
-  }
-}
-
-template void RealFftInefficient (VectorBase<float> *v, bool forward);
-template void RealFftInefficient (VectorBase<double> *v, bool forward);
-
-template
-void ComplexFft(VectorBase<float> *v, bool forward, Vector<float> *tmp_in);
-template
-void ComplexFft(VectorBase<double> *v, bool forward, Vector<double> *tmp_in);
-
-
-// See the long comment below for the math behind this.
-template<typename Real> void RealFft (VectorBase<Real> *v, bool forward) {
-  KALDI_ASSERT(v != NULL);
-  MatrixIndexT N = v->Dim(), N2 = N/2;
-  KALDI_ASSERT(N%2 == 0);
-  if (N == 0) return;
-
-  if (forward) ComplexFft(v, true);
-
-  Real *data = v->Data();
-  Real rootN_re, rootN_im;  // exp(-2pi/N), forward; exp(2pi/N), backward
-  int forward_sign = forward ? -1 : 1;
-  ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
-  Real kN_re = -forward_sign, kN_im = 0.0;  // exp(-2pik/N), forward; exp(-2pik/N), backward
-  // kN starts out as 1.0 for forward algorithm but -1.0 for backward.
-  for (MatrixIndexT k = 1; 2*k <= N2; k++) {
-    ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
-
-    Real Ck_re, Ck_im, Dk_re, Dk_im;
-    // C_k = 1/2 (B_k + B_{N/2 - k}^*) :
-    Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
-    Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
-    // re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
-    Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
-    // im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
-    Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
-    // A_k = C_k + 1^(k/N) D_k:
-    data[2*k] = Ck_re;  // A_k <-- C_k
-    data[2*k+1] = Ck_im;
-    // now A_k += D_k 1^(k/N)
-    ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
-
-    MatrixIndexT kdash = N2 - k;
-    if (kdash != k) {
-      // Next we handle the index k' = N/2 - k.  This is necessary
-      // to do now, to avoid invalidating data that we will later need.
-      // The quantities C_{k'} and D_{k'} are just the conjugates of C_k
-      // and D_k, so the equations are simple modifications of the above,
-      // replacing Ck_im and Dk_im with their negatives.
-      data[2*kdash] = Ck_re;  // A_k' <-- C_k'
-      data[2*kdash+1] = -Ck_im;
-      // now A_k' += D_k' 1^(k'/N)
-      // We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
-      // so it's the same as 1^(k/N) but with the real part negated.
-      ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
-    }
-  }
-
-  {  // Now handle k = 0.
-    // In simple terms: after the complex fft, data[0] becomes the sum of real
-    // parts input[0], input[2]... and data[1] becomes the sum of imaginary
-    // pats input[1], input[3]...
-    // "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
-    // and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
-    Real zeroth = data[0] + data[1],
-        n2th = data[0] - data[1];
-    data[0] = zeroth;
-    data[1] = n2th;
-    if (!forward) {
-      data[0] /= 2;
-      data[1] /= 2;
-    }
-  }
-
-  if (!forward) {
-    ComplexFft(v, false);
-    v->Scale(2.0);  // This is so we get a factor of N increase, rather than N/2 which we would
-    // otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
-    // It's for consistency with our normal FFT convensions.
-  }
-}
-
-template void RealFft (VectorBase<float> *v, bool forward);
-template void RealFft (VectorBase<double> *v, bool forward);
-
-/* Notes for real FFTs.
-   We are using the same convention as above, 1^x to mean exp(-2\pi x) for the forward transform.
-   Actually, in a slight abuse of notation, we use this meaning for 1^x in both the forward and
-   backward cases because it's more convenient in this section.
-
-   Suppose we have real data a[0...N-1], with N even, and want to compute its Fourier transform.
-   We can make do with the first N/2 points of the transform, since the remaining ones are complex
-   conjugates of the first.  We want to compute:
-       for k = 0...N/2-1,
-       A_k = \sum_{n = 0}^{N-1}  a_n 1^(kn/N)                 (1)
-
-   We treat a[0..N-1] as a complex sequence of length N/2, i.e. a sequence b[0..N/2 - 1].
-   Viewed as sequences of length N/2, we have:
-       b = c + i d,
-   where c = a_0, a_2 ... and d = a_1, a_3 ...
-
-   We can recover the length-N/2 Fourier transforms of c and d by doing FT on b and
-   then doing the equations below.  Derivation is marked by (*) in a comment below (search
-   for it).  Let B, C, D be the FTs.
-   We have
-       C_k = 1/2 (B_k + B_{N/2 - k}^*)                                 (z0)
-       D_k =-1/2i (B_k - B_{N/2 - k}^*)                                (z1)
-so: re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k}))                             (z2)
-    im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))                             (z3)
-
-    To recover the FT A from C and D, we write, rearranging (1):
-
-       A_k = \sum_{n = 0, 2, ..., N-2} a_n 1^(kn/N)
-            +\sum_{n = 1, 3, ..., N-1} a_n 1^(kn/N)
-           = \sum_{n = 0, 1, ..., N/2-1} a_n 1^(2kn/N)  + a_{n+1} 1^(2kn/N) 1^(k/N)
-           = \sum_{n = 0, 1, ..., N/2-1} c_n 1^(2kn/N)  + d_n  1^(2kn/N) 1^(k/N)
-       A_k =  C_k + 1^(k/N) D_k                                              (a0)
-
-    This equation is valid for k = 0...N/2-1, which is the range of the sequences B_k and
-    C_k.  We don't use is for k = 0, which is a special case considered below.  For
-    1 < k < N/2, it's convenient to consider the pair k, k', where k' = N/2 - k.
-    Remember that C_k' = C_k^ *and D_k' = D_k^* [where * is conjugation].  Also,
-    1^(N/2 / N) = -1.  So we have:
-       A_k' = C_k^* - 1^(k/N) D_k^*                                          (a0b)
-    We do (a0) and (a0b) together.
-
-
-
-    By symmetry this gives us the Fourier components for N/2+1, ... N, if we want
-    them.  However, it doesn't give us the value for exactly k = N/2.  For k = 0 and k = N/2, it
-    is easiest to argue directly about the meaning of the A_k, B_k and C_k in terms of
-    sums of points.
-       A_0 and A_{N/2} are both real, with A_0=\sum_n a_n, and A_1 an alternating sum
-       A_1 = a_0 - a_1 + a_2 ...
-     It's easy to show that
-              A_0 = B_0 + C_0            (a1)
-              A_{N/2} = B_0 - C_0.       (a2)
-     Since B_0 and C_0 are both real, B_0 is the real coefficient of D_0 and C_0 is the
-     imaginary coefficient.
-
-     *REVERSING THE PROCESS*
-
-     Next we want to reverse this process.  We just need to work out C_k and D_k from the
-     sequence A_k.  Then we do the inverse complex fft and we get back where we started.
-     For 0 and N/2, working from (a1) and (a2) above, we can see that:
-          B_0 = 1/2 (A_0 + A_{N/2})                                       (y0)
-          C_0 = 1/2 (A_0 + A_{N/2})                                       (y1)
-     and we use
-         D_0 = B_0 + i C_0
-     to get the 1st complex coefficient of D.  This is exactly the same as the forward process
-     except with an extra factor of 1/2.
-
-     Consider equations (a0) and (a0b).  We want to work out C_k and D_k from A_k and A_k'.  Remember
-     k' = N/2 - k.
-
-     Write down
-         A_k     =  C_k + 1^(k/N) D_k        (copying a0)
-         A_k'^* =   C_k - 1^(k/N) D_k       (conjugate of a0b)
-      So
-             C_k =            0.5 (A_k + A_k'^*)                    (p0)
-             D_k = 1^(-k/N) . 0.5 (A_k - A_k'^*)                    (p1)
-      Next, we want to compute B_k and B_k' from C_k and D_k.  C.f. (z0)..(z3), and remember
-      that k' = N/2-k.  We can see
-      that
-              B_k  = C_k + i D_k                                    (p2)
-              B_k' = C_k - i D_k                                    (p3)
-
-     We would like to make the equations (p0) ... (p3) look like the forward equations (z0), (z1),
-     (a0) and (a0b) so we can reuse the code.  Define E_k = -i 1^(k/N) D_k.  Then write down (p0)..(p3).
-     We have
-             C_k  =            0.5 (A_k + A_k'^*)                    (p0')
-             E_k  =       -0.5 i   (A_k - A_k'^*)                    (p1')
-             B_k  =    C_k - 1^(-k/N) E_k                            (p2')
-             B_k' =    C_k + 1^(-k/N) E_k                            (p3')
-     So these are exactly the same as (z0), (z1), (a0), (a0b) except replacing 1^(k/N) with
-     -1^(-k/N) .  Remember that we defined 1^x above to be exp(-2pi x/N), so the signs here
-     might be opposite to what you see in the code.
-
-     MODIFICATION: we need to take care of a factor of two.  The complex FFT we implemented
-     does not divide by N in the reverse case.  So upon inversion we get larger by N/2.
-     However, this is not consistent with normal FFT conventions where you get a factor of N.
-     For this reason we multiply by two after the process described above.
-
-*/
-
-
-/*
-   (*) [this token is referred to in a comment above].
-
-   Notes for separating 2 real transforms from one complex one.  Note that the
-   letters here (A, B, C and N) are all distinct from the same letters used in the
-   place where this comment is used.
-   Suppose we
-   have two sequences a_n and b_n, n = 0..N-1.  We combine them into a complex
-   number,
-      c_n = a_n + i b_n.
-   Then we take the fourier transform to get
-      C_k = \sum_{n = 0}^{N-1} c_n 1^(n/N) .
-   Then we use symmetry.  Define A_k and B_k as the DFTs of a and b.
-   We use A_k = A_{N-k}^*, and B_k = B_{N-k}^*, since a and b are real.  Using
-      C_k     = A_k    +  i B_k,
-      C_{N-k} = A_k^*  +  i B_k^*
-              = A_k^*  -  (i B_k)^*
-   So:
-      A_k     = 1/2  (C_k + C_{N-k}^*)
-    i B_k     = 1/2  (C_k - C_{N-k}^*)
->    B_k     =-1/2i (C_k - C_{N-k}^*)
->  re(B_k)   = 1/2 (im(C_k) + im(C_{N-k}))
-    im(B_k)   =-1/2 (re(C_k) - re(C_{N-k}))
-
- */
-
-template<typename Real> void ComputeDctMatrix(Matrix<Real> *M) {
-  //KALDI_ASSERT(M->NumRows() == M->NumCols());
-  MatrixIndexT K = M->NumRows();
-  MatrixIndexT N = M->NumCols();
-
-  KALDI_ASSERT(K > 0);
-  KALDI_ASSERT(N > 0);
-  Real normalizer = std::sqrt(1.0 / static_cast<Real>(N));  // normalizer for
-  // X_0.
-  for (MatrixIndexT j = 0; j < N; j++) (*M)(0, j) = normalizer;
-  normalizer = std::sqrt(2.0 / static_cast<Real>(N));  // normalizer for other
-   // elements.
-  for (MatrixIndexT k = 1; k < K; k++)
-    for (MatrixIndexT n = 0; n < N; n++)
-      (*M)(k, n) = normalizer
-          * std::cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
-}
-
-
-template void ComputeDctMatrix(Matrix<float> *M);
-template void ComputeDctMatrix(Matrix<double> *M);
-
-
-template<typename Real>
-void MatrixExponential<Real>::Clear() {
-  N_ = 0;
-  P_.Resize(0, 0);
-  B_.clear();
-  powers_.clear();
-}
-
-template<typename Real>
-void MatrixExponential<Real>::Compute(const MatrixBase<Real> &M,
-                                      MatrixBase<Real> *X) {
-  // does *X = exp(M)
-  KALDI_ASSERT(M.NumRows() == M.NumCols());
-  Clear();
-
-  N_ = ComputeN(M);
-  MatrixIndexT dim = M.NumRows();
-  P_.Resize(dim, dim);
-  P_.CopyFromMat(M);
-  P_.Scale(std::pow(static_cast<Real>(0.5),
-                    static_cast<Real>(N_)));
-  // would need to keep this code in sync with ComputeN().
-  B_.resize(N_+1);
-  B_[0].Resize(dim, dim);
-  ComputeTaylor(P_, &(B_[0]));  // set B_[0] = exp(P_)
-  for (MatrixIndexT i = 1; i <= N_; i++) {
-    // implement the recursion B_[k] = 2 B_[k-1] + B_[k-1]^2.
-    B_[i].Resize(dim, dim);  // zeros it.
-    B_[i].AddMat(2.0, B_[i-1], kNoTrans);
-    B_[i].AddMatMat(1.0, B_[i-1], kNoTrans, B_[i-1], kNoTrans, 1.0);
-  }
-  KALDI_ASSERT(X->NumRows() == dim && X->NumCols() == dim);
-  (*X).CopyFromMat(B_[N_]);  // last one plus the unit matrix is the answer.
-  // add in the unit matrix.
-  for (MatrixIndexT i = 0; i < dim; i++)
-    (*X)(i, i) += 1.0;
-};
-
-template<typename Real>
-void MatrixExponential<Real>::Compute(const SpMatrix<Real> &M,
-                                      SpMatrix<Real> *X) {
-  Matrix<Real> Mfull(M), Xfull(M.NumRows(), M.NumCols());
-  Compute(Mfull, &Xfull);
-  X->CopyFromMat(Xfull);
-}
-
-
-template<typename Real>
-MatrixIndexT MatrixExponential<Real>::ComputeN(const MatrixBase<Real> &M) {
-  // Computes the power of two we want to use.  Aim to get
-  // AScaled.FrobeniusNorm() < 1/10.
-  Real norm = M.FrobeniusNorm();
-  Real max_norm = 0.1;
-  if (norm > 1000) {
-    KALDI_WARN << "Trying to compute exponent of very high-norm matrix: norm = "
-               << norm;
-  }
-  MatrixIndexT N = 0;
-  while (norm > max_norm) { norm *= 0.5; N++; }
-  return N;
-}
-
-template<typename Real>
-void MatrixExponential<Real>::ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0) {
-  KALDI_ASSERT(P.FrobeniusNorm() < 1.001);  // should actually be << 1
-  // for this to work fast enough.
-  KALDI_ASSERT(P.NumRows() == P.NumCols());
-  MatrixIndexT dim = P.NumRows();
-  KALDI_ASSERT(B0->NumRows() == dim && B0->NumCols() == dim);
-  B0->SetZero();
-  MatrixIndexT n = 1, n_factorial = 1, max_iter = 10000;
-  Matrix<Real> Pn(P),  // Pn = P^n
-      B0cur(dim, dim),
-      tmp(dim, dim);  // use B0cur to test whether B0 changed.
-  std::vector<Matrix<Real>* > powers_tmp;  // list of stored powers of P, starting
-  // from 2 and up to 1 before the last one we used.
-  while (n < max_iter) {  // have an arbitrary very large limit on #iters
-    B0cur.AddMat(1.0 / n_factorial, Pn);
-    if (B0cur.Equal(*B0)) // was no change [already very small]
-      break;
-    B0->CopyFromMat(B0cur);  // Keep B0 in sync with B0cur.
-    tmp.AddMatMat(1.0, P, kNoTrans, Pn, kNoTrans, 0.0);  // tmp = P * P^n
-    n++;
-    n_factorial *= n;
-    Pn.CopyFromMat(tmp);  // copy back to P^n
-    powers_tmp.push_back(new Matrix<Real>(tmp));
-  }
-  if (n == max_iter)
-    KALDI_WARN << "Reached maximum iteration computing Taylor expansion of matrix [serious problem]";
-
-  powers_.resize(powers_tmp.size());
-  for (MatrixIndexT i = 0;
-      i < static_cast<MatrixIndexT>(powers_tmp.size()); i++) {
-    powers_[i].Swap(powers_tmp[i]);
-    delete powers_tmp[i];
-  }
-}
-
-template<typename Real>
-void MatrixExponential<Real>::Backprop(const MatrixBase<Real> &hX,
-                                       MatrixBase<Real> *hM) const {
-  MatrixIndexT dim = P_.NumRows();
-  KALDI_ASSERT(hX.NumRows() == dim && hX.NumCols() == dim
-               && hM->NumRows() == dim && hM->NumCols() == dim);
-  Matrix<Real> dB(hX);
-  // dB represents the gradient df/dB_[i] for the current
-  // value of i, which decreases from N_ to zero (currently it's N_)
-  for (MatrixIndexT i = N_-1;
-      i != (static_cast<MatrixIndexT>(0)-static_cast<MatrixIndexT>(1));  // i >= 0
-      i--) {
-    // Propagate back from df/dB_[i+1] to df/dB_[i].
-    // Using B_[i+1] = 2 B_[i] + B_[i]*B_[i],
-    // df/dB_[i] = 2*(df/dB_[i+1]) +  B_[i]^T (df/dB_[i+1])
-    //         + (df/dB_[i+1])B_[i]^T
-    // note, here we use the perhaps slightly wrong convention that
-    // df/dB_[i] is not transposed, i.e. its i, j'th element is the derivative
-    // of f w.r.t. the i, j'th element of B_[i].
-    Matrix<Real> prev_dB(dB);
-
-    prev_dB.Scale(2.0);      // the term 2*(df/dB_[i+1])
-    // add in the term B_[i](df/dB_[i+1])
-    prev_dB.AddMatMat(1.0, B_[i], kTrans, dB, kNoTrans, 1.0);
-    // add in the term (df/dB_[i+1])B_[i]
-    prev_dB.AddMatMat(1.0, dB, kNoTrans, B_[i], kTrans, 1.0);
-    dB.CopyFromMat(prev_dB);
-  }
-  // currently dB is the gradient df/dB_[0], which is exp(P_) - I.
-  // we have to backprop this and we get df/dP_.
-  BackpropTaylor(dB, hM);  // at this point, hM is temporarily used to store
-  // df/dP_.
-  hM->Scale(std::pow(static_cast<Real>(0.5),
-                     static_cast<Real>(N_)));  // Since A_Scaled = A * std::pow(0.5, N_).
-}
-
-
-template<typename Real>
-void MatrixExponential<Real>::Backprop(const SpMatrix<Real> &hX,
-                                       SpMatrix<Real> *hM) const {
-  Matrix<Real> hXfull(hX), hMfull(hX.NumRows(), hX.NumCols());
-  Backprop(hXfull, &hMfull);
-  hM->CopyFromMat(hMfull);
-}
-
-
-template<typename Real>
-void MatrixExponential<Real>::BackpropTaylor(const MatrixBase<Real> &hB0,
-                                             MatrixBase<Real> *hP) const {
-  // Backprop through the Taylor-series computation.
-  // the original computation was:
-  //  X = \sum_{i = 1}^n (1/i!) P^i
-  // Also you can see that X is actually the exponential minus I, since we start
-  //   the series from 1; this doesn't affect the derivatives.
-  // The variable \hat{B}_0 (hB0) represents df/dX, where f is a scalar function.
-  // Note that there is no transpose in our notation for derivative: hB0(i, j) is
-  // d/df of E(i, j).
-  // Imagine that f is \tr(hB0^T B0) (since this varies linearly with B0 in the same
-  // way that the real f does).  We want d/dP (hB0^T B0); call this hP (for \hat{P}).
-  //   hP = d/dP  \sum_{i = 1}^n (1/i!) hB0^T P^i
-  // Taking each individual term P in this expression and treating the others as constants,
-  // and noting that whenever we have something like tr(A B), then B^T is the derivative
-  // of this expression w.r.t. A (in our notation),
-  //  hP = hB0 + (1/2!) (hB0 P^T + P^T hB0) + (1/3!) (hB0 P^T P^T + P^T hB0 P^T + P^T P^T hB0)
-  //          + (1/4!) ....   (1)
-  // We can compute this with the following recursion by which we get each of the terms
-  // in this series in turn:
-  //  hP_1 <-- hB0
-  //  hP <-- hP_1
-  //  for n = 2 .. infinity:  # we only carry this on for as many terms as we used
-  //                          # in the original expansion
-  //     hP_n <-- (1/n) hP_{n-1} P^T + (1/n!) (P^T)^(n-1) hP_{n-1}
-  //     hP <-- hP + hP_n
-
-  const Matrix<Real> &P(P_);
-  MatrixIndexT dim = P.NumRows();
-  KALDI_ASSERT(P.NumCols() == dim && hB0.NumRows() == dim && hB0.NumCols() == dim
-               && hP->NumRows() == dim && hP->NumCols() == dim);
-  hP->SetZero();
-  Matrix<Real> hPn1(hB0), 
-      hPn(dim, dim);
-  hP->AddMat(1.0, hPn1);  // first term in (1):  df/dP += K
-
-  MatrixIndexT n = 2, nfact = 2;
-  // Now do n = 2 in comment above (this is special case,
-  // since we did not store the 1st power in powers_).
-  hPn.AddMatMat(1.0/n, hPn1, kNoTrans, P, kTrans, 0.0);  // hP_n <-- (1/n) hP_{n-1} P^T
-  hPn.AddMatMat(1.0/nfact, P, kTrans, hB0, kNoTrans, 1.0);  // hP_n += (1/n!) P^T^(n-1) hB0
-  hP->AddMat(1.0, hPn);  // add in second term in (1)
-  hPn.Swap(&hPn1);
-
-  for (MatrixIndexT i = 0;
-      i < static_cast<MatrixIndexT>(powers_.size()); i++) {
-    n++;
-    nfact *= n;
-    // i corresponds to (n-1)-2, and powers_[i] contains the n-1'th power of P.
-    // next line: hP_n <-- (1/n) hP_{n-1} P^T
-    hPn.AddMatMat(1.0/n, hPn1, kNoTrans, P, kTrans, 0.0);
-    // next line: hP_n += (1/n!) P^T^(n-1) hB0
-    hPn.AddMatMat(1.0/nfact, powers_[i], kTrans, hB0, kNoTrans, 1.0);
-    hP->AddMat(1.0, hPn);  // add in n'th term in (1)
-    hPn.Swap(&hPn1);
-  }
-}
-
-template class MatrixExponential<float>;
-template class MatrixExponential<double>;
-
-
-template<typename Real>
-void ComputePca(const MatrixBase<Real> &X,
-                MatrixBase<Real> *U,
-                MatrixBase<Real> *A,
-                bool print_eigs,
-                bool exact) {
-  // Note that some of these matrices may be transposed w.r.t. the
-  // way it's most natural to describe them in math... it's the rows
-  // of X and U that correspond to the (data-points, basis elements).
-  MatrixIndexT N = X.NumRows(), D = X.NumCols();
-  // N = #points, D = feature dim.
-  KALDI_ASSERT(U != NULL && U->NumCols() == D);
-  MatrixIndexT G = U->NumRows();  // # of retained basis elements.
-  KALDI_ASSERT(A == NULL || (A->NumRows() == N && A->NumCols() == G));
-  KALDI_ASSERT(G <= N && G <= D);
-  if (D < N) {  // Do conventional PCA.
-    SpMatrix<Real> Msp(D);  // Matrix of outer products.
-    Msp.AddMat2(1.0, X, kTrans, 0.0);  // M <-- X^T X
-    Matrix<Real> Utmp;
-    Vector<Real> l;
-    if (exact) {
-      Utmp.Resize(D, D);
-      l.Resize(D);
-      //Matrix<Real> M(Msp);
-      //M.DestructiveSvd(&l, &Utmp, NULL);
-      Msp.Eig(&l, &Utmp);
-    } else {
-      Utmp.Resize(D, G);
-      l.Resize(G);
-      Msp.TopEigs(&l, &Utmp);
-    }
-    SortSvd(&l, &Utmp);
-    
-    for (MatrixIndexT g = 0; g < G; g++)
-      U->Row(g).CopyColFromMat(Utmp, g);
-    if (print_eigs)
-      KALDI_LOG << (exact ? "" : "Retained ")
-                << "PCA eigenvalues are " << l;
-    if (A != NULL)
-      A->AddMatMat(1.0, X, kNoTrans, *U, kTrans, 0.0);
-  } else {  // Do inner-product PCA.
-    SpMatrix<Real> Nsp(N);  // Matrix of inner products.
-    Nsp.AddMat2(1.0, X, kNoTrans, 0.0);  // M <-- X X^T
-
-    Matrix<Real> Vtmp;
-    Vector<Real> l;
-    if (exact) {
-      Vtmp.Resize(N, N);
-      l.Resize(N);
-      Matrix<Real> Nmat(Nsp);
-      Nmat.DestructiveSvd(&l, &Vtmp, NULL);
-    } else {
-      Vtmp.Resize(N, G);
-      l.Resize(G);
-      Nsp.TopEigs(&l, &Vtmp);
-    }
-    
-    MatrixIndexT num_zeroed = 0;
-    for (MatrixIndexT g = 0; g < G; g++) {
-      if (l(g) < 0.0) {
-        KALDI_WARN << "In PCA, setting element " << l(g) << " to zero.";
-        l(g) = 0.0;
-        num_zeroed++;
-      }
-    }
-    SortSvd(&l, &Vtmp); // Make sure zero elements are last, this
-    // is necessary for Orthogonalize() to work properly later.
-
-    Vtmp.Transpose();  // So eigenvalues are the rows.
-    
-    for (MatrixIndexT g = 0; g < G; g++) {
-      Real sqrtlg = sqrt(l(g));
-      if (l(g) != 0.0) {
-        U->Row(g).AddMatVec(1.0 / sqrtlg, X, kTrans, Vtmp.Row(g), 0.0);
-      } else {
-        U->Row(g).SetZero();
-        (*U)(g, g) = 1.0;  // arbitrary direction.  Will later orthogonalize.
-      }
-      if (A != NULL)
-        for (MatrixIndexT n = 0; n < N; n++)
-          (*A)(n, g) = sqrtlg * Vtmp(g, n);
-    }
-    // Now orthogonalize.  This is mainly useful in
-    // case there were zero eigenvalues, but we do it
-    // for all of them.
-    U->OrthogonalizeRows();
-    if (print_eigs)
-      KALDI_LOG << "(inner-product) PCA eigenvalues are " << l;
-  }
-}
-
-
-template
-void ComputePca(const MatrixBase<float> &X,
-                MatrixBase<float> *U,
-                MatrixBase<float> *A,
-                bool print_eigs,
-                bool exact);
-
-template
-void ComputePca(const MatrixBase<double> &X,
-                MatrixBase<double> *U,
-                MatrixBase<double> *A,
-                bool print_eigs,
-                bool exact);
-
-
-// Added by Dan, Feb. 13 2012. 
-// This function does: *plus += max(0, a b^T),
-// *minus += max(0, -(a b^T)).
-template<typename Real>
-void AddOuterProductPlusMinus(Real alpha,
-                              const VectorBase<Real> &a,
-                              const VectorBase<Real> &b,
-                              MatrixBase<Real> *plus, 
-                              MatrixBase<Real> *minus) {
-  KALDI_ASSERT(a.Dim() == plus->NumRows() && b.Dim() == plus->NumCols()
-               && a.Dim() == minus->NumRows() && b.Dim() == minus->NumCols());
-  int32 nrows = a.Dim(), ncols = b.Dim(), pskip = plus->Stride() - ncols,
-      mskip = minus->Stride() - ncols;
-  const Real *adata = a.Data(), *bdata = b.Data();
-  Real *plusdata = plus->Data(), *minusdata = minus->Data();
-
-  for (int32 i = 0; i < nrows; i++) {
-    const Real *btmp = bdata;
-    Real multiple = alpha * *adata;
-    if (multiple > 0.0) {
-      for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
-        if (*btmp > 0.0) *plusdata += multiple * *btmp;
-        else *minusdata -= multiple * *btmp;
-      }
-    } else {
-      for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
-        if (*btmp < 0.0) *plusdata += multiple * *btmp;
-        else *minusdata -= multiple * *btmp;
-      }
-    }        
-    plusdata += pskip;
-    minusdata += mskip;
-    adata++;
-  }
-}
-
-// Instantiate template
-template
-void AddOuterProductPlusMinus<float>(float alpha,
-                                     const VectorBase<float> &a,
-                                     const VectorBase<float> &b,
-                                     MatrixBase<float> *plus, 
-                                     MatrixBase<float> *minus);
-template
-void AddOuterProductPlusMinus<double>(double alpha,
-                                      const VectorBase<double> &a,
-                                      const VectorBase<double> &b,
-                                      MatrixBase<double> *plus, 
-                                      MatrixBase<double> *minus);
-
-
-} // end namespace kaldi
-
-
--- a/Source/Readers/KaldiReader/matrix/matrix-functions.h
+++ b/Source/Readers/KaldiReader/matrix/matrix-functions.h
@ -1,230 +0,0 @@
-// matrix/matrix-functions.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Go Vivace Inc.;  Jan Silovsky;
-//                      Yanmin Qian;   1991 Henrique (Rico) Malvar (*)
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-
-
-#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_H_
-#define KALDI_MATRIX_MATRIX_FUNCTIONS_H_
-
-#include "matrix/kaldi-vector.h"
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-/// @addtogroup matrix_funcs_misc
-/// @{
-
-/** The function ComplexFft does an Fft on the vector argument v.
-   v is a vector of even dimension, interpreted for both input
-   and output as a vector of complex numbers i.e.
-   \f[ v = ( re_0, im_0, re_1, im_1, ... )    \f]
-   The dimension of v must be a power of 2.
-
-   If "forward == true" this routine does the Discrete Fourier Transform
-   (DFT), i.e.:
-   \f[   vout[m] \leftarrow \sum_{n = 0}^{N-1} vin[i] exp( -2pi m n / N )  \f]
-
-   If "backward" it does the Inverse Discrete Fourier Transform (IDFT)
-   *WITHOUT THE FACTOR 1/N*,
-   i.e.:
-   \f[   vout[m] <-- \sum_{n = 0}^{N-1} vin[i] exp(  2pi m n / N )   \f]
-   [note the sign difference on the 2 pi for the backward one.]
-
-   Note that this is the definition of the FT given in most texts, but
-   it differs from the Numerical Recipes version in which the forward
-   and backward algorithms are flipped.
-
-   Note that you would have to multiply by 1/N after the IDFT to get
-   back to where you started from.  We don't do this because
-   in some contexts, the transform is made symmetric by multiplying
-   by sqrt(N) in both passes.   The user can do this by themselves.
- */
-template<typename Real> void ComplexFft (VectorBase<Real> *v, bool forward, Vector<Real> *tmp_work = NULL);
-
-/// ComplexFt is the same as ComplexFft but it implements the Fourier
-/// transform in an inefficient way.  It is mainly included for testing purposes.
-/// See comment for ComplexFft to describe the input and outputs and what it does.
-template<typename Real> void ComplexFt (const VectorBase<Real> &in,
-                                     VectorBase<Real> *out, bool forward);
-
-/// RealFft is a fourier transform of real inputs.  Internally it uses
-/// ComplexFft.  The input dimension N must be even.  If forward == true,
-/// it transforms from a sequence of N real points to its complex fourier
-/// transform; otherwise it goes in the reverse direction.  If you call it
-/// in the forward and then reverse direction and multiply by 1.0/N, you
-/// will get back the original data.
-/// The interpretation of the complex-FFT data is as follows: the array
-/// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
-/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
-template<typename Real> void RealFft (VectorBase<Real> *v, bool forward);
-
-
-/// RealFt has the same input and output format as RealFft above, but it is
-/// an inefficient implementation included for testing purposes.
-template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward);
-
-/// ComputeDctMatrix computes a matrix corresponding to the DCT, such that
-/// M * v equals the DCT of vector v.  M must be square at input.
-/// This is the type = III DCT with normalization, corresponding to the
-/// following equations, where x is the signal and X is the DCT:
-/// X_0 = 1/sqrt(2*N) \sum_{n = 0}^{N-1} x_n
-/// X_k = 1/sqrt(N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k )
-/// This matrix's transpose is its own inverse, so transposing this
-/// matrix will give the inverse DCT.
-/// Caution: the type III DCT is generally known as the "inverse DCT" (with the
-/// type II being the actual DCT), so this function is somewhatd mis-named.  It
-/// was probably done this way for HTK compatibility.  We don't change it
-/// because it was this way from the start and changing it would affect the
-/// feature generation.
-
-template<typename Real> void ComputeDctMatrix(Matrix<Real> *M);
-
-
-/// ComplexMul implements, inline, the complex multiplication b *= a.
-template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
-                                            Real *b_re, Real *b_im);
-
-/// ComplexMul implements, inline, the complex operation c += (a * b).
-template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
-                                                   const Real &b_re, const Real &b_im,
-                                                   Real *c_re, Real *c_im);
-
-
-/// ComplexImExp implements a <-- exp(i x), inline.
-template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im);
-
-
-// This class allows you to compute the matrix exponential function
-// B = I + A + 1/2! A^2 + 1/3! A^3 + ...
-// This method is most accurate where the result is of the same order of
-// magnitude as the unit matrix (it will typically not work well when
-// the answer has almost-zero eigenvalues or is close to zero).
-// It also provides a function that allows you do back-propagate the
-// derivative of a scalar function through this calculation.
-// The
-template<typename Real>
-class MatrixExponential {
- public:
-  MatrixExponential() { }
-
-  void Compute(const MatrixBase<Real> &M, MatrixBase<Real> *X);  // does *X = exp(M)
-
-  // Version for symmetric matrices (it just copies to full matrix).
-  void Compute(const SpMatrix<Real> &M, SpMatrix<Real> *X);  // does *X = exp(M)
-
-  void Backprop(const MatrixBase<Real> &hX, MatrixBase<Real> *hM) const;  // Propagates
-  // the gradient of a scalar function f backwards through this operation, i.e.:
-  // if the parameter dX represents df/dX (with no transpose, so element i, j of dX
-  // is the derivative of f w.r.t. E(i, j)), it sets dM to df/dM, again with no
-  // transpose (of course, only the part thereof that comes through the effect of
-  // A on B).  This applies to the values of A and E that were called most recently
-  // with Compute().
-
-  // Version for symmetric matrices (it just copies to full matrix).
-  void Backprop(const SpMatrix<Real> &hX, SpMatrix<Real> *hM) const;
-  
- private:
-  void Clear();
-
-  static MatrixIndexT ComputeN(const MatrixBase<Real> &M);
-
-  // This is intended for matrices P with small norms: compute B_0 = exp(P) - I.
-  // Keeps adding terms in the Taylor series till there is no further
-  // change in the result.  Stores some of the powers of A in powers_,
-  // and the number of terms K as K_.
-  void ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0);
-
-  // Backprop through the Taylor-series computation above.
-  // note: hX is \hat{X} in the math; hM is \hat{M} in the math.
-  void BackpropTaylor(const MatrixBase<Real> &hX,
-                      MatrixBase<Real> *hM) const;
-
-  Matrix<Real> P_;  // Equals M * 2^(-N_)
-  std::vector<Matrix<Real> > B_;  // B_[0] = exp(P_) - I,
-                                 //  B_[k] = 2 B_[k-1] + B_[k-1]^2   [k > 0],
-                                 //  ( = exp(P_)^k - I )
-                                 // goes from 0..N_ [size N_+1].
-
-  std::vector<Matrix<Real> > powers_;  // powers (>1) of P_ stored here,
-  // up to all but the last one used in the Taylor expansion (this is the
-  // last one we need in the backprop).  The index is the power minus 2.
-
-  MatrixIndexT N_;  // Power N_ >=0 such that P_ = A * 2^(-N_),
-  // we choose it so that P_ has a sufficiently small norm
-  // that the Taylor series will converge fast.
-};
-
-
-/**
-    ComputePCA does a PCA computation, using either outer products
-    or inner products, whichever is more efficient.  Let D be
-    the dimension of the data points, N be the number of data
-    points, and G be the PCA dimension we want to retain.  We assume
-    G <= N and G <= D.
-
-    @param X [in]  An N x D matrix.  Each row of X is a point x_i.
-    @param U [out] A G x D matrix.  Each row of U is a basis element u_i.
-    @param A [out] An N x D matrix, or NULL.  Each row of A is a set of coefficients
-         in the basis for a point x_i, so A(i, g) is the coefficient of u_i
-         in x_i.
-    @param print_eigs [in] If true, prints out diagnostic information about the
-         eigenvalues.
-    @param exact [in] If true, does the exact computation; if false, does
-         a much faster (but almost exact) computation based on the Lanczos
-         method.
-*/
-
-template<typename Real>
-void ComputePca(const MatrixBase<Real> &X,
-                MatrixBase<Real> *U,
-                MatrixBase<Real> *A,
-                bool print_eigs = false,
-                bool exact = true);
-
-
-
-// This function does: *plus += max(0, a b^T),
-// *minus += max(0, -(a b^T)).
-template<typename Real>
-void AddOuterProductPlusMinus(Real alpha,
-                              const VectorBase<Real> &a,
-                              const VectorBase<Real> &b,
-                              MatrixBase<Real> *plus, 
-                              MatrixBase<Real> *minus);
-
-template<typename Real1, typename Real2>
-inline void AssertSameDim(const MatrixBase<Real1> &mat1, const MatrixBase<Real2> &mat2) {
-  KALDI_ASSERT(mat1.NumRows() == mat2.NumRows()
-               && mat1.NumCols() == mat2.NumCols());
-}
-
-
-/// @} end of "addtogroup matrix_funcs_misc"
-
-} // end namespace kaldi
-
-#include "matrix/matrix-functions-inl.h"
-
-
-#endif
-
--- a/Source/Readers/KaldiReader/matrix/matrix-lib-test.cc
+++ b/Source/Readers/KaldiReader/matrix/matrix-lib-test.cc
--- a/Source/Readers/KaldiReader/matrix/matrix-lib.h
+++ b/Source/Readers/KaldiReader/matrix/matrix-lib.h
@ -1,37 +0,0 @@
-// matrix/matrix-lib.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// Include everything from this directory.
-// These files include other stuff that we need.
-#ifndef KALDI_MATRIX_MATRIX_LIB_H_
-#define KALDI_MATRIX_MATRIX_LIB_H_
-
-#include "matrix/cblas-wrappers.h"
-#include "base/kaldi-common.h"
-#include "matrix/kaldi-vector.h"
-#include "matrix/kaldi-matrix.h"
-#include "matrix/sp-matrix.h"
-#include "matrix/tp-matrix.h"
-#include "matrix/matrix-functions.h"
-#include "matrix/srfft.h"
-#include "matrix/compressed-matrix.h"
-#include "matrix/optimization.h"
-
-#endif
-
--- a/Source/Readers/KaldiReader/matrix/optimization.cc
+++ b/Source/Readers/KaldiReader/matrix/optimization.cc
@ -1,425 +0,0 @@
-// matrix/optimization.cc
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
-
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-#include "matrix/optimization.h"
-
-namespace kaldi {
-
-
-// Below, N&W refers to Nocedal and Wright, "Numerical Optimization", 2nd Ed.
-
-template<typename Real>
-OptimizeLbfgs<Real>::OptimizeLbfgs(const VectorBase<Real> &x,
-                                   const LbfgsOptions &opts):
-    opts_(opts), k_(0), computation_state_(kBeforeStep), H_was_set_(false) {
-  KALDI_ASSERT(opts.m > 0); // dimension.
-  MatrixIndexT dim = x.Dim();
-  KALDI_ASSERT(dim > 0);
-  x_ = x; // this is the value of x_k
-  new_x_ = x;  // this is where we'll evaluate the function next.
-  deriv_.Resize(dim);
-  temp_.Resize(dim);
-  data_.Resize(2 * opts.m, dim);
-  rho_.Resize(opts.m);
-  // Just set f_ to some invalid value, as we haven't yet set it.
-  f_ = (opts.minimize ? 1 : -1 ) * std::numeric_limits<Real>::infinity();
-  best_f_ = f_;
-  best_x_ = x_;
-}
-
-
-template<typename Real>
-Real OptimizeLbfgs<Real>::RecentStepLength() const {
-  size_t n = step_lengths_.size();
-  if (n == 0) return std::numeric_limits<Real>::infinity();
-  else {
-    if (n >= 2 && step_lengths_[n-1] == 0.0 && step_lengths_[n-2] == 0.0)
-      return 0.0; // two zeros in a row means repeated restarts, which is
-    // a loop.  Short-circuit this by returning zero.
-    Real avg = 0.0;
-    for (size_t i = 0; i < n; i++)
-      avg += step_lengths_[i] / n;
-    return avg;
-  }
-}
-
-template<typename Real>
-void OptimizeLbfgs<Real>::ComputeHifNeeded(const VectorBase<Real> &gradient) {
-  if (k_ == 0) {
-    if (H_.Dim() == 0) {
-      // H was never set up.  Set it up for the first time.
-      Real learning_rate;
-      if (opts_.first_step_length > 0.0) { // this takes
-        // precedence over first_step_learning_rate, if set.
-        // We are setting up H for the first time.
-        Real gradient_length = gradient.Norm(2.0);
-        learning_rate = (gradient_length > 0.0 ?
-                         opts_.first_step_length / gradient_length :
-                         1.0);
-      } else if (opts_.first_step_impr > 0.0) {
-        Real gradient_length = gradient.Norm(2.0);
-        learning_rate = (gradient_length > 0.0 ?
-                  opts_.first_step_impr / (gradient_length * gradient_length) :
-                  1.0);
-      } else {
-        learning_rate = opts_.first_step_learning_rate;
-      }
-      H_.Resize(x_.Dim());
-      KALDI_ASSERT(learning_rate > 0.0);
-      H_.Set(opts_.minimize ? learning_rate : -learning_rate);
-    }
-  } else { // k_ > 0
-    if (!H_was_set_) { // The user never specified an approximate
-      // diagonal inverse Hessian.
-      // Set it using formula 7.20: H_k^{(0)} = \gamma_k I, where
-      // \gamma_k = s_{k-1}^T y_{k-1} / y_{k-1}^T y_{k-1}
-      SubVector<Real> y_km1 = Y(k_-1);
-      double gamma_k = VecVec(S(k_-1), y_km1) / VecVec(y_km1, y_km1);
-      if (KALDI_ISNAN(gamma_k) || KALDI_ISINF(gamma_k)) {
-        KALDI_WARN << "NaN encountered in L-BFGS (already converged?)";
-        gamma_k = (opts_.minimize ? 1.0 : -1.0);
-      }
-      H_.Set(gamma_k);
-    }
-  }
-}  
-
-// This represents the first 2 lines of Algorithm 7.5 (N&W), which
-// in fact is mostly a call to Algorithm 7.4.
-// Note: this is valid whether we are minimizing or maximizing.
-template<typename Real>
-void OptimizeLbfgs<Real>::ComputeNewDirection(Real function_value,
-                                              const VectorBase<Real> &gradient) {
-  KALDI_ASSERT(computation_state_ == kBeforeStep);
-  SignedMatrixIndexT m = M(), k = k_;
-  ComputeHifNeeded(gradient);
-  // The rest of this is computing p_k <-- - H_k \nabla f_k using Algorithm
-  // 7.4 of N&W.
-  Vector<Real> &q(deriv_), &r(new_x_); // Use deriv_ as a temporary place to put
-  // q, and new_x_ as a temporay place to put r.
-  // The if-statement below is just to get rid of spurious warnings from
-  // valgrind about memcpy source and destination overlap, since sometimes q and
-  // gradient are the same variable.
-  if (&q != &gradient)
-    q.CopyFromVec(gradient); // q <-- \nabla f_k.
-  Vector<Real> alpha(m);
-  // for i = k - 1, k - 2, ... k - m
-  for (SignedMatrixIndexT i = k - 1;
-       i >= std::max(k - m, static_cast<SignedMatrixIndexT>(0));
-       i--) { 
-    alpha(i % m) = rho_(i % m) * VecVec(S(i), q); // \alpha_i <-- \rho_i s_i^T q.
-    q.AddVec(-alpha(i % m), Y(i)); // q <-- q - \alpha_i y_i
-  }
-  r.SetZero();
-  r.AddVecVec(1.0, H_, q, 0.0); // r <-- H_k^{(0)} q.
-  // for k = k - m, k - m + 1, ... , k - 1
-  for (SignedMatrixIndexT i = std::max(k - m, static_cast<SignedMatrixIndexT>(0));
-       i < k;
-       i++) {
-    Real beta = rho_(i % m) * VecVec(Y(i), r); // \beta <-- \rho_i y_i^T r
-    r.AddVec(alpha(i % m) - beta, S(i)); // r <-- r + s_i (\alpha_i - \beta)
-  }
-
-  { // TEST.  Note, -r will be the direction.
-    Real dot = VecVec(gradient, r);
-    if ((opts_.minimize && dot < 0) || (!opts_.minimize && dot > 0))
-      KALDI_WARN << "Step direction has the wrong sign!  Routine will fail.";
-  }
-  
-  // Now we're out of Alg. 7.4 and back into Alg. 7.5.
-  // Alg. 7.4 returned r (using new_x_ as the location), and with \alpha_k = 1
-  // as the initial guess, we're setting x_{k+1} = x_k + \alpha_k p_k, with
-  // p_k = -r [hence the statement new_x_.Scale(-1.0)]., and \alpha_k = 1.
-  // This is the first place we'll get the user to evaluate the function;
-  // any backtracking (or acceptance of that step) occurs inside StepSizeIteration.
-  // We're still within iteration k; we haven't yet finalized the step size.
-  new_x_.Scale(-1.0);
-  new_x_.AddVec(1.0, x_);
-  if (&deriv_ != &gradient)
-    deriv_.CopyFromVec(gradient);
-  f_ = function_value;
-  d_ = opts_.d;
-  num_wolfe_i_failures_ = 0;
-  num_wolfe_ii_failures_ = 0;
-  last_failure_type_ = kNone;
-  computation_state_ = kWithinStep;
-}
-
-
-template<typename Real>
-bool OptimizeLbfgs<Real>::AcceptStep(Real function_value,
-                                     const VectorBase<Real> &gradient) {
-  // Save s_k = x_{k+1} - x_{k}, and y_k = \nabla f_{k+1} - \nabla f_k.
-  SubVector<Real> s = S(k_), y = Y(k_);
-  s.CopyFromVec(new_x_);
-  s.AddVec(-1.0, x_); // s = new_x_ - x_.
-  y.CopyFromVec(gradient);
-  y.AddVec(-1.0, deriv_); // y = gradient - deriv_.
-  
-  // Warning: there is a division in the next line.  This could
-  // generate inf or nan, but this wouldn't necessarily be an error
-  // at this point because for zero step size or derivative we should
-  // terminate the iterations.  But this is up to the calling code.
-  Real prod = VecVec(y, s);
-  rho_(k_ % opts_.m) = 1.0 / prod;
-  Real len = s.Norm(2.0);
-
-  if ((opts_.minimize && prod <= 1.0e-20) || (!opts_.minimize && prod >= -1.0e-20)
-      || len == 0.0)
-    return false; // This will force restart.
-  
-  KALDI_VLOG(3) << "Accepted step; length was " << len
-                << ", prod was " << prod;
-  RecordStepLength(len);
-  
-  // store x_{k+1} and the function value f_{k+1}.
-  x_.CopyFromVec(new_x_);
-  f_ = function_value;
-  k_++;
-
-  return true; // We successfully accepted the step.
-}
-
-template<typename Real>
-void OptimizeLbfgs<Real>::RecordStepLength(Real s) {
-  step_lengths_.push_back(s);
-  if (step_lengths_.size() > static_cast<size_t>(opts_.avg_step_length))
-    step_lengths_.erase(step_lengths_.begin(), step_lengths_.begin() + 1);
-}
-
-
-template<typename Real>
-void OptimizeLbfgs<Real>::Restart(const VectorBase<Real> &x,
-                                  Real f,
-                                  const VectorBase<Real> &gradient) {
-  // Note: we will consider restarting (the transition of x_ -> x)
-  // as a step, even if it has zero step size.  This is necessary in
-  // order for convergence to be detected.
-  {
-    Vector<Real> &diff(temp_);
-    diff.CopyFromVec(x);
-    diff.AddVec(-1.0, x_);
-    RecordStepLength(diff.Norm(2.0));
-  }
-  k_ = 0; // Restart the iterations!  [But note that the Hessian,
-  // whatever it was, stays as before.]
-  if (&x_ != &x)
-    x_.CopyFromVec(x);
-  new_x_.CopyFromVec(x);
-  f_ = f;
-  computation_state_ = kBeforeStep;
-  ComputeNewDirection(f, gradient);
-}
-
-template<typename Real>
-void OptimizeLbfgs<Real>::StepSizeIteration(Real function_value,
-                                            const VectorBase<Real> &gradient) {
-  KALDI_VLOG(3) << "In step size iteration, function value changed "
-                << f_ << " to " << function_value;
-  
-  // We're in some part of the backtracking, and the user is providing
-  // the objective function value and gradient.
-  // We're checking two conditions: Wolfe i) [the Armijo rule] and
-  // Wolfe ii).
-  
-  // The Armijo rule (when minimizing) is:
-  // f(k_k + \alpha_k p_k) <= f(x_k) + c_1 \alpha_k p_k^T \nabla f(x_k), where
-  //  \nabla means the derivative.
-  // Below, "temp" is the RHS of this equation, where (\alpha_k p_k) equals
-  // (new_x_ - x_); we don't store \alpha or p_k separately, they are implicit
-  // as the difference new_x_ - x_.
-
-  // Below, pf is \alpha_k p_k^T \nabla f(x_k).
-  Real pf = VecVec(new_x_, deriv_) - VecVec(x_, deriv_);
-  Real temp = f_ + opts_.c1 * pf;
-  
-  bool wolfe_i_ok;
-  if (opts_.minimize) wolfe_i_ok = (function_value <= temp);
-  else wolfe_i_ok = (function_value >= temp);
-  
-  // Wolfe condition ii) can be written as:
-  //  p_k^T \nabla f(x_k + \alpha_k p_k) >= c_2 p_k^T \nabla f(x_k)
-  // p2f equals \alpha_k p_k^T \nabla f(x_k + \alpha_k p_k), where
-  // (\alpha_k p_k^T) is (new_x_ - x_).
-  // Note that in our version of Wolfe condition (ii) we have an extra
-  // factor alpha, which doesn't affect anything.
-  Real p2f = VecVec(new_x_, gradient) - VecVec(x_, gradient);
-  //eps = (sizeof(Real) == 4 ? 1.0e-05 : 1.0e-10) *
-  //(std::abs(p2f) + std::abs(pf));
-  bool wolfe_ii_ok;
-  if (opts_.minimize) wolfe_ii_ok = (p2f >= opts_.c2 * pf);
-  else wolfe_ii_ok = (p2f <= opts_.c2 * pf);
-
-  enum { kDecrease, kNoChange } d_action; // What do do with d_: leave it alone,
-  // or take the square root.
-  enum { kAccept, kDecreaseStep, kIncreaseStep, kRestart } iteration_action;
-  // What we'll do in the overall iteration: accept this value, DecreaseStep
-  // (reduce the step size), IncreaseStep (increase the step size), or kRestart
-  // (set k back to zero).  Generally when we can't get both conditions to be
-  // true with a reasonable period of time, it makes sense to restart, because
-  // probably we've almost converged and got into numerical issues; from here
-  // we'll just produced NaN's.  Restarting is a safe thing to do and the outer
-  // code will quickly detect convergence.
-
-  d_action = kNoChange; // the default.
-  
-  if (wolfe_i_ok && wolfe_ii_ok) {
-    iteration_action = kAccept;
-    d_action = kNoChange; // actually doesn't matter, it'll get reset.
-  } else if (!wolfe_i_ok) {
-    // If wolfe i) [the Armijo rule] failed then we went too far (or are
-    // meeting numerical problems).
-    if (last_failure_type_ == kWolfeII) { // Last time we failed it was Wolfe ii).
-      // When we switch between them we decrease d.
-      d_action = kDecrease;
-    }
-    iteration_action = kDecreaseStep;
-    last_failure_type_ = kWolfeI;
-    num_wolfe_i_failures_++;
-  } else if (!wolfe_ii_ok) {
-    // Curvature condition failed -> we did not go far enough.
-    if (last_failure_type_ == kWolfeI) // switching between wolfe i and ii failures->
-      d_action = kDecrease; // decrease value of d.
-    iteration_action = kIncreaseStep;
-    last_failure_type_ = kWolfeII;
-    num_wolfe_ii_failures_++;
-  }
-
-  // Test whether we've been switching too many times betwen wolfe i) and ii)
-  // failures, or overall have an excessive number of failures.  We just give up
-  // and restart L-BFGS.  Probably we've almost converged.
-  if (num_wolfe_i_failures_ + num_wolfe_ii_failures_ >
-      opts_.max_line_search_iters) {
-    KALDI_VLOG(2) << "Too many steps in line search -> restarting.";
-    iteration_action = kRestart;
-  }
-
-  if (d_action == kDecrease)
-    d_ = std::sqrt(d_);
-  
-  KALDI_VLOG(3) << "d = " << d_ << ", iter = " << k_ << ", action = "
-                << (iteration_action == kAccept ? "accept" :
-                    (iteration_action == kDecreaseStep ? "decrease" :
-                     (iteration_action == kIncreaseStep ? "increase" :
-                      "reject")));
-  
-  // Note: even if iteration_action != Restart at this point,
-  // some code below may set it to Restart.
-  if (iteration_action == kAccept) {
-    if (AcceptStep(function_value, gradient)) { // If we did
-      // not detect a problem while accepting the step..
-      computation_state_ = kBeforeStep;
-      ComputeNewDirection(function_value, gradient);
-    } else {
-      KALDI_VLOG(2) << "Restarting L-BFGS computation; problem found while "
-                    << "accepting step.";
-      iteration_action = kRestart; // We'll have to restart now.
-    }
-  }
-  if (iteration_action == kDecreaseStep || iteration_action == kIncreaseStep) {
-    Real scale = (iteration_action == kDecreaseStep ? 1.0 / d_ : d_);
-    temp_.CopyFromVec(new_x_);
-    new_x_.Scale(scale);
-    new_x_.AddVec(1.0 - scale, x_);
-    if (new_x_.ApproxEqual(temp_, 0.0)) {
-      // Value of new_x_ did not change at all --> we must restart.
-      KALDI_VLOG(3) << "Value of x did not change, when taking step; "
-                    << "will restart computation.";
-      iteration_action = kRestart;
-    }
-    if (new_x_.ApproxEqual(temp_, 1.0e-08) &&
-        std::abs(f_ - function_value) < 1.0e-08 *
-        std::abs(f_) && iteration_action == kDecreaseStep) {
-      // This is common and due to roundoff.
-      KALDI_VLOG(3) << "We appear to be backtracking while we are extremely "
-                    << "close to the old value; restarting.";
-      iteration_action = kRestart;
-    }
-        
-    if (iteration_action == kDecreaseStep) {
-      num_wolfe_i_failures_++;
-      last_failure_type_ = kWolfeI;
-    } else {
-      num_wolfe_ii_failures_++;
-      last_failure_type_ = kWolfeII;
-    }
-  }
-  if (iteration_action == kRestart) {
-    // We want to restart the computation.  If the objf at new_x_ is
-    // better than it was at x_, we'll start at new_x_, else at x_.
-    bool use_newx;
-    if (opts_.minimize) use_newx = (function_value < f_);
-    else use_newx = (function_value > f_);
-    KALDI_VLOG(3) << "Restarting computation.";
-    if (use_newx) Restart(new_x_, function_value, gradient);
-    else Restart(x_, f_, deriv_);
-  }
-}
-
-template<typename Real>
-void OptimizeLbfgs<Real>::DoStep(Real function_value,
-                                 const VectorBase<Real> &gradient) {
-  if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
-    best_f_ = function_value;
-    best_x_.CopyFromVec(new_x_);
-  }
-  if (computation_state_ == kBeforeStep)
-    ComputeNewDirection(function_value, gradient);
-  else // kWithinStep{1,2,3}
-    StepSizeIteration(function_value, gradient);
-}
-
-template<typename Real>
-void OptimizeLbfgs<Real>::DoStep(Real function_value,
-                                 const VectorBase<Real> &gradient,
-                                 const VectorBase<Real> &diag_approx_2nd_deriv) {
-  if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
-    best_f_ = function_value;
-    best_x_.CopyFromVec(new_x_);
-  }
-  if (opts_.minimize) {
-    KALDI_ASSERT(diag_approx_2nd_deriv.Min() > 0.0);
-  } else {
-    KALDI_ASSERT(diag_approx_2nd_deriv.Max() < 0.0);
-  }
-  H_was_set_ = true;
-  H_.CopyFromVec(diag_approx_2nd_deriv);
-  H_.InvertElements();
-  DoStep(function_value, gradient);
-}
-
-template<typename Real>
-const VectorBase<Real>&
-OptimizeLbfgs<Real>::GetValue(Real *objf_value) const {
-  if (objf_value != NULL) *objf_value = best_f_;
-  return best_x_;
-}
-
-
-// Instantiate the class for float and double.
-template
-class OptimizeLbfgs<float>;
-template
-class OptimizeLbfgs<double>;
-
-} // end namespace kaldi
--- a/Source/Readers/KaldiReader/matrix/optimization.h
+++ b/Source/Readers/KaldiReader/matrix/optimization.h
@ -1,219 +0,0 @@
-// matrix/optimization.h
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-
-
-#ifndef KALDI_MATRIX_OPTIMIZATION_H_
-#define KALDI_MATRIX_OPTIMIZATION_H_
-
-#include "matrix/kaldi-vector.h"
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-
-/// @addtogroup matrix_optimization
-/// @{
-
-
-/**
-   This is an implementation of L-BFGS.  It pushes responsibility for
-   determining when to stop, onto the user.  There is no call-back here:
-   everything is done via calls to the class itself (see the example in
-   matrix-lib-test.cc).  This does not implement constrained L-BFGS, but it will
-   handle constrained problems correctly as long as the function approaches
-   +infinity (or -infinity for maximization problems) when it gets close to the
-   bound of the constraint.  In these types of problems, you just let the
-   function value be +infinity for minimization problems, or -infinity for
-   maximization problems, outside these bounds).
-*/
-
-struct LbfgsOptions {
-  bool minimize; // if true, we're minimizing, else maximizing.
-  int m; // m is the number of stored vectors L-BFGS keeps.
-  float first_step_learning_rate; // The very first step of L-BFGS is
-  // like gradient descent.  If you want to configure the size of that step,
-  // you can do it using this variable.
-  float first_step_length; // If this variable is >0.0, it overrides
-  // first_step_learning_rate; on the first step we choose an approximate
-  // Hessian that is the multiple of the identity that would generate this
-  // step-length, or 1.0 if the gradient is zero.
-  float first_step_impr; // If this variable is >0.0, it overrides
-  // first_step_learning_rate; on the first step we choose an approximate
-  // Hessian that is the multiple of the identity that would generate this
-  // amount of objective function improvement (assuming the "real" objf
-  // was linear).
-  float c1; // A constant in Armijo rule = Wolfe condition i)
-  float c2; // A constant in Wolfe condition ii)
-  float d; // An amount > 1.0 (default 2.0) that we initially multiply or
-  // divide the step length by, in the line search.
-  int max_line_search_iters; // after this many iters we restart L-BFGS.
-  int avg_step_length; // number of iters to avg step length over, in
-  // RecentStepLength().
-  
-  LbfgsOptions (bool minimize = true):
-      minimize(minimize),
-      m(10),
-      first_step_learning_rate(1.0),
-      first_step_length(0.0),
-      first_step_impr(0.0),
-      c1(1.0e-04),
-      c2(0.9),
-      d(2.0),
-      max_line_search_iters(50),
-      avg_step_length(4) { }
-};
-
-template<typename Real>
-class OptimizeLbfgs {
- public:
-  /// Initializer takes the starting value of x.
-  OptimizeLbfgs(const VectorBase<Real> &x,
-                const LbfgsOptions &opts);
-  
-  /// This returns the value of the variable x that has the best objective
-  /// function so far, and the corresponding objective function value if
-  /// requested.  This would typically be called only at the end.
-  const VectorBase<Real>& GetValue(Real *objf_value = NULL) const;
-  
-  /// This returns the value at which the function wants us
-  /// to compute the objective function and gradient.
-  const VectorBase<Real>& GetProposedValue() const { return new_x_; }
-  
-  /// Returns the average magnitude of the last n steps (but not
-  /// more than the number we have stored).  Before we have taken
-  /// any steps, returns +infinity.  Note: if the most recent
-  /// step length was 0, it returns 0, regardless of the other
-  /// step lengths.  This makes it suitable as a convergence test
-  /// (else we'd generate NaN's).
-  Real RecentStepLength() const;
-  
-  /// The user calls this function to provide the class with the
-  /// function and gradient info at the point GetProposedValue().
-  /// If this point is outside the constraints you can set function_value
-  /// to {+infinity,-infinity} for {minimization,maximization} problems.
-  /// In this case the gradient, and also the second derivative (if you call
-  /// the second overloaded version of this function) will be ignored.
-  void DoStep(Real function_value,
-              const VectorBase<Real> &gradient);
-  
-  /// The user can call this version of DoStep() if it is desired to set some
-  /// kind of approximate Hessian on this iteration.  Note: it is a prerequisite
-  /// that diag_approx_2nd_deriv must be strictly positive (minimizing), or
-  /// negative (maximizing).
-  void DoStep(Real function_value,
-              const VectorBase<Real> &gradient,
-              const VectorBase<Real> &diag_approx_2nd_deriv);
-  
- private:
-  KALDI_DISALLOW_COPY_AND_ASSIGN(OptimizeLbfgs);
-
-
-  // The following variable says what stage of the computation we're at.
-  // Refer to Algorithm 7.5 (L-BFGS) of Nodecdal & Wright, "Numerical
-  // Optimization", 2nd edition.
-  // kBeforeStep means we're about to do
-  /// "compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4).
-  // kWithinStep means we're at some point within line search; note
-  // that line search is iterative so we can stay in this state more
-  // than one time on each iteration.
-  enum ComputationState {
-    kBeforeStep,
-    kWithinStep, // This means we're within the step-size computation, and
-    // have not yet done the 1st function evaluation.
-  };
-  
-  inline MatrixIndexT Dim() { return x_.Dim(); }
-  inline MatrixIndexT M() { return opts_.m; }
-  SubVector<Real> Y(MatrixIndexT i) {
-    return SubVector<Real>(data_, (i % M()) * 2); // vector y_i
-  }
-  SubVector<Real> S(MatrixIndexT i) {
-    return SubVector<Real>(data_, (i % M()) * 2 + 1); // vector s_i
-  }
-  // The following are subroutines within DoStep():
-  bool AcceptStep(Real function_value,
-                  const VectorBase<Real> &gradient);
-  void Restart(const VectorBase<Real> &x,
-               Real function_value,
-               const VectorBase<Real> &gradient);
-  void ComputeNewDirection(Real function_value,
-                           const VectorBase<Real> &gradient);
-  void ComputeHifNeeded(const VectorBase<Real> &gradient);
-  void StepSizeIteration(Real function_value,
-                         const VectorBase<Real> &gradient);
-  void RecordStepLength(Real s);
-  
-  
-  LbfgsOptions opts_;
-  SignedMatrixIndexT k_; // Iteration number, starts from zero.  Gets set back to zero
-  // when we restart.
-  
-  ComputationState computation_state_;
-  bool H_was_set_; // True if the user specified H_; if false,
-  // we'll use a heuristic to estimate it.
-
-
-  Vector<Real> x_; // current x.
-  Vector<Real> new_x_; // the x proposed in the line search.
-  Vector<Real> best_x_; // the x with the best objective function so far
-                        // (either the same as x_ or something in the current line search.)
-  Vector<Real> deriv_; // The most recently evaluated derivative-- at x_k.
-  Vector<Real> temp_;
-  Real f_; // The function evaluated at x_k.
-  Real best_f_; // the best objective function so far.
-  Real d_; // a number d > 1.0, but during an iteration we may decrease this, when
-  // we switch between armijo and wolfe failures.
-
-  int num_wolfe_i_failures_; // the num times we decreased step size.
-  int num_wolfe_ii_failures_; // the num times we increased step size.
-  enum { kWolfeI, kWolfeII, kNone } last_failure_type_; // last type of step-search
-  // failure on this iter.
-  
-  Vector<Real> H_; // Current inverse-Hessian estimate.  May be computed by this class itself,
-  // or provided by user using 2nd form of SetGradientInfo().
-  Matrix<Real> data_; // dimension (m*2) x dim.  Even rows store
-  // gradients y_i, odd rows store steps s_i.
-  Vector<Real> rho_; // dimension m; rho_(m) = 1/(y_m^T s_m), Eq. 7.17.
-
-  std::vector<Real> step_lengths_; // The step sizes we took on the last
-  // (up to m) iterations; these are not stored in a rotating buffer but
-  // are shifted by one each time (this is more convenient when we
-  // restart, as we keep this info past restarting).
-  
-
-};
-  
-
-
-
-
-
-/// @} 
-
-
-} // end namespace kaldi
-
-
-
-#endif
-
--- a/Source/Readers/KaldiReader/matrix/packed-matrix.cc
+++ b/Source/Readers/KaldiReader/matrix/packed-matrix.cc
@ -1,438 +0,0 @@
-// matrix/packed-matrix.cc
-
-// Copyright 2009-2012  Microsoft Corporation  Saarland University
-//        Johns Hopkins University (Author: Daniel Povey);
-//        Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-/**
- * @file packed-matrix.cc
- *
- * Implementation of specialized PackedMatrix template methods
- */
-#include "matrix/cblas-wrappers.h"
-#include "matrix/packed-matrix.h"
-#include "matrix/kaldi-vector.h"
-
-namespace kaldi {
-
-template<typename Real>
-void PackedMatrix<Real>::Scale(Real alpha) {
-  size_t nr = num_rows_,
-      sz = (nr * (nr + 1)) / 2;
-  cblas_Xscal(sz, alpha, data_, 1);
-}
-
-template<typename Real>
-void PackedMatrix<Real>::AddPacked(const Real alpha, const PackedMatrix<Real> &rMa) {
-  KALDI_ASSERT(num_rows_ == rMa.NumRows());
-  size_t nr = num_rows_,
-      sz = (nr * (nr + 1)) / 2;
-  cblas_Xaxpy(sz, alpha, rMa.Data(), 1, data_, 1);
-}
-
-template<typename Real>
-void PackedMatrix<Real>::SetRandn() {
-  Real *data = data_;
-  size_t dim = num_rows_, size = ((dim*(dim+1))/2);
-  for (size_t i = 0; i < size; i++)
-    data[i] = RandGauss();  
-}
-
-template<typename Real>
-inline void PackedMatrix<Real>::Init(MatrixIndexT r) {
-  if (r == 0) {
-    num_rows_ = 0;
-    data_ = 0;
-    return;
-  }
-  size_t size = ((static_cast<size_t>(r) * static_cast<size_t>(r + 1)) / 2);
-
-  if (static_cast<size_t>(static_cast<MatrixIndexT>(size)) != size) {
-    KALDI_WARN << "Allocating packed matrix whose full dimension does not fit "
-               << "in MatrixIndexT: not all code is tested for this case.";
-  }
-
-  void *data;  // aligned memory block
-  void *temp;
-
-  if ((data = KALDI_MEMALIGN(16, size * sizeof(Real), &temp)) != NULL) {
-    this->data_ = static_cast<Real *> (data);
-    this->num_rows_ = r;
-  } else {
-    throw std::bad_alloc();
-  }
-}
-
-template<typename Real>
-void PackedMatrix<Real>::Swap(PackedMatrix<Real> *other) {
-  std::swap(data_, other->data_);
-  std::swap(num_rows_, other->num_rows_);
-}
-
-template<typename Real>
-void PackedMatrix<Real>::Swap(Matrix<Real> *other) {
-  std::swap(data_, other->data_);
-  std::swap(num_rows_, other->num_rows_);
-}
-
-
-template<typename Real>
-void PackedMatrix<Real>::Resize(MatrixIndexT r, MatrixResizeType resize_type) {
-  // the next block uses recursion to handle what we have to do if
-  // resize_type == kCopyData.
-  if (resize_type == kCopyData) {
-    if (this->data_ == NULL || r == 0) resize_type = kSetZero;  // nothing to copy.
-    else if (this->num_rows_ == r) { return; } // nothing to do.
-    else {
-      // set tmp to a packed matrix of the desired size.
-      PackedMatrix<Real> tmp(r, kUndefined);
-      size_t r_min = std::min(r, num_rows_);
-      size_t mem_size_min = sizeof(Real) * (r_min*(r_min+1))/2,
-          mem_size_full = sizeof(Real) * (r*(r+1))/2;
-      // Copy the contents to tmp.
-      memcpy(tmp.data_, data_, mem_size_min);
-      char *ptr = static_cast<char*>(static_cast<void*>(tmp.data_));
-      // Set the rest of the contents of tmp to zero.
-      memset(static_cast<void*>(ptr + mem_size_min), 0, mem_size_full-mem_size_min);
-      tmp.Swap(this);
-      return;
-    }
-  }
-  if (data_ != NULL) Destroy();
-  Init(r);
-  if (resize_type == kSetZero) SetZero();
-}
-
-
-
-template<typename Real>
-void PackedMatrix<Real>::AddToDiag(Real r) {
-  Real *ptr = data_;
-  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
-    *ptr += r;
-    ptr += i;
-  }
-}
-
-template<typename Real>
-void PackedMatrix<Real>::ScaleDiag(Real alpha) {
-  Real *ptr = data_;
-  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
-    *ptr *= alpha;
-    ptr += i;
-  }
-}
-
-template<typename Real>
-void PackedMatrix<Real>::SetDiag(Real alpha) {
-  Real *ptr = data_;
-  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
-    *ptr = alpha;
-    ptr += i;
-  }
-}
-
-
-
-template<typename Real>
-template<typename OtherReal>
-void PackedMatrix<Real>::CopyFromPacked(const PackedMatrix<OtherReal> &orig) {
-  KALDI_ASSERT(NumRows() == orig.NumRows());
-  if (sizeof(Real) == sizeof(OtherReal)) {
-    memcpy(data_, orig.Data(), SizeInBytes());
-  } else {
-    Real *dst = data_;
-    const OtherReal *src = orig.Data();
-    size_t nr = NumRows(),
-        size = (nr * (nr + 1)) / 2;
-    for (size_t i = 0; i < size; i++, dst++, src++)
-      *dst = *src;
-  }
-}
-
-// template instantiations.
-template
-void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<double> &orig);
-template
-void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<float> &orig);
-template
-void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<double> &orig);
-template
-void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<float> &orig);
-
-
-
-template<typename Real>
-template<typename OtherReal>
-void PackedMatrix<Real>::CopyFromVec(const SubVector<OtherReal> &vec) {
-  MatrixIndexT size = (NumRows()*(NumRows()+1)) / 2;
-  KALDI_ASSERT(vec.Dim() == size);
-  if (sizeof(Real) == sizeof(OtherReal)) {
-    memcpy(data_, vec.Data(), size * sizeof(Real));
-  } else {
-    Real *dst = data_;
-    const OtherReal *src = vec.Data();
-    for (MatrixIndexT i = 0; i < size; i++, dst++, src++)
-      *dst = *src;
-  }
-}
-
-// template instantiations.
-template
-void PackedMatrix<float>::CopyFromVec(const SubVector<double> &orig);
-template
-void PackedMatrix<double>::CopyFromVec(const SubVector<float> &orig);
-template
-void PackedMatrix<double>::CopyFromVec(const SubVector<double> &orig);
-template
-void PackedMatrix<float>::CopyFromVec(const SubVector<float> &orig);
-
-
-
-template<typename Real>
-void PackedMatrix<Real>::SetZero() {
-  memset(data_, 0, SizeInBytes());
-}
-
-template<typename Real>
-void PackedMatrix<Real>::SetUnit() {
-  memset(data_, 0, SizeInBytes());
-  for (MatrixIndexT row = 0;row < num_rows_;row++)
-    (*this)(row, row) = 1.0;
-}
-
-template<typename Real>
-Real PackedMatrix<Real>::Trace() const {
-  Real ans = 0.0;
-  for (MatrixIndexT row = 0;row < num_rows_;row++)
-    ans += (*this)(row, row);
-  return ans;
-}
-
-template<typename Real>
-void PackedMatrix<Real>::Destroy() {
-  // we need to free the data block if it was defined
-  if (data_ != NULL) KALDI_MEMALIGN_FREE(data_);
-  data_ = NULL;
-  num_rows_ = 0;
-}
-
-
-template<typename Real>
-void PackedMatrix<Real>::Write(std::ostream &os, bool binary) const {
-  if (!os.good()) {
-    KALDI_ERR << "Failed to write vector to stream: stream not good";
-  }
-
-  int32 size = this->NumRows();  // make the size 32-bit on disk.
-  KALDI_ASSERT(this->NumRows() == (MatrixIndexT) size);
-  MatrixIndexT num_elems = ((size+1)*(MatrixIndexT)size)/2;
-
-  if(binary) {  
-    std::string my_token = (sizeof(Real) == 4 ? "FP" : "DP");
-    WriteToken(os, binary, my_token);
-    WriteBasicType(os, binary, size);
-  // We don't use the built-in Kaldi write routines for the floats, as they are
-  // not efficient enough.
-    os.write((const char*) data_, sizeof(Real) * num_elems);
-  }
-  else {
-    if(size == 0)
-      os<<"[ ]\n";
-    else {
-      os<<"[\n";
-      MatrixIndexT i = 0;
-      for (int32 j = 0; j < size; j++) {  
-        for (int32 k = 0; k < j + 1; k++) {
-          WriteBasicType(os, binary, data_[i++]);
-        }
-        os << ( (j==size-1)? "]\n" : "\n");
-      }
-      KALDI_ASSERT(i == num_elems);
-    }
-  }
-  if (os.fail()) {
-    KALDI_ERR << "Failed to write packed matrix to stream";
-  }
-}
-
-// template<typename Real>
-//   void Save (std::ostream & os, const PackedMatrix<Real>& rM)
-//   {
-//     const Real* p_elem = rM.data();
-//     for (MatrixIndexT i = 0; i < rM.NumRows(); i++) {
-//       for (MatrixIndexT j = 0; j <= i ; j++) {
-//         os << *p_elem;
-//         p_elem++;
-//         if (j == i) {
-//           os << '\n';
-//         }
-//         else {
-//           os << ' ';
-//         }
-//       }
-//     }
-//     if (os.fail())
-//       KALDI_ERR("Failed to write packed matrix to stream");
-//   }
-
-
-
-
-
-template<typename Real>
-void PackedMatrix<Real>::Read(std::istream& is, bool binary, bool add) {
-  if (add) {
-    PackedMatrix<Real> tmp;
-    tmp.Read(is, binary, false);  // read without adding.
-    if (this->NumRows() == 0) this->Resize(tmp.NumRows());
-    else {
-      if (this->NumRows() != tmp.NumRows()) {
-        if (tmp.NumRows() == 0) return;  // do nothing in this case.
-        else KALDI_ERR << "PackedMatrix::Read, size mismatch " << this->NumRows()
-                       << " vs. " << tmp.NumRows();
-      }
-    }
-    this->AddPacked(1.0, tmp);
-    return;
-  } // now assume add == false.
-
-  std::ostringstream specific_error;
-  MatrixIndexT pos_at_start = is.tellg();
-  int peekval = Peek(is, binary);
-  const char *my_token =  (sizeof(Real) == 4 ? "FP" : "DP");
-  const char *new_format_token = "[";
-  bool is_new_format = false;//added by hxu
-  char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
-  int32 size;
-  MatrixIndexT num_elems;
-
-  if (peekval == other_token_start) {  // need to instantiate the other type to read it.
-    typedef typename OtherReal<Real>::Real OtherType;  // if Real == float, OtherType == double, and vice versa.
-    PackedMatrix<OtherType> other(this->NumRows());
-    other.Read(is, binary, false);  // add is false at this point.
-    this->Resize(other.NumRows());
-    this->CopyFromPacked(other);
-    return;
-  }
-  std::string token;
-  ReadToken(is, binary, &token);
-  if (token != my_token) {
-    if(token != new_format_token) {
-      specific_error << ": Expected token " << my_token << ", got " << token;
-      goto bad;
-    }
-    //new format it is
-    is_new_format = true; 
-  }
-  if(!is_new_format) {
-    ReadBasicType(is, binary, &size);  // throws on error.
-    if ((MatrixIndexT)size != this->NumRows()) {
-      KALDI_ASSERT(size>=0);
-      this->Resize(size);
-    }
-    num_elems = ((size+1)*(MatrixIndexT)size)/2;
-    if (!binary) {
-      for (MatrixIndexT i = 0; i < num_elems; i++) {
-        ReadBasicType(is, false, data_+i);  // will throw on error.
-      }
-    } else {
-      if (num_elems)
-        is.read(reinterpret_cast<char*>(data_), sizeof(Real)*num_elems);
-    }
-    if (is.fail()) goto bad;
-    return;
-  }
-  else {
-    std::vector<Real> data;
-    while(1) {
-      int32 num_lines = 0;
-      int i = is.peek();
-      if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto bad; }
-      else if (static_cast<char>(i) == ']') {  // Finished reading matrix.
-        is.get();  // eat the "]".
-        i = is.peek();
-        if (static_cast<char>(i) == '\r') {
-          is.get();
-          is.get();  // get \r\n (must eat what we wrote)
-        }// I don't actually understand what it's doing here
-        else if (static_cast<char>(i) == '\n') { is.get(); } // get \n (must eat what we wrote)
-
-        if (is.fail()) {
-          KALDI_WARN << "After end of matrix data, read error.";
-          // we got the data we needed, so just warn for this error.
-        }
-        //now process the data:
-        num_lines = int32(sqrt(data.size()*2));
-        
-        KALDI_ASSERT(data.size() == num_lines*(num_lines+1)/2);
-
-        this->Resize(num_lines);
-
-        //std::cout<<data.size()<<' '<<num_lines<<'\n';
-
-        for(int32 i = 0; i < data.size(); i++) {
-          data_[i] = data[i];
-        }
-        return;
-        //std::cout<<"here!!!!!hxu!!!!!"<<std::endl;
-      }
-      else if ( (i >= '0' && i <= '9') || i == '-' ) {  // A number...
-        Real r; 
-        is >> r;
-        if (is.fail()) {
-          specific_error << "Stream failure/EOF while reading matrix data.";
-          goto bad;
-        } 
-        data.push_back(r);
-      }
-      else if (isspace(i)) {
-        is.get();  // eat the space and do nothing.
-      } else {  // NaN or inf or error.
-        std::string str;
-        is >> str;
-        if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
-            !KALDI_STRCASECMP(str.c_str(), "infinity")) {
-          data.push_back(std::numeric_limits<Real>::infinity());
-          KALDI_WARN << "Reading infinite value into matrix.";
-        } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
-          data.push_back(std::numeric_limits<Real>::quiet_NaN());
-          KALDI_WARN << "Reading NaN value into matrix.";
-        } else {
-          specific_error << "Expecting numeric matrix data, got " << str;
-          goto bad;
-        } 
-      }       
-    } 
-  }
-bad:
-  KALDI_ERR << "Failed to read packed matrix from stream. " << specific_error
-            << " File position at start is "
-            << pos_at_start << ", currently " << is.tellg();
-}
-
-
-// Instantiate PackedMatrix for float and double.
-template
-class PackedMatrix<float>;
-
-template
-class PackedMatrix<double>;
-
-
-}  // namespace kaldi
-
--- a/Source/Readers/KaldiReader/matrix/packed-matrix.h
+++ b/Source/Readers/KaldiReader/matrix/packed-matrix.h
@ -1,197 +0,0 @@
-// matrix/packed-matrix.h
-
-// Copyright 2009-2013  Ondrej Glembek;  Lukas Burget;  Microsoft Corporation;
-//                      Saarland University;  Yanmin Qian;
-//                      Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_PACKED_MATRIX_H_
-#define KALDI_MATRIX_PACKED_MATRIX_H_
-
-#include "matrix/matrix-common.h"
-#include <algorithm>
-
-namespace kaldi {
-
-/// \addtogroup matrix_funcs_io
-// we need to declare the friend << operator here
-template<typename Real>
-std::ostream & operator <<(std::ostream & out, const PackedMatrix<Real>& M);
-
-
-/// \addtogroup matrix_group
-/// @{
-
-/// @brief Packed matrix: base class for triangular and symmetric matrices.
-template<typename Real> class PackedMatrix {
-  friend class CuPackedMatrix<Real>;
- public:
-  //friend class CuPackedMatrix<Real>;
-
-  PackedMatrix() : data_(NULL), num_rows_(0) {}
-
-  explicit PackedMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero):
-      data_(NULL) {  Resize(r, resize_type);  }
-
-  explicit PackedMatrix(const PackedMatrix<Real> &orig) : data_(NULL) {
-    Resize(orig.num_rows_, kUndefined);
-    CopyFromPacked(orig);
-  }
-
-  template<typename OtherReal>
-  explicit PackedMatrix(const PackedMatrix<OtherReal> &orig) : data_(NULL) {
-    Resize(orig.NumRows(), kUndefined);
-    CopyFromPacked(orig);
-  }
-  
-  void SetZero();  /// < Set to zero
-  void SetUnit();  /// < Set to unit matrix.
-  void SetRandn(); /// < Set to random values of a normal distribution
-
-  Real Trace() const;
-
-  // Needed for inclusion in std::vector
-  PackedMatrix<Real> & operator =(const PackedMatrix<Real> &other) {
-    Resize(other.NumRows());
-    CopyFromPacked(other);
-    return *this;
-  }
-
-  ~PackedMatrix() {
-    Destroy();
-  }
-
-  /// Set packed matrix to a specified size (can be zero).
-  /// The value of the new data depends on resize_type:
-  ///   -if kSetZero, the new data will be zero
-  ///   -if kUndefined, the new data will be undefined
-  ///   -if kCopyData, the new data will be the same as the old data in any
-  ///      shared positions, and zero elsewhere.
-  /// This function takes time proportional to the number of data elements.
-  void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero);
-
-  void AddToDiag(const Real r); // Adds r to diaginal
-
-  void ScaleDiag(const Real alpha);  // Scales diagonal by alpha.
-
-  void SetDiag(const Real alpha);  // Sets diagonal to this value.
-
-  template<typename OtherReal>
-  void CopyFromPacked(const PackedMatrix<OtherReal> &orig);
-  
-  /// CopyFromVec just interprets the vector as having the same layout
-  /// as the packed matrix.  Must have the same dimension, i.e.
-  /// orig.Dim() == (NumRows()*(NumRows()+1)) / 2;
-  template<typename OtherReal>
-  void CopyFromVec(const SubVector<OtherReal> &orig);
-  
-  Real* Data() { return data_; }
-  const Real* Data() const { return data_; }
-  inline MatrixIndexT NumRows() const { return num_rows_; }
-  inline MatrixIndexT NumCols() const { return num_rows_; }
-  size_t SizeInBytes() const {
-    size_t nr = static_cast<size_t>(num_rows_);
-    return ((nr * (nr+1)) / 2) * sizeof(Real);
-  }
-
-  //MatrixIndexT Stride() const { return stride_; }
-
-  // This code is duplicated in child classes to avoid extra levels of calls.
-  Real operator() (MatrixIndexT r, MatrixIndexT c) const {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_) &&
-                 static_cast<UnsignedMatrixIndexT>(c) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_)
-                 && c <= r);
-    return *(data_ + (r * (r + 1)) / 2 + c);
-  }
-
-  // This code is duplicated in child classes to avoid extra levels of calls.
-  Real &operator() (MatrixIndexT r, MatrixIndexT c) {
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_) &&
-                 static_cast<UnsignedMatrixIndexT>(c) <
-                 static_cast<UnsignedMatrixIndexT>(num_rows_)
-                 && c <= r);
-    return *(data_ + (r * (r + 1)) / 2 + c);
-  }
-
-  Real Max() const {
-    KALDI_ASSERT(num_rows_ > 0);
-    return * (std::max_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
-  }
-
-  Real Min() const {
-    KALDI_ASSERT(num_rows_ > 0);
-    return * (std::min_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
-  }
-
-  void Scale(Real c);
-
-  friend std::ostream & operator << <> (std::ostream & out,
-                                     const PackedMatrix<Real> &m);
-  // Use instead of stream<<*this, if you want to add to existing contents.
-  // Will throw exception on failure.
-  void Read(std::istream &in, bool binary, bool add = false);
-
-  void Write(std::ostream &out, bool binary) const;
-  
-  void Destroy();
-
-  /// Swaps the contents of *this and *other.  Shallow swap.
-  void Swap(PackedMatrix<Real> *other);
-  void Swap(Matrix<Real> *other);
-
-
- protected:
-  // Will only be called from this class or derived classes.
-  void AddPacked(const Real alpha, const PackedMatrix<Real>& M);
-  Real *data_;
-  MatrixIndexT num_rows_;
-  //MatrixIndexT stride_;
- private:
-  /// Init assumes the current contents of the class are is invalid (i.e. junk or
-  /// has already been freed), and it sets the matrixd to newly allocated memory
-  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
-  /// pointed to by data_ will be undefined.
-  void Init(MatrixIndexT dim);
-
-};
-/// @} end "addtogroup matrix_group"
-
-
-/// \addtogroup matrix_funcs_io
-/// @{
-
-template<typename Real>
-std::ostream & operator << (std::ostream & os, const PackedMatrix<Real>& M) {
-  M.Write(os, false);
-  return os;
-}
-
-template<typename Real>
-std::istream & operator >> (std::istream &is, PackedMatrix<Real> &M) {
-  M.Read(is, false);
-  return is;
-}
-
-/// @}
-
-}  // namespace kaldi
-
-#endif
-
--- a/Показать больше
+++ b/Показать больше