sandbox/dan2: merging changes from trunk; some further small code-level optimizations to determinization code (which I just realized were done in sandbox/dan2; I'll now merge those back to trunk.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3087 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2013-10-14 23:33:20 +00:00 · 2013-10-14 23:33:20 +00:00 · 0bc71728a2
--- a/egs/babel/s5/conf/lang/106-tagalog-limitedLP.official.ffv+pitch.conf
+++ b/egs/babel/s5/conf/lang/106-tagalog-limitedLP.official.ffv+pitch.conf
@ -0,0 +1,77 @@
+# include common settings for limitedLP systems.
+. conf/common.limitedLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list
+train_nj=16
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
+dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list
+dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
+dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
+dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
+dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
+dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
+dev2h_subset_ecf=true
+dev2h_nj=23
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list
+dev10h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
+dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
+dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
+dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
+dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
+dev10h_nj=32
+
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
+eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
+eval_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.ecf.xml
+eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.kwlist2.xml
+eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=2500
+numGaussTri2=36000
+numLeavesTri3=2500
+numGaussTri3=36000
+numLeavesMLLT=2500
+numGaussMLLT=36000
+numLeavesSAT=2500
+numGaussSAT=36000
+numGaussUBM=750
+numLeavesSGMM=5000
+numGaussSGMM=18000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+
+use_ffv=true
+use_pitch=true
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/201-haitian-fullLP.official.conf
+++ b/egs/babel/s5/conf/lang/201-haitian-fullLP.official.conf
@ -0,0 +1,97 @@
+# include common settings for fullLP systems.
+. conf/common.fullLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
+train_nj=32
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
+dev2h_data_cmudb=
+dev2h_stm_file=
+dev2h_ecf_file=
+dev2h_rttm_file=
+dev2h_kwlist_file=
+dev2h_subset_ecf=true
+dev2h_nj=20
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
+dev10h_data_cmudb=
+dev10h_stm_file=
+dev10h_ecf_file=
+dev10h_rttm_file=
+dev10h_kwlist_file=
+dev10h_nj=32
+
+#RADICAL DEV data files
+dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
+dev10h_sph_data_cmudb=
+dev10h_sph_stm_file=
+dev10h_sph_ecf_file=
+dev10h_sph_rttm_file=
+dev10h_sph_kwlist_file=
+dev10h_sph_subset_ecf=true
+dev10h_sph_nj=32
+
+#RADICAL DEV data files
+dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
+dev10h_wav_data_cmudb=
+dev10h_wav_stm_file=
+dev10h_wav_ecf_file=
+dev10h_wav_rttm_file=
+dev10h_wav_kwlist_file=
+dev10h_wav_subset_ecf=true
+dev10h_wav_nj=13
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
+eval_data_list=
+eval_ecf_file=
+eval_kwlist_file=
+eval_data_cmudb=
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=1000
+numGaussTri2=20000
+numLeavesTri3=6000
+numGaussTri3=75000
+numLeavesMLLT=6000
+numGaussMLLT=75000
+numLeavesSAT=6000
+numGaussSAT=75000
+numGaussUBM=800
+numLeavesSGMM=10000
+numGaussSGMM=80000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+use_pitch=false
+use_ffv=false
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/201-haitian-fullLP.official.ffv+pitch.conf
+++ b/egs/babel/s5/conf/lang/201-haitian-fullLP.official.ffv+pitch.conf
@ -0,0 +1,97 @@
+# include common settings for fullLP systems.
+. conf/common.fullLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
+train_nj=32
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
+dev2h_data_cmudb=
+dev2h_stm_file=
+dev2h_ecf_file=
+dev2h_rttm_file=
+dev2h_kwlist_file=
+dev2h_subset_ecf=true
+dev2h_nj=20
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
+dev10h_data_cmudb=
+dev10h_stm_file=
+dev10h_ecf_file=
+dev10h_rttm_file=
+dev10h_kwlist_file=
+dev10h_nj=32
+
+#RADICAL DEV data files
+dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
+dev10h_sph_data_cmudb=
+dev10h_sph_stm_file=
+dev10h_sph_ecf_file=
+dev10h_sph_rttm_file=
+dev10h_sph_kwlist_file=
+dev10h_sph_subset_ecf=true
+dev10h_sph_nj=32
+
+#RADICAL DEV data files
+dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
+dev10h_wav_data_cmudb=
+dev10h_wav_stm_file=
+dev10h_wav_ecf_file=
+dev10h_wav_rttm_file=
+dev10h_wav_kwlist_file=
+dev10h_wav_subset_ecf=true
+dev10h_wav_nj=13
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
+eval_data_list=
+eval_ecf_file=
+eval_kwlist_file=
+eval_data_cmudb=
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=1000
+numGaussTri2=20000
+numLeavesTri3=6000
+numGaussTri3=75000
+numLeavesMLLT=6000
+numGaussMLLT=75000
+numLeavesSAT=6000
+numGaussSAT=75000
+numGaussUBM=800
+numLeavesSGMM=10000
+numGaussSGMM=80000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+use_pitch=true
+use_ffv=true
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/201-haitian-limitedLP.official.conf
+++ b/egs/babel/s5/conf/lang/201-haitian-limitedLP.official.conf
@ -0,0 +1,76 @@
+# include common settings for limitedLP systems.
+. conf/common.limitedLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list
+train_nj=16
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
+dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
+dev2h_data_cmudb=
+dev2h_stm_file=
+dev2h_ecf_file=
+dev2h_rttm_file=
+dev2h_kwlist_file=
+dev2h_subset_ecf=true
+dev2h_nj=20
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Haitian_Babel201//dev.list
+dev10h_data_cmudb=
+dev10h_stm_file=
+dev10h_ecf_file=
+dev10h_rttm_file=
+dev10h_kwlist_file=
+dev10h_nj=32
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/201-haitian//release-current/conversational/eval
+eval_data_list=
+eval_ecf_file=
+eval_kwlist_file=
+eval_data_cmudb=
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=2500
+numGaussTri2=36000
+numLeavesTri3=2500
+numGaussTri3=36000
+numLeavesMLLT=2500
+numGaussMLLT=36000
+numLeavesSAT=2500
+numGaussSAT=36000
+numGaussUBM=750
+numLeavesSGMM=5000
+numGaussSGMM=18000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+
+use_pitch=true
+use_ffv=true
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/206-zulu-fullLP.official.conf
+++ b/egs/babel/s5/conf/lang/206-zulu-fullLP.official.conf
@ -85,8 +85,8 @@ numGaussSGMM=80000
 # Lexicon and Language Model parameters
 oovSymbol="<unk>"
 lexiconFlags="--oov <unk>"
-use_pitch=true
-use_ffv=true
+use_pitch=false
+use_ffv=false
 # Scoring protocols (dummy GLM file to appease the scoring script)
 #glmFile=./conf/glm
 lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
--- a/egs/babel/s5/conf/lang/206-zulu-fullLP.official.ffv+pitch.conf
+++ b/egs/babel/s5/conf/lang/206-zulu-fullLP.official.ffv+pitch.conf
@ -0,0 +1,97 @@
+# include common settings for fullLP systems.
+. conf/common.fullLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list
+train_nj=32
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
+dev2h_data_cmudb=
+dev2h_stm_file=
+dev2h_ecf_file=
+dev2h_rttm_file=
+dev2h_kwlist_file=
+dev2h_subset_ecf=true
+dev2h_nj=20
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
+dev10h_data_cmudb=
+dev10h_stm_file=
+dev10h_ecf_file=
+dev10h_rttm_file=
+dev10h_kwlist_file=
+dev10h_nj=32
+
+#RADICAL DEV data files
+dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
+dev10h_sph_data_cmudb=
+dev10h_sph_stm_file=
+dev10h_sph_ecf_file=
+dev10h_sph_rttm_file=
+dev10h_sph_kwlist_file=
+dev10h_sph_subset_ecf=true
+dev10h_sph_nj=32
+
+#RADICAL DEV data files
+dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
+dev10h_wav_data_cmudb=
+dev10h_wav_stm_file=
+dev10h_wav_ecf_file=
+dev10h_wav_rttm_file=
+dev10h_wav_kwlist_file=
+dev10h_wav_subset_ecf=true
+dev10h_wav_nj=13
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
+eval_data_list=
+eval_ecf_file=
+eval_kwlist_file=
+eval_data_cmudb=
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=1000
+numGaussTri2=20000
+numLeavesTri3=6000
+numGaussTri3=75000
+numLeavesMLLT=6000
+numGaussMLLT=75000
+numLeavesSAT=6000
+numGaussSAT=75000
+numGaussUBM=800
+numLeavesSGMM=10000
+numGaussSGMM=80000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+use_pitch=true
+use_ffv=true
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/206-zulu-limitedLP.official.conf
+++ b/egs/babel/s5/conf/lang/206-zulu-limitedLP.official.conf
@ -37,6 +37,28 @@ devtrain_rttm_file=
 devtrain_kwlist_file=
 devtrain_nj=64

+#RADICAL DEV data files
+dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
+dev10h_sph_data_cmudb=
+dev10h_sph_stm_file=
+dev10h_sph_ecf_file=
+dev10h_sph_rttm_file=
+dev10h_sph_kwlist_file=
+dev10h_sph_subset_ecf=true
+dev10h_sph_nj=32
+
+#RADICAL DEV data files
+dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
+dev10h_wav_data_cmudb=
+dev10h_wav_stm_file=
+dev10h_wav_ecf_file=
+dev10h_wav_rttm_file=
+dev10h_wav_kwlist_file=
+dev10h_wav_subset_ecf=true
+dev10h_wav_nj=13
+
 #Official EVAL period evaluation data files
 eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
 eval_data_list=
@ -74,8 +96,8 @@ numGaussSGMM=18000
 oovSymbol="<unk>"
 lexiconFlags="--oov <unk>"

-use_pitch=true
-use_ffv=true
+use_pitch=false
+use_ffv=false
 # Scoring protocols (dummy GLM file to appease the scoring script)
 #glmFile=./conf/glm
 lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt
--- a/egs/babel/s5/conf/lang/206-zulu-limitedLP.official.ffv+pitch.conf
+++ b/egs/babel/s5/conf/lang/206-zulu-limitedLP.official.ffv+pitch.conf
@ -0,0 +1,108 @@
+# include common settings for limitedLP systems.
+. conf/common.limitedLP || exit 1;
+
+#speech corpora files location
+train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
+train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list
+train_nj=16
+
+#RADICAL DEV data files
+dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
+dev2h_data_cmudb=
+dev2h_stm_file=
+dev2h_ecf_file=
+dev2h_rttm_file=
+dev2h_kwlist_file=
+dev2h_subset_ecf=true
+dev2h_nj=20
+
+#Official DEV data files
+dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
+dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
+dev10h_data_cmudb=
+dev10h_stm_file=
+dev10h_ecf_file=
+dev10h_rttm_file=
+dev10h_kwlist_file=
+dev10h_nj=32
+
+#RADICAL EVAL data files (difference between TRAIN-FULL  TRAIN-LIMITED)
+devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
+devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
+devtrain_data_cmudb=
+devtrain_stm_file=
+devtrain_ecf_file=
+devtrain_rttm_file=
+devtrain_kwlist_file=
+devtrain_nj=64
+
+#RADICAL DEV data files
+dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
+dev10h_sph_data_cmudb=
+dev10h_sph_stm_file=
+dev10h_sph_ecf_file=
+dev10h_sph_rttm_file=
+dev10h_sph_kwlist_file=
+dev10h_sph_subset_ecf=true
+dev10h_sph_nj=32
+
+#RADICAL DEV data files
+dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
+dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
+dev10h_wav_data_cmudb=
+dev10h_wav_stm_file=
+dev10h_wav_ecf_file=
+dev10h_wav_rttm_file=
+dev10h_wav_kwlist_file=
+dev10h_wav_subset_ecf=true
+dev10h_wav_nj=13
+
+#Official EVAL period evaluation data files
+eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
+eval_data_list=
+eval_ecf_file=
+eval_kwlist_file=
+eval_data_cmudb=
+eval_nj=64
+
+#Official (POST-)EVAL evaluation data portion
+evalpart1_data_dir=
+evalpart1_data_list=
+evalpart1_data_cmudb=
+evalpart1_stm_file=
+evalpart1_ecf_file=
+evalpart1_rttm_file=
+evalpart1_kwlist_file=
+evalpart1_nj=21
+
+# Acoustic model parameters
+numLeavesTri1=1000
+numGaussTri1=10000
+numLeavesTri2=2500
+numGaussTri2=36000
+numLeavesTri3=2500
+numGaussTri3=36000
+numLeavesMLLT=2500
+numGaussMLLT=36000
+numLeavesSAT=2500
+numGaussSAT=36000
+numGaussUBM=750
+numLeavesSGMM=5000
+numGaussSGMM=18000
+
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="--oov <unk>"
+
+use_pitch=true
+use_ffv=true
+# Scoring protocols (dummy GLM file to appease the scoring script)
+#glmFile=./conf/glm
+lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt
+
+#keyword search settings
+duptime=0.5
+case_insensitive=true
+
--- a/egs/babel/s5/conf/lang/206-zulu-limitedLP.pitch.official.conf
+++ b/egs/babel/s5/conf/lang/206-zulu-limitedLP.pitch.official.conf
@ -15,7 +15,7 @@ dev2h_ecf_file=
 dev2h_rttm_file=
 dev2h_kwlist_file=
 dev2h_subset_ecf=true
-dev2h_nj=20
+dev2h_nj=18

 #Official DEV data files
 dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
@ -27,6 +27,16 @@ dev10h_rttm_file=
 dev10h_kwlist_file=
 dev10h_nj=32

+#RADICAL EVAL data files (difference between TRAIN-FULL  TRAIN-LIMITED)
+devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
+devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.train.list
+devtrain_data_cmudb=
+devtrain_stm_file=
+devtrain_ecf_file=
+devtrain_rttm_file=
+devtrain_kwlist_file=
+devtrain_nj=64
+
 #RADICAL DEV data files
 dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
 dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
--- a/egs/babel/s5/local/kws_oracle_threshold.pl
+++ b/egs/babel/s5/local/kws_oracle_threshold.pl
@ -17,9 +17,10 @@ sub KeywordSort {
 }

 my $Usage = <<EOU;
-This script reads a alignment.csv file and computes the oracle ATWV based on the
-oracle threshold. The duration of the search collection is supposed to be provided.
-In the Babel case, the duration should be half of the total audio duration.
+This script reads a alignment.csv file and computes the ATWV, OTWV, MTWV by
+sweeping the threshold. The duration of the search collection is supposed to be
+provided. In the Babel case, the duration should be half of the total audio
+duration.

 The alignment.csv file is supposed to have the following fields for each line:
 language,file,channel,termid,term,ref_bt,ref_et,sys_bt,sys_et,sys_score,
@ -42,7 +43,7 @@ GetOptions(

@ARGV == 1 || die $Usage;

-# Workout the input/output source.
+# Works out the input/output source.
 my $alignment_in = shift @ARGV;

 # Hash alignment file. For each instance we store a 3-dimension vector:
@ -98,40 +99,66 @@ while (<A>) {
 }
 close(A);

-# Work out the oracle ATWV by sweeping the threshold.
+# Works out the oracle ATWV by sweeping the threshold.
 my $atwv = 0.0;
-my $oracle_atwv = 0.0;
+my $otwv = 0.0;
+my %mtwv_sweep;
 foreach my $kwid (keys %keywords) {
  # Sort the instances by confidence score.
  my @instances = sort KeywordSort @{$alignment{$kwid}};
-  my $local_oracle_atwv = 0.0;
-  my $max_local_oracle_atwv = 0.0;
+  my $local_otwv = 0.0;
+  my $max_local_otwv = 0.0;
  my $local_atwv = 0.0;
  foreach my $instance (@instances) {
    my @ins = @{$instance};
-    # Oracle ATWV.
+    my $gain = 1.0 / $Ntrue{$kwid};
+    my $cost = $beta / ($duration - $Ntrue{$kwid});
+    # ATWV.
    if ($ins[1] == 1) {
-      $local_oracle_atwv += 1.0 / $Ntrue{$kwid};
+      $local_otwv += $gain;
    } else {
-      $local_oracle_atwv -= $beta / ($duration - $Ntrue{$kwid});
+      $local_otwv -= $cost;
    }
-    if ($local_oracle_atwv > $max_local_oracle_atwv) {
-      $max_local_oracle_atwv = $local_oracle_atwv;
+    if ($local_otwv > $max_local_otwv) {
+      $max_local_otwv = $local_otwv;
    }

-    # Original ATWV.
+    # OTWV.
    if ($ins[2] == 1) {
-      $local_atwv -= $beta / ($duration - $Ntrue{$kwid});
+      $local_atwv -= $cost;
    } elsif ($ins[2] == 2) {
-      $local_atwv += 1.0 / $Ntrue{$kwid};
+      $local_atwv += $gain;
+    }
+
+    # MTWV.
+    for (my $threshold = 0.000; $threshold <= $ins[0]; $threshold += 0.001) {
+      if ($ins[1] == 1) {
+        $mtwv_sweep{$threshold} += $gain;
+      } else {
+        $mtwv_sweep{$threshold} -= $cost;
+      }
    }
  }
  $atwv += $local_atwv;
-  $oracle_atwv += $max_local_oracle_atwv;
+  $otwv += $max_local_otwv;
 }
+
+# Works out the MTWV.
+my $mtwv = 0.0;
+my $mtwv_threshold = 0.0;
+for my $threshold (keys %mtwv_sweep) {
+  if ($mtwv_sweep{$threshold} > $mtwv) {
+    $mtwv = $mtwv_sweep{$threshold};
+    $mtwv_threshold = $threshold;
+  }
+}
+
 $atwv /= scalar(keys %keywords);
 $atwv = sprintf("%.4f", $atwv);
-$oracle_atwv /= scalar(keys %keywords);
-$oracle_atwv = sprintf("%.4f", $oracle_atwv);
-print "Original ATWV = $atwv\n";
-print "Oracle ATWV = $oracle_atwv\n";
+$otwv /= scalar(keys %keywords);
+$otwv = sprintf("%.4f", $otwv);
+$mtwv /= scalar(keys %keywords);
+$mtwv = sprintf("%.4f", $mtwv);
+print "ATWV = $atwv\n";
+print "OTWV = $otwv\n";
+print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n";
--- a/egs/hkust/s5b/RESULTS
+++ b/egs/hkust/s5b/RESULTS
@ -41,9 +41,10 @@ nnet_8m_6l/decode_eval_iter270/cer_10:%CER 25.72 [ 1945 / 7562, 405 ins, 533 del
 nnet_8m_6l/decode_eval_iter280/cer_10:%CER 27.43 [ 2074 / 7562, 424 ins, 605 del, 1045 sub ]
 nnet_8m_6l/decode_eval_iter290/cer_10:%CER 26.37 [ 1994 / 7562, 410 ins, 572 del, 1012 sub ]

-nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ]         # 6 layers neural network
-nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ]	    # 6 layers neural network (nnet2 script, 1024 neurons)
-nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ]	    # 4 layers neural network
+nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ]         # 6 hidden layers neural network
+nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ]	    # 6 hidden layers neural network (nnet2 script, 1024 neurons)
+nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ]	    # 3 hidden layers neural network
+nnet_tanh_3l/decode_eval/cer_10:%CER 22.11 [ 1672 / 7562, 391 ins, 489 del, 792 sub ]	    # 3 hidden layers neural network (nnet2 script, 1024 neurons)

 tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ]  # 6 layers DNN - pretrained RBM, cross entropy trained DNN  
 tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN  
@ -97,6 +98,7 @@ nnet_8m_6l/decode_eval_closelm_iter290/cer_10:%CER 20.40 [ 1543 / 7562, 323 ins,
 nnet_8m_6l/decode_eval_closelm/cer_10:%CER 20.68 [ 1564 / 7562, 351 ins, 483 del, 730 sub ]
 nnet_tanh_6l/decode_eval_closelm/cer_10:%CER 17.10 [ 1293 / 7562, 337 ins, 448 del, 508 sub ]
 nnet_4m_3l/decode_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 335 ins, 439 del, 523 sub ]
+nnet_tanh_3l/decode_eval_closelm/cer_10:%CER 17.22 [ 1302 / 7562, 349 ins, 434 del, 519 sub ]

 tri5a_pretrain-dbn_dnn/decode_closelm/cer_10:%CER 16.54 [ 1251 / 7562, 346 ins, 413 del, 492 sub ]
 tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1/cer_10:%CER 15.31 [ 1158 / 7562, 280 ins, 410 del, 468 sub ]
@ -130,6 +132,7 @@ exp/sgmm_5a_mmi_b0.1/decode_wide_eval_4/cer_10:%CER 23.17 [ 1752 / 7562, 373 ins
 exp/nnet_8m_6l/decode_wide_eval/cer_10:%CER 24.13 [ 1825 / 7562, 384 ins, 535 del, 906 sub ]
 exp/nnet_tanh_6l/decode_wide_eval/cer_10:%CER 21.22 [ 1605 / 7562, 365 ins, 485 del, 755 sub ]
 exp/nnet_4m_3l/decode_wide_eval/cer_10:%CER 22.16 [ 1676 / 7562, 365 ins, 505 del, 806 sub ]
+exp/nnet_tanh_3l/decode_wide_eval/cer_10:%CER 21.95 [ 1660 / 7562, 382 ins, 488 del, 790 sub ]
 exp/tri5a_pretrain-dbn_dnn/decode_dnnwide/cer_10:%CER 20.47 [ 1548 / 7562, 383 ins, 467 del, 698 sub ]
 exp/tri5a_pretrain-dbn_dnn_smbr/decode_it1_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ]
 exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
@ -157,6 +160,7 @@ exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_4/cer_10:%CER 19.27 [ 1457 / 7562,
 exp/nnet_8m_6l/decode_wide_eval_closelm/cer_10:%CER 17.87 [ 1351 / 7562, 343 ins, 453 del, 555 sub ]
 exp/nnet_tanh_6l/decode_wide_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 336 ins, 452 del, 509 sub ]
 exp/nnet_4m_3l/decode_wide_eval_closelm/cer_10:%CER 17.02 [ 1287 / 7562, 330 ins, 436 del, 521 sub ]
+exp/nnet_tanh_3l/decode_wide_eval_closelm/cer_10:%CER 17.31 [ 1309 / 7562, 348 ins, 441 del, 520 sub ]
 exp/tri5a_pretrain-dbn_dnn/decode_closelm_dnnwide/cer_10:%CER 16.42 [ 1242 / 7562, 337 ins, 414 del, 491 sub ]
 exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1_dnnwide/cer_10:%CER 15.26 [ 1154 / 7562, 279 ins, 409 del, 466 sub ]
 exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide/cer_10:%CER 15.31 [ 1158 / 7562, 279 ins, 408 del, 471 sub ]
--- a/egs/hkust/s5b/local/run_tanh.sh
+++ b/egs/hkust/s5b/local/run_tanh.sh
@ -12,6 +12,7 @@

 ulimit -u 10000

+# 6 hidden layers DNN
 (
 steps/nnet2/train_tanh.sh \
   --mix-up 8000 \
@ -36,3 +37,28 @@ local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_wid

 )

+# 3 hidden layers  DNN
+(
+ steps/nnet2/train_tanh.sh \
+   --mix-up 8000 \
+   --initial-learning-rate 0.01 --final-learning-rate 0.001 \
+   --num-hidden-layers 3 --hidden-layer-dim 1024 \
+   --cmd "$decode_cmd" \
+   data/train data/lang exp/tri5a_ali_dt100k exp/nnet_tanh_3l || exit 1
+
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_3l/decode_eval &
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_3l/decode_eval_closelm &
+
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_3l/decode_wide_eval &
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_3l/decode_wide_eval_closelm &
+wait
+
+
+local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_3l/decode_eval
+local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_3l/decode_eval_closelm
+
+local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_3l/decode_wide_eval
+local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_3l/decode_wide_eval_closelm
+
+)
+
--- a/egs/rm/s4/local/rm_data_prep.sh
+++ b/egs/rm/s4/local/rm_data_prep.sh
@ -1,9 +1,8 @@
 #!/bin/bash
-#
-# Copyright 2012 Vassil Panayotov
-# modified from a file that was:
-# Copyright 2010-2011 Microsoft Corporation

+# Copyright 2010-2011 Microsoft Corporation
+# Copyright 2012 Vassil Panayotov
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@ -54,11 +53,10 @@ cat $RMROOT/rm1/etc/rm1_test.fileids | \
 # this is needed, because the original "al_sents.snr" file is not available
 # (and because CMU's train utterances have tags like '<sil>' added)
 cat $RMROOT/rm1/etc/rm1_train.transcription |\
- sed -e 's/\(.*\)\(([a-z][a-z][0-9]\+)\)/\1\U\2/' |\
- sed -e 's:</\?si\?l\?>::g' -e 's:([0-9])::g' |\
- sed -e 's:\([ ][ ]\+\): :g' -e 's:^[ ]\+::g' |\
- cat  $RMROOT/rm1/etc/rm1_test.transcription - \
- > al_sents.snr 
+ tr '[a-z]' '[A-Z]' |\
+ sed -E -e 's:</?S(IL)?>: :g' -e 's:\([0-9]\): :g' -e 's:  +: :g' -e 's:^ +::' |\
+ cat $RMROOT/rm1/etc/rm1_test.transcription - \
+ > al_sents.snr

 # training set
 ../../local/make_trans.pl trn train.flist al_sents.snr train_trans.txt train.scp
--- a/egs/swbd/s5b/RESULTS
+++ b/egs/swbd/s5b/RESULTS
@ -4,59 +4,80 @@
 # Switchboard portion of eval2000, excluding CallHome, which is
 # substantially easier.

-# These results are slightly out of date: since then I changed
-# the LDA+MLLT to use 7, not 9 frames of context, and also increased
-# the learning rate for the "indirect" fMMI.
-
 for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null
 for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null


-# These results are still partial.
-
-exp/tri1/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 61.5   26.8   11.7    3.2   41.7   70.2 |
-exp/tri1/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 61.1   27.3   11.6    3.5   42.3   70.3 |
-exp/tri2/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 62.8   26.1   11.0    3.2   40.3   70.1 |
-exp/tri2/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 62.7   26.9   10.3    3.7   40.9   70.5 |
-exp/tri3a/decode_eval2000_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 69.7   21.6    8.6    3.2   33.5   68.0 |
-exp/tri3a/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 69.3   22.0    8.7    3.4   34.1   67.3 |
-exp/tri3b/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 74.3   18.0    7.6    2.9   28.6   65.6 |
-exp/tri3b/decode_eval2000_sw1_fsh_tgpr_newcode/score_13/eval2000.ctm.swbd.filt.sys:  | Sum/Avg    | 1831  21395  | 74.3    18.0    7.6     2.9    28.6   65.6  |
-exp/tri3b/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 74.0   18.7    7.3    3.0   29.0   66.5 |
-exp/tri3b/decode_eval2000_sw1_tg_newcode/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 74.0   18.7    7.3    3.0   29.0   66.5 |
-exp/tri4a/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 78.4   15.2    6.3    2.6   24.1   61.4 |
-exp/tri4a/decode_eval2000_sw1_fsh_tgpr.si/score_11/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 71.8   20.7     7.5    3.6   31.8   67.4 |
-exp/tri4a/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 78.1   15.6    6.3    2.7   24.6   61.7 |
-exp/tri4a/decode_eval2000_sw1_tg.si/score_11/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 71.3   21.2    7.5    3.8   32.5   67.7 |
-exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_16/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 80.4   13.3    6.2    2.1   21.7   60.0 |
-exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 73.7   18.9     7.4    3.0   29.3   65.9 |
-exp/tri4b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 80.1   14.1    5.8    2.5   22.4   60.8 |
-exp/tri4b/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 73.6   19.3    7.1    3.6   30.0   66.2 |
+%WER 37.2 | 1831 21395 | 65.9 24.1 10.0 3.1 37.2 67.8 | exp/tri1/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
+%WER 37.4 | 1831 21395 | 65.9 24.1 10.0 3.4 37.4 67.9 | exp/tri1/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 36.8 | 1831 21395 | 66.5 23.8 9.7 3.3 36.8 68.3 | exp/tri2/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
+%WER 37.0 | 1831 21395 | 66.6 24.1 9.2 3.7 37.0 68.4 | exp/tri2/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 29.1 | 1831 21395 | 74.0 18.3 7.7 3.1 29.1 65.4 | exp/tri3b/decode_eval2000_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 29.7 | 1831 21395 | 73.6 18.6 7.7 3.3 29.7 65.3 | exp/tri3b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
+%WER 24.3 | 1831 21395 | 77.9 15.0 7.1 2.3 24.3 61.5 | exp/tri4a/decode_eval2000_sw1_fsh_tgpr/score_17/eval2000.ctm.swbd.filt.sys
+%WER 32.6 | 1831 21395 | 71.2 21.4 7.4 3.8 32.6 66.9 | exp/tri4a/decode_eval2000_sw1_fsh_tgpr.si/score_12/eval2000.ctm.swbd.filt.sys
+%WER 25.0 | 1831 21395 | 77.7 15.6 6.6 2.8 25.0 62.4 | exp/tri4a/decode_eval2000_sw1_tg/score_15/eval2000.ctm.swbd.filt.sys
+%WER 33.2 | 1831 21395 | 70.8 21.7 7.5 4.0 33.2 67.1 | exp/tri4a/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys
+%WER 23.5 | 1831 21395 | 79.0 14.7 6.3 2.5 23.5 61.3 | exp/tri4a_fmmi_b0.1/decode_eval2000_it4_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
+%WER 23.7 | 1831 21395 | 78.7 14.8 6.5 2.4 23.7 62.0 | exp/tri4a_fmmi_b0.1/decode_eval2000_it4_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
+%WER 22.1 | 1831 21395 | 80.0 13.3 6.7 2.1 22.1 60.1 | exp/tri4a_fmmi_b0.1/decode_eval2000_it5_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 22.6 | 1831 21395 | 79.5 13.9 6.6 2.2 22.6 60.6 | exp/tri4a_fmmi_b0.1/decode_eval2000_it5_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
+%WER 21.9 | 1831 21395 | 80.6 13.8 5.6 2.5 21.9 59.3 | exp/tri4a_fmmi_b0.1/decode_eval2000_it6_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
+%WER 22.2 | 1831 21395 | 80.2 13.9 5.9 2.4 22.2 60.2 | exp/tri4a_fmmi_b0.1/decode_eval2000_it6_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 21.5 | 1831 21395 | 80.9 13.5 5.6 2.5 21.5 59.2 | exp/tri4a_fmmi_b0.1/decode_eval2000_it7_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
+%WER 21.8 | 1831 21395 | 80.7 13.7 5.5 2.5 21.8 59.7 | exp/tri4a_fmmi_b0.1/decode_eval2000_it7_sw1_tg/score_11/eval2000.ctm.swbd.filt.sys
+%WER 21.3 | 1831 21395 | 81.2 13.3 5.5 2.6 21.3 59.1 | exp/tri4a_fmmi_b0.1/decode_eval2000_it8_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
+%WER 21.7 | 1831 21395 | 80.7 13.4 5.9 2.4 21.7 59.6 | exp/tri4a_fmmi_b0.1/decode_eval2000_it8_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 23.0 | 1831 21395 | 79.2 14.2 6.5 2.2 23.0 60.5 | exp/tri4a_mmi_b0.1/decode_eval2000_1.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 23.5 | 1831 21395 | 79.0 14.8 6.2 2.5 23.5 60.8 | exp/tri4a_mmi_b0.1/decode_eval2000_1.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 22.3 | 1831 21395 | 79.8 13.7 6.5 2.1 22.3 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_2.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 22.8 | 1831 21395 | 79.5 14.3 6.2 2.3 22.8 60.0 | exp/tri4a_mmi_b0.1/decode_eval2000_2.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 22.0 | 1831 21395 | 80.4 13.8 5.8 2.3 22.0 59.3 | exp/tri4a_mmi_b0.1/decode_eval2000_3.mdl_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
+%WER 22.4 | 1831 21395 | 79.9 13.9 6.2 2.3 22.4 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_3.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 21.7 | 1831 21395 | 80.6 13.6 5.8 2.3 21.7 59.0 | exp/tri4a_mmi_b0.1/decode_eval2000_4.mdl_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
+%WER 22.1 | 1831 21395 | 80.3 13.9 5.8 2.5 22.1 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_4.mdl_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 21.8 | 1831 21395 | 80.5 13.7 5.8 2.3 21.8 59.3 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
+%WER 29.3 | 1831 21395 | 74.1 18.8 7.0 3.4 29.3 64.8 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
+%WER 22.4 | 1831 21395 | 80.0 13.9 6.1 2.4 22.4 60.0 | exp/tri4b/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
+%WER 30.3 | 1831 21395 | 73.1 19.7 7.1 3.4 30.3 64.7 | exp/tri4b/decode_eval2000_sw1_tg.si/score_14/eval2000.ctm.swbd.filt.sys
+%WER 20.7 | 1831 21395 | 81.3 12.8 6.0 2.0 20.7 59.3 | exp/tri4b_fmmi_b0.1/decode_eval2000_it4_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
+%WER 21.4 | 1831 21395 | 81.0 13.2 5.8 2.4 21.4 59.3 | exp/tri4b_fmmi_b0.1/decode_eval2000_it4_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
+%WER 19.6 | 1831 21395 | 82.2 12.0 5.8 1.9 19.6 57.2 | exp/tri4b_fmmi_b0.1/decode_eval2000_it5_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 20.2 | 1831 21395 | 81.9 12.5 5.6 2.1 20.2 56.9 | exp/tri4b_fmmi_b0.1/decode_eval2000_it5_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 19.4 | 1831 21395 | 82.7 12.0 5.3 2.2 19.4 56.9 | exp/tri4b_fmmi_b0.1/decode_eval2000_it6_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
+%WER 19.9 | 1831 21395 | 82.1 12.2 5.6 2.0 19.9 57.0 | exp/tri4b_fmmi_b0.1/decode_eval2000_it6_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 19.3 | 1831 21395 | 82.9 12.0 5.2 2.1 19.3 56.4 | exp/tri4b_fmmi_b0.1/decode_eval2000_it7_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
+%WER 19.8 | 1831 21395 | 82.4 12.3 5.3 2.2 19.8 56.6 | exp/tri4b_fmmi_b0.1/decode_eval2000_it7_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 19.3 | 1831 21395 | 82.9 11.9 5.2 2.2 19.3 56.5 | exp/tri4b_fmmi_b0.1/decode_eval2000_it8_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
+%WER 19.7 | 1831 21395 | 82.5 12.3 5.2 2.2 19.7 56.7 | exp/tri4b_fmmi_b0.1/decode_eval2000_it8_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 20.2 | 1831 21395 | 81.6 12.4 5.9 1.9 20.2 57.6 | exp/tri4b_mmi_b0.1/decode_eval2000_1.mdl_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
+%WER 20.7 | 1831 21395 | 81.4 12.8 5.7 2.1 20.7 57.9 | exp/tri4b_mmi_b0.1/decode_eval2000_1.mdl_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
+%WER 19.7 | 1831 21395 | 82.2 12.1 5.7 1.9 19.7 57.3 | exp/tri4b_mmi_b0.1/decode_eval2000_2.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 20.3 | 1831 21395 | 81.9 12.6 5.5 2.2 20.3 57.8 | exp/tri4b_mmi_b0.1/decode_eval2000_2.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 19.5 | 1831 21395 | 82.5 12.0 5.5 2.0 19.5 56.1 | exp/tri4b_mmi_b0.1/decode_eval2000_3.mdl_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
+%WER 20.0 | 1831 21395 | 82.0 12.4 5.5 2.1 20.0 56.8 | exp/tri4b_mmi_b0.1/decode_eval2000_3.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
+%WER 19.5 | 1831 21395 | 82.7 12.5 4.8 2.3 19.5 56.4 | exp/tri4b_mmi_b0.1/decode_eval2000_4.mdl_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
+%WER 19.9 | 1831 21395 | 82.3 12.5 5.2 2.2 19.9 56.7 | exp/tri4b_mmi_b0.1/decode_eval2000_4.mdl_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
+%WER 23.4 | 1831 21395 | 79.2 13.6 7.3 2.6 23.4 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
+%WER 30.4 | 1831 21395 | 73.0 18.7 8.3 3.4 30.4 68.1 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
+%WER 23.6 | 1831 21395 | 78.9 13.6 7.5 2.5 23.6 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
+%WER 31.0 | 1831 21395 | 72.7 19.0 8.3 3.7 31.0 68.5 | exp/tri4c_reseg/decode_eval2000_sw1_tg.si/score_13/eval2000.ctm.swbd.filt.sys
+%WER 21.1 | 1831 21395 | 81.2 12.8 6.0 2.3 21.1 59.7 | exp/sgmm2_5a/decode_eval2000_sw1_fsh_tgpr/score_10/eval2000.ctm.swbd.filt.sys
+%WER 21.3 | 1831 21395 | 80.9 13.0 6.2 2.2 21.3 59.5 | exp/sgmm2_5a/decode_eval2000_sw1_tg/score_11/eval2000.ctm.swbd.filt.sys
+%WER 19.7 | 1831 21395 | 82.4 12.0 5.6 2.2 19.7 57.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it1/score_10/eval2000.ctm.swbd.filt.sys
+%WER 19.1 | 1831 21395 | 82.8 11.5 5.7 1.9 19.1 56.7 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it2/score_11/eval2000.ctm.swbd.filt.sys
+%WER 19.0 | 1831 21395 | 83.2 11.5 5.3 2.1 19.0 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it3/score_10/eval2000.ctm.swbd.filt.sys
+%WER 18.9 | 1831 21395 | 83.3 11.6 5.1 2.2 18.9 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it4/score_10/eval2000.ctm.swbd.filt.sys
+%WER 20.4 | 1831 21395 | 81.9 12.4 5.7 2.3 20.4 57.8 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_10/eval2000.ctm.swbd.filt.sys
+%WER 19.8 | 1831 21395 | 82.5 12.1 5.4 2.3 19.8 57.3 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_10/eval2000.ctm.swbd.filt.sys
+%WER 19.5 | 1831 21395 | 82.8 12.0 5.3 2.3 19.5 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_10/eval2000.ctm.swbd.filt.sys
+%WER 19.5 | 1831 21395 | 82.9 12.0 5.1 2.4 19.5 56.5 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_10/eval2000.ctm.swbd.filt.sys


-# some more recent results (Sep 25 2013), from tri4b and tri4c_reseg, to
-# see the effect of resegmentation.  Note: we're only looking at the "swbd" results here,
-# the callhome results or total results are terrible because of huge insertions, because
-# it seems that only some segments of the audio files are in the stm.  I'm not sure
-# where to get the start and end points in the files, that they intended us to 
-# decode.
-%WER 22.2 | 1831 21395 | 80.3 13.8 5.9 2.5 22.2 60.1 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
-%WER 29.3 | 1831 21395 | 73.5 18.7 7.8 2.9 29.3 65.0 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_17/eval2000.ctm.swbd.filt.sys
-%WER 22.5 | 1831 21395 | 79.8 13.8 6.4 2.3 22.5 60.3 | exp/tri4b/decode_eval2000_sw1_tg/score_17/eval2000.ctm.swbd.filt.sys
-%WER 30.5 | 1831 21395 | 73.1 19.8 7.1 3.6 30.5 65.8 | exp/tri4b/decode_eval2000_sw1_tg.si/score_14/eval2000.ctm.swbd.filt.sys

-%WER 22.9 | 1831 21395 | 79.7 13.4 6.9 2.6 22.9 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
-%WER 29.6 | 1831 21395 | 73.8 18.2 8.1 3.4 29.6 66.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
-%WER 23.5 | 1831 21395 | 79.1 13.8 7.1 2.6 23.5 63.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg/score_15/eval2000.ctm.swbd.filt.sys
-%WER 30.9 | 1831 21395 | 73.1 19.0 7.9 4.0 30.9 67.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys
-# so the resegmented one is about 0.3 to 1.0 worse, but the #sub is actually down, it's due to more deletions
-# and insertions.  This is kind of what we'd expect, since the reference segmentation is a kind of "oracle".

-# below are some results where I kept the segments that the segmentation
-# regarded as noise (e.g. cough, etc.).  Results after adaptation almost identical, but
-# 0.1% better with the switchboard-only LM.

-%WER 22.9 | 1831 21395 | 79.7 13.4 6.9 2.6 22.9 62.6 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_fsh_tgpr/score_14/eval2000_with_noise.ctm.swbd.filt.sys
-%WER 29.7 | 1831 21395 | 73.6 18.3 8.0 3.4 29.7 67.0 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_fsh_tgpr.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
-%WER 23.6 | 1831 21395 | 79.1 14.0 6.9 2.8 23.6 64.2 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_tg/score_14/eval2000_with_noise.ctm.swbd.filt.sys
-%WER 30.8 | 1831 21395 | 72.9 19.0 8.1 3.7 30.8 67.5 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_tg.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
+%WER 29.10 [ 14382 / 49427, 1963 ins, 3394 del, 9025 sub ] exp/tri4b/decode_train_dev_sw1_fsh_tgpr/wer_15
+%WER 37.81 [ 18686 / 49427, 2078 ins, 4625 del, 11983 sub ] exp/tri4b/decode_train_dev_sw1_fsh_tgpr.si/wer_15
+%WER 29.53 [ 14598 / 49427, 1885 ins, 3538 del, 9175 sub ] exp/tri4b/decode_train_dev_sw1_tg/wer_16
+%WER 38.42 [ 18990 / 49427, 2154 ins, 4461 del, 12375 sub ] exp/tri4b/decode_train_dev_sw1_tg.si/wer_15
--- a/egs/swbd/s5b/conf/decode_dnn.config
+++ b/egs/swbd/s5b/conf/decode_dnn.config
@ -0,0 +1,2 @@
+beam=13.0 # beam for decoding.  Was 13.0 in the scripts.
+latbeam=8.0 # this has most effect on size of the lattices.
--- a/egs/swbd/s5b/local/run_dnn.sh
+++ b/egs/swbd/s5b/local/run_dnn.sh
@ -116,7 +116,7 @@ acwt=0.08333
 {
 steps/align_nnet.sh --nj 250 --cmd "$train_cmd" \
  data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_ali_all || exit 1;
-steps/make_denlats_nnet.sh --nj 250 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
+steps/make_denlats_nnet.sh --nj 10 --sub-split 100 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
  data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_denlats_all  || exit 1;
 }
 # Now we re-train the hybrid by single iteration of sMBR 
@ -156,7 +156,7 @@ acwt=0.08333
 {
 steps/align_nnet.sh --nj 250 --cmd "$train_cmd" \
  data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_ali_all || exit 1;
-steps/make_denlats_nnet.sh --nj 250 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
+steps/make_denlats_nnet.sh --nj 10 --sub-split 100 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
  data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_denlats_all  || exit 1;
 }
 # Now we re-train the hybrid by several iterations of sMBR 
--- a/egs/swbd/s5b/run.sh
+++ b/egs/swbd/s5b/run.sh
@ -13,8 +13,10 @@

 . cmd.sh
 . path.sh
+set -e # exit on error

-local/swbd1_data_prep.sh /export/corpora3/LDC/LDC97S62
+local/swbd1_data_prep.sh /home/dpovey/data/LDC97S62
+# local/swbd1_data_prep.sh /home/dpovey/data/LDC97S62
 # local/swbd1_data_prep.sh /data/corpora0/LDC97S62
 # local/swbd1_data_prep.sh /mnt/matylda2/data/SWITCHBOARD_1R2
 # local/swbd1_data_prep.sh /exports/work/inf_hcrc_cstr_general/corpora/switchboard/switchboard1
@ -29,9 +31,12 @@ utils/prepare_lang.sh data/local/dict "<unk>" data/local/lang data/lang

 # If you have the Fisher data, you can set this "fisher_opt" variable.
 fisher_opt="--fisher /export/corpora3/LDC/LDC2004T19/fe_03_p1_tran/"
+#fisher_opt="--fisher /home/dpovey/data/LDC2004T19/fe_03_p1_tran/"
 #fisher_opt="--fisher /data/corpora0/LDC2004T19/fe_03_p1_tran/"
 # edinburgh:
 # fisher_opt="--fisher /exports/work/inf_hcrc_cstr_general/corpora/fisher/transcripts"
+# brno:
+# fisher_opt="--fisher /mnt/matylda2/data/FISHER/fe_03_p1_tran" # BUT
 local/swbd1_train_lms.sh $fisher_opt \
  data/local/train/text data/local/dict/lexicon.txt data/local/lm
 # We don't really need all these options for SRILM, since the LM training script
@ -48,7 +53,7 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
 # For some funny reason we are still using IRSTLM for doing LM pruning :)
 export PATH=$PATH:../../../tools/irstlm/bin/
 prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o3g.kn.gz /dev/stdout \
-  | gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
+  | gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz || exit 1
 LM=data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
 utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
  data/lang $LM data/local/dict/lexicon.txt data/lang_sw1_fsh_tgpr
@ -61,23 +66,24 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
 # local/eval2000_data_prep.sh /data/corpora0/LDC2002S09/hub5e_00 /data/corpora0/LDC2002T43
 # local/eval2000_data_prep.sh /mnt/matylda2/data/HUB5_2000/ /mnt/matylda2/data/HUB5_2000/2000_hub5_eng_eval_tr
 # local/eval2000_data_prep.sh /exports/work/inf_hcrc_cstr_general/corpora/switchboard/hub5/2000 /exports/work/inf_hcrc_cstr_general/corpora/switchboard/hub5/2000/transcr
+# local/eval2000_data_prep.sh /home/dpovey/data/LDC2002S09/hub5e_00 /home/dpovey/data/LDC2002T43
 local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43

 # mfccdir should be some place with a largish disk where you
 # want to store MFCC features. 
 mfccdir=mfcc

-steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir || exit 1;
+steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir
 steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir 

 # Remove the small number of utterances that couldn't be extracted for some 
 # reason (e.g. too short; no such file).
-utils/fix_data_dir.sh data/train || exit 1;
+utils/fix_data_dir.sh data/train 

 # Create MFCCs for the eval set
-steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1;
-steps/compute_cmvn_stats.sh data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1;
-utils/fix_data_dir.sh data/eval2000 || exit 1 # remove segments with problems
+steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000 exp/make_mfcc/eval2000 $mfccdir
+steps/compute_cmvn_stats.sh data/eval2000 exp/make_mfcc/eval2000 $mfccdir
+utils/fix_data_dir.sh data/eval2000  # remove segments with problems

 # Use the first 4k sentences as dev set.  Note: when we trained the LM, we used
 # the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
@ -114,13 +120,13 @@ local/remove_dup_utts.sh 300 data/train_nodev data/train_nodup  # 286hr

 ## Starting basic training on MFCC features
 steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
-  data/train_10k_nodup data/lang exp/mono || exit 1;
+  data/train_10k_nodup data/lang exp/mono 

 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_30k_nodup data/lang exp/mono exp/mono_ali || exit 1;
+  data/train_30k_nodup data/lang exp/mono exp/mono_ali 

 steps/train_deltas.sh --cmd "$train_cmd" \
-  3200 30000 data/train_30k_nodup data/lang exp/mono_ali exp/tri1 || exit 1;
+  3200 30000 data/train_30k_nodup data/lang exp/mono_ali exp/tri1 

 for lm_suffix in tg fsh_tgpr; do
  (
@ -133,10 +139,10 @@ for lm_suffix in tg fsh_tgpr; do
 done

 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_30k_nodup data/lang exp/tri1 exp/tri1_ali || exit 1;
+  data/train_30k_nodup data/lang exp/tri1 exp/tri1_ali 

 steps/train_deltas.sh --cmd "$train_cmd" \
-  3200 30000 data/train_30k_nodup data/lang exp/tri1_ali exp/tri2 || exit 1;
+  3200 30000 data/train_30k_nodup data/lang exp/tri1_ali exp/tri2 


 for lm_suffix in tg fsh_tgpr; do
@ -156,11 +162,11 @@ done
 # From now, we start building a bigger system (on train_100k_nodup, which has 
 # 110hrs of data). We start with the LDA+MLLT system
 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k_nodup data/lang exp/tri2 exp/tri2_ali_100k_nodup || exit 1;
+  data/train_100k_nodup data/lang exp/tri2 exp/tri2_ali_100k_nodup 

 # Train tri3b, which is LDA+MLLT, on 100k_nodup data.
 steps/train_lda_mllt.sh --cmd "$train_cmd" \
-  5500 90000 data/train_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/tri3b || exit 1;
+  5500 90000 data/train_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/tri3b 

 for lm_suffix in tg fsh_tgpr; do
  (
@ -174,12 +180,12 @@ done

 # Train tri4a, which is LDA+MLLT+SAT, on 100k_nodup data.
 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k_nodup data/lang exp/tri3b exp/tri3b_ali_100k_nodup || exit 1;
+  data/train_100k_nodup data/lang exp/tri3b exp/tri3b_ali_100k_nodup 


 steps/train_sat.sh  --cmd "$train_cmd" \
  5500 90000 data/train_100k_nodup data/lang exp/tri3b_ali_100k_nodup \
-   exp/tri4a || exit 1;
+   exp/tri4a 

 for lm_suffix in tg fsh_tgpr; do
  (
@ -198,11 +204,11 @@ done
 # 286 hours)
 # Train tri4b, which is LDA+MLLT+SAT, on train_nodup data.
 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train_nodup data/lang exp/tri3b exp/tri3b_ali_nodup || exit 1;
+  data/train_nodup data/lang exp/tri3b exp/tri3b_ali_nodup 


 steps/train_sat.sh  --cmd "$train_cmd" \
-  11500 200000 data/train_nodup data/lang exp/tri3b_ali_nodup exp/tri4b || exit 1;
+  11500 200000 data/train_nodup data/lang exp/tri3b_ali_nodup exp/tri4b 

 for lm_suffix in tg fsh_tgpr; do
  (
@ -215,7 +221,9 @@ for lm_suffix in tg fsh_tgpr; do
       $graph_dir data/train_dev exp/tri4b/decode_train_dev_sw1_${lm_suffix}
  ) &
 done
-
+wait
+steps/lmrescore.sh --mode 3 --cmd "$mkgraph_cmd" data/lang_sw1_fsh_tgpr data/lang_sw1_fsh_tg data/eval2000 \
+  exp/tri4b/decode_eval2000_sw1_fsh_tgpr exp/tri4b/decode_eval2000_sw1_fsh_tg.3 || exit 1


 # MMI training starting from the LDA+MLLT+SAT systems on both the 
@ -229,11 +237,11 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
 steps/make_denlats.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
  --transform-dir exp/tri4a_ali_100k_nodup \
  data/train_100k_nodup data/lang exp/tri4a exp/tri4a_denlats_100k_nodup \
-  || exit 1;
+  

 steps/make_denlats.sh --nj 100 --cmd "$decode_cmd" --config conf/decode.config \
  --transform-dir exp/tri4b_ali_nodup \
-  data/train_nodup data/lang exp/tri4b exp/tri4b_denlats_nodup || exit 1;
+  data/train_nodup data/lang exp/tri4b exp/tri4b_denlats_nodup 

 # 4 iterations of MMI seems to work well overall. The number of iterations is
 # used as an explicit argument even though train_mmi.sh will use 4 iterations by
@ -241,11 +249,11 @@ steps/make_denlats.sh --nj 100 --cmd "$decode_cmd" --config conf/decode.config \
 num_mmi_iters=4
 steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 --num-iters $num_mmi_iters \
  data/train_100k_nodup data/lang exp/tri4a_{ali,denlats}_100k_nodup \
-  exp/tri4a_mmi_b0.1 || exit 1;
+  exp/tri4a_mmi_b0.1 

 steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 --num-iters $num_mmi_iters \
  data/train_nodup data/lang exp/tri4b_{ali,denlats}_nodup \
-  exp/tri4b_mmi_b0.1 || exit 1;
+  exp/tri4b_mmi_b0.1 

 for iter in 1 2 3 4; do
  for lm_suffix in tg fsh_tgpr; do
@ -283,11 +291,11 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 100 --cmd "$train_cmd" \

 steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
  data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup exp/tri4a_dubm \
-  exp/tri4a_denlats_100k_nodup exp/tri4a_fmmi_b0.1 || exit 1;
+  exp/tri4a_denlats_100k_nodup exp/tri4a_fmmi_b0.1 

 steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
  data/train_nodup data/lang exp/tri4b_ali_nodup exp/tri4b_dubm \
-  exp/tri4b_denlats_nodup exp/tri4b_fmmi_b0.1 || exit 1;
+  exp/tri4b_denlats_nodup exp/tri4b_fmmi_b0.1  

 for iter in 4 5 6 7 8; do
  for lm_suffix in tg fsh_tgpr; do
--- a/egs/timit/s5/RESULTS
+++ b/egs/timit/s5/RESULTS
@ -1,28 +1,76 @@
 for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

 # Use caution when comparing these results with other published results.
-Training Set   : Timit training set (4620 sentences
-Test Set       : Timit test set (1680 sentences)
+Training Set   : 3696 sentences 4620 sentences
+Dev Set        : 400 sentences
+Test Set       : 192 sentences Timit test set (1680 sentences)
 Language Model : Bigram phoneme language model which is extracted from training set

 # monophone, deltas.
-%PER 28.94 [ 18201 / 62901, 1598 ins, 5644 del, 10959 sub ] exp/mono/decode_bg_test/wer_4
+---------------------------------Dev Set------------------------------------------
+%WER 33.53 [ 5048 / 15057, 397 ins, 1674 del, 2977 sub ] exp/mono/decode_dev/wer_3
+--------------------------------Test Set------------------------------------------
+%WER 34.77 [ 2509 / 7215, 193 ins, 826 del, 1490 sub ] exp/mono/decode_test/wer_3
+

 # tri1 : first triphone system  (delta+delta-delta features)
-%PER 22.60 [ 14215 / 62901, 1796 ins, 3466 del, 8953 sub ] exp/tri1/decode_bg_test/wer_8
+---------------------------------Dev Set------------------------------------------
+%WER 29.26 [ 4405 / 15057, 541 ins, 1281 del, 2583 sub ] exp/tri1/decode_dev/wer_6
+--------------------------------Test Set------------------------------------------
+%WER 30.53 [ 2203 / 7215, 259 ins, 654 del, 1290 sub ] exp/tri1/decode_test/wer_6
+
+
+#tri2 : an LDA+MLLT system
+---------------------------------Dev Set------------------------------------------
+%WER 26.38 [ 3972 / 15057, 421 ins, 1269 del, 2282 sub ] exp/tri2/decode_dev/wer_7
+--------------------------------Test Set------------------------------------------
+%WER 28.41 [ 2050 / 7215, 220 ins, 664 del, 1166 sub ] exp/tri2/decode_test/wer_7

-#tri2 : an LDA+MLLT system. 
-%PER 20.36 [ 12807 / 62901, 1872 ins, 2914 del, 8021 sub ] exp/tri2/decode_bg_test/wer_7

 #tri3 : Speaker Adaptive Training (SAT) system
-%PER 18.27 [ 11489 / 62901, 1681 ins, 2810 del, 6998 sub ] exp/tri3/decode_bg_test/wer_6
+---------------------------------Dev Set------------------------------------------
+%WER 23.36 [ 3517 / 15057, 464 ins, 1001 del, 2052 sub ] exp/tri3/decode_dev/wer_4
+%WER 26.53 [ 3995 / 15057, 394 ins, 1289 del, 2312 sub ] exp/tri3/decode_dev.si/wer_7
+--------------------------------Test Set------------------------------------------
+%WER 24.96 [ 1801 / 7215, 245 ins, 529 del, 1027 sub ] exp/tri3/decode_test/wer_4
+%WER 27.96 [ 2017 / 7215, 214 ins, 650 del, 1153 sub ] exp/tri3/decode_test.si/wer_7

-#SGMM2 Training
-%PER 16.17 [ 10171 / 62901, 1309 ins, 2708 del, 6154 sub ] exp/sgmm2_4/decode_bg_test/wer_6

-# SGMM2 + MMI Training
-%PER 16.14 [ 10154 / 62901, 1845 ins, 2074 del, 6235 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it1/wer_6
-%PER 16.58 [ 10430 / 62901, 2032 ins, 2031 del, 6367 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it2/wer_7
-%PER 16.80 [ 10570 / 62901, 2071 ins, 2096 del, 6403 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it3/wer_8
-%PER 17.02 [ 10706 / 62901, 2154 ins, 2048 del, 6504 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it4/wer_8
+#SGMM2 Training :
+---------------------------------Dev Set------------------------------------------
+%WER 20.66 [ 3111 / 15057, 347 ins, 1022 del, 1742 sub ] exp/sgmm2_4/decode_dev/wer_5
+--------------------------------Test Set------------------------------------------
+%WER 22.88 [ 1651 / 7215, 189 ins, 519 del, 943 sub ] exp/sgmm2_4/decode_test/wer_5

+
+# SGMM2 + MMI Training :
+---------------------------------Dev Set------------------------------------------
+%WER 20.48 [ 3084 / 15057, 450 ins, 849 del, 1785 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it1/wer_5
+%WER 20.20 [ 3042 / 15057, 508 ins, 740 del, 1794 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it2/wer_5
+%WER 20.36 [ 3065 / 15057, 548 ins, 711 del, 1806 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it3/wer_5
+%WER 20.40 [ 3071 / 15057, 506 ins, 762 del, 1803 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it4/wer_6
+--------------------------------Test Set------------------------------------------
+%WER 22.66 [ 1635 / 7215, 250 ins, 420 del, 965 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it1/wer_5
+%WER 22.44 [ 1619 / 7215, 282 ins, 384 del, 953 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it2/wer_5
+%WER 22.62 [ 1632 / 7215, 298 ins, 369 del, 965 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it3/wer_5
+%WER 22.48 [ 1622 / 7215, 277 ins, 386 del, 959 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it4/wer_6
+
+
+# Hybrid System :
+---------------------------------Dev Set------------------------------------------
+%WER 22.77 [ 3429 / 15057, 411 ins, 1057 del, 1961 sub ] exp/tri4_nnet/decode_dev/wer_3
+--------------------------------Test Set------------------------------------------
+%WER 24.84 [ 1792 / 7215, 197 ins, 579 del, 1016 sub ] exp/tri4_nnet/decode_test/wer_2
+
+
+# Combination :
+---------------------------------Dev Set------------------------------------------
+%WER 20.26 [ 3051 / 15057, 371 ins, 937 del, 1743 sub ] exp/combine_2/decode_dev_it1/wer_4
+%WER 19.91 [ 2998 / 15057, 397 ins, 870 del, 1731 sub ] exp/combine_2/decode_dev_it2/wer_4
+%WER 19.75 [ 2974 / 15057, 422 ins, 825 del, 1727 sub ] exp/combine_2/decode_dev_it3/wer_4
+%WER 19.79 [ 2980 / 15057, 373 ins, 886 del, 1721 sub ] exp/combine_2/decode_dev_it4/wer_5
+--------------------------------Test Set------------------------------------------
+%WER 21.90 [ 1580 / 7215, 191 ins, 474 del, 915 sub ] exp/combine_2/decode_test_it1/wer_4
+%WER 21.73 [ 1568 / 7215, 218 ins, 442 del, 908 sub ] exp/combine_2/decode_test_it2/wer_4
+%WER 21.62 [ 1560 / 7215, 223 ins, 423 del, 914 sub ] exp/combine_2/decode_test_it3/wer_4
+%WER 21.68 [ 1564 / 7215, 197 ins, 476 del, 891 sub ] exp/combine_2/decode_test_it4/wer_5
--- a/egs/timit/s5/conf/test_spk.list
+++ b/egs/timit/s5/conf/test_spk.list
--- a/egs/timit/s5/local/ndx2flist.pl
+++ b/egs/timit/s5/local/ndx2flist.pl
@ -1,62 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program takes as its standard input an .ndx file from the WSJ corpus that looks
-# like this:
-#;; File: tr_s_wv1.ndx, updated 04/26/94
-#;;
-#;; Index for WSJ0 SI-short Sennheiser training data
-#;; Data is read WSJ sentences, Sennheiser mic.
-#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts 
-#;; per speaker TI) = 7236 utts
-#;;
-#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
-#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
-
-#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
-#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1  ... etc.
-# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
-# /mnt/matylda2/data/WSJ0/11-1.1.
-# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
-# uppercase rather than lower case filenames.
-
-foreach $fn (@ARGV) {
-    $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
-    $disk_id=$1; 
-    $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
-    $fn =~ s:/$::; # Remove final slash, just in case it is present.
-    $disk2fn{$disk_id} = $fn;
-}
-
-while(<STDIN>){
-    if(m/^;/){ next; } # Comment.  Ignore it.
-    else {
-      m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
-      $disk=$1;
-      if(!defined $disk2fn{$disk}) {
-          die "Disk id $disk not found";
-      }
-      $filename = $2; # as a subdirectory of the distributed disk.
-      if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
-          # The disk 13-16.1 has been uppercased for some reason, on the
-          # BUT system.  This is a fix specifically for that case.
-          $filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames.  Why?
-      }
-      print "$disk2fn{$disk}/$filename\n";
-  }
-}
--- a/egs/timit/s5/local/run_basis_fmllr.sh
+++ b/egs/timit/s5/local/run_basis_fmllr.sh
@ -1,36 +0,0 @@
-#!/bin/bash
-
-. cmd.sh
-
-mfccdir=mfcc
-
-# Make "per-utterance" versions of the test sets where the speaker
-# information corresponds to utterances-- to demonstrate adaptation on
-# short utterances, particularly for basis fMLLR
-for x in "test" ; do
-  y=${x}_utt
-  rm -r data/$y
-  cp -r data/$x data/$y
-  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
-  cp data/$y/utt2spk data/$y/spk2utt;
-  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
-done
-
- # basis fMLLR experiments.
- # First a baseline: decode per-utterance with normal fMLLR.
-steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" \
-  exp/tri3b/graph_bg data/test_utt exp/tri3b/decode_bg_test_utt || exit 1;
-
- # get the fMLLR basis.
-steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train data/lang exp/tri3b
-
- # decoding tri3b with basis fMLLR
-steps/decode_basis_fmllr.sh --nj 30 --cmd "$decode_cmd" \
-  exp/tri3b/graph_bg data/test exp/tri3b/decode_bg_test_basis || exit 1;
-
-  # The same, per-utterance.
-steps/decode_basis_fmllr.sh --nj 30 --cmd "$decode_cmd" \
-  exp/tri3b/graph_bg data/test_utt exp/tri3b/decode_bg_test_basis_utt || exit 1;
-
-
-
--- a/egs/timit/s5/local/run_fwdbwd.sh
+++ b/egs/timit/s5/local/run_fwdbwd.sh
@ -1,41 +0,0 @@
-#prepare reverse lexicon and language model for backwards decoding
-utils/prepare_lang.sh --reverse true data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp.reverse data/lang.reverse || exit 1;
-utils/reverse_lm.sh data/local/nist_lm/lm_bg_5k.arpa.gz data/lang.reverse data/lang_test_bg_5k.reverse || exit 1;
-utils/reverse_lm_test.sh data/lang_test_bg_5k data/lang_test_bg_5k.reverse || exit 1;
-
-# normal forward decoding
-utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
-steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 8 --cmd "$decode_cmd" \
-  exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_10 || exit 1;
-
-# backward decoding
-utils/mkgraph.sh --reverse data/lang_test_bg_5k.reverse exp/tri2a exp/tri2a/graph_bg5k_r
-steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 8 --cmd "$decode_cmd" \
-  exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_reverse10 || exit 1;
-
-# pingpong decoding
-steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 8 --cmd "$decode_cmd" \
-  --first_pass exp/tri2a/decode_eval92_bg5k_10 exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_pingpong10 || exit 1;
-steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 8 --cmd "$decode_cmd" \
-  --first_pass exp/tri2a/decode_eval92_bg5k_reverse10 exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_pongping10 || exit 1;
-
-# same for bigger language models (on machine with 8GB RAM, you can run the whole decoding in 3-4 min without SGE)
-utils/prepare_lang.sh --reverse true data/local/dict_larger "<SPOKEN_NOISE>" data/local/lang_larger.reverse data/lang_bd.reverse || exit;
-utils/reverse_lm.sh --lexicon data/local/dict_larger/lexicon.txt data/local/local_lm/3gram-mincount/lm_pr6.0.gz data/lang_bd.reverse data/lang_test_bd_tgpr.reverse || exit 1;
-utils/reverse_lm_test.sh data/lang_test_bd_tgpr data/lang_test_bd_tgpr.reverse || exit 1;
-
-utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri2a exp/tri2a/graph_bd_tgpr
-steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 4 --cmd run.pl \
-  exp/tri2a/graph_bd_tgpr data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_10 || exit 1;
-
-utils/mkgraph.sh --reverse data/lang_test_bd_tgpr.reverse exp/tri2a exp/tri2a/graph_bd_tgpr_r
-steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 4 --cmd run.pl \
-  exp/tri2a/graph_bd_tgpr_r data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_reverse10 || exit 1;
-
-steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 4 --cmd run.pl \
-  --first_pass exp/tri2a/decode_eval92_bdtgpr4_10 exp/tri2a/graph_bd_tgpr_r data/test_eval92 \
-  exp/tri2a/decode_eval92_bdtgpr4_pingpong10 || exit 1;
-
-steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 4 --cmd run.pl \
-  --first_pass exp/tri2a/decode_eval92_bdtgpr4_reverse10 exp/tri2a/graph_bd_tgpr data/test_eval92 \
-  exp/tri2a/decode_eval92_bdtgpr4_pongping10 || exit 1;
--- a/egs/timit/s5/local/run_hybrid.sh
+++ b/egs/timit/s5/local/run_hybrid.sh
@ -1,96 +0,0 @@
-#!/bin/bash
-
-
-. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
-           ## This relates to the queue.
-
-. ./path.sh ## Source the tools/utils (import the queue.pl)
-
-
-###
-### Now we can train the Deep Neural Network in a hybrid setup
-###
-### The fMLLR features are 
-###   -spliced, 
-###   -decorrelated by LDA 
-###   -rescaled by CMVN over dataset
-###
-
-#( # Train the MLP
-dir=exp/tri4a_dnn
-$cuda_cmd $dir/_train_nnet.log \
-  steps/train_nnet.sh --hid-layers 4 --hid-dim 1200 \
-  --apply-cmvn false --splice-lr 4 --feat-type lda --lda-dim 300 \
-  --learn-rate 0.008 --bunch-size 256 \
-  data-fmllr/train data-fmllr/test_test_sup data/lang exp/tri3b exp/tri3b_ali_test $dir || exit 1;
-
-# we can use the graph from the baseline system, tri4a.
-# decode .  Note: the dev93 results are not valid as testing results because
-# the fMLLR was from the training transcripts.
-steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 \
-  exp/tri3b/graph_bg data-fmllr/test exp/tri3b_dnn/decode_bg_test 
-
-# decode with big dictionary.
- utils/mkgraph.sh data/lang_test_bg exp/tri3b_dnn exp/tri3b_dnn/graph_bg || exit 1;
-
-steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 \
-  exp/tri3b_dnn/graph_bg data-fmllr/test exp/tri3b_dnn/decode_bg_test
-#)
-
-# Getting results [see RESULTS file]
-# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
-
-
-
-#from here
-
-
-#false && \
-(
-
-###
-### First we need to generate the alignments, 
-###
-### these are used as DNN training targets,
-### also the fMLLR transforms are needed 
-###
-
-# We don't really need the alignment directory, as tri4a was trained
-# on si284 and already contains alignments.
-#steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
-#  data/train_si284 data/lang exp/tri4a exp/tri4a_ali_si284 || exit 1
-
-steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
-  data/test data/lang exp/tri3b exp/tri3b_ali_test || exit 1  #dev
-
-###
-### As next step we store the fMLLR features, so we can train on them easily
-###
-
-gmmdir=exp/tri3b
-
-# dev93 (using alignments)
-dir=data-fmllr/test_test_sup
-# generate the features
-steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
-   --transform-dir exp/tri3b_ali_test \
-   $dir data/test $gmmdir $dir/_log $dir/_data || exit 1
-
-# train si284
-# generate the features
-dir=data-fmllr/train
-steps/make_fmllr_feats.sh --nj 20 --cmd "$train_cmd" \
-   --transform-dir exp/tri3b \
-   $dir data/train $gmmdir $dir/_log $dir/_data || exit 1
-
-# eval92
-dir=data-fmllr/test
-steps/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
-   --transform-dir exp/tri3b/decode_bg_test \
-   $dir data/test $gmmdir $dir/_log $dir/_data || exit 1
-
-dir=data-fmllr/test
-steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
-   --transform-dir exp/tri3b/decode_bg_test \
-   $dir data/test $gmmdir $dir/_log $dir/_data || exit 1
-)
--- a/egs/timit/s5/local/run_mmi_tri2b.sh
+++ b/egs/timit/s5/local/run_mmi_tri2b.sh
@ -1,57 +0,0 @@
-#!/bin/bash
-
-. ./cmd.sh
-
-# Train and test MMI (and boosted MMI) on tri2b system.
-steps/make_denlats.sh --sub-split 20 --nj 30 --cmd "$train_cmd" \
-  data/train data/lang exp/tri2b exp/tri2b_denlats_train || exit 1;
-
-# train the basic MMI system.
-steps/train_mmi.sh --cmd "$train_cmd" \
-  data/train data/lang exp/tri2b_ali_train \
-  exp/tri2b_denlats_train exp/tri2b_mmi  || exit 1;
-
-for iter in 1 2 3 4; do
-  steps/decode_si.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
-    exp/tri2b/graph_bg data/test exp/tri2b_mmi/decode_bg_test_it$iter &
-done
-
-# MMI with 0.1 boosting factor.
-steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
-  data/train data/lang exp/tri2b_ali_train exp/tri2b_denlats_train \
-  exp/tri2b_mmi_b0.1  || exit 1;
-
-for iter in 1 2 3 4; do
-  steps/decode_si.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
-    exp/tri2b/graph_bg data/test exp/tri2b_mmi_b0.1/decode_bg_test_it$iter &
-done
-
-# Train a UBM with 400 components, for fMMI.
-steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
-  400 data/train data/lang exp/tri2b_ali_train exp/dubm2b
-
- steps/train_mmi_fmmi.sh --boost 0.1 --cmd "$train_cmd" \
-   data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
-   exp/tri2b_fmmi_b0.1
-
- for iter in 1 2 3 4; do 
-   steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
-     exp/tri2b/graph_bg data/test exp/tri2b_fmmi_b0.1/decode_bg_test_it$iter &
- done
-
- steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
-   data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
-   exp/tri2b_fmmi_b0.1_lr0.005 || exit 1;
-
- for iter in 1 2 3 4; do 
-   steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
-     exp/tri2b/graph_bg data/test exp/tri2b_fmmi_b0.1_lr0.005/decode_bg_test_it$iter &
- done
-
- steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
-   data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
-   exp/tri2b_fmmi_indirect_b0.1
- for iter in 1 2 3 4; do 
-   steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
-      exp/tri2b/graph_bg data/test exp/tri2b_fmmi_indirect_b0.1/decode_bg_test_it$iter &
- done
--- a/egs/timit/s5/local/run_mmi_tri3b.sh
+++ b/egs/timit/s5/local/run_mmi_tri3b.sh
@ -1,41 +0,0 @@
-#!/bin/bash
-. ./cmd.sh
-[ -f path.sh ] && . ./path.sh
-
-steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
-  --transform-dir exp/tri3b_ali_train \
-  data/train data/lang exp/tri3b exp/tri3b_denlats_train || exit 1;
-
-steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
-  data/train data/lang exp/tri3b_ali_train exp/tri3b_denlats_train \
-  exp/tri3b_mmi_b0.1  || exit 1;
-
-steps/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_bg_test \
-  exp/tri3b/graph_tgpr data/test exp/tri3b_mmi_b0.1/decode_bg_test
-
-#first, train UBM for fMMI experiments.
-steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
-  600 data/train data/lang exp/tri3b_ali_train exp/dubm3b
-
-# Next, fMMI+MMI.
-steps/train_mmi_fmmi.sh \
-  --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali_train exp/dubm3b exp/tri3b_denlats_train exp/tri3b_fmmi_a || exit 1;
-
-for iter in 1 2 3 4; do
- steps/decode_fmmi.sh --nj 30  --cmd "$decode_cmd" --iter $iter \
-   --transform-dir exp/tri3b/decode_bg_test  exp/tri3b/graph_bg data/test \
-  exp/tri3b_fmmi_a/decode_bg_test_it$iter 
-done
-
-# fMMI + mmi with indirect differential.
-steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
-data/train data/lang exp/tri3b_ali_train exp/dubm3b exp/tri3b_denlats_train \
-exp/tri3b_fmmi_indirect || exit 1;
-
-for iter in 1 2 3 4; do
- steps/decode_fmmi.sh --nj 30  --cmd "$decode_cmd" --iter $iter \
-   --transform-dir exp/tri3b/decode_bg_test  exp/tri3b/graph_bg data/test \
-  exp/tri3b_fmmi_indirect/decode_bg_test_it$iter 
-done
-
- 
--- a/egs/timit/s5/local/run_nnet_cpu.sh
+++ b/egs/timit/s5/local/run_nnet_cpu.sh
@ -1,35 +0,0 @@
-#!/bin/bash
-
-. ./cmd.sh
-
-( # I'm using basically the same setup as for Switchboard 100 hours,
-  # but slightly fewer parameters (8M -> 7M) as we have slightly less
-  # data (81 hours).
- steps/train_nnet_cpu.sh \
-   --mix-up 8000 \
-   --initial-learning-rate 0.01 --final-learning-rate 0.001 \
-   --num-jobs-nnet 16 --num-hidden-layers 4 \
-   --num-parameters 7000000 \
-   --cmd "$decode_cmd" \
-    data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c1 || exit 1
-  
-  steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
-    --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
-     exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
-
-  steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
-    --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
-     exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
-)
-
-
-(
- steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.001 \
-   --minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
-   data/train data/lang exp/tri5c1_nnet exp/tri5c1_nnet exp/tri5c1_denlats exp/tri5c1_mmi_a
- 
- steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
-   --transform-dir exp/tri3b/decode \
-   exp/tri3b/graph data/test exp/tri5c1_mmi_a/decode
-)&
-
--- a/egs/timit/s5/local/run_rnnlms_sgmm5b.sh
+++ b/egs/timit/s5/local/run_rnnlms_sgmm5b.sh
@ -1,42 +0,0 @@
-#!/bin/bash
-
-for test in dev93 eval92; do
-
-  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_bd_tgpr data/lang_test_bd_fg \
-    data/test_${test} exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 || exit 1;
-
-
-# Note: for N-best-list generation, choosing the acoustic scale (12) that gave
-# the best WER on this test set.  Ideally we should do this on a dev set.
-
- # This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
-  steps/rnnlmrescore.sh \
-    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
-    0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_${test} \
-    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm30_0.25  \
-    || exit 1;
-
-  steps/rnnlmrescore.sh \
-    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
-    0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_${test} \
-    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm100_0.5 \
-    || exit 1;
-
-  steps/rnnlmrescore.sh \
-    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
-    0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_${test} \
-    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm200_0.5 \
-    || exit 1;
-
-  steps/rnnlmrescore.sh \
-    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
-    0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
-    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.5 \
-    || exit 1;
-
-  steps/rnnlmrescore.sh \
-    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
-    0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
-    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.75 \
-    || exit 1;
-done
--- a/egs/timit/s5/local/run_rnnlms_tri3b.sh
+++ b/egs/timit/s5/local/run_rnnlms_tri3b.sh
@ -1,64 +0,0 @@
-#!/bin/bash
-
-. cmd.sh
-
- # This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
-steps/rnnlmrescore.sh \
-  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm30_0.25  \
-  || exit 1;
-
-steps/rnnlmrescore.sh \
-  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm100_0.5 \
-  || exit 1;
-
-steps/rnnlmrescore.sh \
-  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm200_0.5 \
-  || exit 1;
-
-steps/rnnlmrescore.sh \
-  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 \
-  || exit 1;
-
-steps/rnnlmrescore.sh \
-  --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 
-
-dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75_N1000
-rm -rf $dir
-cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 $dir
-steps/rnnlmrescore.sh \
-  --stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
-
-dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75
-rm -rf $dir
-cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
-steps/rnnlmrescore.sh \
-  --stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
-
-dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.25
-rm -rf $dir
-cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
-steps/rnnlmrescore.sh \
-  --stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.25 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
-
-steps/rnnlmrescore.sh \
-  --N 10 --cmd "$decode_cmd" --inv-acwt 17 \
-  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
-  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N10 \
-  || exit 1;
-
--- a/egs/timit/s5/local/run_sgmm.sh
+++ b/egs/timit/s5/local/run_sgmm.sh
@ -1,113 +0,0 @@
-#!/bin/bash
-
-# This script is invoked from ../run.sh
-# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
-
-. cmd.sh
-
-# SGMM system on si84 data [sgmm5a].  Note: the system we aligned from used the si284 data for
-# training, but this shouldn't have much effect.
-
-(
-  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-    data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
-
-  steps/train_ubm.sh --cmd "$train_cmd" \
-    400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
-
-  steps/train_sgmm.sh --cmd "$train_cmd" \
-    3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \
-    exp/ubm5b/final.ubm exp/sgmm5a || exit 1;
-
-  (
-    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr
-    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
-      exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93
-  ) &
-
-  steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
-    --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1;
-  steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
-    data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84
-
-  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
-    data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1
-
-  for iter in 1 2 3 4; do
-    steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
-      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
-      exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
-  done
-
-  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
-   --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9
-
-  for iter in 1 2 3 4; do
-    steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
-      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
-      exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
-  done
-
-) &
-
-
-(
-# The next commands are the same thing on all the si284 data.
-
-# SGMM system on the si284 data [sgmm5b]
-  steps/train_ubm.sh --cmd "$train_cmd" \
-    600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
-
-  steps/train_sgmm.sh --cmd "$train_cmd" \
-    5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
-    exp/ubm5b/final.ubm exp/sgmm5b || exit 1;
-
-  (
-    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr
-    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
-      exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93
-    steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
-      exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92
-
-    utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1;
-    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
-      exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93
-    steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
-      exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92
-  ) &
-
-  steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
-    --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 
-
-  steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
-    data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284
-
-  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
-    data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1
-
-  for iter in 1 2 3 4; do
-    for test in dev93 eval92; do
-      steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
-        --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \
-        exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter &
-
-      steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
-        --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \
-        exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
-     done
-  done
-) &
-
-
-
-# Train quinphone SGMM system. 
-
-steps/train_sgmm.sh  --cmd "$train_cmd" \
-   --context-opts "--context-width=5 --central-position=2" \
-   5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
-   exp/ubm5b/final.ubm exp/sgmm5c || exit 1;
-
-# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93.
-steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_tgpr_dev93 \
-   data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 
-
--- a/egs/timit/s5/local/run_sgmm2.sh
+++ b/egs/timit/s5/local/run_sgmm2.sh
@ -1,74 +0,0 @@
-#!/bin/bash
-
-# This script is invoked from ../run.sh
-# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
-
-. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
-           ## This relates to the queue.
-[ -f path.sh ] && . ./path.sh
-
-
-
-# Note: you might want to try to give the option --spk-dep-weights=false to train_sgmm2.sh;
-# this takes out the "symmetric SGMM" part which is not always helpful.
-
-# SGMM system on train data [sgmm4a].  Note: the system we aligned from used the train data for training, but this shouldn't have much effect.
-
-
-  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-    data/train data/lang exp/tri3b exp/tri3b_ali_train || exit 1;
-
-  steps/train_ubm.sh --cmd "$train_cmd" \
-    400 data/train data/lang exp/tri3b_ali_train exp/ubm4a || exit 1;
-
-  steps/train_sgmm2.sh --cmd "$train_cmd" \
-    7000 9000 data/train data/lang exp/tri3b_ali_train \
-    exp/ubm4a/final.ubm exp/sgmm2_4a || exit 1;
-
-    utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4a exp/sgmm2_4a/graph_bg
-    steps/decode_sgmm2.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_bg_test \
-      exp/sgmm2_4a/graph_bg data/test exp/sgmm2_4a/decode_bg_test
-
-
-  steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri3b_ali_train \
-    --use-graphs true --use-gselect true data/train data/lang exp/sgmm2_4a exp/sgmm2_4a_ali_train || exit 1;
-  steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train \
-    data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train
-
-  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
-    data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1
-
-  for iter in 1 2 3 4; do
-    for test in "test"; do # dev93
-      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
-        --transform-dir exp/tri3b/decode_bg_${test} data/lang_test_bg data/${test} exp/sgmm2_4a/decode_bg_${test} exp/sgmm2_4a_mmi_b0.1/decode_bg_${test}_it$iter
-     done
-  done
-
-#  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
-#   --update-opts "--cov-min-value=0.9" data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1_m0.9
-
-  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
-    --zero-if-disjoint true data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1_z
-
-  for iter in 1 2 3 4; do
-    for test in "test"; do #dev93
-      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
-        --transform-dir exp/tri3b/decode_bg_${test} data/lang_test_bg data/${test} exp/sgmm2_4a/decode_bg_${test} \
-        exp/sgmm2_4a_mmi_b0.1_z/decode_bg_${test}_it$iter 
-     done
-  done
- 
-# Examples of combining some of the best decodings: SGMM+MMI with
-# MMI+fMMI on a conventional system.
- 
-local/score_combine.sh data/test \
-   data/lang_test_bg \
-   exp/tri3b_fmmi_a/decode_bg_test_it1 \
-   exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it1 \
-   exp/combine_tri3b_fmmi_a_sgmm2_4a_mmi_b0.1/decode_bg_test_it1_1
-
-
-# Checking MBR decode of baseline:
-cp -r -T exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it3{,.mbr}
-local/score_mbr.sh data/test data/lang_test_bg exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it3.mbr
--- a/egs/timit/s5/local/score_mbr.sh
+++ b/egs/timit/s5/local/score_mbr.sh
@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Script for minimum bayes risk decoding.
-
-[ -f ./path.sh ] && . ./path.sh;
-
-# begin configuration section.
-cmd=run.pl
-min_lmwt=1
-max_lmwt=10
-#end configuration section.
-
-[ -f ./path.sh ] && . ./path.sh
-. parse_options.sh || exit 1;
-
-if [ $# -ne 3 ]; then
-  echo "Usage: local/score_mbr.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
-  echo " Options:"
-  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
-  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
-  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
-  exit 1;
-fi
-
-data=$1
-lang_or_graph=$2
-dir=$3
-
-symtab=$lang_or_graph/words.txt
-
-for f in $symtab $dir/lat.1.gz $data/text; do
-  [ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1;
-done
-
-mkdir -p $dir/scoring/log
-
-phonemap="conf/phones.60-48-39.map"
-
-cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
-
-# We submit the jobs separately, not as an array, because it's hard
-# to get the inverse of the LM scales.
-rm $dir/.error 2>/dev/null
-for inv_acwt in `seq $min_lmwt $max_lmwt`; do
-  acwt=`perl -e "print (1.0/$inv_acwt);"`
-  $cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \
-    lattice-mbr-decode  --acoustic-scale=$acwt --word-symbol-table=$symtab \
-      "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \
-    || touch $dir/.error &
-done
-wait;
-[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout.";
-     
-
-$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
-   cat $dir/scoring/LMWT.tra \| \
-    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
-    local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \| \
-    compute-wer --text --mode=present \
-     ark:$dir/scoring/test_filt.txt  ark,p:- ">" $dir/wer_LMWT || exit 1;
-
--- a/egs/timit/s5/local/timit_data_prep.sh
+++ b/egs/timit/s5/local/timit_data_prep.sh
@ -1,97 +1,109 @@
 #!/bin/bash

-# Copyright 2013   (Author: Bagher BabaAli)
+# Copyright 2013   (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
 # Apache 2.0.

-
 if [ $# -ne 1 ]; then
   echo "Argument should be the Timit directory, see ../run.sh for example."
   exit 1;
 fi

-
 dir=`pwd`/data/local/data
-mkdir -p $dir
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
 local=`pwd`/local
 utils=`pwd`/utils
 conf=`pwd`/conf

 . ./path.sh # Needed for KALDI_ROOT
-
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
 sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
 if [ ! -x $sph2pipe ]; then
   echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
   exit 1;
 fi

-cd $dir
+[ -f $conf/test_spk.list ] || error_exit "$PROG: Eval-set speaker list not found.";
+[ -f $conf/dev_spk.list ] || error_exit "$PROG: dev-set speaker list not found.";

-# Make directory of links to the TIMIT disk.  This relies on the command
-# line arguments being absolute pathnames.
-rm -r links/ 2>/dev/null
-mkdir links/
-
-ln -s $* links
-
-# Do some basic checks that we have what we expected.
-if [ ! -d $*/TRAIN -o ! -d $*/TEST ]; then
+# First check if the train & test directories exist (these can either be upper-
+# or lower-cased
+if [ ! -d $*/TRAIN -o ! -d $*/TEST ] && [ ! -d $*/train -o ! -d $*/test ]; then
  echo "timit_data_prep.sh: Spot check of command line argument failed"
  echo "Command line argument must be absolute pathname to TIMIT directory"
  echo "with name like /export/corpora5/LDC/LDC93S1/timit/TIMIT"
  exit 1;
 fi

-# This version for TRAIN
+# Now check what case the directory structure is
+uppercased=false
+train_dir=train
+test_dir=test
+if [ -d $*/TRAIN ]; then
+  [ -d $*/train -o -d $*/test ] \
+    && echo "Error: Found both upper- & lower-cased directories" && exit 1;
+  uppercased=true
+  train_dir=TRAIN
+  test_dir=TEST
+fi

-TrainDir=$*/TRAIN
-find -L $TrainDir \( -iname '*.WAV' -o -iname '*.wav' \) > train.flist
-nl=`cat train.flist | wc -l`
-[ "$nl" -eq 4620 ] || echo "Warning: expected 4620 lines in train.flist, got $nl"
+tmpdir=$(mktemp -d);
+trap 'rm -rf "$tmpdir"' EXIT

-# Now for the TEST.
+# Get the list of speakers. The list of speakers in the 24-speaker core test 
+# set and the 50-speaker development set must be supplied to the script. All
+# speakers in the 'train' directory are used for training.
+if $uppercased; then
+  tr '[:lower:]' '[:upper:]' < $conf/dev_spk.list > $tmpdir/dev_spk
+  tr '[:lower:]' '[:upper:]' < $conf/test_spk.list > $tmpdir/test_spk
+  ls -d "$*"/TRAIN/DR*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
+else
+  tr '[:upper:]' '[:lower:]' < $conf/dev_spk.list > $tmpdir/dev_spk
+  tr '[:upper:]' '[:lower:]' < $conf/test_spk.list > $tmpdir/test_spk
+  ls -d "$*"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
+fi

-TestDir=$*/TEST
-find -L $TestDir \( -iname '*.WAV' -o -iname '*.wav' \) > test.flist
+cd $dir
+for x in train dev test; do
+  # First, find the list of audio files (use only si & sx utterances).
+  # Note: train & test sets are under different directories, but doing find on 
+  # both and grepping for the speakers will work correctly.

-nl=`cat test.flist | wc -l`
-[ "$nl" -eq 1680 ] || echo "Warning: expected 1680 lines in test.flist, got $nl"
+  find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WAV' \
+    | grep -f $tmpdir/${x}_spk > ${x}_sph.flist

+  sed -e 's:.*/\(.*\)/\(.*\).WAV$:\1_\2:i' ${x}_sph.flist \
+    > $tmpdir/${x}_sph.uttids
+  paste $tmpdir/${x}_sph.uttids ${x}_sph.flist \
+    | sort -k1,1 > ${x}_sph.scp

-# Finding the transcript files:
-find -L $TrainDir \( -iname '*.PHN' -o -iname '*.phn' \) > train_phn.flist
-find -L $TestDir \( -iname '*.PHN' -o -iname '*.phn' \)  > test_phn.flist
+  cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids

-# Convert the transcripts into our format (no normalization yet)
-for x in train test; do
-   $local/timit_flist2scp.pl $x.flist | sort > ${x}_sph.scp
-   cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids
-   cat ${x}.uttids | $local/timit_find_transcripts.pl  ${x}_phn.flist > ${x}_phn.trans
-done
+  # Now, Convert the transcripts into our format (no normalization yet)
+  # Get the transcripts: each line of the output contains an utterance 
+  # ID followed by the transcript.
+  find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.PHN' \
+    | grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
+  sed -e 's:.*/\(.*\)/\(.*\).PHN$:\1_\2:i' $tmpdir/${x}_phn.flist \
+    > $tmpdir/${x}_phn.uttids
+  while read line; do
+    [ -f $line ] || error_exit "Cannot find transcription file '$line'";
+    cut -f3 -d' ' "$line" | tr '\n' ' ' | sed -e 's: *$:\n:'
+  done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
+  paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
+    | sort -k1,1 > ${x}.trans

 # Do normalization steps. 
-cat train_phn.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map  -to 48 | sort > train.txt || exit 1;
+  cat ${x}.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 39 | sort > $x.txt || exit 1;

-
-for x in test; do
-   cat ${x}_phn.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 39 | sort > $x.txt || exit 1;
-done
-
-# Create scp's with wav's.
-for x in train test; do
  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
-done

 # Make the utt2spk and spk2utt files.
-for x in train test; do
-    cut -f1 -d'_'  $x.uttids | paste -d' ' $x.uttids - > $x.utt2spk 
-   cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+
+  cut -f1 -d'_'  $x.uttids | paste -d' ' $x.uttids - > $x.utt2spk 
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+
+  cat $x.spk2utt | awk '{print $1}' | perl -ane 'chop; m:^.:; print "$_ $&\n";' > $x.spk2gender
 done

-# Make the spk2gender files.
-for x in train test; do
-   cat $x.spk2utt | awk '{print $1}' | perl -ane 'chop; m:^.:; print "$_ $&\n";' > $x.spk2gender
-done
-
-
-
-echo "Data preparation succeeded"
+echo "Data preparation succeeded"
--- a/egs/timit/s5/local/timit_find_transcripts.pl
+++ b/egs/timit/s5/local/timit_find_transcripts.pl
@ -1,60 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-
-# This program takes on its standard input a list of utterance
-# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
-# It takes as
-# Extracts from the phn files the transcripts for a given
-# dataset (represented by a file list).
-# 
-
-@ARGV == 1 || die "timit_find_transcripts.pl phn_trans_flist < utterance_ids > transcripts";
-$phn_flist = shift @ARGV;
-
-open(L, "<$phn_flist") || die "Opening file list of phn files: $phn_flist\n";
-while(<L>){
-    chop;
-    m:^\S+/(\w+)/(\w+)\.[pP][hH][nN]$: || die "Bad line in phn file list: $_";
-    $spk = $1 . "_" . $2;
-    $spk2phn{$spk} = $_;
-}
-
-%utt2trans = { }; 
-while(<STDIN>){ 
-    chop;
-    $uttid = $_;
-    $uttid =~ m:(\w+)_(\w+): || die "Bad utterance id $_";
-    $phnfile = $spk2phn{$uttid};
-    defined $phnfile || die "No phn file for speaker $spk\n";
-    open(F, "<$phnfile") || die "Error opening phn file $phnfile\n";
-    @trans = "";
-    while(<F>) {
-        $_ =~ m:\d+\s\d+\s(.+)$: || die "Bad line $_ in phn file $phnfile (line $.)\n";
-        push (@trans,$1);
-    }
-    $utt2trans{$uttid} = join(" ",@trans);        
-
-    if(!defined $utt2trans{$uttid}) {
-        print STDERR "No transcript for utterance $uttid (current phn file is $phnfile)\n";
-    } else {
-        print "$uttid $utt2trans{$uttid}\n";
-    }
-    close(F);
-}
-
-
--- a/egs/timit/s5/local/timit_flist2scp.pl
+++ b/egs/timit/s5/local/timit_flist2scp.pl
@ -1,30 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# takes in a file list with lines like
-# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# and outputs an scp in kaldi format with lines like
-# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
-# (the first thing is the utterance-id, which is the same as the basename of the file.
-
-
-while(<>){
-    m:^\S+/(\w+)/(\w+)\.[wW][aA][vV]$: || die "Bad line $_";
-    $id = $1 . "_" . $2;
-    print "$id $_";
-}
-
--- a/egs/timit/s5/local/timit_format_data.sh
+++ b/egs/timit/s5/local/timit_format_data.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-# Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2013  (Author: Daniel Povey)
 # Apache 2.0

 # This script takes data prepared in a corpus-dependent way
@ -9,14 +9,14 @@

 . ./path.sh || exit 1;

-echo "Preparing train and test data"
+echo "Preparing train, dev and test data"
 srcdir=data/local/data
 lmdir=data/local/nist_lm
 tmpdir=data/local/lm_tmp
 lexicon=data/local/dict/lexicon.txt
 mkdir -p $tmpdir

-for x in train test; do 
+for x in train dev test; do 
  mkdir -p data/$x
  cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
  cp $srcdir/$x.txt data/$x/text || exit 1;
@ -25,7 +25,6 @@ for x in train test; do
  utils/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1;
 done

-
 # Next, for each type of language model, create the corresponding FST
 # and the corresponding lang_test_* directory.

@ -53,10 +52,10 @@ for lm_suffix in bg; do
 # Because of the <s> fiasco for these particular LMs, the first number is not
 # as close to zero as it could be.

-  # Everything below is only for diagnostic.
-  # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
-  # this might cause determinization failure of CLG.
-  # #0 is treated as an empty word.
+ # Everything below is only for diagnostic.
+ # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+ # this might cause determinization failure of CLG.
+ # #0 is treated as an empty word.
  mkdir -p $tmpdir/g
  awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
    < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
--- a/egs/timit/s5/local/timit_prepare_dict.sh
+++ b/egs/timit/s5/local/timit_prepare_dict.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-# Copyright 2013  (Author: Bagher BabaAli)
+# Copyright 2013   (Authors: Daniel Povey, Bagher BabaAli)

 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -60,9 +60,7 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
  $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
 >> $dir/extra_questions.txt || exit 1;

-
 # (2) Create the phone bigram LM
-#(
  [ -z "$IRSTLM" ] && \
    echo "LM building won't work without setting the IRSTLM env variable" && exit 1;
  ! which build-lm.sh 2>/dev/null  && \
@ -76,8 +74,4 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
  compile-lm $tmpdir/lm_phone_bg.ilm.gz --text yes /dev/stdout | \
  grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz 

-#) >& data/prepare_lm.log
-
-
-echo "Dictionary preparation succeeded"
-
+echo "Dictionary & language model preparation succeeded"
--- a/egs/timit/s5/run.sh
+++ b/egs/timit/s5/run.sh
@ -5,13 +5,27 @@
 . ./cmd.sh 
 [ -f path.sh ] && . ./path.sh

+# Acoustic model parameters
+numLeavesTri1=2500
+numGaussTri1=15000
+numLeavesMLLT=2500
+numGaussMLLT=15000
+numLeavesSAT=2500
+numGaussSAT=15000
+numGaussUBM=400
+numLeavesSGMM=7000
+numGaussSGMM=9000
+
+decode_nj=20
+train_nj=30
+
 echo ============================================================================
 echo "                Data & Lexicon & Language Preparation                     "
 echo ============================================================================

 timit=/export/corpora5/LDC/LDC93S1/timit/TIMIT

-local/timit_data_prep.sh $timit  || exit 1;
+local/timit_data_prep.sh $timit || exit 1;

 local/timit_prepare_dict.sh || exit 1;

@ -21,119 +35,178 @@ utils/prepare_lang.sh --position-dependent-phones false --num-sil-states 3 \
 local/timit_format_data.sh || exit 1;

 echo ============================================================================
-echo "        MFCC Feature Extration & CMVN for Training and Test set           "
+echo "         MFCC Feature Extration & CMVN for Training and Test set           "
 echo ============================================================================

 # Now make MFCC features.
 mfccdir=mfcc
-for x in test train; do 
- steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \
-  data/$x exp/make_mfcc/$x $mfccdir || exit 1;
- steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+use_pitch=false
+use_ffv=false
+
+for x in train dev test; do 
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
 done

-vecho ============================================================================
+echo ============================================================================
 echo "                     MonoPhone Training & Decoding                        "
 echo ============================================================================

-steps/train_mono.sh  --nj 30 --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;
+steps/train_mono.sh  --nj "$train_nj" --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;

-utils/mkgraph.sh --mono data/lang_test_bg exp/mono exp/mono/graph_bg || exit 1;
+utils/mkgraph.sh --mono data/lang_test_bg exp/mono exp/mono/graph || exit 1;

-steps/decode.sh --nj 30  --cmd "$decode_cmd" \
- exp/mono/graph_bg data/test exp/mono/decode_bg_test || exit 1;
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/mono/graph data/dev exp/mono/decode_dev || exit 1;
+
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/mono/graph data/test exp/mono/decode_test || exit 1;

 echo ============================================================================
 echo "           tri1 : Deltas + Delta-Deltas Training & Decoding               "
 echo ============================================================================

-steps/align_si.sh --boost-silence 1.25 --nj 30 --cmd "$train_cmd" \
+steps/align_si.sh --boost-silence 1.25 --nj "$train_nj" --cmd "$train_cmd" \
 data/train data/lang exp/mono exp/mono_ali || exit 1;

 # Train tri1, which is deltas + delta-deltas, on train data.
 steps/train_deltas.sh --cmd "$train_cmd" \
- 2500 15000 data/train data/lang exp/mono_ali exp/tri1 || exit 1;
+ $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 || exit 1;

-utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph_bg || exit 1;
+utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph || exit 1;

-steps/decode.sh --nj 30 --cmd "$decode_cmd" \
- exp/tri1/graph_bg data/test exp/tri1/decode_bg_test || exit 1;
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri1/graph data/dev exp/tri1/decode_dev || exit 1;
+
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri1/graph data/test exp/tri1/decode_test || exit 1;

 echo ============================================================================
 echo "                 tri2 : LDA + MLLT Training & Decoding                    "
 echo ============================================================================

-steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train data/lang exp/tri1 exp/tri1_ali_train || exit 1;
+steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \
+  data/train data/lang exp/tri1 exp/tri1_ali || exit 1;

 steps/train_lda_mllt.sh --cmd "$train_cmd" \
 --splice-opts "--left-context=3 --right-context=3" \
- 2500 15000 data/train data/lang exp/tri1_ali_train exp/tri2 || exit 1;
+ $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 || exit 1;

-utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph_bg || exit 1;
+utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph || exit 1;

-steps/decode.sh --nj 30 --cmd "$decode_cmd" \
- exp/tri2/graph_bg data/test exp/tri2/decode_bg_test || exit 1;
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1;
+
+steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri2/graph data/test exp/tri2/decode_test || exit 1;

 echo ============================================================================
 echo "              tri3 : LDA + MLLT + SAT Training & Decoding                 "
 echo ============================================================================

 # Align tri2 system with train data.
-steps/align_si.sh  --nj 30 --cmd "$train_cmd" \
- --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali_train  || exit 1;
+steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \
+ --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali || exit 1;

 # From tri2 system, train tri3 which is LDA + MLLT + SAT.
 steps/train_sat.sh --cmd "$train_cmd" \
- 2500 15000 data/train data/lang exp/tri2_ali_train exp/tri3 || exit 1;
+ $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 || exit 1;

-utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph_bg || exit 1;
+utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph || exit 1;

-steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" \
- exp/tri3/graph_bg data/test exp/tri3/decode_bg_test || exit 1;
+steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri3/graph data/dev exp/tri3/decode_dev || exit 1;
+
+steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \
+ exp/tri3/graph data/test exp/tri3/decode_test || exit 1;

 echo ============================================================================
 echo "                        SGMM2 Training & Decoding                         "
 echo ============================================================================

-steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
- data/train data/lang exp/tri3 exp/tri3_ali_train || exit 1;
+steps/align_fmllr.sh --nj "$train_nj" --cmd "$train_cmd" \
+ data/train data/lang exp/tri3 exp/tri3_ali || exit 1;

 steps/train_ubm.sh --cmd "$train_cmd" \
- 400 data/train data/lang exp/tri3_ali_train exp/ubm4 || exit 1;
+ $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm4 || exit 1;

-steps/train_sgmm2.sh --cmd "$train_cmd" 7000 9000 \
- data/train data/lang exp/tri3_ali_train exp/ubm4/final.ubm exp/sgmm2_4 || exit 1;
+steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \
+ data/train data/lang exp/tri3_ali exp/ubm4/final.ubm exp/sgmm2_4 || exit 1;

-utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph_bg || exit 1;
+utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph || exit 1;

-steps/decode_sgmm2.sh --nj 30 --cmd "$decode_cmd"\
- --transform-dir exp/tri3/decode_bg_test exp/sgmm2_4/graph_bg data/test \
- exp/sgmm2_4/decode_bg_test || exit 1;
+steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\
+ --transform-dir exp/tri3/decode_dev exp/sgmm2_4/graph data/dev \
+ exp/sgmm2_4/decode_dev || exit 1;
+
+steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\
+ --transform-dir exp/tri3/decode_test exp/sgmm2_4/graph data/test \
+ exp/sgmm2_4/decode_test || exit 1;

 echo ============================================================================
 echo "                    MMI + SGMM2 Training & Decoding                       "
 echo ============================================================================

-steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" \
- --transform-dir exp/tri3_ali_train --use-graphs true --use-gselect true data/train \
- data/lang exp/sgmm2_4 exp/sgmm2_4_ali_train || exit 1;
+steps/align_sgmm2.sh --nj "$train_nj" --cmd "$train_cmd" \
+ --transform-dir exp/tri3_ali --use-graphs true --use-gselect true data/train \
+ data/lang exp/sgmm2_4 exp/sgmm2_4_ali || exit 1;

-steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd"\
- --transform-dir exp/tri3_ali_train  data/train data/lang exp/sgmm2_4_ali_train \
- exp/sgmm2_4_denlats_train || exit 1;
+steps/make_denlats_sgmm2.sh --nj "$train_nj" --sub-split "$train_nj" --cmd "$decode_cmd"\
+ --transform-dir exp/tri3_ali data/train data/lang exp/sgmm2_4_ali \
+ exp/sgmm2_4_denlats || exit 1;

 steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" \
- --transform-dir exp/tri3_ali_train --boost 0.1 --zero-if-disjoint true \
- data/train data/lang exp/sgmm2_4_ali_train exp/sgmm2_4_denlats_train \
- exp/sgmm2_4_mmi_b0.1_z || exit 1;
+ --transform-dir exp/tri3_ali --boost 0.1 --zero-if-disjoint true \
+ data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats \
+ exp/sgmm2_4_mmi_b0.1 || exit 1;

 for iter in 1 2 3 4; do
  steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
-   --transform-dir exp/tri3/decode_bg_test data/lang_test_bg data/test \
-   exp/sgmm2_4/decode_bg_test exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it$iter || exit 1;
+   --transform-dir exp/tri3/decode_dev data/lang_test_bg data/dev \
+   exp/sgmm2_4/decode_dev exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter || exit 1;
+
+  steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3/decode_test data/lang_test_bg data/test \
+   exp/sgmm2_4/decode_test exp/sgmm2_4_mmi_b0.1/decode_test_it$iter || exit 1;
 done

+echo ============================================================================
+echo "                    DNN Hybrid Training & Decoding                        "
+echo ============================================================================
+
+# DNN hybrid system training parameters
+dnn_mem_reqs="mem_free=1.0G,ram_free=0.2G"
+dnn_extra_opts="--num_epochs 20 --num-epochs-extra 10 --add-layers-period 1 --shrink-interval 3"
+
+steps/train_nnet_cpu.sh --mix-up 5000 --initial-learning-rate 0.015 \
+  --final-learning-rate 0.002 --num-hidden-layers 2 --num-parameters 1500000 \
+  --num-jobs-nnet "$train_nj" --cmd "$train_cmd" "${dnn_train_extra_opts[@]}" \
+  data/train data/lang exp/tri3_ali exp/tri4_nnet  || exit 1;
+
+decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=0.7G")
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \
+  --transform-dir exp/tri3/decode_dev exp/tri3/graph data/dev \
+  exp/tri4_nnet/decode_dev | tee exp/tri4_nnet/decode_dev/decode.log
+
+steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \
+  --transform-dir exp/tri3/decode_test exp/tri3/graph data/test \
+  exp/tri4_nnet/decode_test | tee exp/tri4_nnet/decode_test/decode.log
+
+echo ============================================================================
+echo "                    System Combination (DNN+SGMM)                         "
+echo ============================================================================
+
+for iter in 1 2 3 4; do
+  local/score_combine.sh --cmd "$decode_cmd" \
+   data/dev data/lang_test_bg exp/tri4_nnet/decode_dev \
+   exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter exp/combine_2/decode_dev_it$iter
+
+  local/score_combine.sh --cmd "$decode_cmd" \
+   data/test data/lang_test_bg exp/tri4_nnet/decode_test \
+   exp/sgmm2_4_mmi_b0.1/decode_test_it$iter exp/combine_2/decode_test_it$iter
+done
+
+
 echo ============================================================================
 echo "                    Getting Results [see RESULTS file]                    "
 echo ============================================================================
@ -142,8 +215,8 @@ for x in exp/*/decode*; do
  [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh
 done 

-exit 0;
-
-
-
+echo ============================================================================
+echo "Finished successfully on" `date`
+echo ============================================================================

+exit 0
--- a/egs/wsj/s5/steps/align_nnet.sh
+++ b/egs/wsj/s5/steps/align_nnet.sh
@ -13,6 +13,7 @@ stage=0
 scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
 beam=10
 retry_beam=40
+
 align_to_lats=false # optionally produce alignment in lattice format
 lats_decode_opts="--acoustic-scale=0.1 --beam=20 --latbeam=10"
 lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1"
@ -48,22 +49,19 @@ sdata=$data/split$nj

 cp $srcdir/{tree,final.mdl} $dir || exit 1;

-#Get the files we will need
+# Select default locations to model files
 nnet=$srcdir/final.nnet;
-[ ! -s "$nnet" ] && echo "Missing nnet '$nnet'" && exit 1;
-
 class_frame_counts=$srcdir/ali_train_pdf.counts
-[ ! -s "$class_frame_counts" ] && echo "Missing class_frame_counts '$class_frame_counts'" && exit 1;
-
 feature_transform=$srcdir/final.feature_transform
-[ ! -s $feature_transform ] && echo "Missing feature_transform '$feature_transform'" && exit 1
-
 model=$dir/final.mdl
-[ ! -s "$model" ] && echo "Missing transtion-model '$model'" && exit 1;

-###
-### Prepare feature pipeline (same as for decoding)
-###
+# Check that files exist
+for f in $sdata/1/feats.scp $sdata/1/text $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do
+  [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
+done
+
+
+# PREPARE FEATURE EXTRACTION PIPELINE
 # Create the feature stream:
 feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
 # Optionally add cmvn
@ -77,15 +75,11 @@ if [ -f $srcdir/delta_order ]; then
  delta_order=$(cat $srcdir/delta_order)
  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
 fi
-
 # Finally add feature_transform and the MLP
 feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
-###
-###
-###
- 
-echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"

+
+echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
 # Map oovs in reference transcription 
 tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
 # We could just use align-mapped in the next line, but it's less efficient as it compiles the
--- a/egs/wsj/s5/steps/compute_cmvn_stats.sh
+++ b/egs/wsj/s5/steps/compute_cmvn_stats.sh
@ -87,8 +87,8 @@ cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
 nc=`cat $data/cmvn.scp | wc -l` 
 nu=`cat $data/spk2utt | wc -l` 
 if [ $nc -ne $nu ]; then
-  echo "Error: it seems not all of the speakers got cmvn stats ($nc != $nu);"
-  exit 1;
+  echo "$0: warning: it seems not all of the speakers got cmvn stats ($nc != $nu);"
+  [ $nc -eq 0 ] && exit 1;
 fi

 echo "Succeeded creating CMVN stats for $name"
--- a/egs/wsj/s5/steps/decode_nnet.sh
+++ b/egs/wsj/s5/steps/decode_nnet.sh
@ -3,25 +3,29 @@
 # Copyright 2012-2013 Karel Vesely, Daniel Povey
 # Apache 2.0

-# Begin configuration section.  
-nnet= # Optionally pre-select network to use for getting state-likelihoods
-feature_transform= # Optionally pre-select feature transform (in front of nnet)
-model= # Optionally pre-select transition model
-class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors 
+# Begin configuration section. 
+nnet=               # non-default location of DNN (optional)
+feature_transform=  # non-default location of feature_transform (optional)
+model=              # non-default location of transition model (optional)
+class_frame_counts= # non-default location of PDF counts (optional)
+srcdir=             # non-default location of DNN-dir (decouples model dir from decode dir)

 stage=0 # stage=1 skips lattice generation
 nj=4
 cmd=run.pl
-max_active=7000 # maximum of active tokens
-max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
-beam=13.0 # GMM:13.0
-latbeam=8.0 # GMM:6.0
-acwt=0.10 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
-scoring_opts="--min-lmwt 4 --max-lmwt 15"
+
+acwt=0.10 # note: only really affects pruning (scoring is on lattices).
+beam=13.0
+latbeam=8.0
+max_active=7000 # limit of active tokens
+max_mem=50000000 # approx. limit to memory consumption during minimization in bytes
+
 skip_scoring=false
-use_gpu_id=-1 # disable gpu
-parallel_opts="-pe smp 2" # use 2 CPUs (1 DNN-forward, 1 decoder)
-srcdir= # optionaly select dir with DNN model
+scoring_opts="--min-lmwt 4 --max-lmwt 15"
+
+num_threads=1 # if >1, will use latgen-faster-parallel
+parallel_opts="-pe smp $((num_threads+1))" # use 2 CPUs (1 DNN-forward, 1 decoder)
+use_gpu_id=-1 # -1 disable gpu
 # End configuration section.

 echo "$0 $@"  # Print the command line for logging
@ -32,7 +36,7 @@ echo "$0 $@"  # Print the command line for logging
 if [ $# != 3 ]; then
   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
-   echo " where the DNN + transition model is."
+   echo " where the DNN and transition model is."
   echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
   echo ""
   echo "This script works on plain or modified features (CMN,delta+delta-delta),"
@ -44,13 +48,13 @@ if [ $# != 3 ]; then
   echo "  --nj <nj>                                        # number of parallel jobs"
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
   echo ""
-   echo "  --nnet <nnet>                                    # which nnet to use (opt.)"
-   echo "  --feature-transform <nnet>                       # select transform in front of nnet (opt.)"
-   echo "  --class-frame-counts <file>                      # file with frame counts (used to compute priors) (opt.)"
-   echo "  --model <model>                                  # which transition model to use (opt.)"
+   echo "  --nnet <nnet>                                    # non-default location of DNN (opt.)"
+   echo "  --srcdir <dir>                                   # non-default dir with DNN/models, can be different"
+   echo "                                                   # from parent dir of <decode-dir>' (opt.)"
   echo ""
   echo "  --acwt <float>                                   # select acoustic scale for decoding"
   echo "  --scoring-opts <opts>                            # options forwarded to local/score.sh"
+   echo "  --num-threads <N>                                # N>1: run multi-threaded decoder"
   exit 1;
 fi

@ -58,43 +62,31 @@ fi
 graphdir=$1
 data=$2
 dir=$3
-[ -z $srcdir ] && srcdir=`dirname $dir`; # Or back-off to: model directory one level up from decoding directory.
+[ -z $srcdir ] && srcdir=`dirname $dir`; # Default model directory one level up from decoding directory.
 sdata=$data/split$nj;

 mkdir -p $dir/log
+
 [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 echo $nj > $dir/num_jobs

-if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
-  nnet=$srcdir/final.nnet; 
-fi
-[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+# Select default locations to model files (if not already set externally)
+if [ -z "$nnet" ]; then nnet=$srcdir/final.nnet; fi
+if [ -z "$model" ]; then model=$srcdir/final.mdl; fi
+if [ -z "$feature_transform" ]; then feature_transform=$srcdir/final.feature_transform; fi
+if [ -z "$class_frame_counts" ]; then class_frame_counts=$srcdir/ali_train_pdf.counts; fi

-if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
-  model=$srcdir/final.mdl;
-fi
-
-# find the feature_transform to use
-if [ -z "$feature_transform" ]; then
-  feature_transform=$srcdir/final.feature_transform
-fi
-if [ ! -f $feature_transform ]; then
-  echo "Missing feature_transform '$feature_transform'"
-  exit 1
-fi
-
-# check that files exist
-for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
-  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+# Check that files exist
+for f in $sdata/1/feats.scp $nnet $model $feature_transform $class_frame_counts $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
 done

-# PREPARE THE LOG-POSTERIOR COMPUTATION PIPELINE
-if [ -z "$class_frame_counts" ]; then
-  class_frame_counts=$srcdir/ali_train_pdf.counts
-else
-  echo "Overriding class_frame_counts by $class_frame_counts"
-fi
+# Possibly use multi-threaded decoder
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 

+
+# PREPARE FEATURE EXTRACTION PIPELINE
 # Create the feature stream:
 feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
 # Optionally add cmvn
@ -109,13 +101,12 @@ if [ -f $srcdir/delta_order ]; then
  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
 fi

-
 # Run the decoding in the queue
 if [ $stage -le 0 ]; then
  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
    nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet "$feats" ark:- \| \
-    latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
-    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    latgen-faster-mapped$thread_string --max-active=$max_active --max-mem=$max_mem --beam=$beam \
+    --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
    $model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
 fi

--- a/egs/wsj/s5/steps/make_denlats_nnet.sh
+++ b/egs/wsj/s5/steps/make_denlats_nnet.sh
@ -79,28 +79,21 @@ else
 fi


-
-#Get the files we will need
 cp $srcdir/{tree,final.mdl} $dir

+# Select default locations to model files
 [ -z "$nnet" ] && nnet=$srcdir/final.nnet;
-[ ! -f "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
-
 class_frame_counts=$srcdir/ali_train_pdf.counts
-[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
-
 feature_transform=$srcdir/final.feature_transform
-if [ ! -f $feature_transform ]; then
-  echo "Missing feature_transform '$feature_transform'"
-  exit 1
-fi
-
 model=$dir/final.mdl
-[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;

-###
-### Prepare feature pipeline (same as for decoding)
-###
+# Check that files exist
+for f in $sdata/1/feats.scp $nnet $model $feature_transform $class_frame_counts; do
+  [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
+done
+
+
+# PREPARE FEATURE EXTRACTION PIPELINE
 # Create the feature stream:
 feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
 # Optionally add cmvn
@ -114,28 +107,23 @@ if [ -f $srcdir/delta_order ]; then
  delta_order=$(cat $srcdir/delta_order)
  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
 fi
-
 # Finally add feature_transform and the MLP
 feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
-###
-###
-###


-
-###
-### We will produce lattices, where the correct path is not necessarily present
-###
-
-#1) We don't use reference path here...
-
-echo "Generating the denlats"
-#2) Generate the denominator lattices
-if [ $sub_split -eq 1 ]; then 
+echo "$0: generating denlats from data '$data', putting lattices in '$dir'"
+#1) Generate the denominator lattices
+if [ $sub_split -eq 1 ]; then
+  # Prepare 'scp' for storing lattices separately and gzipped
+  for n in `seq $nj`; do
+    [ ! -d $dir/lat$n ] && mkdir $dir/lat$n;
+    cat $sdata/$n/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/"$1".gz"; }' 
+  done >$dir/lat.store_separately_as_gz.scp
+  # Generate the lattices
  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
    latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
      --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
-      $dir/dengraph/HCLG.fst "$feats" "ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp" || exit 1;
+      $dir/dengraph/HCLG.fst "$feats" "scp:$dir/lat.store_separately_as_gz.scp" || exit 1;
 else
  for n in `seq $nj`; do
    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
@ -146,32 +134,25 @@ else
        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
      fi
      mkdir -p $dir/log/$n
-      mkdir -p $dir/part
      feats_subset=$(echo $feats | sed s:JOB/:$n/split$sub_split/JOB/:g)
+      # Prepare 'scp' for storing lattices separately and gzipped
+      for k in `seq $sub_split`; do
+        [ ! -d $dir/lat$n/$k ] && mkdir -p $dir/lat$n/$k;
+        cat $sdata2/$k/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/'$k'/"$1".gz"; }' 
+      done >$dir/lat.$n.store_separately_as_gz.scp
+      # Generate lattices
      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
        latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
          --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
-          $dir/dengraph/HCLG.fst "$feats_subset" "ark,scp:$dir/lat.$n.JOB.ark,$dir/lat.$n.JOB.scp" || exit 1;
-      echo Merging lists for data subset $n
-      for k in `seq $sub_split`; do
-        cat $dir/lat.$n.$k.scp
-      done > $dir/lat.$n.all.scp
-      echo Merge the ark $n
-      lattice-copy scp:$dir/lat.$n.all.scp ark,scp:$dir/lat.$n.ark,$dir/lat.$n.scp || exit 1;
-      #remove the data
-      rm $dir/lat.$n.*.ark $dir/lat.$n.*.scp $dir/lat.$n.all.scp
+          $dir/dengraph/HCLG.fst "$feats_subset" scp:$dir/lat.$n.store_separately_as_gz.scp || exit 1;
      touch $dir/.done.$n
    fi
  done
 fi

-      
-
-#3) Merge the SCPs to create full list of lattices (will use random access)
-echo Merging to single list $dir/lat.scp
-for ((n=1; n<=nj; n++)); do
-  cat $dir/lat.$n.scp
-done > $dir/lat.scp
-
+#2) Generate 'scp' for reading the lattices
+for n in `seq $nj`; do
+  find $dir/lat${n} -name *.gz | awk -v FS="/" '{ print gensub(".gz","","",$NF)" gunzip -c "$0" |"; }' 
+done >$dir/lat.scp

 echo "$0: done generating denominator lattices."
--- a/egs/wsj/s5/steps/train_nnet_cpu.sh
+++ b/egs/wsj/s5/steps/train_nnet_cpu.sh
@ -51,7 +51,7 @@ realign_iters=""
 beam=10  # for realignment.
 retry_beam=40
 scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
-parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
+parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
 io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
 nnet_config_opts=
 splice_width=4 # meaning +- 4 frames on each side for second LDA
@ -110,8 +110,9 @@ if [ $# != 4 ]; then
  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
  echo "                                                   # as well as speed; may interact with batch size; if you increase"
  echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads."
+  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
+  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
  echo "                                                   # should not get too large, e.g. >2k)."
--- a/egs/wsj/s5/utils/parse_options.sh
+++ b/egs/wsj/s5/utils/parse_options.sh
@ -35,6 +35,7 @@ for ((n=1; n<$#; n++)); do
  if [ "${!n}" == "--config" ]; then
    n_plus1=$((n+1))
    config=${!n_plus1}
+    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
    . $config  # source the config file.
  fi
 done
--- a/egs/wsj/s5/utils/prepare_lang.sh
+++ b/egs/wsj/s5/utils/prepare_lang.sh
@ -86,7 +86,8 @@ mkdir -p $dir $tmpdir $dir/phones

 [ -f path.sh ] && . ./path.sh

-! utils/validate_dict_dir.pl $srcdir && echo "*Error validating directory $srcdir*" && exit 1;
+! utils/validate_dict_dir.pl $srcdir && \
+  echo "*Error validating directory $srcdir*" && exit 1;

 if [[ ! -f $srcdir/lexicon.txt ]]; then
  echo "**Creating $dir/lexicon.txt from $dir/lexiconp.txt"
@ -97,8 +98,12 @@ if [[ ! -f $srcdir/lexiconp.txt ]]; then
  perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
 fi

-! utils/validate_dict_dir.pl $srcdir >&/dev/null && \
-   echo "Validation failed (second time)" && exit 1;
+if ! utils/validate_dict_dir.pl $srcdir >&/dev/null; then
+  utils/validate_dict_dir.pl $srcdir  # show the output.
+  echo "Validation failed (second time)"
+  exit 1;
+fi
+

 if $position_dependent_phones; then
  # Create $tmpdir/lexicon.original from $srcdir/lexicon.txt by
--- a/egs/wsj/s5/utils/queue.pl
+++ b/egs/wsj/s5/utils/queue.pl
@ -163,8 +163,8 @@ print Q "EOF\n"; # without having to escape things like "|" and quote characters
 print Q ") >$logfile\n";
 print Q "time1=\`date +\"%s\"\`\n";
 print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
-print Q "time2=\`date +\"%s\"\`\n";
 print Q "ret=\$?\n";
+print Q "time2=\`date +\"%s\"\`\n";
 print Q "echo '#' Accounting: time=\$((\$time2-\$time1)) threads=$nof_threads >>$logfile\n";
 print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
 print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; 
@ -232,7 +232,7 @@ if (! $sync) { # We're not submitting with -sync y, so we
      # Check that the job exists in SGE. Job can be killed if duration 
      # exceeds some hard limit, or in case of a machine shutdown. 
      if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
-        if ( -f $f ) { next; }; #syncfile appeared, ok
+        if ( -f $f ) { next; }; #syncfile appeared: OK.
        $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
        if($ret != 0) {
          # Don't consider immediately missing job as error, first wait some  
@ -245,7 +245,7 @@ if (! $sync) { # We're not submitting with -sync y, so we
          if ( -f $f ) { next; }; #syncfile appeared, ok
          #Otherwise it is an error
          if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
-          print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+          print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile, syncfile is $f, return status of qstat was $ret\n";
          print STDERR "          Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
          exit(1);
        }
--- a/egs/wsj/s5/utils/validate_dict_dir.pl
+++ b/egs/wsj/s5/utils/validate_dict_dir.pl
@ -195,7 +195,7 @@ if ( (-f "$dict/lexicon.txt") && (-f "$dict/lexiconp.txt")) {

 # Checking extra_questions.txt -------------------------------
 print "Checking $dict/extra_questions.txt ...\n";
-if(-s "$dict/extra_questions.txt") {
+if (-s "$dict/extra_questions.txt") {
  if(!open(EX, "<$dict/extra_questions.txt")) {$exit = 1; print "--> ERROR: fail to open $dict/extra_questions.txt\n";}
  $idx = 1;
  $success = 1;
@ -213,6 +213,8 @@ if(-s "$dict/extra_questions.txt") {
  } 
  close(EX);
  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
-} else {print "--> $dict/extra_phones.txt is empty\n";}
+} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}

 if($exit == 1) { print " [Error detected ]\n"; exit 1;}
+
+exit 0;
--- a/src/doc/build_setup.dox
+++ b/src/doc/build_setup.dox
@ -141,6 +141,6 @@ preprocessor variables, setting compile options, linking with libraries, and so
 \section build_setup_platforms Which platforms has Kaldi been compiled on?

 We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including
-Ubuntu and CentOS), and Darwin.
+Ubuntu, CentOS and SUSE), and Darwin.

 */
--- a/src/featbin/apply-cmvn-sliding.cc
+++ b/src/featbin/apply-cmvn-sliding.cc
@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {

      SlidingWindowCmn(opts, feat, &cmvn_feat);
      
-      feat_writer.Write(utt, feat);
+      feat_writer.Write(utt, cmvn_feat);
      num_done++;
    }

--- a/src/fstext/determinize-lattice-inl.h
+++ b/src/fstext/determinize-lattice-inl.h
@ -47,7 +47,7 @@ template<class IntType> class LatticeStringRepository {
    inline bool operator == (const Entry &other) const {
      return (parent == other.parent && i == other.i);
    }
-    Entry(const Entry *parent, IntType i): parent(parent), i(i) {}
+    Entry() { }
    Entry(const Entry &e): parent(e.parent), i(e.i) {}
  };
  // Note: all Entry* pointers returned in function calls are
@ -59,16 +59,22 @@ template<class IntType> class LatticeStringRepository {
  // Returns string of "parent" with i appended.  Pointer
  // owned by repository
  const Entry *Successor(const Entry *parent, IntType i) {
-    Entry entry(parent, i);
-    typename SetType::iterator iter = set_.find(&entry);
-    if(iter == set_.end()) { // no such entry already...
-      Entry *entry_ptr = new Entry(entry);
-      set_.insert(entry_ptr);
-      return entry_ptr;
-    } else {
-      return *iter;
+    new_entry_->parent = parent;
+    new_entry_->i = i;
+    
+    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
+    if (pr.second) { // Was successfully inserted (was not there).  We need to
+                     // replace the element we inserted, which resides on the
+                     // stack, with one from the heap.
+      const Entry *ans = new_entry_;
+      new_entry_ = new Entry();
+      return ans;
+    } else { // Was not inserted because an equivalent Entry already
+             // existed.
+      return *pr.first;
    }
  }
+
  const Entry *Concatenate (const Entry *a, const Entry *b) {
    if (a == NULL) return b;
    else if (b == NULL) return a;
@ -94,15 +100,22 @@ template<class IntType> class LatticeStringRepository {
  // a common prefix with a.
  void ReduceToCommonPrefix(const Entry *a,
                            vector<IntType> *b) {
-    vector<IntType> a_vec;
-    ConvertToVector(a, &a_vec);
-    if (b->size() > a_vec.size())
-      b->resize(a_vec.size());
-    size_t b_sz = 0, max_sz = std::min(a_vec.size(), b->size());
-    while (b_sz < max_sz && (*b)[b_sz] == a_vec[b_sz])
-      b_sz++;
-    if (b_sz != b->size())
-      b->resize(b_sz);
+    size_t a_size = Size(a), b_size = b->size();
+    while (a_size> b_size) {
+      a = a->parent;
+      a_size--;
+    }
+    if (b_size > a_size)
+      b_size = a_size;
+    typename vector<IntType>::iterator b_begin = b->begin();
+    while (a_size != 0) {
+      if (a->i != *(b_begin + a_size - 1))
+        b_size = a_size - 1;
+      a = a->parent;
+      a_size--;
+    }
+    if (b_size != b->size())
+      b->resize(b_size);
  }

  // removes the first n elements of a.
@ -156,8 +169,8 @@ template<class IntType> class LatticeStringRepository {
    return e;
  }
  
-  LatticeStringRepository() { }
-
+  LatticeStringRepository() { new_entry_ = new Entry; }
+  
  void Destroy() {
    for (typename SetType::iterator iter = set_.begin();
         iter != set_.end();
@ -165,6 +178,10 @@ template<class IntType> class LatticeStringRepository {
      delete *iter;
    SetType tmp;
    tmp.swap(set_);
+    if (new_entry_) {
+      delete new_entry_;
+      new_entry_ = NULL;
+    }
  }

  // Rebuild will rebuild this object, guaranteeing only
@ -220,6 +237,8 @@ template<class IntType> class LatticeStringRepository {
  }
  
  DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
+  Entry *new_entry_; // We always have a pre-allocated Entry ready to use,
+                     // to avoid unnecessary news and deletes.
  SetType set_;

 };
--- a/src/fstext/lattice-weight.h
+++ b/src/fstext/lattice-weight.h
@ -310,7 +310,7 @@ inline int Compare (const LatticeWeightTpl<FloatType> &w1,

 template<class FloatType>
 inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
-                             const LatticeWeightTpl<FloatType> &w2) {
+                                        const LatticeWeightTpl<FloatType> &w2) {
  return (Compare(w1, w2) >= 0 ? w1 : w2);
 }

--- a/src/fstext/rand-fst.h
+++ b/src/fstext/rand-fst.h
@ -123,7 +123,7 @@ template<class Arc> VectorFst<Arc>* RandPairFst(RandFstOptions opts = RandFstOpt
    all_states.push_back(this_state);
  }
  // Set final states.
-  for (size_t j = 0;j < (size_t)opts.n_final;j++) {
+  for (size_t j = 0; j < (size_t)opts.n_final;j++) {
    StateId id = all_states[rand() % opts.n_states];
    Weight weight (opts.weight_multiplier*(rand() % 5), opts.weight_multiplier*(rand() % 5));
    fst->SetFinal(id, weight);
--- a/src/gmm/diag-gmm-normal.cc
+++ b/src/gmm/diag-gmm-normal.cc
@ -54,10 +54,10 @@ void DiagGmmNormal::CopyFromDiagGmm(const DiagGmm &diaggmm) {
  means_.MulElements(vars_);
 }

-void DiagGmmNormal::CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags) {
+void DiagGmmNormal::CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags) const {
    KALDI_ASSERT((static_cast<int32>(diaggmm->Dim()) == means_.NumCols())
-      && (static_cast<int32>(diaggmm->weights_.Dim()) == weights_.Dim()));
-
+                 && (static_cast<int32>(diaggmm->weights_.Dim()) == weights_.Dim()));
+    
    DiagGmmNormal oldg(*diaggmm);

    if (flags & kGmmWeights)
--- a/src/gmm/diag-gmm-normal.h
+++ b/src/gmm/diag-gmm-normal.h
@ -58,7 +58,7 @@ class DiagGmmNormal {
  void CopyFromDiagGmm(const DiagGmm &diaggmm);

  /// Copies to DiagGmm the requested parameters
-  void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags = kGmmAll);
+  void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags = kGmmAll) const;

  int32 NumGauss() { return weights_.Dim(); }
  int32 Dim() { return means_.NumCols(); }
--- a/src/gmm/diag-gmm.cc
+++ b/src/gmm/diag-gmm.cc
@ -821,6 +821,10 @@ BaseFloat DiagGmm::GaussianSelectionPreselect(
  return tot_loglike;
 }

+void DiagGmm::CopyFromNormal(const DiagGmmNormal &diag_gmm_normal) {
+  diag_gmm_normal.CopyToDiagGmm(this);
+}
+
 void DiagGmm::Generate(VectorBase<BaseFloat> *output) {
  KALDI_ASSERT(static_cast<int32>(output->Dim()) == Dim());
  BaseFloat tot = weights_.Sum();
--- a/src/gmm/diag-gmm.h
+++ b/src/gmm/diag-gmm.h
@ -51,6 +51,9 @@ class DiagGmm {
    CopyFromDiagGmm(gmm);
  }

+  /// Copies from DiagGmmNormal; does not resize.
+  void CopyFromNormal(const DiagGmmNormal &diag_gmm_normal);
+  
  DiagGmm(int32 nMix, int32 dim): valid_gconsts_(false) { Resize(nMix, dim); }

  /// Constructor that allows us to merge GMMs with weights.  Weights must sum
--- a/src/gmmbin/Makefile
+++ b/src/gmmbin/Makefile
@ -26,7 +26,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
           gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
           gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
           gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
-           gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost
+           gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats

 OBJFILES =

--- a/src/gmmbin/gmm-global-init-from-feats.cc
+++ b/src/gmmbin/gmm-global-init-from-feats.cc
@ -0,0 +1,180 @@
+// gmmbin/gmm-global-init-from-feats.cc
+
+// Copyright 2013   Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "gmm/model-common.h"
+#include "gmm/full-gmm.h"
+#include "gmm/diag-gmm.h"
+#include "gmm/mle-full-gmm.h"
+
+namespace kaldi {
+
+// We initialize the GMM parameters by setting the variance to the global
+// variance of the features, and the means to distinct randomly chosen frames.
+void InitGmmFromRandomFrames(const Matrix<BaseFloat> &feats, DiagGmm *gmm) {
+  int32 num_gauss = gmm->NumGauss(), num_frames = feats.NumRows(),
+      dim = feats.NumCols();
+  KALDI_ASSERT(num_frames >= 10 * num_gauss && "Too few frames to train on");
+  Vector<double> mean(dim), var(dim);
+  for (int32 i = 0; i < num_frames; i++) {
+    mean.AddVec(1.0 / num_frames, feats.Row(i));
+    var.AddVec2(1.0 / num_frames, feats.Row(i));
+  }
+  var.AddVec2(-1.0, mean);
+  if (var.Max() <= 0.0)
+    KALDI_ERR << "Features do not have positive variance " << var;
+  
+  DiagGmmNormal gmm_normal(*gmm);
+
+  std::set<int32> used_frames;
+  for (int32 g = 0; g < num_gauss; g++) {
+    int32 random_frame = RandInt(0, num_frames - 1);
+    while (used_frames.count(random_frame != 0))
+      random_frame = RandInt(0, num_frames - 1);
+    used_frames.insert(random_frame);
+    gmm_normal.weights_(g) = 1.0 / num_gauss;
+    gmm_normal.means_.Row(g).CopyFromVec(feats.Row(random_frame));
+    gmm_normal.vars_.Row(g).CopyFromVec(var);
+  }
+  gmm->CopyFromNormal(gmm_normal);
+  gmm->ComputeGconsts();
+}
+
+void TrainOneIter(const Matrix<BaseFloat> &feats,
+                  const MleDiagGmmOptions &gmm_opts,
+                  int32 iter,
+                  DiagGmm *gmm) {
+  AccumDiagGmm gmm_acc(*gmm, kGmmAll);
+
+  double tot_like = 0.0;
+  
+  for (int32 t = 0; t < feats.NumRows(); t++)
+    tot_like += gmm_acc.AccumulateFromDiag(*gmm, feats.Row(t), 1.0);
+
+  KALDI_LOG << "Likelihood per frame on iteration " << iter
+            << " was " << (tot_like / feats.NumRows()) << " over "
+            << feats.NumRows() << " frames.";
+  
+  BaseFloat objf_change, count;
+  MleDiagGmmUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, &objf_change, &count);
+
+  KALDI_LOG << "Objective-function change on iteration " << iter << " was "
+            << (objf_change / count) << " over " << count << " frames.";
+}
+
+} // namespace kaldi
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+
+    const char *usage =
+        "This program initializes a single diagonal GMM and does multiple iterations of\n"
+        "training from features stored in memory.\n"
+        "Usage:  gmm-global-init-feats [options] <feature-rspecifier> <model-out>\n"
+        "e.g.: gmm-global-init-feats scp:train.scp 1.mdl\n";
+
+    ParseOptions po(usage);
+    MleDiagGmmOptions gmm_opts;
+    
+    bool binary = true;
+    int32 num_gauss = 100;
+    int32 num_iters = 50;
+    int32 num_frames = 200000;
+    int32 srand_seed = 0;
+    
+    po.Register("binary", &binary, "Write output in binary mode");
+    po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model");
+    po.Register("num-iters", &num_iters, "Number of iterations of training");
+    po.Register("num-frames", &num_frames, "Number of feature vectors to store in "
+                "memory and train on (randomly chosen from the input features)");
+    po.Register("srand", &srand_seed, "Seed for random number generator ");
+    
+    gmm_opts.Register(&po);
+
+    po.Read(argc, argv);
+
+    srand(srand_seed);    
+    
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string feature_rspecifier = po.GetArg(1),
+        model_wxfilename = po.GetArg(2);
+    
+    Matrix<BaseFloat> feats;
+
+    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+
+
+    KALDI_ASSERT(num_frames > 0);
+    
+    int64 num_read = 0, dim = 0;
+
+    KALDI_LOG << "Reading features (will keep " << num_frames << " frames.)";
+
+    for (; !feature_reader.Done(); feature_reader.Next()) {
+      const Matrix<BaseFloat>  &this_feats = feature_reader.Value();
+      for (int32 t = 0; t < this_feats.NumRows(); t++) {
+        num_read++;
+        if (dim == 0) {
+          dim = this_feats.NumCols();
+          feats.Resize(num_frames, dim);
+        } else if (this_feats.NumCols() != dim) {
+          KALDI_ERR << "Features have inconsistent dims "
+                    << this_feats.NumCols() << " vs. " << dim
+                    << " (current utt is) " << feature_reader.Key();
+        }
+        if (num_read <= num_frames) {
+          feats.Row(num_read - 1).CopyFromVec(this_feats.Row(t));
+        } else {
+          BaseFloat keep_prob = num_frames / static_cast<BaseFloat>(num_read);
+          if (WithProb(keep_prob)) { // With probability "keep_prob"
+            feats.Row(RandInt(0, num_frames - 1)).CopyFromVec(this_feats.Row(t));
+          }
+        }
+      }
+    }
+
+    if (num_read < num_frames) {
+      KALDI_WARN << "Number of frames read " << num_read << " was less than "
+                 << "target number " << num_frames << ", using all we read.";
+      feats.Resize(num_read, dim, kCopyData);
+    }
+
+    DiagGmm gmm(num_gauss, dim);
+
+    KALDI_LOG << "Initializing GMM means from random frames";
+    InitGmmFromRandomFrames(feats, &gmm);
+    
+    for (int32 iter = 0; iter < num_iters; iter++)
+      TrainOneIter(feats, gmm_opts, iter, &gmm);
+
+    WriteKaldiObject(gmm, model_wxfilename, binary);
+    KALDI_LOG << "Wrote model to " << model_wxfilename;
+    return 0;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
--- a/src/lat/Makefile
+++ b/src/lat/Makefile
@ -5,11 +5,11 @@ include ../kaldi.mk

 EXTRA_CXXFLAGS += -Wno-sign-compare

-TESTFILES = kaldi-lattice-test
+TESTFILES = kaldi-lattice-test push-lattice-test minimize-lattice-test

 OBJFILES = kaldi-lattice.o lattice-functions.o word-align-lattice.o \
 	   phone-align-lattice.o word-align-lattice-lexicon.o sausages.o \
-           kws-functions.o push-lattice.o
+       kws-functions.o push-lattice.o minimize-lattice.o

 LIBNAME = kaldi-lat

--- a/src/lat/lattice-functions.cc
+++ b/src/lat/lattice-functions.cc
@ -311,6 +311,7 @@ void ConvertLatticeToPhones(const TransitionModel &trans,
 void ConvertCompactLatticeToPhones(const TransitionModel &trans,
                                   CompactLattice *clat) {
  typedef CompactLatticeArc Arc;
+  typedef Arc::Weight Weight;
  int32 num_states = clat->NumStates();
  for (int32 state = 0; state < num_states; state++) {
    for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
@ -327,6 +328,18 @@ void ConvertCompactLatticeToPhones(const TransitionModel &trans,
      arc.weight.SetString(phone_seq);
      aiter.SetValue(arc);
    } // end looping over arcs
+    Weight f = clat->Final(state);
+    if (f != Weight::Zero()) {
+      std::vector<int32> phone_seq;
+      const std::vector<int32> &tid_seq = f.String();
+      for (std::vector<int32>::const_iterator iter = tid_seq.begin();
+           iter != tid_seq.end(); ++iter) {
+        if (trans.IsFinal(*iter))// note: there is one of these per phone...
+          phone_seq.push_back(trans.TransitionIdToPhone(*iter));
+      }
+      f.SetString(phone_seq);
+      clat->SetFinal(state, f);
+    }
  }  // end looping over states
 }

--- a/src/lat/minimize-lattice-test.cc
+++ b/src/lat/minimize-lattice-test.cc
@ -0,0 +1,72 @@
+// lat/minimize-lattice-test.cc
+
+// Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "lat/kaldi-lattice.h"
+#include "lat/minimize-lattice.h"
+#include "lat/push-lattice.h"
+#include "fstext/rand-fst.h"
+
+
+namespace kaldi {
+using namespace fst;
+
+CompactLattice *RandDeterministicCompactLattice() {
+  RandFstOptions opts;
+  opts.acyclic = true;
+  while (1) {
+    Lattice *fst = fst::RandPairFst<LatticeArc>(opts);
+    CompactLattice *cfst = new CompactLattice;
+    if (!DeterminizeLattice(*fst, cfst)) {
+      delete fst;
+      delete cfst;
+      KALDI_WARN << "Determinization failed, trying again.";
+    } else {
+      delete fst;
+      return cfst;
+    }
+  }
+}
+
+void TestMinimizeCompactLattice() {
+  CompactLattice *clat = RandDeterministicCompactLattice();
+  CompactLattice clat2(*clat);
+  BaseFloat delta = (rand() % 2 == 0 ? 1.0 : 1.0e-05);
+
+  // Minimization will only work well on determinized and pushed lattices.
+  PushCompactLatticeStrings(&clat2);
+  PushCompactLatticeWeights(&clat2);
+  
+  MinimizeCompactLattice(&clat2, delta);
+  KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, delta, rand(), 10));
+  
+  delete clat;
+}
+
+
+} // end namespace kaldi
+
+int main() {
+  using namespace kaldi;
+  SetVerboseLevel(4);
+  for (int32 i = 0; i < 1000; i++) {
+    TestMinimizeCompactLattice();
+  }
+  KALDI_LOG << "Success.";
+}
--- a/src/lat/minimize-lattice.cc
+++ b/src/lat/minimize-lattice.cc
@ -0,0 +1,283 @@
+// lat/minimize-lattice.cc
+
+// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
+//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
+//                      Bagher BabaAli
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "lat/minimize-lattice.h"
+#include "hmm/transition-model.h"
+#include "util/stl-utils.h"
+
+namespace kaldi {
+
+/*
+  Process the states in reverse topological order.
+  For each state, compute a hash-value that will be the same for states
+  that can be combined.  Then for each pair of states with the
+  same hash value, check that the "to-states" map to the
+  same equivalence class and that the weights are sufficiently similar.
+*/
+  
+
+using fst::ArcIterator;
+using fst::MutableArcIterator;
+using fst::kNoStateId;
+
+class CompactLatticeMinimizer {  
+ public:
+  typedef CompactLattice::StateId StateId;
+  typedef CompactLatticeArc Arc;
+  typedef Arc::Label Label;
+  typedef CompactLatticeWeight Weight;
+  typedef size_t HashType;
+  
+  CompactLatticeMinimizer(CompactLattice *clat, float delta = fst::kDelta):
+      clat_(clat), delta_(delta) { }
+
+  bool Minimize() {
+    if (clat_->Properties(fst::kTopSorted, true) == 0) {
+      if (!TopSort(clat_)) {
+        KALDI_WARN << "Topological sorting of state-level lattice failed "
+            "(probably your lexicon has empty words or your LM has epsilon cycles; this "
+            " is a bad idea.)";
+        return false;
+      }
+    }
+    ComputeStateHashValues();
+    ComputeStateMap();
+    ModifyModel();
+    return true;
+  }
+  
+  static HashType ConvertStringToHashValue(const std::vector<int32> &vec) {
+    const HashType prime = 53281;
+    VectorHasher<int32> h;
+    HashType ans = static_cast<HashType>(h(vec));
+    if (ans == 0)  ans = prime;
+    // We don't allow a zero answer, as this can cause too many values to be the
+    // same.
+    return ans;
+  }
+  
+  static void InitHashValue(const Weight &final_weight, HashType *h) {
+    const HashType prime1 = 33317, prime2 = 607; // it's pretty random.
+    if (final_weight == Weight::Zero()) *h = prime1;
+    else *h = prime2 * ConvertStringToHashValue(final_weight.String());
+  }
+
+  // It's important that this function and UpdateHashValueForFinalProb be
+  // insensitive to the order in which it's called, as the order of the arcs
+  // won't necessarily be the same for different equivalent states.
+  static void UpdateHashValueForTransition(const Weight &weight,
+                                           Label label,
+                                           HashType &next_state_hash,
+                                           HashType *h) {
+    const HashType prime1 = 1447, prime2 = 51907;
+    if (label == 0) label = prime2; // Zeros will cause problems.
+    *h += prime1 * label *
+        (1 + ConvertStringToHashValue(weight.String()) * next_state_hash);
+    // Above, the "1 +" is to ensure that if somehow we get zeros due to
+    // weird word sequences, they don't propagate.
+  }
+
+  void ComputeStateHashValues() {
+    // Note: clat_ is topologically sorted, and StateId is
+    // signed.  Each state's hash value is only a function of toplogically-later
+    // states' hash values.
+    state_hashes_.resize(clat_->NumStates());
+    for (StateId s = clat_->NumStates() - 1; s >= 0; s--) {
+      HashType this_hash;
+      InitHashValue(clat_->Final(s), &this_hash);
+      for (ArcIterator<CompactLattice> aiter(*clat_, s); !aiter.Done();
+           aiter.Next()) {
+        const Arc &arc = aiter.Value();
+        HashType next_hash;
+        if (arc.nextstate > s) {
+          next_hash = state_hashes_[arc.nextstate];
+        } else {
+          KALDI_ASSERT(s == arc.nextstate &&
+                       "Lattice not topologically sorted [code error]");
+          next_hash = 1;
+          KALDI_WARN << "Minimizing lattice with self-loops "
+              "(lattices should not have self-loops)";
+        }
+        UpdateHashValueForTransition(arc.weight, arc.ilabel,
+                                     next_hash, &this_hash);
+      }
+      state_hashes_[s] = this_hash;
+    }
+  }
+
+
+
+  struct EquivalenceSorter {
+    // This struct has an operator () which you can interpret as a less-than (<)
+    // operator for arcs.  We sort on ilabel; since the lattice is supposed to
+    // be deterministic, this should completely determine the ordering (there
+    // should not be more than one arc with the same ilabel, out of the same
+    // state).  For identical ilabels we next sort on the nextstate, simply to
+    // better handle non-deterministic input (we do our best on this, without
+    // guaranteeing full minimization).  We could sort on the strings next, but
+    // this would be an unnecessary hassle as we only really need good
+    // performance on deterministic input.
+    bool operator () (const Arc &a, const Arc &b) const {
+      if (a.ilabel < b.ilabel) return true;
+      else if (a.ilabel > b.ilabel) return false;
+      else if (a.nextstate < b.nextstate) return true;
+      else return false;
+    }
+  };
+
+  
+  // This function works out whether s and t are equivalent, assuming
+  // we have already partitioned all topologically-later states into
+  // equivalence classes (i.e. set up state_map_).
+  bool Equivalent(StateId s, StateId t) const {
+    if (!ApproxEqual(clat_->Final(s), clat_->Final(t), delta_))
+      return false;
+    if (clat_->NumArcs(s) != clat_->NumArcs(t))
+      return false;
+    std::vector<Arc> s_arcs;
+    std::vector<Arc> t_arcs;
+    for (int32 iter = 0; iter <= 1; iter++) {
+      StateId state = (iter == 0 ? s : t);
+      std::vector<Arc> &arcs = (iter == 0 ? s_arcs : t_arcs);
+      arcs.reserve(clat_->NumArcs(s));
+      for (ArcIterator<CompactLattice> aiter(*clat_, state); !aiter.Done();
+           aiter.Next()) {
+        Arc arc = aiter.Value();
+        if (arc.nextstate == state) {
+          // This is a special case for states that have self-loops.  If two
+          // states have an identical self-loop arc, they may be equivalent.
+          arc.nextstate = kNoStateId;
+        } else {
+          KALDI_ASSERT(arc.nextstate > state);
+          //while (state_map_[arc.nextstate] != arc.nextstate)
+          arc.nextstate = state_map_[arc.nextstate];
+          arcs.push_back(arc);
+        }
+      }
+      EquivalenceSorter s;
+      std::sort(arcs.begin(), arcs.end(), s);
+    }
+    KALDI_ASSERT(s_arcs.size() == t_arcs.size());
+    for (size_t i = 0; i < s_arcs.size(); i++) {
+      if (s_arcs[i].nextstate != t_arcs[i].nextstate) return false;
+      KALDI_ASSERT(s_arcs[i].ilabel == s_arcs[i].olabel); // CompactLattices are
+                                                          // supposed to be
+                                                          // acceptors.
+      if (s_arcs[i].ilabel != t_arcs[i].ilabel) return false;
+      // We've already mapped to equivalence classes.
+      if (s_arcs[i].nextstate != t_arcs[i].nextstate) return false;
+      if (!ApproxEqual(s_arcs[i].weight, t_arcs[i].weight)) return false;
+    }
+    return true;
+  }
+  
+  void ComputeStateMap() {
+    // We have to compute the state mapping in reverse topological order also,
+    // since the equivalence test relies on later states being already sorted
+    // out into equivalence classes (by state_map_).
+    StateId num_states = clat_->NumStates();
+    unordered_map<HashType, std::vector<StateId> > hash_groups_;
+    
+    for (StateId s = 0; s < num_states; s++)
+      hash_groups_[state_hashes_[s]].push_back(s);
+
+    state_map_.resize(num_states);
+    for (StateId s = 0; s < num_states; s++)
+      state_map_[s] = s; // Default mapping.
+    
+
+    { // This block is just diagnostic.
+      typedef unordered_map<HashType, std::vector<StateId> >::const_iterator
+          HashIter;
+      size_t max_size = 0;
+      for (HashIter iter = hash_groups_.begin(); iter != hash_groups_.end();
+           ++iter)
+        max_size = std::max(max_size, iter->second.size());
+      if (max_size > 1000) {
+        KALDI_WARN << "Largest equivalence group (using hash) is " << max_size
+                   << ", minimization might be slow.";
+      }
+    }
+
+    for (StateId s = num_states - 1; s >= 0; s--) {
+      HashType hash = state_hashes_[s];
+      const std::vector<StateId> &equivalence_class = hash_groups_[hash];
+      KALDI_ASSERT(!equivalence_class.empty());
+      for (size_t i = 0; i < equivalence_class.size(); i++) {
+        StateId t = equivalence_class[i];
+        // Below, there is no point doing the test if state_map_[t] != t, because
+        // in that case we will, before after this, be comparing with another state
+        // that is equivalent to t.
+        if (t > s && state_map_[t] == t && Equivalent(s, t)) {
+          state_map_[s] = t;
+          break;
+        }
+      }
+    }
+  }
+
+  void ModifyModel() {    
+    // Modifies the model according to state_map_;
+
+    StateId num_removed = 0;
+    StateId num_states = clat_->NumStates();
+    for (StateId s = 0; s < num_states; s++)
+      if (state_map_[s] != s)
+        num_removed++;
+    KALDI_VLOG(3) << "Removing " << num_removed << " of "
+                  << num_states << " states.";
+    if (num_removed == 0) return; // Nothing to do.
+    
+    clat_->SetStart(state_map_[clat_->Start()]);
+
+    for (StateId s = 0; s < num_states; s++) {
+      if (state_map_[s] != s)
+        continue; // There is no point modifying states we're removing.
+      for (MutableArcIterator<CompactLattice> aiter(clat_, s); !aiter.Done();
+           aiter.Next()) {
+        Arc arc = aiter.Value();
+        StateId mapped_nextstate = state_map_[arc.nextstate];
+        if (mapped_nextstate != arc.nextstate) {
+          arc.nextstate = mapped_nextstate;
+          aiter.SetValue(arc);
+        }
+      }
+    }
+    fst::Connect(clat_);
+  }
+ private:
+  CompactLattice *clat_;
+  float delta_;
+  std::vector<HashType> state_hashes_;
+  std::vector<StateId> state_map_; // maps each state to itself or to some
+                                   // equivalent state.  Within each equivalence
+                                   // class, we pick one arbitrarily.
+};
+
+bool MinimizeCompactLattice(CompactLattice *clat, float delta) {
+  CompactLatticeMinimizer minimizer(clat, delta);
+  return minimizer.Minimize();
+}
+
+  
+
+}  // namespace kaldi
--- a/src/lat/minimize-lattice.h
+++ b/src/lat/minimize-lattice.h
@ -0,0 +1,48 @@
+// lat/minimize-lattice.h
+
+// Copyright 2013        Johns Hopkins University (Author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_LAT_MINIMIZE_LATTICE_H_
+#define KALDI_LAT_MINIMIZE_LATTICE_H_
+
+#include <vector>
+#include <map>
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "fstext/fstext-lib.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"
+
+namespace kaldi {
+
+
+/// This function minimizes the compact lattice.  It is to be called after
+/// determinization (see ../fstext/determinize-lattice-pruned.h) and pushing
+/// (see ./push-lattice.h).  If the lattice is not determinized and pushed this
+/// function will not combine as many states as it could, but it won't crash.
+/// Returns true on success, and false if it failed due to topological sorting
+/// failing.
+bool MinimizeCompactLattice(CompactLattice *clat, float delta = fst::kDelta);
+
+
+
+}  // namespace kaldi
+
+#endif  // KALDI_LAT_PUSH_LATTICE_H_
--- a/src/lat/push-lattice-test.cc
+++ b/src/lat/push-lattice-test.cc
@ -0,0 +1,119 @@
+// lat/push-lattice-test.cc
+
+// Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "lat/kaldi-lattice.h"
+#include "lat/push-lattice.h"
+#include "fstext/rand-fst.h"
+
+
+namespace kaldi {
+using namespace fst;
+
+CompactLattice *RandCompactLattice() {
+  RandFstOptions opts;
+  opts.acyclic = true;
+  Lattice *fst = fst::RandPairFst<LatticeArc>(opts);
+  CompactLattice *cfst = new CompactLattice;
+  ConvertLattice(*fst, cfst);
+  delete fst;
+  return cfst;
+}
+
+void TestPushCompactLatticeStrings() {
+  CompactLattice *clat = RandCompactLattice();
+  CompactLattice clat2(*clat);
+  PushCompactLatticeStrings(&clat2);
+  KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, 0.001, rand(), 10));
+  for (CompactLatticeArc::StateId s = 0; s < clat2.NumStates(); s++) {
+    if (s == 0)
+      continue; // We don't check state zero, as the "leftover string" stays
+               // there.
+    int32 first_label;
+    bool ok = false;
+    bool first_label_set = false;
+    for (ArcIterator<CompactLattice> aiter(clat2, s); !aiter.Done();
+         aiter.Next()) {
+      if (aiter.Value().weight.String().size() == 0) {
+        ok = true;
+      } else {
+        int32 this_label = aiter.Value().weight.String().front();
+        if (first_label_set) {
+          if (this_label != first_label) ok = true;
+        } else {
+          first_label = this_label;
+          first_label_set = true;
+        }
+      }
+    }
+    if (clat2.Final(s) != CompactLatticeWeight::Zero()) {
+      if (clat2.Final(s).String().size() == 0) ok = true;
+      else {
+        int32 this_label = clat2.Final(s).String().front();
+        if (first_label_set && this_label != first_label) ok = true;
+      }
+    }
+    KALDI_ASSERT(ok);
+  }
+  delete clat;
+}
+
+void TestPushCompactLatticeWeights() {
+  CompactLattice *clat = RandCompactLattice();
+  CompactLattice clat2(*clat);
+  PushCompactLatticeWeights(&clat2);
+  KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, 0.001, rand(), 10));
+  for (CompactLatticeArc::StateId s = 0; s < clat2.NumStates(); s++) {
+    if (s == 0)
+      continue; // We don't check state zero, as the "leftover string" stays
+                // there.
+    LatticeWeight sum = clat2.Final(s).Weight();
+    for (ArcIterator<CompactLattice> aiter(clat2, s); !aiter.Done();
+         aiter.Next()) {
+      sum = Plus(sum, aiter.Value().weight.Weight());
+    }
+    if (!ApproxEqual(sum, LatticeWeight::One())) {
+      {
+        fst::FstPrinter<CompactLatticeArc> printer(clat2, NULL, NULL,
+                                                   NULL, true, true);
+        printer.Print(&std::cerr, "<unknown>");
+      }
+      {
+        fst::FstPrinter<CompactLatticeArc> printer(*clat, NULL, NULL, 
+                                                   NULL, true, true);
+        printer.Print(&std::cerr, "<unknown>");
+      }
+      KALDI_ERR << "Bad lattice being pushed.";
+    }
+  }
+  delete clat;
+}
+
+
+
+} // end namespace kaldi
+
+int main() {
+  using namespace kaldi;
+  for (int32 i = 0; i < 15; i++) {
+    TestPushCompactLatticeStrings();
+    TestPushCompactLatticeWeights();
+  }
+  KALDI_LOG << "Success.";
+}
--- a/src/lat/push-lattice.cc
+++ b/src/lat/push-lattice.cc
@ -202,12 +202,75 @@ class CompactLatticePusher {
  std::vector<int32> shift_vec_;
 };

-bool PushCompactLattice(CompactLattice *clat) {
+bool PushCompactLatticeStrings(CompactLattice *clat) {
  CompactLatticePusher pusher(clat);
  return pusher.Push();
 }
-      

+bool PushCompactLatticeWeights(CompactLattice *clat) {
+  if (clat->Properties(fst::kTopSorted, true) == 0) {
+    if (!TopSort(clat)) {
+      KALDI_WARN << "Topological sorting of state-level lattice failed "
+          "(probably your lexicon has empty words or your LM has epsilon cycles; this "
+          " is a bad idea.)";
+      return false;
+    }
+  }
+  typedef CompactLattice::StateId StateId; // Note: this is guaranteed to be
+                                           // signed.
+  typedef CompactLatticeArc Arc;
+  typedef CompactLatticeWeight Weight;
+
+  StateId num_states = clat->NumStates();
+  if (num_states == 0) {
+    KALDI_WARN << "Pushing weights of empty compact lattice";
+    return true; // this is technically success because an empty
+                 // lattice is already pushed.
+  }
+  std::vector<LatticeWeight> weight_to_end(num_states); // Note: LatticeWeight
+                                                      // contains two floats.
+  for (StateId s = num_states - 1; s >= 0; s--) {
+    LatticeWeight this_weight_to_end = clat->Final(s).Weight();
+    for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
+         aiter.Next()) {
+      const Arc &arc = aiter.Value();
+      KALDI_ASSERT(arc.nextstate > s && "Cyclic lattices not allowed.");
+      this_weight_to_end = Plus(this_weight_to_end,
+                                Times(aiter.Value().weight.Weight(),
+                                      weight_to_end[arc.nextstate]));
+    }
+    if (this_weight_to_end == LatticeWeight::Zero()) {
+      KALDI_WARN << "Lattice has non-coaccessible states.";
+    }
+    weight_to_end[s] = this_weight_to_end;
+  }
+  weight_to_end[0] = LatticeWeight::One(); // We leave the "leftover weight" on
+                                           // the start state, which won't
+                                           // necessarily end up summing to one.
+  for (StateId s = 0; s < num_states; s++) {
+    LatticeWeight this_weight_to_end = weight_to_end[s];
+    if (this_weight_to_end == LatticeWeight::Zero())
+      continue;
+    for (fst::MutableArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
+         aiter.Next()) {
+      Arc arc = aiter.Value();
+      LatticeWeight next_weight_to_end = weight_to_end[arc.nextstate];
+      if (next_weight_to_end != LatticeWeight::Zero()) {
+        arc.weight.SetWeight(Times(arc.weight.Weight(),
+                                   Divide(next_weight_to_end,
+                                          this_weight_to_end)));
+        aiter.SetValue(arc);
+      }
+    }
+    Weight final_weight = clat->Final(s);
+    if (final_weight != Weight::Zero()) {
+      final_weight.SetWeight(Divide(final_weight.Weight(), this_weight_to_end));
+      clat->SetFinal(s, final_weight);
+    }
+  }
+  
+  return true;
+}

  

--- a/src/lat/push-lattice.h
+++ b/src/lat/push-lattice.h
@ -41,7 +41,16 @@ namespace kaldi {
 /// It returns true on success, false if it failed due to TopSort failing,
 /// which should never happen, but we handle it gracefully by just leaving the
 /// lattice the same.
-bool PushCompactLattice(CompactLattice *clat);
+/// This function used to be called just PushCompactLattice.
+bool PushCompactLatticeStrings(CompactLattice *clat);
+
+/// This function pushes the weights in the CompactLattice so that all states
+/// except possibly the start state, have Weight components (of type
+/// LatticeWeight) that "sum to one" in the LatticeWeight (i.e. interpreting the
+/// weights as negated log-probs).  It returns true on success, false if it
+/// failed due to TopSort failing, which should never happen, but we handle it
+/// gracefully by just leaving the lattice the same.
+bool PushCompactLatticeWeights(CompactLattice *clat);

 }  // namespace kaldi

--- a/src/latbin/Makefile
+++ b/src/latbin/Makefile
@ -16,7 +16,8 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
           lattice-to-ctm-conf lattice-arcgraph lattice-combine lattice-reverse \
           lattice-rescore-mapped lattice-depth lattice-align-phones \
           lattice-to-smbr-post lattice-determinize-pruned-parallel \
-           lattice-add-penalty lattice-align-words-lexicon lattice-push
+           lattice-add-penalty lattice-align-words-lexicon lattice-push \
+           lattice-minimize


 OBJFILES =
--- a/src/latbin/lattice-depth.cc
+++ b/src/latbin/lattice-depth.cc
@ -82,8 +82,8 @@ int main(int argc, char *argv[]) {
      num_done++;
    }
    KALDI_LOG << "Done " << num_done << " lattices.";
-    KALDI_LOG << "The average density is "
-              << static_cast<float> ((float)sum_depth / total_t);
+    KALDI_LOG << "Overall density is "
+              << (static_cast<BaseFloat>(sum_depth) / total_t);
    if (num_done != 0) return 0;
    else return 1;
  } catch (const std::exception &e) {
--- a/src/latbin/lattice-determinize-pruned-parallel.cc
+++ b/src/latbin/lattice-determinize-pruned-parallel.cc
@ -21,6 +21,8 @@
 #include "lat/kaldi-lattice.h"
 #include "fstext/determinize-lattice-pruned.h"
 #include "lat/lattice-functions.h"
+#include "lat/push-lattice.h"
+#include "lat/minimize-lattice.h"
 #include "thread/kaldi-task-sequence.h"

 namespace kaldi {
@ -33,11 +35,13 @@ class DeterminizeLatticeTask {
      std::string key,
      BaseFloat acoustic_scale,
      BaseFloat beam,
+      bool minimize,
      Lattice *lat,
      CompactLatticeWriter *clat_writer,
      int32 *num_warn):
      opts_(opts), key_(key), acoustic_scale_(acoustic_scale), beam_(beam),
-      lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn) { }
+      minimize_(minimize), lat_(lat), clat_writer_(clat_writer),
+      num_warn_(num_warn) { }

  void operator () () {
    Invert(lat_); // to get word labels on the input side.
@ -58,6 +62,11 @@ class DeterminizeLatticeTask {
    }
    delete lat_; // This is no longer needed so we can delete it now;
    lat_ = NULL;
+    if (minimize_) {
+      PushCompactLatticeStrings(&det_clat_);
+      PushCompactLatticeWeights(&det_clat_);
+      MinimizeCompactLattice(&det_clat_);
+    }
    // Invert the original acoustic scaling
    fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
                      &det_clat_);
@ -72,6 +81,7 @@ class DeterminizeLatticeTask {
  std::string key_;
  BaseFloat acoustic_scale_;
  BaseFloat beam_;
+  bool minimize_;
  Lattice *lat_; // The lattice we're working on.  Owned locally.
  CompactLattice det_clat_; // The output of our process.  Will be written
  // to clat_writer_ in the destructor.
@ -101,6 +111,7 @@ int main(int argc, char *argv[]) {
    ParseOptions po(usage);
    BaseFloat acoustic_scale = 1.0;
    BaseFloat beam = 10.0;
+    bool minimize = false;
    TaskSequencerConfig sequencer_config; // has --num-threads option
    fst::DeterminizeLatticePrunedOptions determinize_config; // Options used in DeterminizeLatticePruned--
    // this options class does not have its own Register function as it's viewed as
@ -108,8 +119,11 @@ int main(int argc, char *argv[]) {
    determinize_config.max_mem = 50000000;
    determinize_config.max_loop = 0; // was 500000;
    
-    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
    po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
+    po.Register("minimize", &minimize,
+                "If true, push and minimize after determinization");
    determinize_config.Register(&po);
    sequencer_config.Register(&po);
    po.Read(argc, argv);
@ -142,7 +156,7 @@ int main(int argc, char *argv[]) {
      Lattice *lat = lat_reader.Value().Copy(); // will give ownership to "task"
                                                // below
      DeterminizeLatticeTask *task = new DeterminizeLatticeTask(
-          determinize_config, key, acoustic_scale, beam,
+          determinize_config, key, acoustic_scale, beam, minimize,
          lat, &compact_lat_writer, &n_warn);
      sequencer.Run(task);
      n_done++;
--- a/src/latbin/lattice-determinize-pruned.cc
+++ b/src/latbin/lattice-determinize-pruned.cc
@ -21,6 +21,8 @@
 #include "lat/kaldi-lattice.h"
 #include "fstext/determinize-lattice-pruned.h"
 #include "lat/lattice-functions.h"
+#include "lat/push-lattice.h"
+#include "lat/minimize-lattice.h"

 int main(int argc, char *argv[]) {
  try {
@ -39,14 +41,18 @@ int main(int argc, char *argv[]) {
    ParseOptions po(usage);
    BaseFloat acoustic_scale = 1.0;
    BaseFloat beam = 10.0;
+    bool minimize = false;
    fst::DeterminizeLatticePrunedOptions opts; // Options used in DeterminizeLatticePruned--
    // this options class does not have its own Register function as it's viewed as
    // being more part of "fst world", so we register its elements independently.
    opts.max_mem = 50000000;
    opts.max_loop = 0; // was 500000;
    
-    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
    po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
+    po.Register("minimize", &minimize,
+                "If true, push and minimize after determinization");
    opts.Register(&po);
    po.Read(argc, argv);

@ -89,6 +95,11 @@ int main(int argc, char *argv[]) {
            "(partial output will be pruned tighter than the specified beam.)";
        n_warn++;
      }
+      if (minimize) {
+        PushCompactLatticeStrings(&det_clat);
+        PushCompactLatticeWeights(&det_clat);
+        MinimizeCompactLattice(&det_clat);
+      }
      fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &det_clat);
      compact_lat_writer.Write(key, det_clat);
      n_done++;
--- a/src/latbin/lattice-determinize.cc
+++ b/src/latbin/lattice-determinize.cc
@ -1,6 +1,7 @@
 // latbin/lattice-determinize.cc

-// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+// Copyright 2009-2012  Microsoft Corporation
+//           2012-2013  Johns Hopkins University (Author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
@ -23,6 +24,8 @@
 #include "fstext/fstext-lib.h"
 #include "lat/kaldi-lattice.h"
 #include "lat/lattice-functions.h"
+#include "lat/push-lattice.h"
+#include "lat/minimize-lattice.h"

 namespace kaldi {

@ -111,16 +114,27 @@ int main(int argc, char *argv[]) {
    int32 max_loop = 500000;
    BaseFloat delta = fst::kDelta;
    bool prune = false;
+    bool minimize = false;
    
-    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
-    po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling]-- also used "
-                "to handle determinization failures, set --prune=false to disable routine pruning");
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
+    po.Register("beam", &beam,
+                "Pruning beam [applied after acoustic scaling]-- also used "
+                "to handle determinization failures, set --prune=false to "
+                "disable routine pruning");
    po.Register("delta", &delta, "Tolerance used in determinization");
-    po.Register("prune", &prune, "If true, prune determinized lattices with the --beam option.");
-    po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be many times this)");
-    po.Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
-    po.Register("beam-ratio", &beam_ratio, "Ratio by which to decrease beam if we reach the max-arcs.");
-    po.Register("num-loops", &num_loops, "Number of times to decrease beam by beam-ratio if determinization fails.");
+    po.Register("prune", &prune, "If true, prune determinized lattices "
+                "with the --beam option.");
+    po.Register("max-mem", &max_mem, "Maximum approximate memory usage in "
+                "determinization (real usage might be many times this)");
+    po.Register("max-loop", &max_loop, "Option to detect a certain "
+                "type of failure in lattice determinization (not critical)");
+    po.Register("beam-ratio", &beam_ratio, "Ratio by which to "
+                "decrease beam if we reach the max-arcs.");
+    po.Register("num-loops", &num_loops, "Number of times to "
+                "decrease beam by beam-ratio if determinization fails.");
+    po.Register("minimize", &minimize,
+                "If true, push and minimize after determinization");
    
    po.Read(argc, argv);

@ -158,6 +172,11 @@ int main(int argc, char *argv[]) {
      if (DeterminizeLatticeWrapper(lat, key, prune,
                                    beam, beam_ratio, max_mem, max_loop,
                                    delta, num_loops, &clat)) {
+        if (minimize) {
+          PushCompactLatticeStrings(&clat);
+          PushCompactLatticeWeights(&clat);
+          MinimizeCompactLattice(&clat);
+        }
        fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &clat);
        compact_lattice_writer.Write(key, clat);
        n_done++;
--- a/src/latbin/lattice-minimize.cc
+++ b/src/latbin/lattice-minimize.cc
@ -0,0 +1,110 @@
+// latbin/lattice-minimize.cc
+
+// Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "fstext/fstext-lib.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/minimize-lattice.h"
+#include "lat/push-lattice.h"
+
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+
+    const char *usage =
+      "Minimize lattices, in CompactLattice format.  Should be applied to\n"
+      "determinized lattices (e.g. produced with --determinize-lattice=true)\n"
+      "Note: by default this program\n"
+      "pushes the strings and weights prior to minimization."
+      "Usage: lattice-minimize [options] lattice-rspecifier lattice-wspecifier\n"
+        " e.g.: lattice-minimize ark:1.lats ark:2.lats\n";
+
+    ParseOptions po(usage);
+
+    bool push_strings = true;
+    bool push_weights = true;
+
+    po.Register("push-strings", &push_strings, "If true, push the strings in the "
+                "lattice to the start.");
+    po.Register("push-weights", &push_weights, "If true, push the weights in the "
+                "lattice to the start.");
+    
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string lats_rspecifier = po.GetArg(1),
+        lats_wspecifier = po.GetArg(2);
+
+
+    SequentialCompactLatticeReader clat_reader(lats_rspecifier);
+    CompactLatticeWriter clat_writer(lats_wspecifier); 
+
+    int32 n_done = 0, n_err = 0;
+
+    
+    for (; !clat_reader.Done(); clat_reader.Next()) {
+      std::string key = clat_reader.Key();
+      CompactLattice clat = clat_reader.Value();
+      KALDI_VLOG(1) << "Processing lattice for utterance " << key;
+      if (push_strings && !PushCompactLatticeStrings(&clat)) {
+        KALDI_WARN << "Failure in pushing lattice strings (bad lattice?), "
+                   << "for key " << key;
+        n_err++;
+        continue;
+      }
+      if (push_weights && !PushCompactLatticeWeights(&clat)) {
+        KALDI_WARN << "Failure in pushing lattice weights (bad lattice?),"
+                   << "for key " << key ;           
+        n_err++;
+        continue;
+      }
+      if (!MinimizeCompactLattice(&clat)) {
+        KALDI_WARN << "Failure in minimizing lattice (bad lattice?),"
+                   << "for key " << key ;           
+        n_err++;
+        continue;
+      }
+      if (clat.NumStates() == 0) {
+        KALDI_WARN << "Empty lattice for key " << key;
+        n_err++;
+        continue;
+      }
+      clat_writer.Write(key, clat);
+      n_done++;
+    }
+    KALDI_LOG << "Minimized " << n_done << " lattices, errors on " << n_err;
+    return (n_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
--- a/src/latbin/lattice-oracle.cc
+++ b/src/latbin/lattice-oracle.cc
@ -1,6 +1,7 @@
 // latbin/lattice-oracle.cc

 // Copyright 2011 Gilles Boulianne
+//           2013 Johns Hopkins University (author: Daniel Povey)
 //
 // See ../../COPYING for clarification regarding multiple authors
 //
@ -95,10 +96,10 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
  typedef StdArc StdArc;
  typedef StdArc::Weight Weight;
  typedef StdArc::Label Label;
-  Weight corrCost(0.0);
-  Weight subsCost(1.0);
-  Weight insCost(1.0);
-  Weight delCost(1.0);
+  Weight correct_cost(0.0);
+  Weight substitution_cost(1.0);
+  Weight insertion_cost(1.0);
+  Weight deletion_cost(1.0);

  // create set of output symbols in fst1
  std::vector<Label> fst1syms, fst2syms;
@ -108,17 +109,17 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
  pfst->AddState();
  pfst->SetStart(0);
  for (size_t i = 0; i < fst1syms.size(); i++) 
-    pfst->AddArc(0, StdArc(fst1syms[i], 0, delCost, 0)); // deletions
+    pfst->AddArc(0, StdArc(fst1syms[i], 0, deletion_cost, 0)); // deletions
  
  for (size_t i = 0; i < fst2syms.size(); i++)
-    pfst->AddArc(0, StdArc(0, fst2syms[i], insCost, 0));  // insertions
+    pfst->AddArc(0, StdArc(0, fst2syms[i], insertion_cost, 0));  // insertions
 
  // stupid implementation O(N^2)
  for (size_t i = 0; i < fst1syms.size(); i++) {
    Label label1 = fst1syms[i];
    for (size_t j = 0; j < fst2syms.size(); j++) {
      Label label2 = fst2syms[j];
-      Weight cost( label1 == label2 ? corrCost : subsCost);
+      Weight cost( label1 == label2 ? correct_cost : substitution_cost);
      pfst->AddArc(0, StdArc(label1, label2, cost, 0)); // substitutions
    }
  }
@ -127,30 +128,33 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
 }

 void CountErrors(fst::StdVectorFst &fst,
-                 unsigned int *corr,
-                 unsigned int *subs,
-                 unsigned int *ins,
-                 unsigned int *del,
-                 unsigned int *totwords) {
+                 int32 *correct,
+                 int32 *substitutions,
+                 int32 *insertions,
+                 int32 *deletions,
+                 int32 *num_words) {
  typedef fst::StdArc::StateId StateId;
  typedef fst::StdArc::Weight Weight;
-   *corr = *subs = *ins = *del = *totwords = 0;
+   *correct = *substitutions = *insertions = *deletions = *num_words = 0;

  // go through the first complete path in fst (there should be only one)
  StateId src = fst.Start(); 
  while (fst.Final(src)== Weight::Zero()) { // while not final
    for (fst::ArcIterator<fst::StdVectorFst> aiter(fst, src); !aiter.Done(); aiter.Next()) {
      fst::StdArc arc = aiter.Value();
-      if (arc.ilabel == 0 && arc.olabel == 0) {
-        // don't count these so we may compare number of arcs and number of errors
-      } else if (arc.ilabel == arc.olabel) {
-        (*corr)++; (*totwords)++;
-      } else if (arc.ilabel == 0) {
-        (*ins)++;
-      } else if (arc.olabel == 0) {
-        (*del)++; (*totwords)++;
+      if (arc.ilabel == arc.olabel && arc.ilabel != 0) {
+        (*correct)++;
+        (*num_words)++;
+      } else if (arc.ilabel == 0 && arc.olabel != 0) {
+        (*deletions)++;
+        (*num_words)++;
+      } else if (arc.ilabel != 0 && arc.olabel == 0) {
+        (*insertions)++; 
+      } else if (arc.ilabel != 0 && arc.olabel != 0) {
+        (*substitutions)++;
+        (*num_words)++;
      } else {
-        (*subs)++; (*totwords)++;
+        KALDI_ASSERT(arc.ilabel == 0 && arc.olabel == 0);
      }
      src = arc.nextstate;
      continue; // jump to next state
@ -175,7 +179,7 @@ bool CheckFst(fst::StdVectorFst &fst, string name, string key) {

 // Guoguo Chen added the implementation for option "write-lattices". This
 // function does a depth first search on the lattice and remove the arcs that
-// don't correspond to the oracle path. By "remove" I actually point the next
+// don't correctespond to the oracle path. By "remove" I actually point the next
 // state of the arc to some state that is not in the lattice and then use the
 // openfst connect function. This makes things much easier. 
 bool GetOracleLattice(Lattice *oracle_lat, 
@ -229,8 +233,9 @@ int main(int argc, char *argv[]) {
    const char *usage =
        "Finds the path having the smallest edit-distance between two lattices.\n"
        "For efficiency put the smallest lattices first (for example reference strings).\n"
-        "Usage: lattice-oracle [options] test-lattice-rspecifier reference-rspecifier transcriptions-wspecifier\n"
-        " e.g.: lattice-oracle ark:ref.lats ark:1.tra ark:2.tra\n";
+        "Usage: lattice-oracle [options] test-lattice-rspecifier reference-rspecifier "
+        "transcriptions-wspecifier [edit-distance-wspecifier]\n"
+        " e.g.: lattice-oracle ark:lat.1 'ark:sym2int.pl -f 2- data/lang/words.txt <data/test/text' ark,t:-\n";
        
    ParseOptions po(usage);
    
@ -238,27 +243,28 @@ int main(int argc, char *argv[]) {
    std::string wild_syms_filename;

    std::string lats_wspecifier;
+    
    po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
    po.Register("wildcard-symbols-list", &wild_syms_filename, "List of symbols that don't count as errors");
    po.Register("write-lattices", &lats_wspecifier, "If supplied, write 1-best path as lattices to this wspecifier");
    
    po.Read(argc, argv);
 
-    if (po.NumArgs() < 3 || po.NumArgs() > 4) {
+    if (po.NumArgs() != 3) {
      po.PrintUsage();
      exit(1);
    }

    std::string lats_rspecifier = po.GetArg(1),
        reference_rspecifier = po.GetArg(2),
-        transcriptions_wspecifier = po.GetOptArg(3);
+        transcriptions_wspecifier = po.GetArg(3);

    // will read input as  lattices
    SequentialLatticeReader lattice_reader(lats_rspecifier);
    RandomAccessInt32VectorReader reference_reader(reference_rspecifier);

    Int32VectorWriter transcriptions_writer(transcriptions_wspecifier);
-
+    
    // Guoguo Chen added the implementation for option "write-lattices".
    CompactLatticeWriter lats_writer(lats_wspecifier);

@ -276,7 +282,8 @@ int main(int argc, char *argv[]) {
    }
    
    int32 n_done = 0, n_fail = 0;
-    unsigned int tot_corr=0, tot_subs=0, tot_ins=0, tot_del=0, tot_words=0;
+    int32 tot_correct=0, tot_substitutions=0, tot_insertions=0, tot_deletions=0,
+        tot_words=0;

    for (; !lattice_reader.Done(); lattice_reader.Next()) {
      std::string key = lattice_reader.Key();
@ -284,9 +291,9 @@ int main(int argc, char *argv[]) {
      cerr << "Lattice "<<key<<" read."<<endl;

      // remove all weights while creating a standard FST
-      VectorFst<StdArc> fst1;
-      ConvertLatticeToUnweightedAcceptor(lat, wild_syms, &fst1);
-      CheckFst(fst1, "fst1_", key);
+      VectorFst<StdArc> lattice_fst;
+      ConvertLatticeToUnweightedAcceptor(lat, wild_syms, &lattice_fst);
+      CheckFst(lattice_fst, "lattice_fst_", key);
      
      // TODO: map certain symbols (using an FST created with CreateMapFst())
      
@ -296,44 +303,49 @@ int main(int argc, char *argv[]) {
        continue;
      }
      const std::vector<int32> &reference = reference_reader.Value(key);
-      VectorFst<StdArc> fst2;
-      MakeLinearAcceptor(reference, &fst2);
+      VectorFst<StdArc> reference_fst;
+      MakeLinearAcceptor(reference, &reference_fst);
      
-      CheckFst(fst2, "fst2_", key);
+      CheckFst(reference_fst, "reference_fst_", key);
            
      // recreate edit distance fst if necessary
-      fst::StdVectorFst editDistanceFst;
-      CreateEditDistance(fst1, fst2, &editDistanceFst);
+      fst::StdVectorFst edit_distance_fst;
+      CreateEditDistance(lattice_fst, reference_fst, &edit_distance_fst);
      
      // compose with edit distance transducer
-      VectorFst<StdArc> composedFst;
-      fst::Compose(editDistanceFst, fst2, &composedFst);
-      CheckFst(composedFst, "composed_", key);
+      VectorFst<StdArc> edit_ref_fst;
+      fst::Compose(edit_distance_fst, reference_fst, &edit_ref_fst);
+      CheckFst(edit_ref_fst, "composed_", key);
      
      // make sure composed FST is input sorted
-      fst::ArcSort(&composedFst, fst::StdILabelCompare());
+      fst::ArcSort(&edit_ref_fst, fst::StdILabelCompare());
      
      // compose with previous result
-      VectorFst<StdArc> resultFst;
-      fst::Compose(fst1, composedFst, &resultFst);
-      CheckFst(resultFst, "result_", key);
+      VectorFst<StdArc> result_fst;
+      fst::Compose(lattice_fst, edit_ref_fst, &result_fst);
+      CheckFst(result_fst, "result_", key);
      
      // find out best path
      VectorFst<StdArc> best_path;
-      fst::ShortestPath(resultFst, &best_path);
+      fst::ShortestPath(result_fst, &best_path);
      CheckFst(best_path, "best_path_", key);

      if (best_path.Start() == fst::kNoStateId) {
        KALDI_WARN << "Best-path failed for key " << key;
        n_fail++;
      } else {
-
        // count errors
-        unsigned int corr, subs, ins, del, totwords;
-        CountErrors(best_path, &corr, &subs, &ins, &del, &totwords);
-        unsigned int toterrs = subs+ins+del;
-        KALDI_LOG << "%WER "<<(100.*toterrs)/totwords<<" [ "<<toterrs<<" / "<<totwords<<", "<<ins<<" ins, "<<del<<" del, "<<subs<<" sub ]";
-        tot_corr += corr; tot_subs += subs; tot_ins += ins; tot_del += del; tot_words += totwords;     
+        int32 correct, substitutions, insertions, deletions, num_words;
+        CountErrors(best_path, &correct, &substitutions, &insertions, &deletions, &num_words);
+        int32 toterrs = substitutions + insertions + deletions;
+        KALDI_LOG << "%WER " << (100.*toterrs) / num_words << " [ " << toterrs
+                  << " / " << num_words << ", " << insertions << " insertions, " << deletions
+                  << " deletions, " << substitutions << " sub ]";
+        tot_correct += correct;
+        tot_substitutions += substitutions;
+        tot_insertions += insertions;
+        tot_deletions += deletions;
+        tot_words += num_words;     
        
        std::vector<int32> oracle_words;
        std::vector<int32> reference_words;
@ -354,7 +366,8 @@ int main(int argc, char *argv[]) {
          for (size_t i = 0; i < reference_words.size(); i++) {
            std::string s = word_syms->Find(reference_words[i]);
            if (s == "")
-              KALDI_ERR << "Word-id " << reference_words[i] <<" not in symbol table.";
+              KALDI_ERR << "Word-id " << reference_words[i]
+                        << " not in symbol table.";
            std::cerr << s << ' ';
          }
          std::cerr << '\n';
@ -367,8 +380,10 @@ int main(int argc, char *argv[]) {
        if (lats_wspecifier != "") {
          Lattice oracle_lat = lat;
          LatticeArc::StateId bad_state = oracle_lat.AddState();
-          if (!GetOracleLattice(&oracle_lat, oracle_words, bad_state, oracle_lat.Start(), 0)) 
-            KALDI_WARN << "Fail to find the oracle path in the original lattice: " << key;
+          if (!GetOracleLattice(&oracle_lat, oracle_words,
+                                bad_state, oracle_lat.Start(), 0)) 
+            KALDI_WARN << "Failed to find the oracle path in the original "
+                       << "lattice: " << key;
          CompactLattice oracle_clat;
          ConvertLattice(oracle_lat, &oracle_clat);
          lats_writer.Write(key, oracle_clat);
@ -377,9 +392,13 @@ int main(int argc, char *argv[]) {
      n_done++;
    }
    if (word_syms) delete word_syms;
-    unsigned int tot_errs = tot_subs + tot_del + tot_ins;
-    KALDI_LOG << "Overall %WER "<<(100.*tot_errs)/tot_words<<" [ "<<tot_errs<<" / "<<tot_words<<", "<<tot_ins<<" ins, "<<tot_del<<" del, "<<tot_subs<<" sub ]";
-    KALDI_LOG << "Scored " << n_done << " lattices, "<<n_fail<<" not present in hyp.";
+    int32 tot_errs = tot_substitutions + tot_deletions + tot_insertions;
+    KALDI_LOG << "Overall %WER " << (100.*tot_errs)/tot_words << " [ "
+              << tot_errs << " / " << tot_words << ", " << tot_insertions
+              << " insertions, " << tot_deletions << " deletions, "
+              << tot_substitutions << " substitutions ]";
+    KALDI_LOG << "Scored " << n_done << " lattices, " << n_fail
+              << " not present in ref.";
  } catch(const std::exception &e) {
    std::cerr << e.what();
    return -1;
--- a/src/latbin/lattice-push.cc
+++ b/src/latbin/lattice-push.cc
@ -37,13 +37,22 @@ int main(int argc, char *argv[]) {

    const char *usage =
        "Push lattices, in CompactLattice format, so that the strings are as\n"
-        "close to the start as possible.  Does not affect the weights.  This can\n"
-        "be helpful prior to word-alignment.\n"
+        "close to the start as possible, and the lowest cost weight for each\n"
+        "state except the start state is (0, 0).  This can be helpful prior to\n"
+        "word-alignment (in this case, only strings need to be pushed)\n"
        "\n"
        "Usage: lattice-push [options] lattice-rspecifier lattice-wspecifier\n"
        " e.g.: lattice-push ark:1.lats ark:2.lats\n";
-      
+
    ParseOptions po(usage);
+
+    bool push_strings = true;
+    bool push_weights = true;
+
+    po.Register("push-strings", &push_strings, "If true, push the strings in the "
+                "lattice to the start.");
+    po.Register("push-weights", &push_weights, "If true, push the weights in the "
+                "lattice to the start.");
    
    po.Read(argc, argv);

@ -66,18 +75,25 @@ int main(int argc, char *argv[]) {
      std::string key = clat_reader.Key();
      CompactLattice clat = clat_reader.Value();
      KALDI_VLOG(1) << "Processing lattice for utterance " << key;
-      if (!PushCompactLattice(&clat)) {
-        KALDI_WARN << "Failure in pushing lattice (bad lattice?) for key " << key;
+      if (push_strings && !PushCompactLatticeStrings(&clat)) {
+        KALDI_WARN << "Failure in pushing lattice strings (bad lattice?), "
+                   << "for key " << key;
        n_err++;
-      } else {
-        if (clat.NumStates() == 0) {
-          KALDI_WARN << "Empty lattice for key " << key;
-          n_err++;
-        } else {
-          clat_writer.Write(key, clat);
-          n_done++;
-        }
+        continue;
      }
+      if (push_weights && !PushCompactLatticeWeights(&clat)) {
+        KALDI_WARN << "Failure in pushing lattice weights (bad lattice?),"
+                   << "for key " << key ;           
+        n_err++;
+        continue;
+      }
+      if (clat.NumStates() == 0) {
+        KALDI_WARN << "Empty lattice for key " << key;
+        n_err++;
+        continue;
+      }
+      clat_writer.Write(key, clat);
+      n_done++;
    }
    KALDI_LOG << "Pushed " << n_done << " lattices, errors on " << n_err;
    return (n_done != 0 ? 0 : 1);
--- a/src/matrix/compressed-matrix.cc
+++ b/src/matrix/compressed-matrix.cc
@ -44,10 +44,11 @@ void CompressedMatrix::CopyFromMat(
  // we need to ensure that the percentile_0 through percentile_100
  // are in strictly increasing order.
  float min_value = mat.Min(), max_value = mat.Max();
-  float safety_margin = 0.001 * (fabs(min_value) + fabs(max_value));
-  if (safety_margin == 0.0) safety_margin = 1.0;
-  min_value -= safety_margin;
-  max_value += safety_margin;
+  if (max_value == min_value)
+    max_value = min_value + (1.0 + fabs(min_value)); // ensure it's strictly
+                                                     // greater than min_value,
+                                                     // even if matrix is
+                                                     // constant.

  global_header.min_value = min_value;
  global_header.range = max_value - min_value;
@ -125,17 +126,25 @@ void CompressedMatrix::ComputeColHeader(

  if (num_rows >= 5) {
    int quarter_nr = num_rows/4;
-    // The elements at positions 0, quarter_nr,
    // std::sort(sdata.begin(), sdata.end());
+    // The elements at positions 0, quarter_nr,
    // 3*quarter_nr, and num_rows-1 need to be in sorted order.
-    // Note: the + 1's below are not necessary but may speed things
-    // up slightly.
    std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
+    // Now, sdata.begin() + quarter_nr contains the element that would appear
+    // in sorted order, in that position.
    std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
+    // Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
+    // that would appear at those positions in sorted order.
    std::nth_element(sdata.begin() + quarter_nr + 1,
-                     sdata.begin() + (3*quarter_nr) + 1, sdata.end());
-    std::nth_element(sdata.begin() + (3*quarter_nr), sdata.end() - 1,
+                     sdata.begin() + (3*quarter_nr), sdata.end());
+    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
+    // 3*quarter_nr, contain the elements that would appear at those positions
+    // in sorted order.
+    std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
                     sdata.end());
+    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
+    // 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
+    // at those positions in sorted order.
    
    header->percentile_0 = FloatToUint16(global_header, sdata[0]);
    header->percentile_25 = std::max<uint16>(
@ -228,7 +237,7 @@ void CompressedMatrix::CompressColumn(
    unsigned char *byte_data) {
  ComputeColHeader(global_header, data, stride,
                   num_rows, header);
-
+  
  float p0 = Uint16ToFloat(global_header, header->percentile_0),
      p25 = Uint16ToFloat(global_header, header->percentile_25),
      p75 = Uint16ToFloat(global_header, header->percentile_75),
@ -248,6 +257,9 @@ void* CompressedMatrix::AllocateData(int32 num_bytes) {
  return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
 }

+#define DEBUG_COMPRESSED_MATRIX 0 // Must be zero for Kaldi to work; use 1 only
+                                  // for debugging.
+
 void CompressedMatrix::Write(std::ostream &os, bool binary) const {
  if (binary) {  // Binary-mode write:
    WriteToken(os, binary, "CM");
@ -264,12 +276,13 @@ void CompressedMatrix::Write(std::ostream &os, bool binary) const {
    }
  } else {
    // In text mode, just use the same format as a regular matrix.
+    // This is not compressed.
+#if DEBUG_COMPRESSED_MATRIX == 0
    Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
                               kUndefined);
    this->CopyToMat(&temp_mat);
    temp_mat.Write(os, binary);
-
-    /*
+#else
    // Text-mode writing.  Only really useful for debug, but we'll implement it.
    if (data_ == NULL) {
      os << 0.0 << ' ' << 0.0 << ' ' << 0 << ' ' << 0 << '\n';
@ -288,7 +301,9 @@ void CompressedMatrix::Write(std::ostream &os, bool binary) const {
        for (int32 j = 0; j < h.num_rows; j++, c++)
          os << static_cast<int>(*c) << ' ';
        os << '\n';
-    } */
+      }
+    }
+#endif
  }
  if (os.fail())
    KALDI_ERR << "Error writing compressed matrix to stream.";
@ -316,11 +331,12 @@ void CompressedMatrix::Read(std::istream &is, bool binary) {
    is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
            remaining_size);
  } else {  // Text-mode read.
+#if DEBUG_COMPRESSED_MATRIX == 0    
    Matrix<BaseFloat> temp;
    temp.Read(is, binary);
    this->CopyFromMat(temp);
-    /*
-      // The old reading code...
+#else
+    // The old reading code...
    GlobalHeader h;
    is >> h.min_value >> h.range >> h.num_rows >> h.num_cols;
    if (is.fail())
@ -346,7 +362,8 @@ void CompressedMatrix::Read(std::istream &is, bool binary) {
        assert(i >= 0 && i <= 255);
        *c = static_cast<unsigned char>(i);
      }
-    } */
+    }
+#endif
  }
  if (is.fail())
    KALDI_ERR << "Failed to read data.";
--- a/src/matrix/compressed-matrix.h
+++ b/src/matrix/compressed-matrix.h
@ -123,7 +123,6 @@ class CompressedMatrix {
    uint16 percentile_100;
  };

-  // The following function is called in CopyToMatrix.
  template<typename Real>
  static void CompressColumn(const GlobalHeader &global_header,
                             const Real *data, MatrixIndexT stride,
--- a/src/matrix/matrix-lib-test.cc
+++ b/src/matrix/matrix-lib-test.cc
@ -1346,7 +1346,7 @@ template<typename Real> static void UnitTestEig() {
    Pinv.Invert();
    Matrix<Real> D(dimM, dimM);
    CreateEigenvalueMatrix(real_eigs, imag_eigs, &D);
-
+    
    // check that M = P D P^{-1}.
    Matrix<Real> tmp(dimM, dimM);
    tmp.AddMatMat(1.0, P, kNoTrans, D, kNoTrans, 0.0);  // tmp = P * D
@ -3838,6 +3838,22 @@ template<typename Real> static void UnitTestCompressedMatrix() {
    Matrix<Real> diff(M2);
    diff.AddMat(-1.0, M);

+    { // Check that when compressing a matrix that has already been compressed,
+      // and uncompressing, we get the same answer.
+      CompressedMatrix cmat2(M2);
+      Matrix<Real> M3(cmat.NumRows(), cmat.NumCols());
+      cmat2.CopyToMat(&M3);
+      if (!M2.ApproxEqual(M3, 1.0e-05)) {
+        KALDI_LOG << "cmat is: ";
+        cmat.Write(std::cout, false);
+        KALDI_LOG << "cmat2 is: ";
+        cmat2.Write(std::cout, false);
+        KALDI_ERR << "Matrices differ " << M2 << " vs. " << M3 << ", M2 range is "
+                  << M2.Min() << " to " << M2.Max() << ", M3 range is " 
+                  << M3.Min() << " to " << M3.Max();
+      }
+    }
+    
    // test CopyRowToVec
    for (MatrixIndexT i = 0; i < num_rows; i++) {
      Vector<Real> V(num_cols);
@ -3891,6 +3907,7 @@ template<typename Real> static void UnitTestCompressedMatrix() {
        InitKaldiInputStream(ins, &binary_in);
        cmat2.Read(ins, binary_in);
      }
+#if 1
      { // check that compressed-matrix can be read as matrix.
        bool binary_in;
        std::ifstream ins("tmpf", std::ios_base::in | std::ios_base::binary);
@ -3900,6 +3917,7 @@ template<typename Real> static void UnitTestCompressedMatrix() {
        Matrix<Real> mat2(cmat2);
        AssertEqual(mat1, mat2);
      }
+#endif
      
      Matrix<Real> M3(cmat2.NumRows(), cmat2.NumCols());
      cmat2.CopyToMat(&M3);
--- a/src/nnet2/nnet-example.cc
+++ b/src/nnet2/nnet-example.cc
@ -1,6 +1,6 @@
 // nnet/nnet-example.cc

-// Copyright 2012   Johns Hopkins University (author: Daniel Povey)
+// Copyright 2012-2013  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
@ -34,7 +34,8 @@ void NnetTrainingExample::Write(std::ostream &os, bool binary) const {
    WriteBasicType(os, binary, labels[i].second);
  }
  WriteToken(os, binary, "<InputFrames>");
-  input_frames.Write(os, binary);
+  CompressedMatrix compressed(input_frames);
+  compressed.Write(os, binary); // can be read as regular Matrix.
  WriteToken(os, binary, "<LeftContext>");
  WriteBasicType(os, binary, left_context);
  WriteToken(os, binary, "<SpkInfo>");
--- a/src/nnetbin/Makefile
+++ b/src/nnetbin/Makefile
@ -8,7 +8,6 @@ LDLIBS += $(CUDA_LDLIBS)

 BINFILES = nnet-train-xent-hardlab-perutt \
        nnet-train-xent-hardlab-frmshuff \
-	nnet-train-xent-hardlab-frmshuff-prior \
 	nnet-train-mse-tgtmat-frmshuff \
 	nnet-train-mmi-sequential \
 	nnet-train-mpe-sequential \
--- a/src/nnetbin/nnet-concat.cc
+++ b/src/nnetbin/nnet-concat.cc
@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
    
    ParseOptions po(usage);
    
-    bool binary_write = false;
+    bool binary_write = true;
    po.Register("binary", &binary_write, "Write output in binary mode");

    po.Read(argc, argv);
--- a/src/nnetbin/nnet-forward.cc
+++ b/src/nnetbin/nnet-forward.cc
@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
    int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
 #else
-    int32 use_gpu_id=-2;
+    int32 use_gpu_id=0;
    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif

--- a/src/nnetbin/nnet-mpe.cc
+++ b/src/nnetbin/nnet-mpe.cc
@ -1,393 +0,0 @@
-// nnetbin/nnet-mpe.cc
-
-// Copyright 2011-2013  Karel Vesely;  Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "tree/context-dep.h"
-#include "hmm/transition-model.h"
-#include "fstext/fstext-lib.h"
-#include "decoder/faster-decoder.h"
-#include "decoder/decodable-matrix.h"
-#include "lat/kaldi-lattice.h"
-#include "lat/lattice-functions.h"
-
-#include "nnet/nnet-component.h"
-#include "nnet/nnet-activation.h"
-#include "nnet/nnet-nnet.h"
-#include "util/timer.h"
-#include "cudamatrix/cu-device.h"
-
-
-namespace kaldi {
-namespace nnet1 {
-
-void LatticeAcousticRescore(const Matrix<BaseFloat> &log_like,
-                            const TransitionModel &trans_model,
-                            const std::vector<int32> state_times,
-                            Lattice *lat) {
-  kaldi::uint64 props = lat->Properties(fst::kFstProperties, false);
-  if (!(props & fst::kTopSorted))
-    KALDI_ERR << "Input lattice must be topologically sorted.";
-
-  KALDI_ASSERT(!state_times.empty());
-  std::vector<std::vector<int32> > time_to_state(log_like.NumRows());
-  for (size_t i = 0; i < state_times.size(); i++) {
-    KALDI_ASSERT(state_times[i] >= 0);
-    if (state_times[i] < log_like.NumRows())  // end state may be past this..
-      time_to_state[state_times[i]].push_back(i);
-    else
-      KALDI_ASSERT(state_times[i] == log_like.NumRows()
-                   && "There appears to be lattice/feature mismatch.");
-  }
-
-  for (int32 t = 0; t < log_like.NumRows(); t++) {
-    for (size_t i = 0; i < time_to_state[t].size(); i++) {
-      int32 state = time_to_state[t][i];
-      for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
-           aiter.Next()) {
-        LatticeArc arc = aiter.Value();
-        int32 trans_id = arc.ilabel;
-        if (trans_id != 0) {  // Non-epsilon input label on arc
-          int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
-          arc.weight.SetValue2(-log_like(t, pdf_id) + arc.weight.Value2());
-          aiter.SetValue(arc);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace nnet1
-}  // namespace kaldi
-
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;
-  using namespace kaldi::nnet1;
-  typedef kaldi::int32 int32;
-  try {
-    const char *usage =
-        "Perform iteration of Neural Network MPE/sMBR training by stochastic "
-        "gradient descent.\n"
-        "Usage:  nnet-mpe [options] <model-in> <transition-model-in> "
-        "<feature-rspecifier> <den-lat-rspecifier> <ali-rspecifier> [<model-out>]\n"
-        "e.g.: \n"
-        " nnet-mpe nnet.init trans.mdl scp:train.scp scp:denlats.scp ark:train.ali "
-        "nnet.iter1\n";
-
-    ParseOptions po(usage);
-    bool binary = false,
-        crossvalidate = false;
-    po.Register("binary", &binary, "Write output in binary mode");
-    po.Register("cross-validate", &crossvalidate,
-                "Perform cross-validation (don't backpropagate)");
-
-    BaseFloat learn_rate = 0.00001,
-        momentum = 0.0,
-        l2_penalty = 0.0,
-        l1_penalty = 0.0;
-
-    po.Register("learn-rate", &learn_rate, "Learning rate");
-    po.Register("momentum", &momentum, "Momentum");
-    po.Register("l2-penalty", &l2_penalty, "L2 penalty (weight decay)");
-    po.Register("l1-penalty", &l1_penalty, "L1 penalty (promote sparsity)");
-
-    std::string feature_transform, class_frame_counts, silence_phones_str;
-    po.Register("feature-transform", &feature_transform,
-                "Feature transform Neural Network");
-    po.Register("class-frame-counts", &class_frame_counts,
-                "Class frame counts to compute the class priors");
-    po.Register("silence-phones", &silence_phones_str, "Colon-separated list "
-                "of integer id's of silence phones, e.g. 46:47");
-
-    BaseFloat acoustic_scale = 1.0,
-        lm_scale = 1.0,
-        old_acoustic_scale = 0.0;
-    po.Register("acoustic-scale", &acoustic_scale,
-                "Scaling factor for acoustic likelihoods");
-    po.Register("lm-scale", &lm_scale,
-                "Scaling factor for \"graph costs\" (including LM costs)");
-    po.Register("old-acoustic-scale", &old_acoustic_scale,
-                "Add in the scores in the input lattices with this scale, rather "
-                "than discarding them.");
-
-    bool do_smbr = false;
-    po.Register("do-smbr", &do_smbr, "Use state-level accuracies instead of "
-                "phone accuracies.");
-
-#if HAVE_CUDA == 1
-    kaldi::int32 use_gpu_id=-2;
-    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
-                "(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
-#endif
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 6-(crossvalidate?1:0)) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string model_filename = po.GetArg(1),
-        transition_model_filename = po.GetArg(2),
-        feature_rspecifier = po.GetArg(3),
-        den_lat_rspecifier = po.GetArg(4),
-        ref_ali_rspecifier = po.GetArg(5);
-
-    std::string target_model_filename;
-    if (!crossvalidate) {
-      target_model_filename = po.GetArg(6);
-    }
-
-    std::vector<int32> silence_phones;
-    if (!kaldi::SplitStringToIntegers(silence_phones_str, ":", false,
-                                      &silence_phones))
-      KALDI_ERR << "Invalid silence-phones string " << silence_phones_str;
-    kaldi::SortAndUniq(&silence_phones);
-    if (silence_phones.empty())
-      KALDI_LOG << "No silence phones specified.";
-
-    // Select the GPU
-#if HAVE_CUDA == 1
-    if (use_gpu_id > -2)
-      CuDevice::Instantiate().SelectGpuId(use_gpu_id);
-#endif
-
-    Nnet nnet_transf;
-    if (feature_transform != "") {
-      nnet_transf.Read(feature_transform);
-    }
-
-    Nnet nnet;
-    nnet.Read(model_filename);
-    // using activations directly: remove softmax, if present
-    if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
-      KALDI_LOG << "Removing softmax from the nnet " << model_filename;
-      nnet.RemoveLayer(nnet.LayerCount()-1);
-    } else {
-      KALDI_LOG << "The nnet was without softmax " << model_filename;
-    }
-
-    nnet.SetLearnRate(learn_rate, NULL);
-    nnet.SetMomentum(momentum);
-    nnet.SetL2Penalty(l2_penalty);
-    nnet.SetL1Penalty(l1_penalty);
-
-    TransitionModel trans_model;
-    ReadKaldiObject(transition_model_filename, &trans_model);
-
-    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
-    RandomAccessLatticeReader den_lat_reader(den_lat_rspecifier);
-    RandomAccessInt32VectorReader ref_ali_reader(ref_ali_rspecifier);
-
-    CuMatrix<BaseFloat> feats, feats_transf, nnet_out, nnet_diff;
-    Matrix<BaseFloat> nnet_out_h, nnet_diff_h;
-
-    // Read the class-counts, compute priors
-    CuVector<BaseFloat> log_priors;
-    if (class_frame_counts != "") {
-      Vector<BaseFloat> tmp_priors;
-      Input in;
-      in.OpenTextMode(class_frame_counts);
-      tmp_priors.Read(in.Stream(), false);
-      in.Close();
-
-      // create inv. priors, or log inv priors
-      BaseFloat sum = tmp_priors.Sum();
-      tmp_priors.Scale(1.0 / sum);
-      tmp_priors.ApplyLog();
-
-      // push priors to GPU
-      log_priors.Resize(tmp_priors.Dim());
-      log_priors.CopyFromVec(tmp_priors);
-    }
-
-
-    Timer time;
-    double time_now = 0;
-    KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " STARTED";
-
-    int32 num_done = 0, num_no_ref_ali = 0, num_no_den_lat = 0,
-        num_other_error = 0;
-
-    kaldi::int64 total_frames = 0;
-    double total_frame_acc = 0.0, utt_frame_acc;
-
-    // do per-utterance processing
-    for (; !feature_reader.Done(); feature_reader.Next()) {
-      std::string utt = feature_reader.Key();
-      if (!den_lat_reader.HasKey(utt)) {
-        KALDI_WARN << "Utterance " << utt << ": found no lattice.";
-        num_no_den_lat++;
-        continue;
-      }
-      if (!ref_ali_reader.HasKey(utt)) {
-        KALDI_WARN << "Utterance " << utt << ": found no reference alignment.";
-        num_no_ref_ali++;
-        continue;
-      }
-
-      // 1) get the features, numerator alignment
-      const Matrix<BaseFloat> &mat = feature_reader.Value();
-      const std::vector<int32> &ref_ali = ref_ali_reader.Value(utt);
-      // check for temporal length of numerator alignments
-      if (static_cast<MatrixIndexT>(ref_ali.size()) != mat.NumRows()) {
-        KALDI_WARN << "Numerator alignment has wrong length "
-                   << ref_ali.size() << " vs. "<< mat.NumRows();
-        num_other_error++;
-        continue;
-      }
-
-      // 2) get the denominator lattice, preprocess
-      Lattice den_lat = den_lat_reader.Value(utt);
-      if (old_acoustic_scale != 1.0) {
-        fst::ScaleLattice(fst::AcousticLatticeScale(old_acoustic_scale),
-                          &den_lat);
-      }
-      // sort it topologically if not already so
-      kaldi::uint64 props = den_lat.Properties(fst::kFstProperties, false);
-      if (!(props & fst::kTopSorted)) {
-        if (fst::TopSort(&den_lat) == false)
-          KALDI_ERR << "Cycles detected in lattice.";
-      }
-      // get the lattice length and times of states
-      vector<int32> state_times;
-      int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times);
-      // check for temporal length of denominator lattices
-      if (max_time != mat.NumRows()) {
-        KALDI_WARN << "Denominator lattice has wrong length " << max_time
-                   << " vs. " << mat.NumRows();
-        num_other_error++;
-        continue;
-      }
-
-      // 3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
-      // push features to GPU
-      feats = mat;
-      // possibly apply transform
-      nnet_transf.Feedforward(feats, &feats_transf);
-      // propagate through the nnet (assuming w/o softmax)
-      nnet.Propagate(feats_transf, &nnet_out);
-      // subtract the log_priors
-      if (log_priors.Dim() > 0) {
-        nnet_out.AddVecToRows(-1.0, log_priors);
-      }
-      // transfer it back to the host
-      int32 num_frames = nnet_out.NumRows(),
-          num_pdfs = nnet_out.NumCols();
-      nnet_out_h.Resize(num_frames, num_pdfs, kUndefined);
-      nnet_out.CopyToMat(&nnet_out_h);
-
-      // 4) rescore the latice
-      LatticeAcousticRescore(nnet_out_h, trans_model, state_times, &den_lat);
-      if (acoustic_scale != 1.0 || lm_scale != 1.0)
-        fst::ScaleLattice(fst::LatticeScale(lm_scale, acoustic_scale), &den_lat);
-
-      // 5) get the posteriors
-      vector< std::map<int32, char> > arc_accs;
-      arc_accs.resize(ref_ali.size());
-      kaldi::Posterior post;
-
-      if (do_smbr) {  // use state-level accuracies, i.e. sMBR estimation
-        for (size_t i = 0; i < ref_ali.size(); i++) {
-          int32 pdf = trans_model.TransitionIdToPdf(ref_ali[i]);
-          arc_accs[i][pdf] = 1;
-        }
-        utt_frame_acc = LatticeForwardBackwardSmbr(den_lat, trans_model,
-                                                   arc_accs, silence_phones,
-                                                   &post);
-      } else {  // use phone-level accuracies, i.e. regular MPE
-        for (size_t i = 0; i < ref_ali.size(); i++) {
-          int32 phone = trans_model.TransitionIdToPhone(ref_ali[i]);
-          arc_accs[i][phone] = 1;
-        }
-        utt_frame_acc = kaldi::LatticeForwardBackwardMpe(den_lat, trans_model,
-                                                         arc_accs, &post,
-                                                         silence_phones);
-      }
-
-      // 6) convert the Posterior to a matrix
-      nnet_diff_h.Resize(num_frames, num_pdfs, kSetZero);
-      for (int32 t = 0; t < post.size(); t++) {
-        for (int32 arc = 0; arc < post[t].size(); arc++) {
-          int32 pdf = trans_model.TransitionIdToPdf(post[t][arc].first);
-          nnet_diff_h(t, pdf) -= post[t][arc].second;
-        }
-      }
-
-      KALDI_VLOG(1) << "Processed lattice for utterance " << num_done + 1
-                    << " (" << utt << "): found " << den_lat.NumStates()
-                    << " states and " << fst::NumArcs(den_lat) << " arcs.";
-
-      KALDI_VLOG(1) << "Utterance " << utt << ": Average frame accuracy = "
-                    << (utt_frame_acc/num_frames) << " over " << num_frames
-                    << " frames.";
-
-      // 9) backpropagate through the nnet
-      if (!crossvalidate) {
-        nnet_diff = nnet_diff_h;
-        nnet.Backpropagate(nnet_diff, NULL);
-      }
-
-      // increase time counter
-      total_frame_acc += utt_frame_acc;
-      total_frames += num_frames;
-      num_done++;
-
-      if (num_done % 100 == 0) {
-        time_now = time.Elapsed();
-        KALDI_VLOG(1) << "After " << num_done << "utterances: time elapsed = "
-                      << time_now/60 << " min; processed " << total_frames/time_now
-                      << " frames per second.";
-      }
-    }
-
-    if (!crossvalidate) {
-      // add the softmax layer back before writing
-      KALDI_LOG << "Appending the softmax " << target_model_filename;
-      nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
-      //store the nnet
-      nnet.Write(target_model_filename, binary);
-    }
-
-    time_now = time.Elapsed();
-    KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " FINISHED; "
-              << "Time taken = " << time_now/60 << " min; processed "
-              << (total_frames/time_now) << " frames per second.";
-
-    KALDI_LOG << "Done " << num_done << " files, "
-              << num_no_ref_ali << " with no reference alignments, "
-              << num_no_den_lat << " with no lattices, "
-              << num_other_error << " with other errors.";
-
-    KALDI_LOG << "Overall average frame-accuracy is "
-              << (total_frame_acc/total_frames) << " over " << total_frames
-              << " frames.";
-
-#if HAVE_CUDA == 1
-    CuDevice::Instantiate().PrintProfile();
-#endif
-
-    return 0;
-  } catch(const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
--- a/src/nnetbin/nnet-train-mmi-sequential.cc
+++ b/src/nnetbin/nnet-train-mmi-sequential.cc
@ -102,7 +102,7 @@ int main(int argc, char *argv[]) {
    NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
    trn_opts.Register(&po);

-    bool binary = false; 
+    bool binary = true; 
    po.Register("binary", &binary, "Write output in binary mode");

    std::string feature_transform;
@ -134,6 +134,9 @@ int main(int argc, char *argv[]) {
    kaldi::int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
                "(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif

    po.Read(argc, argv);
--- a/src/nnetbin/nnet-train-mpe-sequential.cc
+++ b/src/nnetbin/nnet-train-mpe-sequential.cc
@ -100,7 +100,7 @@ int main(int argc, char *argv[]) {
    NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
    trn_opts.Register(&po);

-    bool binary = false; 
+    bool binary = true; 
    po.Register("binary", &binary, "Write output in binary mode");

    std::string feature_transform;
@ -133,6 +133,9 @@ int main(int argc, char *argv[]) {
    kaldi::int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
                "(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif

    po.Read(argc, argv);
--- a/src/nnetbin/nnet-train-mse-tgtmat-frmshuff.cc
+++ b/src/nnetbin/nnet-train-mse-tgtmat-frmshuff.cc
@ -46,7 +46,7 @@ int main(int argc, char *argv[]) {
    NnetTrainOptions trn_opts;
    trn_opts.Register(&po);

-    bool binary = false, 
+    bool binary = true, 
         crossvalidate = false,
         randomize = true;
    po.Register("binary", &binary, "Write output in binary mode");
@ -64,6 +64,9 @@ int main(int argc, char *argv[]) {
 #if HAVE_CUDA==1
    int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif
    
    po.Read(argc, argv);
--- a/src/nnetbin/nnet-train-xent-hardlab-frmshuff-prior.cc
+++ b/src/nnetbin/nnet-train-xent-hardlab-frmshuff-prior.cc
@ -1,223 +0,0 @@
-// nnetbin/nnet-train-xent-hardlab-frmshuff-prior.cc
-
-// Copyright 2011-2013  Karel Vesely, Brno University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "nnet/nnet-trnopts.h"
-#include "nnet/nnet-nnet.h"
-#include "nnet/nnet-loss-prior.h"
-#include "nnet/nnet-cache.h"
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "util/timer.h"
-#include "cudamatrix/cu-device.h"
-
-
-int main(int argc, char *argv[]) {
-  using namespace kaldi;
-  using namespace kaldi::nnet1;
-  try {
-    const char *usage =
-        "Perform one iteration of Neural Network training by stochastic gradient descent.\n"
-        "Usage:  nnet-train-xent-hardlab-frmshuff-prior [options] <model-in> <feature-rspecifier> <alignments-rspecifier> [<model-out>]\n"
-        "e.g.: \n"
-        " nnet-train-xent-hardlab-frmshuff-prior nnet.init scp:train.scp ark:train.ali nnet.iter1\n";
-
-    ParseOptions po(usage);
-
-    NnetTrainOptions trn_opts;
-    trn_opts.Register(&po);
-
-    bool binary = false, 
-         crossvalidate = false,
-         randomize = true;
-    po.Register("binary", &binary, "Write output in binary mode");
-    po.Register("cross-validate", &crossvalidate, "Perform cross-validation (don't backpropagate)");
-    po.Register("randomize", &randomize, "Perform the frame-level shuffling within the Cache::");
-
-    std::string feature_transform;
-    po.Register("feature-transform", &feature_transform, "Feature transform in Nnet format");
-
-    int32 bunchsize=512, cachesize=32768, seed=777;
-    po.Register("bunchsize", &bunchsize, "Size of weight update block");
-    po.Register("cachesize", &cachesize, "Size of cache for frame level shuffling (max 8388479)");
-
-    std::string prior_rxfile;
-    po.Register("prior", &prior_rxfile, "Priors of the training data to scale down gradients of represented PDFs [REQUIRED]");
-    BaseFloat prior_softener = 1000; // ie. use uniform prior (disable reweighting)
-    BaseFloat prior_silence_amount = 1.0; // ie. disable silence downscaling (use all the silence data available)
-    po.Register("prior-softener", &prior_softener, "Prior softener, scales uniform part added to prior before doing the inverse");
-    po.Register("prior-silence-amount", &prior_silence_amount, "Define how much of ``effective silence data'' should be used for training, (1.0 will bypass silence scaling)");
-    int32 prior_silence_numpdf = 5;
-    po.Register("prior-silence-numpdf", &prior_silence_numpdf, "Number of initial PDFs which model the silence");
-
-#if HAVE_CUDA==1
-    int32 use_gpu_id=-2;
-    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
-#endif
-    
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 4-(crossvalidate?1:0)) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string model_filename = po.GetArg(1),
-        feature_rspecifier = po.GetArg(2),
-        alignments_rspecifier = po.GetArg(3);
-        
-    std::string target_model_filename;
-    if (!crossvalidate) {
-      target_model_filename = po.GetArg(4);
-    }
-
-    //set the seed to the pre-defined value
-    srand(seed);
-     
-    using namespace kaldi;
-    using namespace kaldi::nnet1;
-    typedef kaldi::int32 int32;
-
-    //Select the GPU
-#if HAVE_CUDA==1
-    CuDevice::Instantiate().SelectGpuId(use_gpu_id);
-#endif
-
-    Nnet nnet_transf;
-    if(feature_transform != "") {
-      nnet_transf.Read(feature_transform);
-    }
-
-    Nnet nnet;
-    nnet.Read(model_filename);
-    nnet.SetTrainOptions(trn_opts);
-
-    kaldi::int64 total_frames = 0;
-
-    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
-    RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier);
-
-    Cache cache;
-    cachesize = (cachesize/bunchsize)*bunchsize; // ensure divisibility
-    cache.Init(cachesize, bunchsize);
-
-    XentPrior xent;
-    if(prior_rxfile != "") {
-      xent.ReadPriors(prior_rxfile, prior_softener, prior_silence_amount, prior_silence_numpdf);
-    } else {
-      KALDI_ERR << "Missing prior file!";
-    }
-    
-    CuMatrix<BaseFloat> feats, feats_transf, nnet_in, nnet_out, obj_diff;
-    std::vector<int32> targets;
-
-    Timer time;
-    double time_now = 0;
-    double time_next = 0;
-    KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " STARTED";
-
-    int32 num_done = 0, num_no_alignment = 0, num_other_error = 0, num_cache = 0;
-    while (1) {
-      // fill the cache
-      while (!cache.Full() && !feature_reader.Done()) {
-        std::string utt = feature_reader.Key();
-        if (!alignments_reader.HasKey(utt)) {
-          num_no_alignment++;
-        } else {
-          // get feature alignment pair
-          const Matrix<BaseFloat> &mat = feature_reader.Value();
-          const std::vector<int32> &alignment = alignments_reader.Value(utt);
-          // check the length of the data
-          if ((int32)alignment.size() != mat.NumRows()) {
-            KALDI_WARN << "Alignment has wrong length, ali "<< (alignment.size()) << " vs. feats "<< (mat.NumRows()) << ", " << utt;
-            num_other_error++;
-          } else { //length OK
-            // push features to GPU
-            feats.Resize(mat.NumRows(), mat.NumCols(), kUndefined);
-            feats.CopyFromMat(mat);
-            // possibly apply transform
-            nnet_transf.Feedforward(feats, &feats_transf);
-            // add to cache
-            cache.AddData(feats_transf, alignment);
-            num_done++;
-          }
-        }
-        Timer t_features;
-        feature_reader.Next(); 
-        time_next += t_features.Elapsed();
-
-        //report the speed
-        if (num_done % 1000 == 0) {
-          time_now = time.Elapsed();
-          KALDI_VLOG(1) << "After " << num_done << " utterances: time elapsed = "
-                        << time_now/60 << " min; processed " << total_frames/time_now
-                        << " frames per second.";
-        }
-
-      }
-      // randomize
-      if (!crossvalidate && randomize) {
-        cache.Randomize();
-      }
-      // report
-      KALDI_VLOG(1) << "Cache #" << ++num_cache << " "
-                << (cache.Randomized()?"[RND]":"[NO-RND]")
-                << " segments: " << num_done
-                << " frames: " << static_cast<double>(total_frames)/360000 << "h";
-      // train with the cache
-      while (!cache.Empty()) {
-        // get block of feature/target pairs
-        cache.GetBunch(&nnet_in, &targets);
-        // train 
-        nnet.Propagate(nnet_in, &nnet_out);
-        xent.EvalVec(nnet_out, targets, &obj_diff);
-        if (!crossvalidate) {
-          nnet.Backpropagate(obj_diff, NULL);
-        }
-        total_frames += nnet_in.NumRows();
-      }
-
-      // stop training when no more data
-      if (feature_reader.Done()) break;
-    }
-
-    if (!crossvalidate) {
-      nnet.Write(target_model_filename, binary);
-    }
-    
-    KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " FINISHED " 
-              << time.Elapsed()/60 << "min, fps" << total_frames/time.Elapsed()
-              << ", feature wait " << time_next << "s"; 
-
-    KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment
-              << " with no alignments, " << num_other_error
-              << " with other errors.";
-
-    KALDI_LOG << xent.Report();
-
-#if HAVE_CUDA==1
-    CuDevice::Instantiate().PrintProfile();
-#endif
-
-
-    return 0;
-  } catch(const std::exception &e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
--- a/src/nnetbin/nnet-train-xent-hardlab-frmshuff.cc
+++ b/src/nnetbin/nnet-train-xent-hardlab-frmshuff.cc
@ -42,7 +42,7 @@ int main(int argc, char *argv[]) {
    NnetTrainOptions trn_opts;
    trn_opts.Register(&po);

-    bool binary = false, 
+    bool binary = true, 
         crossvalidate = false,
         randomize = true;
    po.Register("binary", &binary, "Write output in binary mode");
@ -63,6 +63,9 @@ int main(int argc, char *argv[]) {
 #if HAVE_CUDA==1
    int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif
    
    po.Read(argc, argv);
--- a/src/nnetbin/nnet-train-xent-hardlab-perutt.cc
+++ b/src/nnetbin/nnet-train-xent-hardlab-perutt.cc
@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
    NnetTrainOptions trn_opts;
    trn_opts.Register(&po);

-    bool binary = false, 
+    bool binary = true, 
         crossvalidate = false;
    po.Register("binary", &binary, "Write output in binary mode");
    po.Register("cross-validate", &crossvalidate, "Perform cross-validation (don't backpropagate)");
@ -52,6 +52,9 @@ int main(int argc, char *argv[]) {
 #if HAVE_CUDA==1
    int32 use_gpu_id=-2;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif
    
    po.Read(argc, argv);
--- a/src/nnetbin/nnet-trim-n-last-transforms.cc
+++ b/src/nnetbin/nnet-trim-n-last-transforms.cc
@ -1,76 +0,0 @@
-// nnetbin/nnet-trim-last-n-layers.cc
-
-// Copyright 2012  Karel Vesely
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "nnet/nnet-nnet.h"
-
-int main(int argc, char *argv[]) {
-  try {
-    using namespace kaldi;
-    using namespace kaldi::nnet1;
-    typedef kaldi::int32 int32;
-
-    const char *usage =
-        "Trim ending part of the MLP\n"
-        "Usage:  nnet-trim-last-n-layers [options] <model-in> <model-out>\n"
-        "e.g.:\n"
-        " nnet-trim-last-n-layers --binary=false nnet.mdl nnet_txt.mdl\n";
-
-
-    bool binary_write = false;
-    
-    ParseOptions po(usage);
-    po.Register("binary", &binary_write, "Write output in binary mode");
-
-    int32 trim_num = 0;
-    po.Register("n", &trim_num, "Number of transforms to be trimmed (include simgoid/softmax)");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 2) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string model_in_filename = po.GetArg(1),
-        model_out_filename = po.GetArg(2);
-
-    Nnet nnet; 
-    {
-      bool binary_read;
-      Input ki(model_in_filename, &binary_read);
-      nnet.Read(ki.Stream(), binary_read);
-    }
-
-    {
-      Output ko(model_out_filename, binary_write);
-      int32 write_num_layers = nnet.LayerCount() - trim_num;
-      nnet.WriteFrontLayers(ko.Stream(), binary_write, write_num_layers);
-    }
-
-    KALDI_LOG << "Written model to " << model_out_filename;
-    return 0;
-  } catch(const std::exception& e) {
-    std::cerr << e.what() << '\n';
-    return -1;
-  }
-}
-
-
--- a/src/nnetbin/rbm-convert-to-nnet.cc
+++ b/src/nnetbin/rbm-convert-to-nnet.cc
@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
        " rbm-convert-to-nnet --binary=false rbm.mdl nnet.mdl\n";


-    bool binary_write = false;
+    bool binary_write = true;
    
    ParseOptions po(usage);
    po.Register("binary", &binary_write, "Write output in binary mode");
--- a/src/nnetbin/rbm-train-cd1-frmshuff.cc
+++ b/src/nnetbin/rbm-train-cd1-frmshuff.cc
@ -68,6 +68,9 @@ int main(int argc, char *argv[]) {
 #if HAVE_CUDA==1
    int32 use_gpu_id=-2 ;
    po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
+#else
+    int32 use_gpu_id=0;
+    po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
 #endif

    po.Read(argc, argv);
--- a/src/onlinebin/Makefile
+++ b/src/onlinebin/Makefile
@ -5,7 +5,12 @@ include ../kaldi.mk

 # The PA_RingBuffer interface is internal and is not exported in the .so libray
 # so we have to link against the static one
-EXTRA_LDLIBS = ../../tools/portaudio/install/lib/libportaudio.a
+
+ifneq "$(wildcard ../../tools/portaudio/install/lib/libportaudio.a)" ""
+    EXTRA_LDLIBS = ../../tools/portaudio/install/lib/libportaudio.a
+else 
+    EXTRA_LDLIBS = ../../tools/portaudio/install/lib64/libportaudio.a
+endif

 UNAME=$(shell uname)
 ifeq ($(UNAME), Linux)
--- a/src/tree/build-tree-utils.cc
+++ b/src/tree/build-tree-utils.cc
@ -178,7 +178,11 @@ void SplitStatsByMap(const BuildTreeStatsType &stats, const EventMap &e, std::ve
    const EventType &evec = iter->first;
    EventAnswerType ans;
    if (!e.Map(evec, &ans)) // this is an error--could not map it.
-      KALDI_ERR << "SplitStatsByMap: could not map event vector " << EventTypeToString(evec);
+      KALDI_ERR << "SplitStatsByMap: could not map event vector " << EventTypeToString(evec)
+                << "if error seen during tree-building, check that "
+                << "--context-width and --central-position match stats, "
+                << "and that phones that are context-independent (CI) during "
+                << "stats accumulation do not share roots with non-CI phones.";
    size = std::max(size, (size_t)(ans+1));
  }
  stats_out->resize(size);