sandbox/online: merging changes from trunk

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4261 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2014-08-06 01:49:09 +00:00 · 2014-08-06 01:49:09 +00:00 · 0a6f544715
--- a/egs/babel/s5b/conf/common.fullLP
+++ b/egs/babel/s5b/conf/common.fullLP
@ -17,6 +17,7 @@ bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the
                         # non-bottleneck system (which has 80000).
 bnf_decode_acwt=0.066666

+
 # DNN hybrid system training parameters
 dnn_num_hidden_layers=4
 dnn_input_dim=4000
@ -48,9 +49,16 @@ if [[ `hostname` == *.tacc.utexas.edu ]] ; then
  sgmm_train_extra_opts=( )
  sgmm_group_extra_opts=( --num_iters 25 ) 
  sgmm_denlats_extra_opts=( --num-threads 2 )
+  sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
  dnn_denlats_extra_opts=( --num-threads 2 )
-  dnn_parallel_opts="-l gpu=1"
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" )
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 8 --num-threads 1)
+
  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_parallel_opts="-l gpu=1"
 else
  decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=0.7G")
  sgmm_train_extra_opts=( --num-iters 25 )
--- a/egs/babel/s5b/conf/common.limitedLP
+++ b/egs/babel/s5b/conf/common.limitedLP
@ -49,14 +49,15 @@ dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter

 if [[ `hostname` == *.tacc.utexas.edu ]] ; then
  decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
-  sgmm_train_extra_opts=( )
+  sgmm_train_extra_opts=( --num-iters 25 )
  sgmm_group_extra_opts=( ) 
  sgmm_denlats_extra_opts=( --num-threads 1 )
  dnn_denlats_extra_opts=( --num-threads 1 )

  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
                         --parallel-opts "-pe smp 16" )
-  dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1)
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1
+                         --parallel-opts "-pe smp 16" )

  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1)

--- a/egs/babel/s5b/conf/lang/102-assamese-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/102-assamese-fullLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=24
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/
 eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list
@ -46,19 +49,20 @@ shadow_data_dir=(
                /export/babel/data/102-assamese/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Assamese_Babel102/uem/102-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Assamese_Babel102/dev.list
                /export/babel/data/splits/Assamese_Babel102/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Assamese_Babel102/uem/102-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

-
 # Acoustic model parameters
 numLeavesTri1=1000
 numGaussTri1=10000
@ -74,7 +78,6 @@ numGaussUBM=800
 numLeavesSGMM=10000
 numGaussSGMM=80000

-
 # Lexicon and Language Model parameters
 oovSymbol="<unk>"
 lexiconFlags="--romanized --oov <unk>"
--- a/egs/babel/s5b/conf/lang/102-assamese-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/102-assamese-limitedLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=24
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/
 eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list
@ -46,19 +49,20 @@ shadow_data_dir=(
                /export/babel/data/102-assamese/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Assamese_Babel102/uem/102-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Assamese_Babel102/dev.list
                /export/babel/data/splits/Assamese_Babel102/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Assamese_Babel102/uem/102-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

-
 unsup_data_dir=(/export/babel/data/102-assamese//release-current/conversational/training/
                /export/babel/data/102-assamese//release-current/conversational/untranscribed-training/
                  )  
@ -83,7 +87,6 @@ numGaussUBM=750
 numLeavesSGMM=5000
 numGaussSGMM=18000

-
 # Lexicon and Language Model parameters
 oovSymbol="<unk>"
 lexiconFlags="--romanized --oov <unk>"
--- a/egs/babel/s5b/conf/lang/103-bengali-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/103-bengali-fullLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=12
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval
 eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/103-bengali/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/103-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Bengali_Babel103/dev.list
                /export/babel/data/splits/Bengali_Babel103/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/103-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

@ -73,7 +78,6 @@ numGaussUBM=800
 numLeavesSGMM=10000
 numGaussSGMM=80000

-
 # Lexicon and Language Model parameters
 oovSymbol="<unk>"
 lexiconFlags="--romanized --oov <unk>"
--- a/egs/babel/s5b/conf/lang/103-bengali-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/103-bengali-limitedLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=12
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval
 eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/103-bengali/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/103-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Bengali_Babel103/dev.list
                /export/babel/data/splits/Bengali_Babel103/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/103-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

@ -82,7 +87,6 @@ numGaussUBM=750
 numLeavesSGMM=5000
 numGaussSGMM=18000

-
 # Lexicon and Language Model parameters
 oovSymbol="<unk>"
 lexiconFlags="--romanized --oov <unk>"
--- a/egs/babel/s5b/conf/lang/201-haitian-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/201-haitian-fullLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [LimitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=20
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [LimitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval
 eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/201-haitian/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval
              )
+shadow_data_cmudb=/export/babel/data/splits/Haitian_Babel201/uem/201-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Haitian_Babel201/dev.list
                /export/babel/data/splits/Haitian_Babel201/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Haitian_Babel201/uem/201-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

--- a/egs/babel/s5b/conf/lang/201-haitian-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/201-haitian-limitedLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [LimitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=20
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [LimitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval
 eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/201-haitian/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval
              )
+shadow_data_cmudb=/export/babel/data/splits/Haitian_Babel201/uem/201-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Haitian_Babel201/dev.list
                /export/babel/data/splits/Haitian_Babel201/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Haitian_Babel201/uem/201-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

--- a/egs/babel/s5b/conf/lang/203-lao-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/203-lao-fullLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=18
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval
 eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list
@ -46,27 +49,20 @@ shadow_data_dir=(
                /export/babel/data/203-lao/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Lao_Babel203/uem/203-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Lao_Babel203/dev.list
                /export/babel/data/splits/Lao_Babel203/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Lao_Babel203/uem/203-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

-unsup_data_dir=(/export/babel/data/203-lao/release-current/conversational/training/
-                /export/babel/data/203-lao/release-current/conversational/untranscribed-training/
-                  )  
-unsup_data_list=(
-                    /export/babel/data/splits/Lao_Babel203/train.LimitedLP.untranscribed.list
-                    /export/babel/data/splits/Lao_Babel203/train.untranscribed.list
-                    )
-unsup_nj=64
- 
 # Acoustic model parameters
 numLeavesTri1=1000
 numGaussTri1=10000
--- a/egs/babel/s5b/conf/lang/203-lao-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/203-lao-limitedLP.official.conf
@ -15,7 +15,8 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
                    )
 dev2h_subset_ecf=true
 dev2h_nj=18
@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval
 eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/203-lao/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Lao_Babel203/uem/203-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Lao_Babel203/dev.list
                /export/babel/data/splits/Lao_Babel203/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Lao_Babel203/uem/203-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

--- a/egs/babel/s5b/conf/lang/204-tamil-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/204-tamil-fullLP.official.conf
@ -14,9 +14,12 @@ dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev
 dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
-#dev2h_more_kwlists=(
-#                      [limitedLP]=
-#                    )
+dev2h_more_kwlists=(
+                      [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml
+                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
+                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
+                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                    )
 dev2h_subset_ecf=true
 dev2h_nj=18

@ -33,19 +36,41 @@ dev10h_more_kwlists=(
                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml
                    )
 dev10h_nj=32


-unsup_data_dir=(/export/babel/data/204-tamil/release-current/conversational/training/
-                /export/babel/data/204-tamil/release-current/conversational/untranscribed-training/
-                  )  
-unsup_data_cmudb=/export/babel/data/splits/Tamil_Babel204/uem/db-unsup-jhuseg-v8-utt.dat
-unsup_data_list=(
-                    /export/babel/data/splits/Tamil_Babel204/train.LimitedLP.untranscribed.list
-                    /export/babel/data/splits/Tamil_Babel204/train.untranscribed.list
+#Official EVAL period evaluation data files 
+eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/
+eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list
+eval_data_cmudb=/export/babel/data/splits/Tamil_Babel204/uem/db-shadow-jhuseg-v8-utt.dat
+eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
+eval_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
+eval_nj=64
+
+#Shadow data files 
+shadow_data_dir=(
+                /export/babel/data/204-tamil/release-current/conversational/dev/
+                /export/babel/data/204-tamil/release-current/conversational/eval/
+              )
+shadow_data_cmudb=/export/babel/data/splits/Tamil_Babel204/uem/204-shadow-v0-utt.dat
+shadow_data_list=(
+                /export/babel/data/splits/Tamil_Babel204/dev.list
+                /export/babel/data/splits/Tamil_Babel204/eval.list
+              )
+shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
+shadow_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
+shadow_more_kwlists=(
+                      [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml
+                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
+                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
+                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml
                    )
-unsup_nj=64
+shadow_nj=64
+
+
 
 # Acoustic model parameters
 numLeavesTri1=1000
--- a/egs/babel/s5b/conf/lang/204-tamil-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/204-tamil-limitedLP.official.conf
@ -14,9 +14,12 @@ dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev
 dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
-#dev2h_more_kwlists=(
-#                      [limitedLP]=
-#                    )
+dev2h_more_kwlists=(
+                      [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml
+                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
+                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
+                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                    )
 dev2h_subset_ecf=true
 dev2h_nj=18

@ -33,10 +36,41 @@ dev10h_more_kwlists=(
                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml
                    )
 dev10h_nj=32


+#Official EVAL period evaluation data files 
+eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/
+eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list
+eval_data_cmudb=/export/babel/data/splits/Tamil_Babel204/uem/db-shadow-jhuseg-v8-utt.dat
+eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
+eval_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
+eval_nj=64
+
+#Shadow data files 
+shadow_data_dir=(
+                /export/babel/data/204-tamil/release-current/conversational/dev/
+                /export/babel/data/204-tamil/release-current/conversational/eval/
+              )
+shadow_data_cmudb=/export/babel/data/splits/Tamil_Babel204/uem/204-shadow-v0-utt.dat
+shadow_data_list=(
+                /export/babel/data/splits/Tamil_Babel204/dev.list
+                /export/babel/data/splits/Tamil_Babel204/eval.list
+              )
+shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml
+shadow_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml
+shadow_more_kwlists=(
+                      [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml
+                      [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml
+                      [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml
+                      [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml
+                    )
+shadow_nj=64
+
+
 unsup_data_dir=(/export/babel/data/204-tamil/release-current/conversational/training/
                /export/babel/data/204-tamil/release-current/conversational/untranscribed-training/
                  )  
--- a/egs/babel/s5b/conf/lang/206-zulu-fullLP.official.conf
+++ b/egs/babel/s5b/conf/lang/206-zulu-fullLP.official.conf
@ -15,8 +15,9 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
-                   )
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+                    )
 dev2h_subset_ecf=true
 dev2h_nj=18

@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval
 eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list
@ -46,15 +49,17 @@ shadow_data_dir=(
                /export/babel/data/206-zulu/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Zulu_Babel206/dev.list
                /export/babel/data/splits/Zulu_Babel206/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

--- a/egs/babel/s5b/conf/lang/206-zulu-limitedLP.official.conf
+++ b/egs/babel/s5b/conf/lang/206-zulu-limitedLP.official.conf
@ -15,8 +15,9 @@ dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev
 dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm
 dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 dev2h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
-                   )
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+                    )
 dev2h_subset_ecf=true
 dev2h_nj=18

@ -29,10 +30,12 @@ dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-de
 dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm
 dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 dev10h_more_kwlists=(
-                      [limitedLP]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
                    )
 dev10h_nj=32

+
 #Official EVAL period evaluation data files 
 eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval
 eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list
@ -46,18 +49,21 @@ shadow_data_dir=(
                /export/babel/data/206-zulu/release-current/conversational/dev
                /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/
              )
+shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat
 shadow_data_list=(
                /export/babel/data/splits/Zulu_Babel206/dev.list
                /export/babel/data/splits/Zulu_Babel206/eval.list
              )
-shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-utt.dat
 shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml
-shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
 shadow_more_kwlists=(
-                      [FullLPdev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml
+                      [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml
+                      [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml
+
                    )
 shadow_nj=32

+
 unsup_data_dir=(/export/babel/data/206-zulu/release-current/conversational/training/
                /export/babel/data/206-zulu/release-current/conversational/untranscribed-training/
                  )  
--- a/egs/babel/s5b/export_systems.sh
+++ b/egs/babel/s5b/export_systems.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+set -e
+set -o pipefail
+
+. ./cmd.sh; . ./path.sh; 
+
+
+#(
+#bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* 
+#bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it*
+#) &
+#bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem*
+#wait
+
+(
+bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it*
+#bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* 
+) &
+bash filter_data.sh  --cmd "$decode_cmd"  data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem 
+wait
+
+wait
+exit
+
+bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1  --data data/shadow.uem --master dev10h.uem lang.conf ./release
+bash make_release.sh --dryrun false --dir exp/tri6b_nnet  --data data/shadow.uem --master dev10h.uem lang.conf ./release
+bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1  --data data/shadow.uem --master dev10h.uem lang.conf ./release
+
+bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --extrasys "NEWJHU"  --data data/dev10h.uem --master dev10h.uem lang.conf ./release
+bash make_release.sh --dryrun false --dir exp/tri6b_nnet --extrasys "NEWJHU"  --data data/dev10h.uem --master dev10h.uem lang.conf ./release
+bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --extrasys "NEWJHU"  --data data/dev10h.uem --master dev10h.uem lang.conf ./release
+
+
--- a/egs/babel/s5b/filter_data.sh
+++ b/egs/babel/s5b/filter_data.sh
@ -0,0 +1,125 @@
+min_lmwt=5
+max_lmwt=25
+cer=0
+cmd=run.pl
+. ./utils/parse_options.sh
+
+min_lmwt_start=$min_lmwt
+max_lmwt_start=$max_lmwt
+
+datadir=$1; shift
+name=$1; shift
+. ./lang.conf
+
+set -e
+set -o pipefail
+
+[ ! -d $datadir/compounds/$name ] && echo "Component called $name does not exist" && exit 1
+ecf=$datadir/compounds/$name/ecf.xml
+cat $ecf | grep -P -o '(?<=audio_filename\=")[^"]*' > $datadir/compounds/$name/files.list
+filelist=$datadir/compounds/$name/files.list
+[ -f $datadir/compounds/$name/rttm ] && rttm=$datadir/compounds/$name/rttm
+[ -f $datadir/compounds/$name/stm ] && stm=$datadir/compounds/$name/stm
+
+if [ -f $ecf ] ; then
+  duration=`head -1 $ecf |\
+      grep -o -E "duration=\"[0-9]*[    \.]*[0-9]*\"" |\
+      perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2.0;}'`
+  echo "INFO: Using duration $duration seconds (from ECF)."
+else
+  echo "WARNING: Using default duration. ECF wasn't specified?"
+  duration=9999
+fi
+
+inputname=`basename $datadir`
+outputname=$name
+
+while (( "$#" )); do
+  resultdir=$1;shift
+  echo "Processing data directory $resultdir" 
+
+  [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1;
+
+  targetdir=$resultdir/$outputname
+
+  min_existing=
+  max_existing=
+  for lmw in `seq $min_lmwt_start $max_lmwt_start`; do
+    [ -d $resultdir/score_$lmw ] && [ -z $min_existing ] && min_existing=$lmw
+    [ -d $resultdir/score_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw
+  done
+  [ -z $min_existing ] && echo "Data directories to be scored could not be found!" && exit 1
+  [ -z $max_existing ] && echo "Data directories to be scored could not be found!" && exit 1
+  min_lmwt=$min_existing
+  max_lmwt=$max_existing
+  echo "Found data directories for range LMWT=$min_lmwt:$max_lmwt"
+
+  $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/scoring/filter.LMWT.log \
+    set -e';' set -o pipefail';' \
+    mkdir -p $targetdir/score_LMWT/';'\
+    test -f $resultdir/score_LMWT/$inputname.ctm '&&' \
+    utils/filter_scp.pl $filelist $resultdir/score_LMWT/$inputname.ctm '>' \
+      $targetdir/score_LMWT/$outputname.ctm || exit 1
+
+  if [ ! -z $stm ] && [ -f $stm ] ; then
+    echo "For scoring CTMs, this STM is used $stm"
+    local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $datadir/compounds/$name data/lang $targetdir
+  else
+    echo "Not running scoring, $datadir/compounds/$name/stm does not exist"
+  fi
+
+
+  kws_tasks="kws"
+
+  for kws in `cat $datadir/extra_kws_tasks`; do
+    kws_tasks+=" ${kws}_kws"
+  done
+
+  for kws in $kws_tasks ; do
+    echo "Processing KWS task: $kws"
+    mkdir -p $targetdir/$kws
+    filter=$targetdir/$kws/utterances
+    grep -F -f $filelist $datadir/segments | tee  $targetdir/$kws/segments | \
+                       awk '{print $1, $2}' | tee  $targetdir/$kws/utter_map |\
+                       awk '{print $1}' > $filter
+
+    kwlist=$datadir/$kws/kwlist.xml
+
+    echo -e  "\tFiltering..."
+    #$cmd LMWT=$min_lmwt:$max_lmwt $targetdir/$kws/kws_filter.LMWT.log \
+    #  set -e';' set -o pipefail';' \
+    #  mkdir -p $targetdir/${kws}_LMWT';'\
+    #  cat $resultdir/${kws}_LMWT/'result.*' \| grep -F -f $filter \> $targetdir/${kws}_LMWT/result || exit 1
+
+    $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/$kws/kws_filter.LMWT.log \
+      set -e';' set -o pipefail';' \
+      mkdir -p $targetdir/${kws}_LMWT';'\
+      cat $resultdir/${kws}_LMWT/'result.*' \| utils/filter_scp.pl -f 1 $filter \> $targetdir/${kws}_LMWT/result || exit -1
+
+
+    echo -e  "\tWrite normalized..."
+    $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/$kws/kws_write_normalized.LMWT.log \
+      set -e';' set -o pipefail';' \
+      cat $targetdir/${kws}_LMWT/result \| \
+      utils/write_kwslist.pl --flen=0.01 --duration=$duration \
+        --segments=$targetdir/$kws/segments --normalize=true --remove-dup=true\
+        --map-utter=$targetdir/$kws/utter_map  --digits=3 - $targetdir/${kws}_LMWT/kwslist.xml || exit 1
+
+    echo -e  "\tWrite unnormalized..."
+    $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/$kws/kws_write_unnormalized.LMWT.log \
+      set -e';' set -o pipefail';' \
+      cat $targetdir/${kws}_LMWT/result \| \
+      utils/write_kwslist.pl --flen=0.01 --duration=$duration \
+        --segments=$targetdir/$kws/segments --normalize=false --remove-dup=true\
+        --map-utter=$targetdir/$kws/utter_map  - $targetdir/${kws}_LMWT/kwslist.unnormalized.xml || exit 1
+
+    if [ ! -z $rttm ] ; then
+      echo -e  "\tScoring..."
+      $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/$kws/kws_score.LMWT.log \
+        set -e';' set -o pipefail';' \
+        local/kws_score.sh --ecf $ecf --rttm $rttm --kwlist $kwlist $datadir $targetdir/${kws}_LMWT || exit 1
+    else
+      echo -e  "\tNot scoring..."
+    fi
+  done
+done
--- a/egs/babel/s5b/get_training_times.sh
+++ b/egs/babel/s5b/get_training_times.sh
@ -0,0 +1,229 @@
+if [ -z $1 ] ; then
+  dir=`pwd`
+else
+  dir=$1
+fi
+echo $dir
+
+
+convertsecs() {
+    h=$(($1/3600))
+    m=$((($1/60)%60))
+    s=$(($1%60))
+    printf "%02d:%02d:%02d\n" $h $m $s
+}
+
+function process {
+  count=1
+  if [ ! -z $1 ];  then
+    count=$1
+  fi
+
+  replace=""
+  for a in `seq 1 $count` ; do
+    replace+="\t"
+  done
+
+  ( 
+    eval `grep "group=all"` 
+    echo -n "threads=$total_threads"
+    echo -n " cpu_time=$total_cpu_time wall_time=$clock_time"
+    echo -n " human_cpu_time="`convertsecs $total_cpu_time`
+    echo -n " human_wall_time="`convertsecs $clock_time`
+    echo ""
+  ) | sed 's/^/'$replace'/g'
+}
+
+function legend {
+  echo -ne '"'"$@"'" '
+}
+
+legend Parameterization dev/train
+local/summarize_logs.pl $dir/exp/make_*/*train*/  |  process
+
+if [ -d $dir/data/local/extend ] ; then
+  legend "Extending the lexicon"
+  local/summarize_logs.pl $dir/data/local/extend/tmp/log | process 
+fi
+
+legend "Training upto stage tri5"
+local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process 
+
+legend "SGMM2 stage training"
+local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log  | process 
+
+legend "SGMM2+bMMI stage training"
+local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process 
+
+nnet=tri6_nnet
+[ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet
+
+legend "DNN stage training GPU"
+local/summarize_logs.pl $dir/exp/$nnet/log  | process
+
+legend "BNF stage training"
+local/summarize_logs.pl $dir/exp_bnf/tri6_bnf/log  | process
+
+legend "BNF stage training GPU"
+local/summarize_logs.pl $dir/exp_bnf/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \
+  $dir/exp_bnf/sgmm7_denlats/log/*  $dir/exp_bnf/ubm7 | process
+
+legend "SEGMENTATION TRAINING: "
+local/summarize_logs.pl $dir/exp/tri4_train_seg_ali/log \
+  $dir/exp/make_plp_pitch/train_seg/ \
+  $dir/exp/tri4b_seg/log | process
+
+semisup=exp_bnf_semisup2
+if [ -d $dir/param_bnf_semisup ] || [ -d $dir/param_bnf_semisup2 ]  ; then
+  [ ! -d $dir/$semisup ] && semisup=exp_bnf_semisup
+
+  decode=unsup.seg
+  legend "BNF_SEMISUP training, segmentation "
+  local/summarize_logs.pl $dir/exp/make_seg/$decode/log \
+    $dir/exp/make_seg/$decode/make_plp/ \
+    $dir/exp/tri4b_seg/decode_${decode}/log \
+    $dir/exp/make_plp/$decode | process
+
+  legend "BNF_SEMISUP training, ecode unsup.seg TRI5 "
+  local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process
+  legend "BNF_SEMISUP training, ecode unsup.seg PLP "
+  local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process
+  legend "BNF_SEMISUP training, ecode unsup.seg DNN "
+  local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process
+  legend "BNF_SEMISUP training, data preparation for BNF_SEMISUP "
+  local/summarize_logs.pl $dir/exp/combine2_post/unsup.seg/log \
+    $dir/exp/combine2_post/unsup.seg/decode_unsup.seg/log\
+    $dir/exp/tri6_nnet_ali/log | process
+
+  legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF GPU "
+  local/summarize_logs.pl $dir/$semisup/tri6_bnf/log  | process
+  legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF "
+  local/summarize_logs.pl $dir/$semisup/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \
+    $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process
+fi
+
+if [ -d $dir/exp/tri6_nnet_mpe ] ; then
+  legend "DNN_MPE stage CPU training"
+  local/summarize_logs.pl $dir/exp/tri6_nnet_ali/log/ \
+    $dir/exp/tri6_nnet_denlats/log/* | process
+
+  legend "DNN_MPE stage GPU training"
+  local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/log/ | process
+fi
+
+#~decode=dev10h.seg
+#~legend "DEV10H.SEG decoding"
+#~legend "Segmentation: "
+#~local/summarize_logs.pl $dir/exp/make_seg/$decode/log \
+#~     $dir/exp/make_seg/$decode/make_plp/ \
+#~     $dir/exp/tri4b_seg/decode_${decode}/log \
+#~     $dir/exp/make_plp/$decode | process
+#~legend "Decode $decode TRI5: "
+#~local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process
+#~legend "Decode $decode PLP: "
+#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process
+#~legend "Decode $decode DNN: "
+#~local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process
+#~legend "Decode $decode PLP: "
+#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process
+
+legend "G2P and confusion matrix: "
+local/summarize_logs.pl  $dir/exp/conf_matrix/log  $dir/exp/g2p/log  | process
+if [ -d $dir/data/shadow2.uem ]; then
+  decode=shadow2.uem
+else
+  decode=shadow.uem
+fi
+
+legend "Segmentation $decode: provided..."
+echo
+#--legend "Segmentation: "
+#--local/summarize_logs.pl $dir/exp/make_seg/$decode/log \
+#--     $dir/exp/make_seg/$decode/make_plp/ \
+#--     $dir/exp/tri4b_seg/decode_${decode}/log \
+#--     $dir/exp/make_plp/$decode | process
+legend "Parametrization: "
+local/summarize_logs.pl $dir/exp/make_plp/$decode |  process
+legend "Decode $decode TRI5: "
+local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process
+legend "Decode $decode PLP: "
+local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process
+legend "Decode $decode DNN: "
+local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process
+legend "Decode $decode BNF: "
+local/summarize_logs.pl $dir/exp_bnf/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process
+if [ -d $dir/$semisup ] ; then
+  legend "Decode $decode BNF_SEMISUP: "
+  local/summarize_logs.pl $dir/$semisup/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process
+fi
+if [ -d $dir/exp/tri6_nnet_mpe ] ; then
+  legend "Decode $decode DNN_MPE: "
+  local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/log | process
+fi
+
+legend "Indexing $decode PLP: "
+local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process
+legend "Indexing $decode DNN: "
+local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/kws_indices*/log | process
+legend "Indexing $decode BNF: "
+local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process
+if [ -d $dir/$semisup ] ; then
+  legend "Indexing $decode BNF_SEMISUP: "
+  local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process
+fi
+if [ -d $dir/exp/tri6_nnet_mpe ] ; then
+  legend "Indexing $decode DNN_MPE: "
+  local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/kws_indices*/log | process
+fi
+
+legend "Search $decode PLP: "
+local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws \
+  $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process
+legend "Search $decode DNN: "
+local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_kws \
+  $dir/exp/$nnet/decode_*${decode}*/evalKW_kws_*/log | process
+legend "Search $decode BNF: "
+local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws \
+  $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process
+if [ -d $dir/$semisup ] ; then
+  legend "Search $decode BNF_SEMISUP: "
+  local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws/ \
+    $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws*/log | process
+fi
+if [ -d $dir/exp/tri6_nnet_mpe ] ; then
+  legend "Search $decode DNN_MPE: "
+  local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws \
+    $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws*/log | process
+fi
+
+legend "Proxies generation: "
+local/summarize_logs.pl $dir/data/$decode/evalKW_oov_kws/g2p/log \
+  $dir/data/$decode/evalKW_oov_kws/tmp/split/log  | process
+legend "Search $decode PLP: "
+local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \
+  $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process
+legend "Search $decode DNN: "
+local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws \
+  $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws_*/log | process
+legend "Search $decode BNF: "
+local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \
+  $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process
+
+if [ -d $dir/$semisup ] ; then
+  legend "Search $decode BNF_SEMISUP: "
+  local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws/ \
+    $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws*/log | process
+fi
+
+
+if [ -d $dir/exp/tri6_nnet_mpe ] ; then
+  legend "Search $decode DNN_MPE: "
+  local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws \
+    $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws*/log | process
+fi
+
+
+
+
+
+
--- a/egs/babel/s5b/local/apply_g2p.sh
+++ b/egs/babel/s5b/local/apply_g2p.sh
@ -122,6 +122,6 @@ if [ $nlex -ne $nwlist ] ; then
  echo "WARNING:   Lexicon : $nlex words"
  echo "WARNING:Diff example: "
  diff <(cut -f 1 $output_lex | sort -u ) \
-       <(cut -f 1 $output/wordlist.orig.txt | sort -u )
+       <(cut -f 1 $output/wordlist.orig.txt | sort -u ) || true
 fi
 exit 0
--- a/egs/babel/s5b/local/arpa2G.sh
+++ b/egs/babel/s5b/local/arpa2G.sh
@ -89,7 +89,15 @@ if [ ! -z "$oov_prob_file" ]; then
  lmfile=$destdir/lm_tmp.gz
 fi

-gunzip -c $lmfile | \
+if [[ $lmfile == *.bz2 ]] ; then
+  decompress="bunzip2 -c $lmfile"
+elif [[ $lmfile == *.gz ]] ; then
+  decompress="gunzip -c $lmfile"
+else
+  decompress="cat $lmfile"
+fi
+ 
+$decompress | \
  grep -v '<s> <s>' | grep -v '</s> <s>' |  grep -v '</s> </s>' | \
  arpa2fst - | \
  fstprint | \
@ -97,7 +105,7 @@ gunzip -c $lmfile | \
  utils/s2eps.pl | \
  fstcompile --isymbols=$langdir/words.txt \
  --osymbols=$langdir/words.txt  --keep_isymbols=false --keep_osymbols=false | \
-  fstrmepsilon > $destdir/G.fst || exit 1
+  fstrmepsilon | fstarcsort --sort_type=olabel > $destdir/G.fst || exit 1
 fstisstochastic $destdir/G.fst || true;

 if $cleanup; then
--- a/egs/babel/s5b/local/combine_posteriors.sh
+++ b/egs/babel/s5b/local/combine_posteriors.sh
@ -73,13 +73,10 @@ mkdir -p $out_decode

 if [ $stage -lt -1 ]; then
  mkdir -p $out_decode/log
-  if [ ! -f $out_decode/.best_path.done ]; then
-    $cmd JOB=1:$nj $out_decode/log/best_path.JOB.log \
-      lattice-best-path --acoustic-scale=0.1 \
-      "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \
-      ark:/dev/null "ark:| gzip -c > $out_decode/best_path_ali.JOB.gz" || exit 1
-    touch $out_decode/.best_path.done
-  fi
+  $cmd JOB=1:$nj $out_decode/log/best_path.JOB.log \
+    lattice-best-path --acoustic-scale=0.1 \
+    "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \
+    ark:/dev/null "ark:| gzip -c > $out_decode/best_path_ali.JOB.gz" || exit 1
 fi

 weights_sum=0.0
--- a/egs/babel/s5b/local/datasets/basic_kws.sh
+++ b/egs/babel/s5b/local/datasets/basic_kws.sh
@ -14,36 +14,15 @@ fi
 check_variables_are_set

 if [ ! -f ${dataset_dir}/kws/.done ] ; then
-  if [ "$dataset_kind" == "shadow" ]; then
-    # we expect that the ${dev2shadow} as well as ${eval2shadow} already exist
-    if [ ! -f data/${dev2shadow}/kws/.done ]; then
-      echo "Error: data/${dev2shadow}/kws/.done does not exist."
-      echo "Create the directory data/${dev2shadow} first, by calling $0 --dir $dev2shadow --dataonly"
-      exit 1
-    fi
-    if [ ! -f data/${eval2shadow}/kws/.done ]; then
-      echo "Error: data/${eval2shadow}/kws/.done does not exist."
-      echo "Create the directory data/${eval2shadow} first, by calling $0 --dir $eval2shadow --dataonly"
-      exit 1
-    fi
-
-    local/kws_data_prep.sh --case_insensitive $case_insensitive \
-      "${icu_opt[@]}" \
-      data/lang ${dataset_dir} ${datadir}/kws || exit 1
-    utils/fix_data_dir.sh ${dataset_dir}
-
-    touch ${dataset_dir}/kws/.done
-  else # This will work for both supervised and unsupervised dataset kinds
-    kws_flags=(--use-icu true)
-    if [  "${dataset_kind}" == "supervised"  ] ; then
-      kws_flags+=(--rttm-file $my_rttm_file )
-    fi
-    if $my_subset_ecf ; then
-      kws_flags+=(--subset-ecf $my_data_list)
-    fi
-    local/kws_setup.sh --case_insensitive $case_insensitive \
-      "${kws_flags[@]}" "${icu_opt[@]}" \
-      $my_ecf_file $my_kwlist_file data/lang ${dataset_dir} || exit 1
+  kws_flags=( --use-icu true )
+  if [  "${dataset_kind}" == "supervised"  ] ; then
+    kws_flags+=(--rttm-file $my_rttm_file )
  fi
+  if $my_subset_ecf ; then
+    kws_flags+=(--subset-ecf $my_data_list)
+  fi
+  local/kws_setup.sh --case_insensitive $case_insensitive \
+    "${kws_flags[@]}" "${icu_opt[@]}" \
+    $my_ecf_file $my_kwlist_file data/lang ${dataset_dir} || exit 1
  touch ${dataset_dir}/kws/.done 
 fi
--- a/egs/babel/s5b/local/datasets/extra_kws.sh
+++ b/egs/babel/s5b/local/datasets/extra_kws.sh
@ -22,13 +22,23 @@ function register_extraid {
 }

 function setup_oov_search {
-  local nbest=500
+  #Basic lexicon
+  #local phone_beam=-1
+  #local phone_nbest=-1
+  #local beam=5
+  #local nbest=500
+
+  #Extended lexicon
+  local nbest=-1
+  local beam=-1
+  local phone_nbest=300
+  local phone_beam=5
+
+  local phone_cutoff=5
+
  local g2p_nbest=10
  local g2p_mass=0.95
-  local beam=5
-  local phone_beam=4
-  local phone_nbest=-1
-  local phone_cutoff=5
+

  local data_dir=$1
  local source_dir=$2
@ -37,10 +47,15 @@ function setup_oov_search {
  local kwsdatadir=$data_dir/${extraid}_kws

  mkdir -p $kwsdatadir
-  cp $source_dir/kwlist*.xml $kwsdatadir
-  cp $source_dir/ecf.xml $kwsdatadir
-  cp $source_dir/utter_* $kwsdatadir
-  [ -f $source_dir/rttm ] && cp $source_dir/rttm $kwsdatadir
+
+  if [ "${dataset_kind}" == "supervised" ] ; then
+    for file in $source_dir/rttm ; do
+      cp -f $file $kwsdatadir
+    done
+  fi
+  for file in $source_dir/utter_* $source_dir/kwlist*.xml $source_dir/ecf.xml ; do
+    cp -f $file $kwsdatadir
+  done

  kwlist=$source_dir/kwlist_outvocab.xml
  #Get the KW list
@ -84,55 +99,50 @@ function setup_oov_search {
 }


-if [ "$dataset_kind" == "shadow" ]; then
-  true #we do not support multiple kw lists for shadow set system
-   
-else # This will work for both supervised and unsupervised dataset kinds
-  kws_flags=( --use-icu true )
-  if [  "${dataset_kind}" == "supervised"  ] ; then
-    #The presence of the file had been already verified, so just 
-    #add the correct switches
-    kws_flags+=(--rttm-file $my_rttm_file )
-  fi
-  if $my_subset_ecf ; then
-    kws_flags+=(--subset-ecf $my_data_list)
-  fi
-
-  if [ ! -f $dataset_dir/.done.kws.oov ] ; then
-    setup_oov_search $dataset_dir $dataset_dir/kws oov
-    register_extraid $dataset_dir oov
-    touch $dataset_dir/.done.kws.oov
-  fi
-  if [ ${#my_more_kwlists[@]} -ne 0  ] ; then
-    
-    touch $dataset_dir/extra_kws_tasks
-    
-    for extraid in "${!my_more_kwlists[@]}" ; do
-      #The next line will help us in running only one. We don't really
-      #know in which directory the KWS setup will reside in, so we will 
-      #place  the .done file directly into the data directory
-      [ -f $dataset_dir/.done.kws.$extraid ] && continue;
-      kwlist=${my_more_kwlists[$extraid]}
-
-      local/kws_setup.sh  --extraid $extraid --case_insensitive $case_insensitive \
-        "${kws_flags[@]}" "${icu_opt[@]}" \
-        $my_ecf_file $kwlist data/lang ${dataset_dir} || exit 1
-      
-      #Register the dataset for default running...
-      #We can do it without any problem here -- the kws_stt_tasks will not
-      #run it, unless called with --run-extra-tasks true switch
-      register_extraid $dataset_dir $extraid
-      touch $dataset_dir/.done.kws.$extraid
-    done
-    for extraid in "${!my_more_kwlists[@]}" ; do
-      #The next line will help us in running only one. We don't really
-      #know in which directory the KWS setup will reside in, so we will 
-      #place  the .done file directly into the data directory
-      [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue;
-      setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov
-      register_extraid $dataset_dir ${extraid}_oov
-      touch $dataset_dir/.done.kws.${extraid}_oov
-    done
-  fi
+kws_flags=( --use-icu true )
+if [  "${dataset_kind}" == "supervised"  ] ; then
+  #The presence of the file had been already verified, so just 
+  #add the correct switches
+  kws_flags+=(--rttm-file $my_rttm_file )
+fi
+if $my_subset_ecf ; then
+  kws_flags+=(--subset-ecf $my_data_list)
+fi
+
+if [ ! -f $dataset_dir/.done.kws.oov ] ; then
+  setup_oov_search $dataset_dir $dataset_dir/kws oov || exit 1
+  register_extraid $dataset_dir oov
+  touch $dataset_dir/.done.kws.oov
+fi
+if [ ${#my_more_kwlists[@]} -ne 0  ] ; then
+  
+  touch $dataset_dir/extra_kws_tasks
+  
+  for extraid in "${!my_more_kwlists[@]}" ; do
+    #The next line will help us in running only one. We don't really
+    #know in which directory the KWS setup will reside in, so we will 
+    #place  the .done file directly into the data directory
+    [ -f $dataset_dir/.done.kws.$extraid ] && continue;
+    kwlist=${my_more_kwlists[$extraid]}
+
+    local/kws_setup.sh  --extraid $extraid --case_insensitive $case_insensitive \
+      "${kws_flags[@]}" "${icu_opt[@]}" \
+      $my_ecf_file $kwlist data/lang ${dataset_dir} || exit 1
+    
+    #Register the dataset for default running...
+    #We can do it without any problem here -- the kws_stt_tasks will not
+    #run it, unless called with --run-extra-tasks true switch
+    register_extraid $dataset_dir $extraid
+    touch $dataset_dir/.done.kws.$extraid
+  done
+  for extraid in "${!my_more_kwlists[@]}" ; do
+    #The next line will help us in running only one. We don't really
+    #know in which directory the KWS setup will reside in, so we will 
+    #place  the .done file directly into the data directory
+    [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue;
+    setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov
+    register_extraid $dataset_dir ${extraid}_oov
+    touch $dataset_dir/.done.kws.${extraid}_oov
+  done
 fi

--- a/egs/babel/s5b/local/fix_kwslist.pl
+++ b/egs/babel/s5b/local/fix_kwslist.pl
@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl

 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 # Apache 2.0.
--- a/egs/babel/s5b/local/kaldi_dir2uem.py
+++ b/egs/babel/s5b/local/kaldi_dir2uem.py
@ -0,0 +1,101 @@
+#! /usr/bin/env python
+
+import argparse, sys
+from argparse import ArgumentParser
+import re
+
+def main():
+  parser = ArgumentParser(description='Convert kaldi data directory to uem dat files',
+      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument('--verbose', type=int, \
+      dest='verbose', default=0, \
+      help='Give higher verbose for more logging')
+  parser.add_argument('--get-text', action='store_true', \
+      help='Get text in dat file')
+  parser.add_argument('--prefix', type=str, \
+      help='Add db file name as db-<prefix>-{utt/spk}.dat')
+  parser.add_argument('kaldi_dir', \
+      help='Kaldi data directory')
+  parser.add_argument('output_dir', \
+      help='Directory to store uem dat files')
+  parser.usage=':'.join(parser.format_usage().split(':')[1:]) \
+      + 'e.g. :  %(prog)s --prefix 203-lao-v0 data/dev10h.seg CMU_db'
+  options = parser.parse_args()
+
+  if options.get_text:
+    try:
+      text_file = open(options.kaldi_dir+'/text', 'r')
+    except IOError as e:
+      repr(e)
+      sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/text'))
+      sys.exit(1)
+
+  try:
+    segments_file = open(options.kaldi_dir+'/segments', 'r')
+  except IOError as e:
+    repr(e)
+    sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/segments'))
+    sys.exit(1)
+
+  try:
+    scp_file = open(options.kaldi_dir+'/wav.scp', 'r')
+  except IOError as e:
+    repr(e)
+    sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/wav.scp'))
+    sys.exit(1)
+
+  reco2file_map = {}
+  for line in scp_file.readlines():
+    splits = line.strip().split()
+    m = re.search(r".*/(?P<file_name>[0-9A-Za-z_]*\.(sph|wav)).*", line)
+    if not m:
+      sys.stderr.write("%s does not contain a valid speech file (.wav or .sph)\n" % line.strip())
+      sys.exit(1)
+    reco2file_map[splits[0]] = m.group('file_name')
+  # End for
+
+  spk2utt_map = {}
+
+  if options.prefix == None:
+    prefix = options.kaldi_dir.split('/')[-1].split('.')[0]
+  else:
+    prefix = options.prefix
+
+  try:
+    utt_dat = open(options.output_dir+'/db-'+prefix+'-utt.dat', 'w')
+    spk_dat = open(options.output_dir+'/db-'+prefix+'-spk.dat', 'w')
+  except IOError as e:
+    repr(e)
+    sys.stderr.write("%s: Could not write dat files in %s\n" % (sys.argv[0], options.output_dir))
+    sys.exit(1)
+
+  for line in segments_file.readlines():
+    utt_id, file_id, start, end = line.strip().split()
+
+    if (options.get_text):
+      splits = text_file.readline().split()
+      while splits[0] < utt_id:
+        splits = text_file.readline().split()
+      text = ' '.join(splits[1:])
+    else:
+      text = ""
+
+    utt_dat.write("{UTTID %s} {UTT %s} {SPK %s} {FROM %s} {TO %s} {TEXT %s}\n" % (utt_id, utt_id, file_id, start, end, text))
+    spk2utt_map.setdefault(file_id, [])
+    spk2utt_map[file_id].append(utt_id)
+
+  for spk, utts in spk2utt_map.items():
+    try:
+      spk_dat.write("{SEGS %s} {ADC %s} {CONV %s.wav} {CHANNEL 1} {DUR }\n" % (' '.join(utts), reco2file_map[spk], spk))
+    except KeyError as e:
+      repr(e)
+      sys.stderr.write("%s: Error in getting file for %s\n" % (sys.argv[0], spk))
+      sys.exit(1)
+  # End for
+
+  segments_file.close()
+  utt_dat.close()
+  spk_dat.close()
+
+if __name__ == '__main__':
+  main()
--- a/egs/babel/s5b/local/kws_setup.sh
+++ b/egs/babel/s5b/local/kws_setup.sh
@ -85,6 +85,7 @@ fi
 mkdir -p $kwsdatadir

 if [ -z $subset_ecf ] ; then
+  test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml 
  cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1
 else
  local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml
@ -107,10 +108,12 @@ if $kwlist_wordlist ; then
 echo '</kwlist>'
 ) > $kwsdatadir/kwlist.xml || exit 1
 else
+  test -f $kwsdatadir/kwlist.xml && rm -f $kwsdatadir/kwlist.xml
  cp "$kwlist_file" $kwsdatadir/kwlist.xml || exit 1
 fi

 if [ ! -z $rttm_file ] ; then
+  test -f $kwsdatadir/rttm && rm -f $kwsdatadir/rttm
  cp "$rttm_file" $kwsdatadir/rttm || exit 1
 fi

--- a/egs/babel/s5b/local/lonestar.py
+++ b/egs/babel/s5b/local/lonestar.py
@ -0,0 +1,333 @@
+#!/usr/bin/env python
+from pylauncher import *
+import pylauncher
+import sys
+
+import os
+import errno
+
+def make_path(path):
+	try:
+		os.makedirs(path)
+	except OSError as exception:
+		if exception.errno != errno.EEXIST:
+			raise
+		elif not os.path.isdir(path):
+			raise
+
+def tail(n, filename):
+	import subprocess
+	p=subprocess.Popen(['tail','-n',str(n),filename], stdout=subprocess.PIPE)
+	soutput,sinput=p.communicate()
+	soutput=soutput.split("\n")
+	return soutput
+
+def KaldiLauncher(lo, **kwargs):
+	import time;
+	jobid = JobId()
+	debug = kwargs.pop("debug","")
+	qdir= os.path.join(lo.qdir, lo.taskname);
+	cores = lo.nof_threads;
+
+	ce=SSHExecutor(workdir=qdir, debug=debug, force_workdir=True, catch_output=True)
+	ce.outstring="out."
+	ce.execstring=lo.taskname + "."
+
+	hostpool=HostPool(hostlist=HostListByName(), commandexecutor=ce )
+	
+	completion=lambda x:FileCompletion( taskid=x, stamproot="done.", stampdir=qdir)	
+
+	logfiles = list()
+	commands = list()
+	for q in xrange(lo.jobstart, lo.jobend+1):
+		s = "bash " + lo.queue_scriptfile + " " + str(q) 
+		commands.append(s)
+
+		logfile = lo.logfile.replace("${PY_LAUNCHER_ID}", str(q))
+		logfiles.append(logfile)
+
+	generator=ListCommandlineGenerator(list=commands, cores=cores)
+	tasks = TaskGenerator(generator, completion=completion, debug=debug )
+
+	job = LauncherJob( hostpool=hostpool, taskgenerator=tasks, debug=debug,**kwargs)
+
+	job.run()
+	#At this point all the .done files should exist and everything should be finalized.
+	num_failed=0;
+	time.sleep(1); #Lets wait for a while to give the shared fs time to sync
+	error_pending=True
+	for logfile in logfiles:
+		import time
+		sched_rate=[0, 0.5, 1, 2, 4, 8, 15, 32 ];
+		for delay in sched_rate:
+			time.sleep(delay);
+			if os.path.isfile(logfile):
+				break;
+		if not os.path.isfile(logfile):
+			sys.stderr.write("ERROR: " + "The following file is missing:\n")
+			sys.stderr.write("ERROR: " + "\t" + logfile + "\n")
+			sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n");
+			sys.exit(-1);
+	
+		error_pending=True;
+		for delay in sched_rate:
+			time.sleep(delay);
+			
+			lines=tail(10, logfile)
+			with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines)
+		
+			if len(with_status) == 0:
+				sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem"
+						" to indicate return status as expected\n");
+			elif len(with_status) > 1:
+				sys.stderr.write("The last line(s) of the log-file " + logfile + " does seem"
+						" to indicate multiple return statuses \n");
+			else: 
+				status_re=re.search(r'with status (\d+)', with_status[0]);
+				status=status_re.group(1);
+				if status == '0':
+					error_pending=False;
+				break;
+			sys.stderr.write("INFO: Waiting for status in files, sleeping %d seconds\n" %	(delay,))
+		if error_pending:
+			num_failed+=1;
+
+	if num_failed != 0:
+		sys.stderr.write(sys.argv[0] + ": " + str(num_failed) + "/" + str(len(logfiles)) +  " failed \n");
+		sys.stderr.write(sys.argv[0] + ": See  " + lo.logfile.replace("${PY_LAUNCHER_ID}", "*" ) + " for details\n");
+		sys.exit(-1);
+
+	#Remove service files. Be careful not to remove something that might be needed in problem diagnostics	
+	for i in xrange(len(commands)):
+		out_file=os.path.join(qdir, ce.outstring+str(i))
+
+		#First, let's wait on files missing (it might be that those are missing
+		#just because of slow shared filesystem synchronization
+		if not os.path.isfile(out_file):
+			import time
+			sched_rate=[0.5, 1, 2, 4, 8 ];
+			for delay in sched_rate:
+				time.sleep(delay);
+				if os.path.isfile(out_file):
+					break;
+			if not os.path.isfile(out_file):
+				sys.stderr.write("ERROR: " + "The following file is missing:\n")
+				sys.stderr.write("ERROR: " + "\t" + out_file + "\n")
+				sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n");
+				sys.exit(-1);
+
+		if os.stat(out_file).st_size != 0:
+			sys.stderr.write("ERROR: " + "The following file has non-zero size:\n")
+			sys.stderr.write("ERROR: " + "\t" + out_file + "\n")
+			sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n");
+			sys.exit(-1);
+		else:
+			exec_file=os.path.join(qdir, ce.execstring+str(i))
+			done_file=os.path.join(qdir, "done."+str(i))
+			if (not os.path.isfile(exec_file) ) or (not os.path.isfile(done_file)):
+				sys.stderr.write("ERROR: " + "One of the following files is missing:\n")
+				sys.stderr.write("ERROR: " + "\t" + exec_file + "\n")
+				sys.stderr.write("ERROR: " + "\t" + done_file + "\n")
+				sys.stderr.write("ERROR: " + "\t" + out_file + "\n")
+				sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n");
+				sys.exit(-1);
+			elif os.stat(done_file).st_size != 0:
+				sys.stderr.write("ERROR: " + "The following file has non-zero size:\n")
+				sys.stderr.write("ERROR: " + "\t" + done_file + "\n")
+				sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n");
+				sys.exit(-1);
+			else:
+				os.remove(exec_file)
+				os.remove(done_file)
+				os.remove(out_file)
+	try:
+		os.rmdir(qdir) 
+	except OSError:
+		sys.stderr.write("ERROR: " + "Failed to remove the pylauncher task dir " + qdir + "\n");
+		sys.stderr.write("ERROR: " + "Find out what is wrong and fix it\n")
+		sys.exit(-1);
+	
+	#print job.final_report()
+
+class LauncherOpts:
+	def __init__(self):
+		self.sync=0
+		self.nof_threads = 1
+		self.qsub_opts = None
+
+		self.jobname=None
+		self.jobstart=None
+		self.jobend=None
+		pass
+
+def CmdLineParser(argv):
+	import re;
+	sync=0
+	qsub_opts=''
+	nof_threads=1
+
+	while  len(argv) >= 2 and argv[0].startswith('-'):
+		switch = argv.pop(0);
+
+		if switch == '-V':
+			qsub_opts += switch + ' ';
+		else:
+			option = argv.pop(0)
+			
+			if switch == "-sync" and (option in ['Y', 'y']):
+				sync=1;
+			qsub_opts += switch + ' ' + option + ' ';
+			if switch == "-pe":
+				option2 = argv.pop(0);
+				qsub_opts += option2 + ' ';
+				nof_threads = int(option2);
+
+	#Now we have to parse the JOB specifier 
+	jobname = ""
+	jobstart = 0
+	jobend = 0
+	if (re.match( r"^[A-Za-z_]\w*=\d+:\d+$", argv[0])):
+		m=re.match( r"^([A-Za-z_]\w*)=(\d+):(\d+)$", argv[0])
+		jobname=m.group(1)
+		jobstart=int(m.group(2))
+		jobend=int(m.group(3))
+		argv.pop(0)
+	elif(re.match( r"^[A-Za-z_]\w*=\d+$", argv[0])):
+		m=re.match( r"^([A-Za-z_]\w*)=(\d+)$", argv[0])
+		jobname=m.group(1)
+		jobstart=int(m.group(2))
+		jobend=int(m.group(2))
+		argv.pop(0)
+	elif re.match("^.+=.*:.*$", argv[0]):
+		print >> sys.stderr, "warning: suspicious JOB argument " + argv[0];
+
+	if jobstart > jobend:
+		sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n")
+		sys.exit(1)
+
+	logfile=argv.pop(0)
+
+	opts=LauncherOpts()
+	opts.sync = sync
+	opts.nof_threads=nof_threads;
+	opts.qsub_opts=qsub_opts
+	opts.varname=jobname
+	opts.jobstart=jobstart
+	opts.jobend=jobend
+	opts.logfile=logfile
+	
+	opts.cmd = escape_cmd(argv);
+
+	return (opts, argv)	
+
+def escape_cmd(argv):
+	cmd =""
+	for x in argv:
+		#print x + " -> ",
+		if re.search("^\S+$", x):
+			#print " A -> ",
+			cmd += x + " "
+		elif '"' in x:
+			cmd += "'''" + x + "''' "
+		else:
+			cmd += "\"" + x + "\" "
+		#print cmd
+	return cmd
+
+def setup_paths_and_vars(opts):
+	cwd = os.getcwd()
+
+	if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend):
+		print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \
+			"but you are putting the output into just one log file (" + opts.logfile + ")";
+		sys.exit(1)
+
+	if not os.path.isabs(opts.logfile):
+		opts.logfile = os.path.join(cwd, opts.logfile);
+	logfile=opts.logfile
+
+	dir = os.path.dirname(logfile)
+	base = os.path.basename(logfile)
+	qdir = os.path.join(dir, "q");
+
+	if re.search("log/*q", qdir, flags=re.IGNORECASE):
+		qdir = re.sub("log/*q", "/q", qdir, flags=re.IGNORECASE)
+
+	
+	queue_logfile= os.path.join(qdir, base)
+	if opts.varname:
+		queue_logfile = re.sub("\.?"+opts.varname, "", queue_logfile)
+
+	taskname=os.path.basename(queue_logfile)
+	taskname = taskname.replace(".log", "");
+	if taskname == "":
+		print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \
+			"that leads to an empty task name ("+logfile + ")";
+		sys.exit(1)
+
+	if not os.path.isabs(queue_logfile):
+		queue_logfile= os.path.join(cwd, queue_logfile)
+
+	if opts.varname:
+		opts.logfile = opts.logfile.replace(opts.varname, "${PY_LAUNCHER_ID}")
+		opts.cmd = opts.cmd.replace(opts.varname, "${PY_LAUNCHER_ID}");
+
+	queue_scriptfile=queue_logfile;
+	if re.search("\.[a-zA-Z]{1,5}$", queue_scriptfile):
+		queue_scriptfile = re.sub("\.[a-zA-Z]{1,5}$", ".sh", queue_scriptfile);
+	if not os.path.isabs(queue_scriptfile):
+		queue_scriptfile= os.path.join(cwd, queue_scriptfile)
+
+	
+	make_path(qdir)
+	make_path(dir)
+
+	opts.qdir = qdir
+	opts.log_dir = dir
+	opts.queue_scriptfile = queue_scriptfile
+	opts.queue_logfile = queue_logfile
+	opts.taskname = taskname
+	
+	return opts	
+
+
+ 
+def create_scriptfile(scriptname, opts):
+	import os
+	logfile = opts.logfile
+	cmd = opts.cmd
+	nof_threads=opts.nof_threads;
+	cwd = os.getcwd()
+	#print scriptname
+	f = open(scriptname, "wb")
+	f.write("#!/bin/bash\n")
+	f.write("export PY_LAUNCHER_ID=$1; shift;\n")
+	f.write("cd " + cwd + "\n")
+	f.write(". ./path.sh\n")
+	f.write("( echo '#' Running on `hostname`\n")
+	f.write("  echo '#' Started at `date`\n")
+	f.write("  echo -n '# '; cat <<EOF\n")
+	f.write(cmd + "\n")
+	f.write("EOF\n")
+	f.write(") > " +logfile + "\n")
+	f.write("time1=`date +\"%s\"`\n")
+	f.write("( " + cmd + ") 2>>" + logfile + " >>" + logfile + " \n")
+	f.write("ret=$?\n")
+	f.write("time2=`date +\"%s\"`\n")
+	f.write("echo '#' Accounting time=$(($time2 - $time1)) threads=" + str(nof_threads) + " >> " + logfile + "\n")
+
+	f.write("echo '#' Finished at `date` with status $ret >>" + logfile + "\n")
+	f.write("exit $ret \n")
+	f.close()
+	
+
+	
+if __name__ == "__main__":
+	(opts, cmd) = CmdLineParser(sys.argv[1:]);
+	setup_paths_and_vars(opts)
+	create_scriptfile(opts.queue_scriptfile, opts);
+
+	#pylauncher.ClassicLauncher(["true && sleep 10s", "false || sleep 1s" ], debug="job+host+task+exec+ssh")
+	KaldiLauncher(opts, debug="")
+
+
--- a/egs/babel/s5b/local/map_lang.sh
+++ b/egs/babel/s5b/local/map_lang.sh
@ -0,0 +1,81 @@
+#! /usr/bin/bash
+
+VARIABLES=`diff <(compgen -A variable) <(. ./lang.conf.orig; compgen -A variable) | grep '^>'| sed 's/^> *//g'`
+
+. ./conf/common_vars.sh
+. ./lang.conf.orig
+
+for variable in $VARIABLES ; do
+
+    eval VAL=\$${variable}
+    if [[ $VAL =~ /export/babel/data/ ]] ; then
+      eval $variable=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/}
+      #declare -x $variable
+      declare -p $variable
+    fi
+done
+
+for kwlist in $( (compgen -A variable) | grep _data_list ) ; do
+  declare -p $kwlist
+  eval KEYS="\${!${kwlist}[@]}"
+  #declare -p my_more_kwlist_keys
+  for key in $KEYS  # make sure you include the quotes there
+  do
+    #echo $key
+    eval VAL="\${${kwlist}[$key]}"
+    #echo $my_more_kwlist_val
+    if [[ $VAL =~ /export/babel/data/ ]] ; then
+      eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/}
+    fi
+  done
+  declare -p $kwlist
+done
+unset VAL
+unset KEYS
+
+for kwlist in $( (compgen -A variable) | grep _data_dir ) ; do
+  declare -p $kwlist
+  eval KEYS="\${!${kwlist}[@]}"
+  #declare -p my_more_kwlist_keys
+  for key in $KEYS  # make sure you include the quotes there
+  do
+    #echo $key
+    eval VAL="\${${kwlist}[$key]}"
+    #echo $my_more_kwlist_val
+    if [[ $VAL =~ /export/babel/data/ ]] ; then
+      eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/}
+    fi
+  done
+  declare -p $kwlist
+done
+unset VAL
+unset KEYS
+
+for kwlist in $( (compgen -A variable) | grep _more_kwlists ) ; do
+  declare -p $kwlist
+  eval KEYS="\${!${kwlist}[@]}"
+  #declare -p my_more_kwlist_keys
+  for key in $KEYS  # make sure you include the quotes there
+  do
+    #echo $key
+    eval VAL="\${${kwlist}[$key]}"
+    #echo $my_more_kwlist_val
+    if [[ $VAL =~ /export/babel/data/ ]] ; then
+      eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/}
+    fi
+  done
+  declare -p $kwlist
+done
+unset VAL
+unset KEYS
+
+if [ "$babel_type" == "limited" ] ; then
+  train_nj=32
+else
+  train_nj=64
+fi
+dev10h_nj=60
+unsup_nj=120
+shadow_nj=60
+shadow2_nj=120
+eval_nj=120
--- a/egs/babel/s5b/local/prepare_acoustic_training_data.pl
+++ b/egs/babel/s5b/local/prepare_acoustic_training_data.pl
@ -96,6 +96,7 @@ use Getopt::Long;
 #
 ########################################################################

+print STDERR "$0 " . join(" ", @ARGV) . "\n";
 GetOptions("fragmentMarkers=s" => \$fragMarkers,
           "oov=s" => \$OOV_symbol, 
           "vocab=s" => \$vocabFile,
@ -165,7 +166,7 @@ if (-d $TranscriptionDir) {
            open (TRANSCRIPT, $inputspec) || die "Unable to open $filename";
            while ($line=<TRANSCRIPT>) {
                chomp $line;
-                if ($line =~ m:^\[([0-9]+\.*[0-9]*)\]$:) {
+                if ($line =~ m:^\s*\[([0-9]+\.*[0-9]*)\]\s*$:) {
                    $thisTimeMark = $1;
                    if ($thisTimeMark < $prevTimeMark) {
                      print STDERR ("$0 ERROR: Found segment with negative duration in $filename\n");
@ -245,6 +246,7 @@ if (-d $TranscriptionDir) {
 			} else {
 			    # This is a just regular spoken word
 			    if ($vocabFile && (! $inVocab{$w}) && $fragMarkers) {
+            print "Not in vocab: $w\n";
 				# $w is a potential OOV token
 				# Remove fragMarkers to see if $w becomes in-vocabulary
 				while ($w =~ m:^(\S+[$fragMarkers]|[$fragMarkers]\S+)$:) {
--- a/egs/babel/s5b/local/resegment/generate_segments.sh
+++ b/egs/babel/s5b/local/resegment/generate_segments.sh
@ -61,7 +61,7 @@ total_time=0
 t1=$(date +%s)

 if [ $stage -le 0 ] ; then
-  steps/decode_nolats.sh $decoder_extra_opts --write-words false --write-alignments true \
+  steps/decode_nolats.sh ${decode_extra_opts+} --write-words false --write-alignments true \
    --cmd "$cmd" --nj $nj --beam $beam --max-active $max_active \
    $model_dir/phone_graph $datadir $model_dir/decode_${dirid} || exit 1
 fi
@ -122,7 +122,7 @@ mkdir -p $output_dir
 mkdir -p $temp_dir/log

 local/resegment/segmentation.py --verbose 2 $segmentation_opts \
-  $temp_dir/pred $temp_dir/phone_map.txt 2> $temp_dir/log/resegment.log | \
+  $temp_dir/pred $temp_dir/phone_map.txt 2>$temp_dir/log/resegment.log | \
  sort > $output_dir/segments || exit 1

 if [ ! -s $output_dir/segments ] ; then
--- a/egs/babel/s5b/local/run_kws_stt_task.sh
+++ b/egs/babel/s5b/local/run_kws_stt_task.sh
@ -31,8 +31,6 @@ skip_scoring=false
 extra_kws=false
 cmd=run.pl
 max_states=150000
-dev2shadow=
-eval2shadow=
 wip=0.5 #Word insertion penalty
 #End of options

@ -53,16 +51,6 @@ data_dir=$1;
 lang_dir=$2;
 decode_dir=$3; 

-type=normal
-if [ ! -z ${dev2shadow}  ] && [ ! -z ${eval2shadow} ] ; then
-  type=shadow
-elif [ -z ${dev2shadow}  ] && [ -z ${eval2shadow} ] ; then
-  type=normal
-else
-  echo "Switches --dev2shadow and --eval2shadow must be used simultaneously" > /dev/stderr
-  exit 1
-fi
-
 ##NB: The first ".done" files are used for backward compatibility only
 ##NB: should be removed in a near future...
 if  [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then 
@ -70,11 +58,7 @@ if  [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then
    --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \
    $data_dir $lang_dir $decode_dir

-  if [[ "$type" == shadow* ]]; then
-    local/split_ctms.sh --cmd "$cmd" --cer $cer \
-      --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\
-      $data_dir $decode_dir ${dev2shadow} ${eval2shadow}
-  elif ! $skip_scoring ; then
+  if ! $skip_scoring ; then
    local/score_stm.sh --cmd "$cmd"  --cer $cer \
      --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\
      $data_dir $lang_dir $decode_dir
@ -84,15 +68,9 @@ fi

 if ! $skip_kws ; then
  if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then 
-    if [[ "$type" == shadow* ]]; then
-      local/shadow_set_kws_search.sh --cmd "$cmd" --max-states ${max_states} \
-        --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\
-        $data_dir $lang_dir $decode_dir ${dev2shadow} ${eval2shadow}
-    else
-      local/kws_search.sh --cmd "$cmd" --max-states ${max_states} \
-        --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} --skip-scoring $skip_scoring\
-        --indices-dir $decode_dir/kws_indices $lang_dir $data_dir $decode_dir
-    fi
+    local/kws_search.sh --cmd "$cmd" --max-states ${max_states} \
+      --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} --skip-scoring $skip_scoring\
+      --indices-dir $decode_dir/kws_indices $lang_dir $data_dir $decode_dir
    touch $decode_dir/.done.kws
  fi
  if $extra_kws && [ -f $data_dir/extra_kws_tasks ]; then
--- a/egs/babel/s5b/local/show_lattice.sh
+++ b/egs/babel/s5b/local/show_lattice.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+. path.sh
+
+format=pdf # pdf svg
+output=
+
+. utils/parse_options.sh
+
+if [ $# != 3 ]; then
+   echo "usage: $0 [--format pdf|svg] [--output <path-to-output>] <utt-id> <lattice-ark> <word-list>"
+   echo "e.g.:  $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
+   exit 1;
+fi
+
+uttid=$1
+lat=$2
+words=$3
+
+tmpdir=$(mktemp -d); trap "rm -r $tmpdir" EXIT # cleanup
+
+gunzip -c $lat | lattice-to-fst ark:- ark,scp:$tmpdir/fst.ark,$tmpdir/fst.scp || exit 1
+! grep "^$uttid " $tmpdir/fst.scp && echo "ERROR : Missing utterance '$uttid' from gzipped lattice ark '$lat'" && exit 1
+fstcopy "scp:grep '^$uttid ' $tmpdir/fst.scp |" "scp:echo $uttid $tmpdir/$uttid.fst |" || exit 1
+fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
+
+if [ ! -z $output ]; then
+  cp $tmpdir/$uttid.${format} $output
+fi
+
+[ $format == "pdf" ] && evince $tmpdir/$uttid.pdf
+[ $format == "svg" ] && eog $tmpdir/$uttid.svg
+
+exit 0
--- a/egs/babel/s5b/local/summarize_logs.pl
+++ b/egs/babel/s5b/local/summarize_logs.pl
@ -0,0 +1,121 @@
+#!/usr/bin/perl
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+#scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
+
+sub split_hundreds { # split list of filenames into groups of 100.
+  my $names = shift @_;
+  my @A = split(" ", $names);
+  my @ans = ();
+  while (@A > 0) {
+    my $group = "";
+    for ($x = 0; $x < 100 && @A>0; $x++) {
+      $fname = pop @A;
+      $group .= "$fname ";
+    }
+    push @ans, $group;
+  }
+  return @ans;
+}
+
+sub parse_accounting_entry {
+  $entry= shift @_;
+
+  @elems = split " ", $entry;
+  
+  $time=undef;
+  $threads=undef;
+  foreach $elem (@elems) {
+    if ( $elem=~ m/time=(\d+)/ ) {
+      $elem =~ s/time=(\d+)/$1/;
+      $time = $elem;
+    } elsif ( $elem=~ m/threads=(\d+)/ ) {
+      $elem =~ s/threads=(\d+)/$1/g;
+      $threads = $elem;
+    } else {
+      die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
+    }
+  }
+
+  if (defined($time) and defined($threads) ) {
+    return ($time, $threads);
+  } else {
+    die "The accounting entry \"$entry\" did not contain all necessary attributes";
+  }
+}
+
+foreach $dir (@ARGV) {
+
+  #$dir = $ARGV[0];
+  print $dir
+
+  ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
+
+  $dir =~ s:/$::; # Remove trailing slash.
+
+
+  # Group the files into categories where all have the same base-name.
+  foreach $f (glob ("$dir/*.log")) {
+    $f_category = $f;
+    # do next expression twice; s///g doesn't work as they overlap.
+    $f_category =~ s:\.\d+\.(?!\d+):.*.:;
+    #$f_category =~ s:\.\d+\.:.*.:;
+    $fmap{$f_category} .= " $f";
+  }
+}
+
+foreach $c (sort (keys %fmap) ) {
+  $n = 0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $n += `grep -w WARNING $fgroup | wc -l`;
+  }
+  if ($n != 0) {
+    print "$n warnings in $c\n"
+  }
+}
+foreach $c (sort (keys %fmap)) {
+  $n = 0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $n += `grep -w ERROR $fgroup | wc -l`;
+  }
+  if ($n != 0) {
+    print "$n errors in $c\n"
+  }
+}
+
+$supertotal_cpu_time=0.0;
+$supertotal_clock_time=0.0;
+$supertotal_threads=0.0;
+
+foreach $c (sort (keys %fmap)) {
+  $n = 0;
+
+  $total_cpu_time=0.0;
+  $total_clock_time=0.0;
+  $total_threads=0.0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`;
+    
+    #print $lines ."\n";
+
+    @entries = split "\n", $lines;
+
+    foreach $line (@entries) {
+      $time, $threads = parse_accounting_entry($line);
+
+      $total_cpu_time += $time * $threads;
+      $total_threads += $threads;
+      if ( $time > $total_clock_time ) {
+        $total_clock_time = $time;
+      }
+    }
+  }
+  print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
+
+  $supertotal_cpu_time += $total_cpu_time;
+  $supertotal_clock_time += $total_clock_time;
+  $supertotal_threads += $total_threads;
+}
+print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
+
--- a/egs/babel/s5b/make_release.sh
+++ b/egs/babel/s5b/make_release.sh
@ -1,57 +1,39 @@
 #!/bin/bash

-lp=
-lr=
-ar=
-split=BaEval
+team=RADICAL
+corpusid=
+partition=
+scase=BaEval  #BaDev|BaEval
+sysid=
+master=
 version=1
-relname=
-exp=c
+sysid=
+prim=c
 cer=0
 dryrun=true
 dir="exp/sgmm5_mmi_b0.1/"
+extrasys=""
+data=data/dev10h.seg
+master=dev10h
 final=false
-dev2shadow=dev10h.uem
-eval2shadow=eval.uem
-team=RADICAL

 #end of configuration

+
 echo $0 " " "$@"

 [ -f ./cmd.sh ] && . ./cmd.sh
 [ -f ./path.sh ] && . ./path.sh
 . ./utils/parse_options.sh

-if [ $# -ne 2 ] ; then
-  echo "Invalid number of parameters!"
-  echo "Parameters " "$@"
-  echo "$0 --ar <NTAR|TAR> --lr <BaseLR|BabelLR|OtherLR> --lp <FullLP|LimitedLP> --relname <NAME> [--version <version-nr> ] <config> <output>"
-  exit 1
-fi
-
-
-[ -z $lp ] && echo "Error -- you must specify --lp <FullLP|LimitedLP>" && exit 1
-if [ "$lp" != "FullLP" ] && [ "$lp" != "LimitedLP" ] ; then
-  echo "Error -- you must specify --lp <FullLP|LimitedLP>" && exit 1
-fi
-
-[ -z $lr ] && echo "Error -- you must specify --lr <BaseLR|BabelLR|OtherLR>" && exit 1
-if [ "$lr" != "BaseLR" ] && [ "$lr" != "BabelLR" ]  && [ "$lr" != "OtherLR" ] ; then
-  echo "Error -- you must specify --lr <BaseLR|BabelLR|OtherLR>" && exit 1
-fi
-[ -z $ar ] && echo "Error -- you must specify --ar <NTAR|TAR>" && exit 1
-if [ "$ar" != "NTAR" ] && [ "$ar" != "TAR" ] ; then
-  echo "Error -- you must specify --ar <NTAR|TAR>" && exit 1
-fi
-[ -z $relname ] && echo "Error -- you must specify name" && exit 1
-
-[ ! -f $1 ] && echo "Configuration $1 does not exist! " && exit 1
 . $1
 outputdir=$2

+set -e
+set -o pipefail
+
 function export_file {
-  set -x
+  #set -x
  source_file=$1
  target_file=$2
  if [ ! -f $source_file ] ; then
@ -61,12 +43,17 @@ function export_file {
    if [ ! -f $target_file ] ; then
      if ! $dryrun ; then
        ln -s `readlink -f $source_file` $target_file || exit 1
+        ls -al $target_file
+      else
+        echo "$source_file -> $target_file"
      fi
+      
    else
      echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)"
      exit 1
    fi
  fi
+  #set +x
  return 0
 }

@ -97,38 +84,227 @@ function export_kws_file {
  return 0
 }

-if [[ "$eval_kwlist_file" == *.kwlist.xml ]] ; then
-  corpus=`basename $eval_kwlist_file .kwlist.xml`
-elif [[ "$eval_kwlist_file" == *.kwlist2.xml ]] ; then
-  corpus=`basename $eval_kwlist_file .kwlist2.xml`
+function find_best_kws_result {
+  local dir=$1
+  local mask=$2
+  local record=`(find $dir -name "sum.txt" -path "$mask" | xargs grep "^| *Occ")  | cut -f 1,13,17 -d '|' | sed 's/|//g'  | column -t | sort -r -n -k 3 | tail -n 1`
+  echo $record >&2
+  local file=`echo $record | awk -F ":" '{print $1}'`
+  #echo $file >&2
+  local path=`dirname $file`
+  #echo $path >&2
+  echo $path
+}
+
+function find_best_stt_result {
+  local dir=$1
+  local mask=$2
+  local record=`(find $dir -name "*.ctm.sys" -path "$mask" | xargs grep Avg)  | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1`
+  
+  echo $record >&2
+  local file=`echo $record | awk -F ":" '{print $1}'`
+  #echo $file >&2
+  local path=`dirname $file`
+  #echo $path >&2
+  echo $path
+}
+
+function create_sysid {
+  local best_one=$1
+  local extrasys=$2
+  local taskid=`basename $best_one`
+  local system_path=`dirname $best_one`
+  if [[ $system_path =~ .*sgmm5.* ]] ; then
+    sysid=PLP
+  elif [[ $system_path =~ .*nnet.* ]] ; then
+    sysid=DNN
+  elif [[ $system_path =~ .*sgmm7.* ]] ; then
+    sysid=BNF
+  else
+    echo "Unknown system path ($system_path), cannot deduce the systemID" >&2
+    exit 1
+  fi
+  if [ ! -z $extrasys ]; then
+    sysid="${sysid}-${extrasys}"
+  fi
+  local kwsid=${taskid//kws_*/}
+  kwsid=${kwsid//_/}
+  if [ -z $kwsid ]; then
+    echo ${sysid}
+  else
+    echo ${sysid}-$kwsid
+  fi
+}
+
+function get_ecf_name {
+  local best_one=$1
+  local taskid=`basename $best_one`
+  local kwstask=${taskid//kws_*/kws}
+  local kwlist=
+  #echo $kwstask
+  if [ -z $kwstask ] ; then
+    #echo $data/kws/kwlist.xml
+    kwlist= `readlink -f $data/kws/kwlist.xml`
+  else
+    #echo $data/$kwstask/kwlist.xml
+    kwlist=`readlink -f  $data/$kwstask/kwlist.xml`
+  fi
+  ecf=`head -n 1 $kwlist | grep -Po "(?<=ecf_filename=\")[^\"]*"`
+  echo -e "\tFound ECF: $ecf" >&2
+  echo $ecf
+  return 0
+}
+
+function compose_expid {
+  local task=$1
+  local best_one=$2
+  local extraid=$3
+  [ ! -z $extraid ] && extraid="-$extraid"
+  local sysid=`create_sysid $best_one $extrasys`
+  if [ "$task" == "KWS" ]; then
+    ext="kwslist.xml"
+  elif [ "$task" == "STT" ]; then
+    ext="ctm"
+  else
+    echo "Incorrect task ID ($task) given to compose_expid function!" >&2
+    exit 1
+  fi
+  echo "KWS14_${team}_${corpusid}_${partition}_${scase}_${task}_${prim}-${sysid}${extraid}_$version.$ext"
+  return 0
+}
+
+function figure_out_scase {
+  local ecf=`basename $1`
+  if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then
+    local basnam=${ecf%%.ecf.xml}
+    local scase=`echo $basnam | awk -F _ '{print $2}'`
+    
+    if [ "$scase" = "conv-dev" ]; then
+      echo "BaDev"
+    elif [ "$scase" = "conv-eval" ]; then
+      echo "BaEval"
+    else
+      echo "WARNING: The ECF file  $ecf is probably not an official file" >&2
+      echo "BaDev"
+      return 1
+    fi
+  else 
+    echo "WARNING: The ECF file  $ecf is probably not an official file" >&2
+    echo "BaDev"
+    return 1
+  fi
+  return 0
+}
+
+function figure_out_partition {
+  local ecf=`basename $1`
+  if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then
+    local basnam=${ecf%%.ecf.xml}
+    local scase=`echo $basnam | awk -F _ '{print $2}'`
+    
+    if [ "$scase" = "conv-dev" ]; then
+      echo "conv-dev"
+    elif [ "$scase" = "conv-eval" ]; then
+      echo "conv-eval"
+    else
+      echo "WARNING: The ECF file  $ecf is probably not an official file" >&2
+      echo "conv-dev"
+      return 1
+    fi
+  else 
+    echo "WARNING: The ECF file  $ecf is probably not an official file" >&2
+    echo "conv-dev"
+    return 1
+  fi
+  return 0
+}
+
+function figure_out_corpusid {
+  local ecf=`basename $1`
+  if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then
+    local basnam=${ecf%%.ecf.xml}
+    local corpusid=`echo $basnam | awk -F _ '{print $1}'`
+  else
+    echo "WARNING: The ECF file  $ecf is probably not an official file" >&2
+    local corpusid=${ecf%%.*}
+  fi
+  echo $corpusid
+}
+
+#data=data/shadow.uem
+dirid=`basename $data`
+kws_tasks="kws "
+[ -f $data/extra_kws_tasks ] &&  kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` 
+[ -d $data/compounds ] && compounds=`ls $data/compounds`
+
+if [ -z "$compounds" ] ; then
+  for kws in $kws_tasks ; do
+    echo $kws
+    best_one=`find_best_kws_result "$dir/decode_*${dirid}*/${kws}_*" "*"`
+    sysid=`create_sysid $best_one $extrasys`
+    ecf=`get_ecf_name $best_one`
+    scase=`figure_out_scase $ecf` || break
+    partition=`figure_out_partition $ecf` || break
+    corpusid=`figure_out_corpusid $ecf`
+    echo -e "\tEXPORT as:" `compose_expid KWS $best_one`
+  done
 else
-  echo "Unknown naming pattern of the kwlist file $eval_kwlist_file"
-  exit 1
+  [ -z $master ] && echo "You must choose the master compound (--master <compound>) for compound data set" && exit 1
+  for kws in $kws_tasks ; do
+    echo $kws
+    best_one=`find_best_kws_result "$dir/decode_*${dirid}*/$master/${kws}_*" "*"`
+
+    for compound in $compounds ; do
+      compound_best_one=`echo $best_one | sed ":$master/${kws}_:$compound/${kws}_:g"`
+      echo -e "\tPREPARE EXPORT: $compound_best_one"
+      sysid=`create_sysid $compound_best_one $extrasys`
+      #ecf=`get_ecf_name $best_one`
+      ecf=`readlink -f $data/compounds/$compound/ecf.xml`
+      scase=`figure_out_scase $ecf`
+      partition=`figure_out_partition $ecf`
+      corpusid=`figure_out_corpusid $ecf`
+      expid=`compose_expid KWS $compound_best_one`
+      echo -e "\tEXPORT NORMALIZED as: $expid"
+      expid_unnormalized=`compose_expid KWS $compound_best_one "unnorm"`
+      echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized"
+
+      export_kws_file $compound_best_one/kwslist.xml $compound_best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid
+      export_kws_file $compound_best_one/kwslist.unnormalized.xml $compound_best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized
+    done
+  done
 fi
-#REMOVE the IARPA- prefix, if present
-#corpus=${corpora##IARPA-}

-scores=`find -L $dir  -name "sum.txt"  -path "*${dev2shadow}_${eval2shadow}*" | xargs grep "|   Occurrence" | cut -f 1,13 -d '|'| sed 's/:|//g' | column -t | sort -k 2 -n -r  `
-[ -z "$scores" ] && echo "Nothing to export, exiting..." && exit 1
+##EXporting STT -- more straightforward, because there is only one task
+if [ -z "$compounds" ] ; then
+  best_one=`find_best_stt_result "$dir/decode_*${dirid}*/score_*" "*"`
+  echo -e "\tERROR: I don't know how to do this, yet"
+  ecf=`get_ecf_name kws`
+  sysid=`create_sysid $best_one $extrasys`
+  scase=`figure_out_scase $ecf` || break
+  partition=`figure_out_partition $ecf`
+  corpusid=`figure_out_corpusid $ecf`
+  expid=`compose_expid STT $best_one`
+  echo -e "\tEXPORT NORMALIZED as: $expid"
+  export_file $best_one/${dirid}.ctm $outputdir/$expid
+else
+  [ -z $master ] && echo "You must choose the master compound (--master <compound>) for compound data set" && exit 1
+  best_one=`find_best_stt_result "exp/sgmm5_mmi_b0.1/decode_*${dirid}*/$master/score_*" "*"`

-echo  "$scores" | head
-count=`echo "$scores" | wc -l`
-echo "Total result files: $count"
-best_score=`echo "$scores" | head -n 1 | cut -f 1 -d ' '`
+  for compound in $compounds ; do
+    compound_best_one=`echo $best_one | sed ":$master/${kws}_:$compound/${kws}_:g"`
+    echo -e "\tPREPARE EXPORT: $compound_best_one"
+    sysid=`create_sysid $compound_best_one $extrasys`
+    #ecf=`get_ecf_name $best_one`
+    ecf=`readlink -f $data/compounds/$compound/ecf.xml`
+    scase=`figure_out_scase $ecf`
+    partition=`figure_out_partition $ecf`
+    corpusid=`figure_out_corpusid $ecf`
+    expid=`compose_expid STT $compound_best_one`
+    echo -e "\tEXPORT NORMALIZED as: $expid"

-lmwt=`echo $best_score | sed 's:.*/kws_\([0-9][0-9]*\)/.*:\1:g'`
-echo "Best scoring file: $best_score"
-echo $lmwt
-base_dir=`echo $best_score | sed "s:\\(.*\\)/${dev2shadow}_${eval2shadow}/.*:\\1:g"`
-echo $base_dir
-
-eval_dir=$base_dir/$eval2shadow/kws_$lmwt/
-eval_kwlist=$eval_dir/kwslist.xml
-eval_fixed_kwlist=$eval_dir/kwslist.fixed.xml
-eval_export_kwlist=$outputdir/KWS13_${team}_${corpus}_${split}_KWS_${lp}_${lr}_${ar}_${relname}_${version}.kwslist.xml
-
-echo "export_kws_file $eval_kwlist $eval_fixed_kwlist $eval_kwlist_file $eval_export_kwlist"
-export_kws_file $eval_kwlist $eval_fixed_kwlist $eval_kwlist_file $eval_export_kwlist
+    export_file $compound_best_one/${compound}.ctm $outputdir/$expid
+  done
+fi

 echo "Everything looks fine, good luck!"
 exit 0
--- a/egs/babel/s5b/run-1-main-extend-lex.sh
+++ b/egs/babel/s5b/run-1-main-extend-lex.sh
@ -100,7 +100,7 @@ if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then
    # Extend the original lexicon.
    # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}.
    mv data/local/lexicon.txt  data/local/lexicon_orig.txt
-    local/extend_lexicon.sh --cmd "$train_cmd" \
+    local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \
      --num-sent-gen $num_sent_gen --num-prons $num_prons \
      data/local/lexicon_orig.txt data/local/extend data/dev2h/text
    cp data/local/extend/lexiconp.txt data/local/
@ -153,7 +153,7 @@ if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\
    [ -f data/local/extend/original_oov_rates ] || exit 1;
    unk_fraction=`cat data/local/extend/original_oov_rates |\
      grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'`
-    extend_lexicon_param=(--unk-fraction $unk_fraction \
+    extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \
      --oov-prob-file data/local/extend/oov2prob)
  fi
  local/arpa2G.sh ${extend_lexicon_param[@]} \
@ -177,6 +177,11 @@ if [ ! -f data/train/.plp.done ]; then
  touch data/train/.plp.done
 fi

+echo -------------------------------------------------------------------------
+echo "Extended lexicon finished on" `date`. Now run the script run-1-main.sh
+echo -------------------------------------------------------------------------
+exit 0
+
 mkdir -p exp

 if [ ! -f data/train_sub3/.done ]; then
@ -199,6 +204,11 @@ if [ ! -f data/train_sub3/.done ]; then
  touch data/train_sub3/.done
 fi

+echo "------------------------------------------------------------------"
+echo "Now run the script run-1-main.sh"
+echo "------------------------------------------------------------------"
+exit 0
+
 if [ ! -f exp/mono/.done ]; then
  echo ---------------------------------------------------------------------
  echo "Starting (small) monophone training in exp/mono on" `date`
--- a/egs/babel/s5b/run-1-main.sh
+++ b/egs/babel/s5b/run-1-main.sh
@ -2,6 +2,7 @@

 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
+sgmm5_only=false

 [ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1
 [ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1
@ -287,6 +288,11 @@ if [ ! -f exp/sgmm5/.done ]; then
  touch exp/sgmm5/.done
 fi

+if $sgmm5_only ; then
+  echo "Exiting after stage SGMM5, as requested. "
+  echo "Everything went fine. Done"
+  exit 0;
+fi
 ################################################################################
 # Ready to start discriminative SGMM training
 ################################################################################
--- a/egs/babel/s5b/run-2a-nnet-cpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-cpu.sh
@ -5,6 +5,7 @@

 # This parameter will be used when the training dies at a certain point.
 train_stage=-100
+dir=exp/tri6_nnet
 . ./utils/parse_options.sh

 set -e
@ -17,7 +18,7 @@ echo "Waiting till exp/tri5_ali/.done exists...."
 while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done
 echo "...done waiting for exp/tri5_ali/.done"

-if [ ! -f exp/tri6_nnet/.done ]; then
+if [ ! -f $dir/.done ]; then
  steps/nnet2/train_pnorm.sh \
    --stage $train_stage --mix-up $dnn_mixup \
    --initial-learning-rate $dnn_init_learning_rate \
@ -27,7 +28,7 @@ if [ ! -f exp/tri6_nnet/.done ]; then
    --pnorm-output-dim $dnn_output_dim \
    --cmd "$train_cmd" \
    "${dnn_cpu_parallel_opts[@]}" \
-    data/train data/lang exp/tri5_ali exp/tri6_nnet || exit 1
+    data/train data/lang exp/tri5_ali $dir || exit 1

-  touch exp/tri6_nnet/.done
+  touch $dir/.done
 fi
--- a/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
@ -1,7 +1,12 @@
 #!/bin/bash

-. conf/common_vars.sh
 . ./lang.conf
+. conf/common_vars.sh
+
+train_stage=-10
+dir=exp/tri6b_nnet
+
+. ./utils/parse_options.sh

 set -e
 set -o pipefail
@ -12,12 +17,10 @@ dnn_pnorm_input_dim=3000
 dnn_pnorm_output_dim=300
 dnn_init_learning_rate=0.004
 dnn_final_learning_rate=0.001
-train_stage=-10
 temp_dir=`pwd`/nnet_gpu_egs
 ensemble_size=4
 initial_beta=0.1
 final_beta=5
-dir=exp/tri6b_nnet
 egs_dir=

 # Wait till the main run.sh gets to the stage where's it's 
--- a/egs/babel/s5b/run-2a-nnet-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-gpu.sh
@ -1,4 +1,6 @@
 #!/bin/bash
+dir=exp/tri6_nnet
+train_stage=-10

 . conf/common_vars.sh
 . ./lang.conf
@ -17,7 +19,7 @@ echo "Waiting till exp/tri5_ali/.done exists...."
 while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done
 echo "...done waiting for exp/tri5_ali/.done"

-if [ ! -f exp/tri6_nnet/.done ]; then
+if [ ! -f $dir/.done ]; then
  steps/nnet2/train_pnorm.sh \
    --stage $train_stage --mix-up $dnn_mixup \
    --initial-learning-rate $dnn_init_learning_rate \
@ -27,8 +29,8 @@ if [ ! -f exp/tri6_nnet/.done ]; then
    --pnorm-output-dim $dnn_output_dim \
    --cmd "$train_cmd" \
    "${dnn_gpu_parallel_opts[@]}" \
-    data/train data/lang exp/tri5_ali exp/tri6_nnet || exit 1
+    data/train data/lang exp/tri5_ali $dir || exit 1

-  touch exp/tri6_nnet/.done
+  touch $dir/.done
 fi

--- a/egs/babel/s5b/run-4-anydecode.sh
+++ b/egs/babel/s5b/run-4-anydecode.sh
@ -7,8 +7,6 @@ set -o pipefail


 dir=dev10h.pem
-dev2shadow=dev10h.uem
-eval2shadow=eval.uem
 kind=
 data_only=false
 fast_path=true
@ -19,7 +17,6 @@ max_states=150000
 extra_kws=true
 vocab_kws=false
 wip=0.5
-shadow_set_extra_opts=( --wip $wip )

 echo "run-4-test.sh $@"

@ -46,8 +43,6 @@ dataset_type=${dir%%.*}
 if [ -z ${kind} ] ; then
  if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ] ; then
    dataset_kind=supervised
-  elif [ "$dataset_type" == "shadow" ] ; then
-    dataset_kind=shadow
  else
    dataset_kind=unsupervised
  fi
@ -139,55 +134,33 @@ function check_variables_are_set {
  fi
 }

-if  [ "$dataset_kind" == "shadow" ] ; then
-  # we expect that the ${dev2shadow} as well as ${eval2shadow} already exist
-  if [ ! -f data/${dev2shadow}/.done ]; then
-    echo "Error: data/${dev2shadow}/.done does not exist."
-    echo "Create the directory data/${dev2shadow} first"
-    echo "e.g. by calling $0 --type $dev2shadow --dataonly"
-    exit 1
-  fi
-  if [ ! -f data/${eval2shadow}/.done ]; then
-    echo "Error: data/${eval2shadow}/.done does not exist."
-    echo "Create the directory data/${eval2shadow} first."
-    echo "e.g. by calling $0 --type $eval2shadow --dataonly"
+if [ ! -f data/raw_${dataset_type}_data/.done ]; then
+  echo ---------------------------------------------------------------------
+  echo "Subsetting the ${dataset_type} set"
+  echo ---------------------------------------------------------------------
+ 
+  l1=${#my_data_dir[*]}
+  l2=${#my_data_list[*]}
+  if [ "$l1" -ne "$l2" ]; then
+    echo "Error, the number of source files lists is not the same as the number of source dirs!"
    exit 1
  fi
  
-  local/create_shadow_dataset.sh ${dataset_dir} \
-    data/${dev2shadow} data/${eval2shadow}
-  utils/fix_data_dir.sh ${datadir}
-  nj_max=`cat $dataset_dir/wav.scp | wc -l`
-  my_nj=64
-else
-  if [ ! -f data/raw_${dataset_type}_data/.done ]; then
-    echo ---------------------------------------------------------------------
-    echo "Subsetting the ${dataset_type} set"
-    echo ---------------------------------------------------------------------
-   
-    l1=${#my_data_dir[*]}
-    l2=${#my_data_list[*]}
-    if [ "$l1" -ne "$l2" ]; then
-      echo "Error, the number of source files lists is not the same as the number of source dirs!"
-      exit 1
-    fi
-    
-    resource_string=""
-    if [ "$dataset_kind" == "unsupervised" ]; then
-      resource_string+=" --ignore-missing-txt true"
-    fi
-
-    for i in `seq 0 $(($l1 - 1))`; do
-      resource_string+=" ${my_data_dir[$i]} "
-      resource_string+=" ${my_data_list[$i]} "
-    done
-    local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data
-    touch data/raw_${dataset_type}_data/.done
+  resource_string=""
+  if [ "$dataset_kind" == "unsupervised" ]; then
+    resource_string+=" --ignore-missing-txt true"
  fi
-  my_data_dir=`readlink -f ./data/raw_${dataset_type}_data`
-  [ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list
-  nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l`
+
+  for i in `seq 0 $(($l1 - 1))`; do
+    resource_string+=" ${my_data_dir[$i]} "
+    resource_string+=" ${my_data_list[$i]} "
+  done
+  local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data
+  touch data/raw_${dataset_type}_data/.done
 fi
+my_data_dir=`readlink -f ./data/raw_${dataset_type}_data`
+[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list
+nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l`
 if [ "$nj_max" -lt "$my_nj" ] ; then
  echo "Number of jobs ($my_nj) is too big!"
  echo "The maximum reasonable number of jobs is $nj_max"
@ -234,10 +207,6 @@ if [ ! -f  $dataset_dir/.done ] ; then
      echo "Valid dataset types are: seg, uem, pem";
      exit 1
    fi
-  elif  [ "$dataset_kind" == "shadow" ] ; then
-    #We don't actually have to do anything here
-    #The shadow dir is already set...
-    true  
  else
    echo "Unknown kind of the dataset: \"$dataset_kind\"!";
    echo "Valid dataset kinds are: supervised, unsupervised, shadow";
@ -303,13 +272,13 @@ if ! $fast_path ; then
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \
-    "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
+    "${lmwt_plp_extra_opts[@]}" \
    ${dataset_dir} data/lang ${decode}

  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-    "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
+    "${lmwt_plp_extra_opts[@]}" \
    ${dataset_dir} data/lang ${decode}.si
 fi

@ -337,7 +306,7 @@ if [ -f exp/sgmm5/.done ]; then
      local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
        --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
        --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-        "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
+        "${lmwt_plp_extra_opts[@]}" \
        ${dataset_dir} data/lang  exp/sgmm5/decode_fmllr_${dataset_id}
    fi
  fi
@ -371,7 +340,7 @@ if [ -f exp/sgmm5/.done ]; then
      local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
        --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
        --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-      "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
+      "${lmwt_plp_extra_opts[@]}" \
      ${dataset_dir} data/lang $decode
  done
 fi
@ -397,7 +366,7 @@ if [ -f exp/tri6_nnet/.done ]; then
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-    "${shadow_set_extra_opts[@]}" "${lmwt_dnn_extra_opts[@]}" \
+    "${lmwt_dnn_extra_opts[@]}" \
    ${dataset_dir} data/lang $decode
 fi

@ -423,7 +392,7 @@ if [ -f exp/tri6a_nnet/.done ]; then
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-    "${shadow_set_extra_opts[@]}" "${lmwt_dnn_extra_opts[@]}" \
+    "${lmwt_dnn_extra_opts[@]}" \
    ${dataset_dir} data/lang $decode
 fi

@ -447,6 +416,31 @@ if [ -f exp/tri6b_nnet/.done ]; then
    touch $decode/.done
  fi

+  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
+    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
+    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
+    "${lmwt_dnn_extra_opts[@]}" \
+    ${dataset_dir} data/lang $decode
+fi
+####################################################################
+##
+## DNN (ensemble) decoding
+##
+####################################################################
+if [ -f exp/tri6c_nnet/.done ]; then
+  decode=exp/tri6c_nnet/decode_${dataset_id}
+  if [ ! -f $decode/.done ]; then
+    mkdir -p $decode
+    steps/nnet2/decode.sh \
+      --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \
+      --beam $dnn_beam --lat-beam $dnn_lat_beam \
+      --skip-scoring true "${decode_extra_opts[@]}" \
+      --transform-dir exp/tri5/decode_${dataset_id} \
+      exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log
+
+    touch $decode/.done
+  fi
+
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
@ -476,7 +470,7 @@ if [ -f exp/tri6_nnet_mpe/.done ]; then
    local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
      --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
      --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-      "${shadow_set_extra_opts[@]}" "${lmwt_dnn_extra_opts[@]}" \
+      "${lmwt_dnn_extra_opts[@]}" \
      ${dataset_dir} data/lang $decode
  done
 fi
@ -505,7 +499,7 @@ for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \
    local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
      --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \
      --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt  \
-      "${shadow_set_extra_opts[@]}" "${lmwt_dnn_extra_opts[@]}" \
+      "${lmwt_dnn_extra_opts[@]}" \
      ${dataset_dir} data/lang $decode
  fi
 done
--- a/egs/babel/s5b/run-5-anydecode.sh
+++ b/egs/babel/s5b/run-5-anydecode.sh
@ -17,7 +17,7 @@ skip_scoring=false
 max_states=150000
 wip=0.5

-echo "$0 $@"
+echo "run-5-test.sh $@"

 . utils/parse_options.sh

@ -314,32 +314,34 @@ fi
 ##
 ####################################################################

-for iter in 1 2 3 4; do
+decode=exp/sgmm5/decode_fmllr_${dirid}
+if [ ! -f $decode/.done ]; then
+  for iter in 1 2 3 4; do
    # Decode SGMM+MMI (via rescoring).
-  decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dirid}_it$iter
-  if [ ! -f $decode/.done ]; then
+    decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dirid}_it$iter
+    if [ ! -f $decode/.done ]; then

-    mkdir -p $decode
-    steps/decode_sgmm2_rescore.sh  --skip-scoring true \
-      --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dirid} \
-      data/lang ${datadir} exp/sgmm5/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log
+      mkdir -p $decode
+      steps/decode_sgmm2_rescore.sh  --skip-scoring true \
+        --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dirid} \
+        data/lang ${datadir} exp/sgmm5/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log

-    touch $decode/.done
-  fi
-done
+      touch $decode/.done
+    fi
+  done

  #We are done -- all lattices has been generated. We have to
  #a)Run MBR decoding
  #b)Run KW search
-for iter in 1 2 3 4; do
+  for iter in 1 2 3 4; do
    # Decode SGMM+MMI (via rescoring).
-  decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dirid}_it$iter
-  local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
+    decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dirid}_it$iter
+    local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
      --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip $wip \
-    "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
-    ${datadir} data/lang $decode
-done
-
+      "${shadow_set_extra_opts[@]}" "${lmwt_plp_extra_opts[@]}" \
+      ${datadir} data/lang $decode
+  done
+fi

 ####################################################################
 ##
--- a/egs/babel/s5b/run-6-bnf-semisupervised.sh
+++ b/egs/babel/s5b/run-6-bnf-semisupervised.sh
@ -16,6 +16,7 @@ set -u           #Fail on an undefined variable
 skip_kws=true
 skip_stt=false
 semisupervised=true
+unsup_string="_semisup"
 bnf_train_stage=-100
 bnf_weight_threshold=0.35
 ali_dir=exp/tri6_nnet_ali
@ -31,7 +32,6 @@ fi


 if $semisupervised ; then
-  unsup_string="_semi_supervised"
  egs_string="--egs-dir exp_bnf${unsup_string}/tri6_bnf/egs"
 else
  unsup_string=""  #" ": supervised training, _semi_supervised: unsupervised BNF training
@ -43,6 +43,22 @@ datadir=data/${dirid}
 exp_dir=exp_bnf${unsup_string}
 data_bnf_dir=data_bnf${unsup_string}
 param_bnf_dir=param_bnf${unsup_string}
+
+if [ -z $ali_dir ] ; then
+  # If alignment directory is not done, use exp/tri6_nnet_ali as alignment 
+  # directory
+  ali_dir=exp/tri6_nnet_ali
+fi
+
+if [ ! -f $ali_dir/.done ]; then
+  echo "$0: Aligning supervised training data in exp/tri6_nnet_ali"
+  [ ! -f exp/tri6_nnet/final.mdl ] && echo "exp/tri6_nnet/final.mdl not found!\nRun run-6-nnet.sh first!" && exit 1
+  steps/nnet2/align.sh  --cmd "$train_cmd" \
+    --use-gpu no --transform-dir exp/tri5_ali --nj $train_nj \
+    data/train data/lang exp/tri6_nnet $ali_dir || exit 1
+  touch $ali_dir/.done
+fi
+
 ###############################################################################
 #
 # Semi-supervised BNF training
@ -50,22 +66,22 @@ param_bnf_dir=param_bnf${unsup_string}
 ###############################################################################
 [ ! -d $datadir ] && echo "Error: $datadir is not available!" && exit 1;
 mkdir -p $exp_dir/tri6_bnf  
-if $semisupervised ; then
-  echo "$0: Generate examples using unsupervised data in $exp_dir/tri6_nnet"
-  if [ ! -f $exp_dir/tri6_bnf/egs/.done ]; then
-    local/nnet2/get_egs_semi_supervised.sh \
-      "${egs_cpu_opts[@]}" --io-opts "$egs_io_opts" \
-      --transform-dir-sup exp/tri5_ali \
-      --transform-dir-unsup exp/tri5/decode_${dirid} \
-      --weight-threshold $bnf_weight_threshold \
-      data/train $datadir data/lang \
-      $ali_dir $decode_dir $exp_dir/tri6_bnf || exit 1;
-    touch $exp_dir/tri6_bnf/egs/.done
-  fi
- 
-fi  
-
 if [ ! -f $exp_dir/tri6_bnf/.done ]; then    
+  if $semisupervised ; then
+    echo "$0: Generate examples using unsupervised data in $exp_dir/tri6_nnet"
+    if [ ! -f $exp_dir/tri6_bnf/egs/.done ]; then
+      local/nnet2/get_egs_semi_supervised.sh \
+        "${dnn_update_egs_opts[@]}" \
+        --transform-dir-sup exp/tri5_ali \
+        --transform-dir-unsup exp/tri5/decode_${dirid} \
+        --weight-threshold $bnf_weight_threshold \
+        data/train $datadir data/lang \
+        $ali_dir $decode_dir $exp_dir/tri6_bnf || exit 1;
+      touch $exp_dir/tri6_bnf/egs/.done
+    fi
+   
+  fi  
+
 echo "$0: Train Bottleneck network"
  steps/nnet2/train_tanh_bottleneck.sh \
    --stage $bnf_train_stage --num-jobs-nnet $bnf_num_jobs \
@ -86,7 +102,7 @@ fi
 if [ ! -f $data_bnf_dir/train_bnf/.done ]; then
  mkdir -p $data_bnf_dir
  # put the archives in ${param_bnf_dir}/.
-  steps/nnet2/dump_bottleneck_features.sh --nj $train_nj --cmd "$train_cmd" \
+  steps/nnet/make_bn_feats.sh --nj $train_nj --cmd "$train_cmd" \
    --transform-dir exp/tri5 data/train $data_bnf_dir/train_bnf \
    $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf
  touch $data_bnf_dir/train_bnf/.done
@ -95,7 +111,7 @@ fi
 if [ ! $data_bnf_dir/train/.done -nt $data_bnf_dir/train_bnf/.done ]; then
  steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
    --nj $train_nj --transform-dir exp/tri5_ali  $data_bnf_dir/train_sat data/train \
-    exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/ 
+    exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir  

  steps/append_feats.sh --cmd "$train_cmd" --nj 4 \
    $data_bnf_dir/train_bnf $data_bnf_dir/train_sat $data_bnf_dir/train \
--- a/egs/babel/s5b/run-6-bnf-sgmm-semisupervised.sh
+++ b/egs/babel/s5b/run-6-bnf-sgmm-semisupervised.sh
@ -25,7 +25,7 @@ if [ $babel_type == "full" ] && $semisupervised; then
 fi

 if $semisupervised ; then
-  unsup_string="_semi_supervised"
+  unsup_string="_semisup"
 else
  unsup_string=""  #" ": supervised training, _semi_supervised: unsupervised BNF training
 fi
@ -45,8 +45,9 @@ if [ ! $exp_dir/sgmm7/.done -nt $exp_dir/ubm7/.done ]; then
  echo ---------------------------------------------------------------------
  echo "Starting $exp_dir/sgmm7 on" `date`
  echo ---------------------------------------------------------------------
-  steps/train_sgmm2_group.sh \
-    --cmd "$train_cmd" "${sgmm_group_extra_opts[@]}"\
+  #steps/train_sgmm2_group.sh \
+  steps/train_sgmm2.sh \
+    --cmd "$train_cmd" "${sgmm_train_extra_opts[@]}"\
    $numLeavesSGMM $bnf_num_gauss_sgmm $data_bnf_dir/train data/lang \
    $exp_dir/tri6 $exp_dir/ubm7/final.ubm $exp_dir/sgmm7 
  touch $exp_dir/sgmm7/.done
--- a/egs/babel/s5b/run-6-semisupervised.sh
+++ b/egs/babel/s5b/run-6-semisupervised.sh
@ -38,6 +38,8 @@ if [ $# -ne 2 ]; then
  exit 1
 fi

+set -u
+
 unsup_datadir=$1
 unsup_postdir=$2
 unsup_dirid=`basename $unsup_datadir`
@ -57,12 +59,12 @@ fi
 if [ ! -f $ali_dir/.done ]; then
  echo "$0: Aligning supervised training data in exp/tri6_nnet_ali"
  [ ! -f exp/tri6_nnet/final.mdl ] && echo "exp/tri6_nnet/final.mdl not found!\nRun run-6-nnet.sh first!" && exit 1
-  steps/nnet2/align.sh  --cmd "$decode_cmd" \
+  steps/nnet2/align.sh  --cmd "$train_cmd" \
    --use-gpu no --transform-dir exp/tri5_ali --nj $train_nj \
    data/train data/lang exp/tri6_nnet $ali_dir || exit 1
  touch $ali_dir/.done
 fi
-
+exit 0
 echo "$0: Using supervised data alignments from $ali_dir"

 ###############################################################################
@ -85,11 +87,10 @@ done
 mkdir -p exp/tri6_nnet_semi_supervised

 if [ ! -f exp/tri6_nnet_semi_supervised/.egs.done ] ; then
-  local/nnet2/get_egs_semi_supervised.sh $spk_vecs_opt \
-    "${egs_gpu_opts[@]}" --io-opts "$egs_io_opts" \
+  local/nnet2/get_egs_semi_supervised.sh --cmd "$train_cmd" \
+    "${dnn_update_egs_opts[@]}" \
    --transform-dir-sup exp/tri5_ali \
-    --transform-dir-unsup exp/tri5/decode_${dirid} \
-    --weight-threshold $weight_threshold \
+    --transform-dir-unsup exp/tri5/decode_${unsup_dirid} \
    data/train $unsup_datadir data/lang \
    $ali_dir $unsup_postdir exp/tri6_nnet_semi_supervised || exit 1;

--- a/egs/babel/s5b/run-8a-kaldi-bnf.sh
+++ b/egs/babel/s5b/run-8a-kaldi-bnf.sh
@ -46,13 +46,13 @@ fi
 if [ ! -f data_bnf/train_bnf/.done ]; then
  mkdir -p data_bnf
  # put the archives in plp/.
-  steps/nnet2/dump_bottleneck_features.sh --nj $train_nj --cmd "$train_cmd" \
+  steps/nnet/make_bn_feats.sh --nj $train_nj --cmd "$train_cmd" \
    --transform-dir exp/tri5 data/train data_bnf/train_bnf exp_bnf/tri6_bnf param_bnf exp_bnf/dump_bnf
  touch data_bnf/train_bnf/.done
 fi 

 if [ ! data_bnf/train/.done -nt data_bnf/train_bnf/.done ]; then
-  steps/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
+  steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
    --nj $train_nj --transform-dir exp/tri5_ali  data_bnf/train_sat data/train \
    exp/tri5_ali exp_bnf/make_fmllr_feats/log param_bnf/ 

--- a/egs/babel/s5b/run-8b-kaldi-bnf-sgmm.sh
+++ b/egs/babel/s5b/run-8b-kaldi-bnf-sgmm.sh
@ -26,8 +26,8 @@ if [ ! exp_bnf/sgmm7/.done -nt exp_bnf/ubm7/.done ]; then
  echo ---------------------------------------------------------------------
  echo "Starting exp_bnf/sgmm7 on" `date`
  echo ---------------------------------------------------------------------
-  steps/train_sgmm2_group.sh \
-    --cmd "$train_cmd" "${sgmm_group_extra_opts[@]}"\
+  steps/train_sgmm2.sh \
+    --cmd "$train_cmd" \
    $numLeavesSGMM $bnf_num_gauss_sgmm data_bnf/train data/lang \
    exp_bnf/tri6 exp_bnf/ubm7/final.ubm exp_bnf/sgmm7 
  touch exp_bnf/sgmm7/.done
--- a/egs/babel/s5b/run-8c-kaldi-bnf-dnn.sh
+++ b/egs/babel/s5b/run-8c-kaldi-bnf-dnn.sh
@ -27,7 +27,7 @@ if [ ! exp_bnf/tri6_ali_50/.done -nt exp_bnf/tri6/.done ]; then
  echo "Aligning fMLLR system with 50 jobs"
  echo ---------------------------------------------------------------------
  steps/align_fmllr.sh \
-    --boost-silence $boost_sil --nj 50 --cmd "$train_cmd" \
+    --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \
    data_bnf/train_app data/lang exp_bnf/tri6 exp_bnf/tri6_ali_50
  touch exp_bnf/tri6_ali_50/.done
 fi
--- a/egs/babel/s5b/run-8d-test-kaldi-bnf-sgmm-semisupervised.sh
+++ b/egs/babel/s5b/run-8d-test-kaldi-bnf-sgmm-semisupervised.sh
@ -33,12 +33,12 @@ if [ $babel_type == "full" ] && $semisupervised; then
 fi

 if $semisupervised ; then
-  unsup_string="_semi_supervised"
+  unsup_string="_semisup"
 else
  unsup_string=""  #" ": supervised training, _semi_supervised: unsupervised BNF training
 fi

-if ! echo {dev10h,dev2h,eval,unsup}{,.uem,.seg} | grep -w "$type" >/dev/null; then
+if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.uem,.seg} | grep -w "$type" >/dev/null; then
  # note: echo dev10.uem | grep -w dev10h will produce a match, but this
  # doesn't matter because dev10h is also a valid value.
  echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg}
@ -61,7 +61,7 @@ my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1;
 if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \
   [ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then
  # put the archives in $param_bnf_dir/.
-  steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \
+  steps/nnet/make_bn_feats.sh --nj $my_nj --cmd "$train_cmd" \
    --transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf
  touch $data_bnf_dir/${dirid}_bnf/.done
 fi
@ -77,10 +77,14 @@ if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then
  steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir
  rm -r $data_bnf_dir/${dirid}_sat
  if ! $skip_kws ; then
-    cp -r data/${dirid}/kws* $data_bnf_dir/${dirid}/
+    cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true
  fi
  touch $data_bnf_dir/${dirid}/.done
 fi
+if ! $skip_kws ; then
+  cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true
+fi
+

 if $data_only ; then
  echo "Exiting, as data-only was requested... "
@ -179,6 +183,8 @@ for iter in 1 2 3 4; do
    ${datadir} data/lang $decode
 done

+exit 0
+
 if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ] || \
   [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt exp_bnf/tri7_nnet/.done ]; then
  
--- a/egs/babel/s5b/run-8d-test-kaldi-bnf-sgmm.sh
+++ b/egs/babel/s5b/run-8d-test-kaldi-bnf-sgmm.sh
@ -22,7 +22,7 @@ if [ $# -ne 0 ]; then
  exit 1
 fi

-if ! echo {dev10h,dev2h,eval,unsup}{,.uem,.seg} | grep -w "$type" >/dev/null; then
+if ! echo {shadow,dev10h,dev2h,eval,unsup}{,.uem,.seg,.pem} | grep -w "$type" >/dev/null; then
  # note: echo dev10.uem | grep -w dev10h will produce a match, but this
  # doesn't matter because dev10h is also a valid value.
  echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg}
@ -38,17 +38,19 @@ datadir=data_bnf/${dirid}
 # Set my_nj; typically 64.
 my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1;

-
+test -d param_bnf || mkdir -p param_bnf
+mkdir -p param_bnf
 if [ ! data_bnf/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \
   [ ! data_bnf/${dirid}_bnf/.done -nt exp_bnf/tri6_bnf/.done ]; then
  # put the archives in param_bnf/.
-  steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \
+  
+  local/nnet/make_bn_feats.sh --nj $my_nj --cmd "$train_cmd" \
    --transform-dir exp/tri5/decode_${dirid} data/${dirid} data_bnf/${dirid}_bnf exp_bnf/tri6_bnf param_bnf exp_bnf/dump_bnf
  touch data_bnf/${dirid}_bnf/.done
 fi

 if [ ! data_bnf/${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]; then
-  steps/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
+  steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
    --nj 16 --transform-dir exp/tri5/decode_${dirid} data_bnf/${dirid}_sat data/${dirid} \
    exp/tri5_ali exp_bnf/make_fmllr_feats/log param_bnf/ 

@ -62,6 +64,9 @@ if [ ! data_bnf/${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]; then
  fi
  touch data_bnf/${dirid}/.done
 fi
+if ! $skip_kws ; then
+  cp -r data/${dirid}/*kws* data_bnf/${dirid}/ || true
+fi


 if $data_only ; then
@ -161,6 +166,9 @@ for iter in 1 2 3 4; do
    ${datadir} data/lang $decode
 done

+echo "$0: Everything looking good...." 
+exit 0
+
 if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ] || \
   [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt exp_bnf/tri7_nnet/.done ]; then
  
--- a/egs/babel/s5b/run-8e-test-kaldi-bnf-dnn.sh
+++ b/egs/babel/s5b/run-8e-test-kaldi-bnf-dnn.sh
@ -40,13 +40,13 @@ my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1;
 if [ ! data_bnf/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \
   [ ! data_bnf/${dirid}_bnf/.done -nt exp_bnf/tri6_bnf/.done ]; then
  # put the archives in plp/.
-  steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \
+  steps/nnet/make_bn_feats.sh --nj $my_nj --cmd "$train_cmd" \
    --transform-dir exp/tri5/decode_${dirid} data/${dirid} data_bnf/${dirid}_bnf exp_bnf/tri6_bnf param_bnf exp_bnf/dump_bnf
  touch data_bnf/${dirid}_bnf/.done
 fi

 if [ ! data_bnf/${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]; then
-  steps/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
+  steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
    --nj $train_nj --transform-dir exp/tri5/decode_${dirid} data_bnf/${dirid}_sat data/${dirid} \
    exp/tri5_ali exp_bnf/make_fmllr_feats/log param_bnf

@ -59,6 +59,7 @@ if [ ! data_bnf/${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]; then
  touch data_bnf/${dirid}/.done
 fi

+decode=exp_bnf/tri7_nnet/decode_${dirid}
 if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ] || \
   [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt exp_bnf/tri7_nnet/.done ]; then
  
@ -70,7 +71,6 @@ if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]
  utils/mkgraph.sh \
    data/lang exp_bnf/tri6 exp_bnf/tri6/graph |tee exp_bnf/tri6/mkgraph.log

-  decode=exp_bnf/tri7_nnet/decode_${dirid}
  if [ ! -f $decode/.done ]; then
    mkdir -p $decode
    steps/nnet2/decode.sh \
@ -84,11 +84,11 @@ if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ]
    touch $decode/.done
  fi

-  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
-    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip $wip \
-    "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
-    ${datadir} data/lang $decode
 fi
+local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
+  --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip $wip \
+  "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
+  ${datadir} data/lang $decode

 echo "$0: Everything looking good...." 
 exit 0
--- a/egs/babel/s5b/run-all.sh
+++ b/egs/babel/s5b/run-all.sh
@ -0,0 +1,110 @@
+#!/bin/bash
+
+export NJ=`(. ./lang.conf > /dev/null; echo $train_nj )`
+export TYPE=`(. ./lang.conf > /dev/null; echo $babel_type )`
+
+echo $NJ
+echo $TYPE
+
+if [ "$TYPE" == "limited" ]; then
+  T_SHORT="6:0:0"
+  T_MEDIUM="12:0:0"
+  T_LONG="24:0:0"
+  T_EXTREME="48:0:0"
+  BNF_NJ=$((16 * 4))
+  DNN_NJ=$((16 * 4))
+elif [ "$TYPE" == "full" ]; then
+  T_SHORT="6:0:0"
+  T_MEDIUM="24:0:0"
+  T_LONG="48:0:0"
+  T_EXTREME="48:0:0"
+  BNF_NJ=$((16 * 8))
+  DNN_NJ=$((16 * 8))
+else
+  echo "Unknown BABEL type! Exiting..."
+  exit 1
+fi
+
+
+export SBATCH_JOBID
+
+
+function sbatch {
+  #echo "sbatch " "${@}"
+  output_name=""
+  for param in "${@}"; do
+    if [[ $param =~ ^\./.*sh ]]; then
+      output_name=`basename $param`
+    fi
+  done
+  if [ ! -z $output_name ]; then
+    output_name="-o ${output_name}.%j"
+  fi
+  #echo "OUTPUT: $output_name"
+  echo /usr/bin/sbatch --mail-type ALL --mail-user 'jtrmal@gmail.com' $output_name "${@}" 
+  jobid=$(/usr/bin/sbatch --mail-type ALL --mail-user 'jtrmal@gmail.com' $output_name "${@}" | tee /dev/stderr  | grep "Submitted batch job" | awk '{print $4}'  )
+  SBATCH_JOBID=$jobid
+}
+
+sbatch -p normal -n $NJ -t $T_SHORT  ./run-1-main.sh --tri5-only true 
+TRI5_ID=$SBATCH_JOBID
+
+sbatch -p normal -n $NJ -t $T_LONG --dependency=afterok:$TRI5_ID  ./run-1-main.sh 
+PLP_ID=$SBATCH_JOBID
+
+sbatch -p normal -n $NJ -t $T_SHORT --dependency=afterok:$TRI5_ID ./run-2-segmentation.sh
+SEG_ID=$SBATCH_JOBID
+
+if [ "$TYPE" == "limited" ]; then
+  sbatch -p gpu -n $DNN_NJ -t $T_MEDIUM --dependency=afterok:$TRI5_ID  ./run-2a-nnet-ensemble-gpu.sh --dir exp/tri6_nnet/
+else
+  sbatch -p gpu -n $DNN_NJ -t $T_MEDIUM --dependency=afterok:$TRI5_ID  ./run-2a-nnet-gpu.sh
+  DNN_ID=$SBATCH_JOBID
+  sbatch -p gpu -n $DNN_NJ -t $T_MEDIUM --dependency=afterok:$DNN_ID  ./run-2a-nnet-mpe.sh
+fi
+DNN_ID=$SBATCH_JOBID
+
+sbatch -p gpu -n $BNF_NJ -t 24:0:0 --dependency=afterok:$TRI5_ID  ./run-8a-kaldi-bnf.sh
+BNF_ID=$SBATCH_JOBID
+
+sbatch -p normal -n $NJ   -t $T_LONG --dependency=afterok:$BNF_ID  ./run-8b-kaldi-bnf-sgmm.sh
+BNF_SGMM_ID=$SBATCH_JOBID
+
+
+#Decode DNNs and PLP systems
+sbatch -p normal -n 128 -t $T_MEDIUM  --dependency=afterok:$DNN_ID:$PLP_ID ./run-5-anydecode.sh --fast-path true --skip-kws true  --type dev10h
+DECODE_DNN_PLP_ID=$SBATCH_JOBID
+sbatch -p normal -n 16 -t $T_MEDIUM  --dependency=afterok:$DECODE_DNN_PLP_ID ./run-5-anydecode.sh --fast-path true
+
+#Decode BNF systems
+sbatch -p normal -n 128  -t $T_LONG --dependency=afterok:$BNF_SGMM_ID:$DECODE_DNN_PLP_ID ./run-8d-test-kaldi-bnf-sgmm.sh --skip-kws true --type dev10h
+DECODE_BNF_SGMM_ID=$SBATCH_JOBID
+sbatch -p normal -n 16  -t $T_MEDIUM --dependency=afterok:$DECODE_BNF_SGMM_ID ./run-8d-test-kaldi-bnf-sgmm.sh
+
+exit 0
+
+#For the discriminative training, we have to actually decode the unsup.seg
+#The unsup.seg needs segmentation to be done, i.e. it depends on the individual systems and on the segmentation
+if [ "$TYPE" == "limited" ]; then
+  #First, setup data
+  sbatch -p normal -n $NJ -t $T_LONG --dependency=afterok:$SEG_ID ./run-4-anydecode.sh --fast-path true --skip-scoring true --skip-kws true --dir unsup.seg --data-only true
+  UNSUP_DATA_PREPARED=$SBATCH_JOBID
+  sbatch -p normal -n 256 -t $T_LONG --dependency=afterok:$UNSUP_DATA_PREPARED:$DNN_ID:$PLP_ID ./run-4-anydecode.sh --fast-path true --skip-scoring true --skip-kws true --dir unsup.seg
+  SEMI_PARTA_ID=$SBATCH_JOBID
+  sbatch -p normal -n 256 -t $T_LONG --dependency=afterok:$UNSUP_DATA_PREPARED:$BNF_SGMM_ID:$DECODE_DNN_PLP_ID ./run-8d-test-kaldi-bnf-sgmm.sh --skip-kws true --skip-kws true --type unsup.seg 
+  SEMI_PARTB_ID=$SBATCH_JOBID
+fi
+
+#
+#
+#We do not run BNF on the top of DNN by default (low performance)
+#sbatch -p gpu -n $BNF_NJ -t 24:0:0 --dependency=afterok:$BNF_ID  ./run-8c-kaldi-bnf-dnn.sh
+#BNF_DNN_ID=$SBATCH_JOBID
+#The decoding depends on the BNF-SGMM in that sense that it expects the data directories to be prepared.
+#It can create the directories on its own, but do not run those two scripts in parallel -- because of no locking
+#this will result in crash as the scripts will overwrite each others's files
+#sbatch -p normal -n 128  -t $T_LONG --dependency=afterok:$BNF_DNN_ID:$DECODE_DNN_PLP_ID:$DECODE_BNF_SGMM_ID ./run-8e-test-kaldi-bnf-dnn.sh --skip-kws true
+#DECODE_BNF_DNN_ID=$SBATCH_JOBID
+#sbatch -p normal -n 16 -t $T_MEDIUM --dependency=afterok:$DECODE_BNF_DNN_ID ./run-8e-test-kaldi-bnf-dnn.sh
+
+
--- a/egs/fisher_english/s5/RESULTS
+++ b/egs/fisher_english/s5/RESULTS
@ -1,11 +1,10 @@
 for x in exp/*/decode_dev; do grep WER $x/wer_* | utils/best_wer.sh; done

-%WER 50.00 [ 19571 / 39141, 1893 ins, 4738 del, 12940 sub ] exp/tri1/decode_dev/wer_12
-%WER 49.52 [ 19384 / 39141, 1774 ins, 5035 del, 12575 sub ] exp/tri2/decode_dev/wer_13
-%WER 42.57 [ 16664 / 39141, 1908 ins, 4080 del, 10676 sub ] exp/tri3a/decode_dev/wer_12
-%WER 35.67 [ 13963 / 39141, 1810 ins, 3347 del, 8806 sub ] exp/tri4a/decode_dev/wer_13
-%WER 32.09 [ 12560 / 39141, 1680 ins, 3131 del, 7749 sub ] exp/tri5a/decode_dev/wer_14
-
+%WER 49.72 [ 19461 / 39141, 1999 ins, 4578 del, 12884 sub ] exp/tri1/decode_dev/wer_12
+%WER 49.00 [ 19181 / 39141, 1812 ins, 4848 del, 12521 sub ] exp/tri2/decode_dev/wer_13
+%WER 41.86 [ 16384 / 39141, 1735 ins, 4152 del, 10497 sub ] exp/tri3a/decode_dev/wer_13
+%WER 34.73 [ 13593 / 39141, 1719 ins, 3365 del, 8509 sub ] exp/tri4a/decode_dev/wer_14
+%WER 31.07 [ 12163 / 39141, 1869 ins, 2705 del, 7589 sub ] exp/tri5a/decode_dev/wer_13
+%WER 31.13 [ 12184 / 39141, 1939 ins, 2584 del, 7661 sub ] exp/tri5a_0.1/decode_dev/wer_12

 %WER 23.66 [ 9259 / 39141, 1495 ins, 2432 del, 5332 sub ] exp/nnet6c4_gpu/decode_dev/wer_11
-
--- a/egs/fisher_english/s5/local/fisher_data_prep.sh
+++ b/egs/fisher_english/s5/local/fisher_data_prep.sh
@ -5,17 +5,30 @@

 stage=0

+calldata=
+while test $# -gt 0
+do
+    case "$1" in
+        --calldata) calldata=1
+            ;;
+        *) break;
+            ;;
+    esac
+    shift
+done
+
 . utils/parse_options.sh

 if [ $# -eq 0 ]; then
-  echo "$0 <fisher-dir-1> [<fisher-dir-2> ...]"
+  echo "$0 [--calldata] <fisher-dir-1> [<fisher-dir-2> ...]"
  echo " e.g.: $0 /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19\\"
  echo " /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13"
  echo " (We also support a single directory that has the contents of all of them)"
+  echo " If specified, --calldata will be used to map Kaldi speaker ID to real"
+  echo " speaker PIN released with the Fisher corpus."
  exit 1;
 fi

-
 # Check that the arguments are all absolute pathnames.

 for dir in $*; do
@ -178,5 +191,17 @@ if [ $stage -le 4 ]; then
  fi
 fi

+if [ ! -z "$calldata" ]; then # fix speaker IDs
+  cat $links/fe_03_p{1,2}_tran/doc/*calldata.tbl > $tmpdir/combined-calldata.tbl
+  local/fisher_fix_speakerid.pl $tmpdir/combined-calldata.tbl data/train_all
+  utils/utt2spk_to_spk2utt.pl data/train_all/utt2spk.new > data/train_all/spk2utt.new
+  # patch files
+  for f in spk2utt utt2spk text segments spk2gender; do
+    cp data/train_all/$f data/train_all/$f.old || exit 1;
+    cp data/train_all/$f.new data/train_all/$f || exit 1;
+  done
+  rm $tmpdir/combined-calldata.tbl
+fi
+
 echo "Data preparation succeeded"

--- a/egs/fisher_english/s5/local/fisher_fix_speakerid.pl
+++ b/egs/fisher_english/s5/local/fisher_fix_speakerid.pl
@ -0,0 +1,114 @@
+#!/usr/bin/perl -w
+
+# Author: Peng Qi (pengqi@cs.stanford.edu)
+# This script maps Switchboard speaker IDs to the true physical speakers
+# and fixes the utterances IDs accordingly. Expected to be run one level of
+# directory above.
+
+sub trim {
+    (my $s = $_[0]) =~ s/^\s+|\s+$//g;
+    return $s;        
+}
+
+if ($#ARGV != 1) {
+	print "Usage: swbd1_fix_speakerid.pl <fisher-calldata-tbl-file> <data-dir>\n";
+	print "E.g.:  swbd1_fix_speakerid.pl data/local/train/combined-calldata.tbl data/train_all\n";
+}
+
+$tab_file = $ARGV[0];
+$dir = $ARGV[1];
+
+%conv_to_spk = ();
+
+open(my $conv_tab, '<', $tab_file) or die "Could not open '$tab_file' $!\n";
+ 
+while (my $line = <$conv_tab>) {
+  chomp $line;
+ 
+  my @fields = split "," , $line;
+  #$fields[0] = trim($fields[0]);
+  $fields[5] = trim($fields[5]);
+  $fields[10] = trim($fields[10]);
+  $conv_to_spk{'fe_03_' . $fields[0] . '-A'} = $fields[5];
+  $conv_to_spk{'fe_03_' . $fields[0] . '-B'} = $fields[10];
+}
+
+close($conv_tab);
+
+# fix utt2spk
+
+%missingconv = ();
+
+open(my $utt2spk, '<', $dir . '/utt2spk') or die "Could not open '$dir/utt2spk' $!\n";
+open(my $utt2spk_new, '>', $dir . '/utt2spk.new');
+
+while (my $line = <$utt2spk>) {
+  chomp $line;
+
+  my @fields = split " " , $line;
+  my $convid = substr $fields[0], 0, 13;
+  
+  if (exists $conv_to_spk{ $convid }) {
+    my $spkid = $conv_to_spk{ $convid };
+    $spkid = "fe_03_" . $spkid;
+    my $newuttid = $spkid . '-' . (substr $fields[0], 6);
+
+    print $utt2spk_new "$newuttid $spkid\n";
+  } else {
+    my $convid = substr $convid, 6, 5;
+    $missingconv{$convid} = 1;
+    
+    print $utt2spk_new $fields[0]." ".$fields[1]."\n";
+  }
+}
+
+close($utt2spk);
+close($utt2spk_new);
+
+foreach my $conv (keys %missingconv) {
+  print "Warning: Conversation ID '$conv' not found in conv.tab, retaining old speaker IDs\n"
+}
+
+# fix spk2gender
+
+if (open(my $spk2gender, '<', $dir . '/spk2gender')) {
+  open(my $spk2gender_new, '>', $dir . '/spk2gender.new');
+
+  while (my $line = <$spk2gender>) {
+    chomp $line;
+
+    my @fields = split " ", $line;
+    my $convid = $fields[0];
+
+    if (exists $conv_to_spk{ $convid }) {
+      my $spkid = $conv_to_spk{ $convid };
+      $spkid = "fe_03_" . $spkid;
+
+      print $spk2gender_new $spkid." ".$fields[1]."\n";
+    } else {
+      print $spk2gender_new $fields[0]." ".$fields[1]."\n";
+    }
+  }
+
+  close($spk2gender);
+  close($spk2gender_new);
+}
+
+# fix segments and text
+
+foreach my $file ('segments','text') {
+  open(my $oldfile, '<', "$dir/$file") or die "Could not open '$dir/$file' $!\n";
+  open(my $newfile, '>', "$dir/$file.new");
+
+  while (my $line = <$oldfile>) {
+    chomp $line;
+
+    my $convid = substr $line, 0, 13;
+    if (exists $conv_to_spk{$convid}) {
+      my $spkid = $conv_to_spk{$convid};
+      print $newfile "fe_03_$spkid-" . (substr $line, 6) . "\n";
+    } else {
+      print $newfile "$line\n";
+    }
+  }
+}
--- a/egs/fisher_english/s5/local/run_data_cleaning.sh
+++ b/egs/fisher_english/s5/local/run_data_cleaning.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+
+# This script shows how you can do data-cleaning, and exclude data that has a
+# higher likelihood of being wrongly transcribed.  see the RESULTS file; this
+# made essentially no difference in our case-- indicating, perhaps, that Fisher
+# transcripts are already clean enough.
+
+
+. cmd.sh
+. path.sh
+set -e
+
+
+steps/cleanup/find_bad_utts.sh --nj 200 --cmd "$train_cmd" data/train data/lang \
+  exp/tri5a exp/tri5a_cleanup
+
+ # with threshold of 0.05 we keep 1.1 million out of 1.6 million utterances, and
+ # around 8.7 million out of 18.1 million words
+ # with threshold of 0.1 we keep 1.3 out of 1.6 million utterances, and around
+ # 13.2 million out of 18.1 million words.
+thresh=0.1
+cat exp/tri5a_cleanup/all_info.txt | awk -v threshold=$thresh '{ errs=$2;ref=$3; if (errs <= threshold*ref) { print $1; } }' > uttlist
+utils/subset_data_dir.sh --utt-list uttlist data/train data/train.thresh$thresh
+
+steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+  data/train.thresh$thresh data/lang exp/tri4a exp/tri4a_ali_$thresh
+
+steps/train_sat.sh  --cmd "$train_cmd" \
+  10000 300000 data/train data/lang exp/tri4a_ali_$thresh  exp/tri5a_$thresh || exit 1;
+
+
+utils/mkgraph.sh data/lang_test exp/tri5a_$thresh exp/tri5a_$thresh/graph
+steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+  exp/tri5a_$thresh/graph data/dev exp/tri5a_$thresh/decode_dev
+
--- a/egs/fisher_english/s5/run.sh
+++ b/egs/fisher_english/s5/run.sh
@ -10,6 +10,12 @@ set -e
 # the next command produces the data in local/train_all
 local/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \
   /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13
+# You could also try specifying the --calldata argument to this command as below.
+# If specified, the script will use actual speaker personal identification 
+# numbers released with the dataset, i.e. real speaker IDs. Note: --calldata has
+# to be the first argument of this script.
+# local/fisher_data_prep.sh --calldata /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \
+#    /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13

 # at BUT:
 # local/fisher_data_prep.sh /mnt/matylda6/jhu09/qpovey/FISHER/LDC2005T19 /mnt/matylda2/data/FISHER/
@ -156,12 +162,12 @@ steps/train_sat.sh  --cmd "$train_cmd" \
    exp/tri5a/graph data/dev exp/tri5a/decode_dev
 )&

-#
-# steps/cleanup/find_bad_utts.sh --nj 200 --cmd "$train_cmd" data/train data/lang \
-#   exp/tri5a exp/tri5a_cleanup


+# The step below won't run by default; it demonstrates a data-cleaning method.
+# It doesn't seem to help in this setup; maybe the data was clean enough already.
+# local/run_data_cleaning.sh
+
 # local/run_for_spkid.sh

-# we don't have to results for the step below yet.
 # local/run_nnet2.sh
--- a/egs/rm/s5/RESULTS
+++ b/egs/rm/s5/RESULTS
@ -168,6 +168,8 @@ exit 0
 # last time I created this.
 # Per-frame cross-entropy training
 %WER 1.66 [ 208 / 12533, 27 ins, 49 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
+%WER 7.80 [ 978 / 12533, 83 ins, 151 del, 744 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_6
+
 # Sequence-based sMBR training
 %WER 1.64 [ 206 / 12533, 24 ins, 49 del, 133 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_4
 %WER 1.62 [ 203 / 12533, 25 ins, 46 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it2/wer_4
--- a/egs/rm/s5/cmd.sh
+++ b/egs/rm/s5/cmd.sh
@ -7,8 +7,14 @@

 train_cmd="queue.pl -l arch=*64"
 decode_cmd="queue.pl -l arch=*64"
+
+# cuda_cmd is used for nnet1 scripts e.g. local/run_dnn.sh, but
+# in the nnet2 scripts e.g. local/run_nnet2.sh, this is not
+# used and we append options to train_cmd.
+cuda_cmd="queue.pl -l arch=*64 -l gpu=1"
+
 #train_cmd="run.pl"
-# Do training locally.  Note: for jobs on smallish subsets,
+# with run.pl we do training locally.  Note: for jobs on smallish subsets,
 # it's way faster to run on a single machine with a handful of CPUs, as
 # you avoid the latency of starting GridEngine jobs.

--- a/egs/rm/s5/conf/decode.config
+++ b/egs/rm/s5/conf/decode.config
@ -2,5 +2,3 @@
 first_beam=16.0
 beam=20.0
 lattice_beam=10.0
-min_lmwt=2
-max_lmwt=10
--- a/egs/rm/s5/local/nnet2/run_4d.sh
+++ b/egs/rm/s5/local/nnet2/run_4d.sh
@ -41,7 +41,7 @@ if [ ! -f $dir/final.mdl ]; then
     --minibatch-size "$minibatch_size" \
     --parallel-opts "$parallel_opts" \
     --num-jobs-nnet 4 \
-     --num-epochs-extra 10 --add-layers-period 1 \
+     --num-epochs 8 --num-epochs-extra 5 --add-layers-period 1 \
     --num-hidden-layers 2 \
     --mix-up 4000 \
     --initial-learning-rate 0.02 --final-learning-rate 0.004 \
--- a/egs/rm/s5/local/run_dnn.sh
+++ b/egs/rm/s5/local/run_dnn.sh
@ -66,6 +66,8 @@ if [ $stage -le 2 ]; then
  # Decode (reuse HCLG graph)
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    $gmmdir/graph $data_fmllr/test $dir/decode || exit 1;
+  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
+    $gmmdir/graph_ug $data_fmllr/test $dir/decode_ug || exit 1;
 fi


@ -92,6 +94,9 @@ if [ $stage -le 4 ]; then
    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
      --nnet $dir/${ITER}.nnet --acwt $acwt \
      $gmmdir/graph $data_fmllr/test $dir/decode_it${ITER} || exit 1
+    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
+      --nnet $dir/${ITER}.nnet --acwt $acwt \
+      $gmmdir/graph_ug $data_fmllr/test $dir/decode_ug_it${ITER} || exit 1
  done 
 fi

@ -100,3 +105,16 @@ exit 0

 # Getting results [see RESULTS file]
 # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
+
+# Showing how model conversion to nnet2 works; note, we use the expanded variable
+# names here so be careful in case the script changes.
+# steps/nnet2/convert_nnet1_to_nnet2.sh exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2
+# cp exp/tri3b/splice_opts exp/tri3b/cmvn_opts exp/tri3b/final.mat exp/dnn4b_nnet2/
+# 
+#  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
+#    --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2/decode
+
+# decoding results are essentially the same (any small difference is probably because
+# decode.config != decode_dnn.config).
+# %WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
+# %WER 1.59 [ 199 / 12533, 23 ins, 45 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
--- a/egs/swbd/s5b/local/swbd1_data_prep.sh
+++ b/egs/swbd/s5b/local/swbd1_data_prep.sh
@ -10,11 +10,18 @@
 ## you unpacked this.  We are just doing a "find" command to locate
 ## the .sph files.

+## The second input is optional, which should point to a directory containing
+## Switchboard transcriptions/documentations (specifically, the conv.tab file).
+## If specified, the script will try to use the actual speaker PINs provided 
+## with the corpus instead of the conversation side ID (Kaldi default). We 
+## will be using "find" to locate this file so we don't make any assumptions
+## on the directory structure. (Peng Qi, Aug 2014)
+
 . path.sh

 #check existing directories
-if [ $# != 1 ]; then
-  echo "Usage: swbd1_data_prep_edin.sh /path/to/SWBD"
+if [ $# != 1 -a $# != 2 ]; then
+  echo "Usage: swbd1_data_prep_edin.sh /path/to/SWBD [/path/to/SWBD_DOC]"
  exit 1; 
 fi 

@ -144,6 +151,17 @@ for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do
  cp data/local/train/$f data/train/$f || exit 1;
 done

+if [ $# == 2 ]; then # fix speaker IDs
+  find $2 -name conv.tab > $dir/conv.tab
+  local/swbd1_fix_speakerid.pl `cat $dir/conv.tab` data/train
+  utils/utt2spk_to_spk2utt.pl data/train/utt2spk.new > data/train/spk2utt.new
+  # patch files
+  for f in spk2utt utt2spk text segments; do
+    cp data/train/$f data/train/$f.old || exit 1;
+    cp data/train/$f.new data/train/$f || exit 1;
+  done
+  rm $dir/conv.tab
+fi 

 echo Switchboard-1 data preparation succeeded.

--- a/egs/swbd/s5b/local/swbd1_fix_speakerid.pl
+++ b/egs/swbd/s5b/local/swbd1_fix_speakerid.pl
@ -0,0 +1,89 @@
+#!/usr/bin/perl -w
+
+# Author: Peng Qi (pengqi@cs.stanford.edu)
+# This script maps Switchboard speaker IDs to the true physical speakers
+# and fixes the utterances IDs accordingly. Expected to be run one level of
+# directory above.
+
+sub trim {
+    (my $s = $_[0]) =~ s/^\s+|\s+$//g;
+    return $s;        
+}
+
+if ($#ARGV != 1) {
+	print "Usage: swbd1_fix_speakerid.pl <swbd-conv-tab-file> <data-dir>\n";
+	print "E.g.:  swbd1_fix_speakerid.pl /datasets/SWBD1Transcripts/tables/conv.tab data/train\n";
+}
+
+$tab_file = $ARGV[0];
+$dir = $ARGV[1];
+
+%conv_to_spk = ();
+
+open(my $conv_tab, '<', $tab_file) or die "Could not open '$tab_file' $!\n";
+ 
+while (my $line = <$conv_tab>) {
+  chomp $line;
+ 
+  my @fields = split "," , $line;
+  #$fields[0] = trim($fields[0]);
+  $fields[2] = trim($fields[2]);
+  $fields[3] = trim($fields[3]);
+  $conv_to_spk{'sw0' . $fields[0] . '-A'} = $fields[2];
+  $conv_to_spk{'sw0' . $fields[0] . '-B'} = $fields[3];
+}
+
+close($conv_tab);
+
+# fix utt2spk
+
+%missingconv = ();
+
+open(my $utt2spk, '<', $dir . '/utt2spk') or die "Could not open '$dir/utt2spk' $!\n";
+open(my $utt2spk_new, '>', $dir . '/utt2spk.new');
+
+while (my $line = <$utt2spk>) {
+  chomp $line;
+
+  my @fields = split " " , $line;
+  my $convid = substr $fields[0], 0, 9;
+  
+  if (exists $conv_to_spk{ $convid }) {
+    my $spkid = $conv_to_spk{ $convid };
+    $spkid = "sw" . $spkid;
+    my $newuttid = $spkid . '-' . (substr $fields[0], 2);
+
+    print $utt2spk_new "$newuttid $spkid\n";
+  } else {
+    my $convid = substr $convid, 3, 4;
+    $missingconv{$convid} = 1;
+    
+    print $utt2spk_new $fields[0]." ".$fields[1]."\n";
+  }
+}
+
+close($utt2spk);
+close($utt2spk_new);
+
+foreach my $conv (keys %missingconv) {
+  print "Warning: Conversation ID '$conv' not found in conv.tab, retaining old speaker IDs\n"
+}
+
+# fix segments and text
+
+foreach my $file ('segments','text') {
+  open(my $oldfile, '<', "$dir/$file") or die "Could not open '$dir/$file' $!\n";
+  open(my $newfile, '>', "$dir/$file.new");
+
+  while (my $line = <$oldfile>) {
+    chomp $line;
+
+    my $convid = substr $line, 0, 9;
+    if (exists $conv_to_spk{$convid}) {
+      my $spkid = $conv_to_spk{$convid};
+      print $newfile "sw$spkid-" . (substr $line, 2) . "\n";
+    } else {
+      print $newfile "$line\n";
+    }
+  }
+}
--- a/egs/swbd/s5b/run.sh
+++ b/egs/swbd/s5b/run.sh
@ -15,6 +15,15 @@
 . path.sh
 set -e # exit on error

+# Prepare Switchboard data. This command can also take a second optional argument
+# which specifies the directory to Switchboard documentations. Specifically, if
+# this argument is given, the script will look for the conv.tab file and correct
+# speaker IDs to the actual speaker personal identification numbers released in
+# the documentations. The documentations can be found here: 
+# https://catalog.ldc.upenn.edu/docs/LDC97S62/
+# Note: if you are using this link, make sure you rename conv_tab.csv to conv.tab
+# after downloading.
+# Usage: local/swbd1_data_prep.sh /path/to/SWBD [/path/to/SWBD_docs]
 local/swbd1_data_prep.sh /export/corpora3/LDC/LDC97S62
 # local/swbd1_data_prep.sh /home/dpovey/data/LDC97S62
 # local/swbd1_data_prep.sh /data/corpora0/LDC97S62
--- a/egs/wsj/s5/steps/cleanup/find_bad_utts.sh
+++ b/egs/wsj/s5/steps/cleanup/find_bad_utts.sh
@ -14,12 +14,13 @@ use_graphs=false
 # Begin configuration.
 scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
 acoustic_scale=0.1
-beam=20.0
-lattice_beam=10.0
+beam=15.0
+lattice_beam=8.0
+max_active=750
 transform_dir=  # directory to find fMLLR transforms in.
 top_n_words=100 # Number of common words that we compile into each graph (most frequent
                # in $lang/text.
-stage=0
+stage=-1
 cleanup=true
 # End configuration options.

@ -64,14 +65,16 @@ cp $srcdir/{tree,final.mdl} $dir || exit 1;
 cp $srcdir/final.occs $dir;


-utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt <$data/text | \
-  awk '{for(x=2;x<=NF;x++) print $x;}' | sort | uniq -c | \
-   sort -rn > $dir/word_counts.int || exit 1;
-num_words=$(awk '{x+=$1} END{print x}' < $dir/word_counts.int) || exit 1;
-# print top-n words with their unigram probabilities.
+if [ $stage -le 0 ]; then
+  utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt <$data/text | \
+    awk '{for(x=2;x<=NF;x++) print $x;}' | sort | uniq -c | \
+    sort -rn > $dir/word_counts.int || exit 1;
+  num_words=$(awk '{x+=$1} END{print x}' < $dir/word_counts.int) || exit 1;
+  # print top-n words with their unigram probabilities.

-head -n $top_n_words $dir/word_counts.int | awk -v tot=$num_words '{print $1/tot, $2;}' >$dir/top_words.int
-utils/int2sym.pl -f 2 $lang/words.txt <$dir/top_words.int >$dir/top_words.txt
+  head -n $top_n_words $dir/word_counts.int | awk -v tot=$num_words '{print $1/tot, $2;}' >$dir/top_words.int
+  utils/int2sym.pl -f 2 $lang/words.txt <$dir/top_words.int >$dir/top_words.txt
+fi

 if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
 echo "$0: feature type is $feat_type"
@ -105,9 +108,9 @@ elif [ -f $srcdir/final.alimdl ]; then
 fi


-echo "$0: decoding $data using utterance-specific decoding graphs using model from $srcdir, output in $dir"
+if [ $stage -le 1 ]; then
+  echo "$0: decoding $data using utterance-specific decoding graphs using model from $srcdir, output in $dir"

-if [ $stage -le 0 ]; then
  rm $dir/edits.*.txt $dir/aligned_ref.*.txt 2>/dev/null

  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
@ -116,7 +119,8 @@ if [ $stage -le 0 ]; then
    compile-train-graphs-fsts $scale_opts --read-disambig-syms=$lang/phones/disambig.int \
     $dir/tree $dir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
    gmm-latgen-faster --acoustic-scale=$acoustic_scale --beam=$beam \
-     --lattice-beam=$lattice_beam --word-symbol-table=$lang/words.txt \
+      --max-active=$max_active --lattice-beam=$lattice_beam \
+      --word-symbol-table=$lang/words.txt \
     $dir/final.mdl ark:- "$feats" ark:- \| \
    lattice-oracle ark:- "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|" \
      ark,t:- ark,t:$dir/edits.JOB.txt \| \
@ -124,15 +128,16 @@ if [ $stage -le 0 ]; then
 fi


-if [ $stage -le 1 ]; then
+if [ $stage -le 2 ]; then
  if [ -f $dir/edits.1.txt ]; then
-    for x in $(seq $nj); do cat $dir/edits.$x.txt; done > $dir/edits.txt
-    for x in $(seq $nj); do cat $dir/aligned_ref.$x.txt; done > $dir/aligned_ref.txt
+    # the awk commands below are to ensure that partially-written files don't confuse us.
+    for x in $(seq $nj); do cat $dir/edits.$x.txt; done | awk '{if(NF==2){print;}}' > $dir/edits.txt
+    for x in $(seq $nj); do cat $dir/aligned_ref.$x.txt; done | awk '{if(NF>=1){print;}}' > $dir/aligned_ref.txt
  else
    echo "$0: warning: no file $dir/edits.1.txt, using previously concatenated file if present."
  fi

-  # in case any utterances failed to align, get filtered copy of $data/text that's filtered.
+  # in case any utterances failed to align, get filtered copy of $data/text
  utils/filter_scp.pl $dir/edits.txt < $data/text  > $dir/text
  cat $dir/text | awk '{print $1, (NF-1);}' > $dir/length.txt

@ -162,4 +167,3 @@ if [ $stage -le 1 ]; then
    rm $dir/edits.*.txt $dir/aligned_ref.*.txt
  fi
 fi
-
--- a/egs/wsj/s5/steps/decode_nolats.sh
+++ b/egs/wsj/s5/steps/decode_nolats.sh
@ -1,12 +1,21 @@
 #!/bin/bash

-# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
+#                      Vimal Manohar
 # Apache 2.0

+##Changes
+# Vimal Manohar (Jan 2014):
+# Added options to boost silence probabilities in the model before
+# decoding. This can help in favoring the silence phones when 
+# some silence regions are wrongly decoded as speech phones like glottal stops
+
 # Begin configuration section.  
 transform_dir=
 iter=
 model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+boost_silence=1.0         # Boost silence pdfs in the model by this factor before decoding
+silence_phones_list=      # List of silence phones that would be boosted before decoding
 stage=0
 nj=4
 cmd=run.pl
@ -27,6 +36,8 @@ echo "$0 $@"  # Print the command line for logging
 [ -f ./path.sh ] && . ./path.sh; # source the path.
 . parse_options.sh || exit 1;

+[ -z $silence_phones_list ] && boost_silence=1.0
+
 if [ $# != 3 ]; then
   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
@ -106,10 +117,13 @@ if [ $stage -le 0 ]; then
    words="ark:/dev/null"
  fi

+  [ ! -z "$silence_phones_list" ]  && \
+    model="gmm-boost-silence --boost=$boost_silence $silence_phones_list $model - |"
+
  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
    gmm-decode-faster$thread_string --max-active=$max_active --beam=$beam  \
    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
-    $model $graphdir/HCLG.fst "$feats" "$words" "$ali" || exit 1;
+    "$model" $graphdir/HCLG.fst "$feats" "$words" "$ali" || exit 1;
 fi

 exit 0;
--- a/egs/wsj/s5/steps/decode_sgmm2.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2.sh
@ -20,7 +20,7 @@ gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
 first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
            # the 1st pass of decoding (lattice generation).
 max_active=7000
-
+max_mem=50000000
 #WARNING: This option is renamed lattice_beam (it was renamed to follow the naming 
 #         in the other scripts
 lattice_beam=6.0 # Beam we use in lattice generation.
@ -131,7 +131,7 @@ if [ $stage -le 2 ]; then
  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_pass1.JOB.log \
    sgmm2-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
-    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \
+    --word-symbol-table=$graphdir/words.txt --max-mem=$max_mem "$gselect_opt_1stpass" $alignment_model \
    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
 fi

--- a/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh
+++ b/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh
@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+
+# This script converts nnet1 into nnet2 models.
+# Note, it doesn't support all possible types of nnet1 models.
+
+# Begin configuration section
+cleanup=true
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 [options] <src-nnet1-dir> <dest-nnet2-dir>"
+  echo "e.g.: $0 exp/dnn4b_pretrain-dbn_dnn_smbr exp/dnn4b_smbr_nnet2"
+  exit 1;
+fi
+
+src=$1
+dir=$2
+
+mkdir -p $dir/log || exit 1;
+
+for f in $src/final.mdl $src/final.nnet $src/final.feature_transform $src/ali_train_pdf.counts; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+# We could do the following things all as one long piped command,
+# but it will be easier to debug if we make them separate.
+
+$cmd $dir/log/convert_feature_transform.log \
+  nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1;
+
+$cmd $dir/log/convert_model.log \
+  nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
+
+$cmd $dir/log/append_model.log \
+  raw-nnet-concat $dir/0.raw $dir/1.raw $dir/concat.raw || exit 1;
+
+$cmd $dir/log/init_model.log \
+  nnet-am-init $src/final.mdl $dir/concat.raw $dir/final_noprior.mdl || exit 1;
+
+$cmd $dir/log/set_priors.log \
+  nnet-adjust-priors $dir/final_noprior.mdl $src/ali_train_pdf.counts $dir/final.mdl || exit 1;
+
+if $cleanup; then
+  rm $dir/0.raw $dir/1.raw $dir/concat.raw $dir/final_noprior.mdl
+fi
--- a/egs/wsj/s5/steps/nnet2/decode.sh
+++ b/egs/wsj/s5/steps/nnet2/decode.sh
@ -65,6 +65,7 @@ for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do
 done

 sdata=$data/split$nj;
+cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
 thread_string=
 [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 

@ -79,7 +80,6 @@ if [ -z "$feat_type" ]; then
  echo "$0: feature type is $feat_type"
 fi

-cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
 splice_opts=`cat $srcdir/splice_opts 2>/dev/null`

 case $feat_type in
@ -90,16 +90,30 @@ case $feat_type in
 esac
 if [ ! -z "$transform_dir" ]; then
  echo "$0: using transforms from $transform_dir"
-  if [ "$feat_type" == "lda" ]; then
-    [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
-    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
-      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
-    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+  [ ! -s $transform_dir/num_jobs ] && \
+    echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1;
+  nj_orig=$(cat $transform_dir/num_jobs)
+  
+  if [ $feat_type == "raw" ]; then trans=raw_trans;
+  else trans=trans; fi
+  if [ $feat_type == "lda" ] && \
+    ! cmp $transform_dir/../final.mat $srcdir/final.mat && \
+    ! cmp $transform_dir/final.mat $srcdir/final.mat; then
+    echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+    exit 1;
+  fi
+  if [ ! -f $transform_dir/$trans.1 ]; then
+    echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)"
+    exit 1;
+  fi
+  if [ $nj -ne $nj_orig ]; then
+    # Copy the transforms into an archive with an index.
+    for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \
+       copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1;
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |"
  else
-    [ ! -f $transform_dir/raw_trans.1 ] && echo "$0: no such file $transform_dir/raw_trans.1" && exit 1;
-    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
-      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
-    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- |"
+    # number of jobs matches with alignment dir.
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |"
  fi
 elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then
  echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms,"
--- a/egs/wsj/s5/utils/subset_data_dir.sh
+++ b/egs/wsj/s5/utils/subset_data_dir.sh
@ -36,6 +36,7 @@ perspk=false
 first_opt=""
 speakers=false
 spk_list_specified=false
+utt_list_specified=false

 if [ "$1" == "--per-spk" ]; then
  perspk=true;
@ -55,6 +56,9 @@ elif [ "$1" == "--last" ]; then
 elif [ "$1" == "--spk-list" ]; then
  spk_list_specified=true
  shift;
+elif [ "$1" == "--utt-list" ]; then
+  utt_list_specified=true
+  shift;
 fi


@ -64,6 +68,7 @@ if [ $# != 3 ]; then
  echo "Usage: "
  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
+  echo "  subset_data_dir.sh [--utt-list <utterance-list-file>] <srcdir> <destdir>"
  echo "By default, randomly selects <num-utt> utterances from the data directory."
  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
  echo "With --per-spk, selects <num-utt> utterances per speaker, if available."
@ -78,6 +83,10 @@ if $spk_list_specified; then
  spk_list=$1
  srcdir=$2
  destdir=$3
+elif $utt_list_specified; then
+  utt_list=$1
+  srcdir=$2
+  destdir=$3
 else
  srcdir=$1
  numutt=$2
@ -127,7 +136,13 @@ if $spk_list_specified; then
  utils/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
  utils/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
  do_filtering; # bash function.
-  exit 0;  
+  exit 0;
+elif $utt_list_specified; then
+  mkdir -p $destdir
+  utils/filter_scp.pl "$utt_list" $srcdir/utt2spk > $destdir/utt2spk || exit 1;
+  utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt || exit 1;
+  do_filtering; # bash function.
+  exit 0;
 elif $speakers; then
  mkdir -p $destdir
  utils/shuffle_list.pl < $srcdir/spk2utt | awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' | \
--- a/egs/wsj/s5/utils/summarize_logs.pl
+++ b/egs/wsj/s5/utils/summarize_logs.pl
@ -2,23 +2,7 @@

 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.

- @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
-
-$dir = $ARGV[0];
-
-! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
-
-$dir =~ s:/$::; # Remove trailing slash.
-
-
-# Group the files into categories where all have the same base-name.
-foreach $f (glob ("$dir/*.log")) {
-  $f_category = $f;
-  # do next expression twice; s///g doesn't work as they overlap.
-  $f_category =~ s:\.\d+\.:.*.:;
-  $f_category =~ s:\.\d+\.:.*.:;
-  $fmap{$f_category} .= " $f";
-}
+#scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;

 sub split_hundreds { # split list of filenames into groups of 100.
  my $names = shift @_;
@ -35,7 +19,53 @@ sub split_hundreds { # split list of filenames into groups of 100.
  return @ans;
 }

-foreach $c (keys %fmap) {
+sub parse_accounting_entry {
+  $entry= shift @_;
+
+  @elems = split " ", $entry;
+  
+  $time=undef;
+  $threads=undef;
+  foreach $elem (@elems) {
+    if ( $elem=~ m/time=(\d+)/ ) {
+      $elem =~ s/time=(\d+)/$1/;
+      $time = $elem;
+    } elsif ( $elem=~ m/threads=(\d+)/ ) {
+      $elem =~ s/threads=(\d+)/$1/g;
+      $threads = $elem;
+    } else {
+      die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
+    }
+  }
+
+  if (defined($time) and defined($threads) ) {
+    return ($time, $threads);
+  } else {
+    die "The accounting entry \"$entry\" did not contain all necessary attributes";
+  }
+}
+
+foreach $dir (@ARGV) {
+
+  #$dir = $ARGV[0];
+  print $dir
+
+  ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
+
+  $dir =~ s:/$::; # Remove trailing slash.
+
+
+  # Group the files into categories where all have the same base-name.
+  foreach $f (glob ("$dir/*.log")) {
+    $f_category = $f;
+    # do next expression twice; s///g doesn't work as they overlap.
+    $f_category =~ s:\.\d+\.(?!\d+):.*.:;
+    #$f_category =~ s:\.\d+\.:.*.:;
+    $fmap{$f_category} .= " $f";
+  }
+}
+
+foreach $c (sort (keys %fmap) ) {
  $n = 0;
  foreach $fgroup (split_hundreds($fmap{$c})) {
    $n += `grep -w WARNING $fgroup | wc -l`;
@ -44,7 +74,7 @@ foreach $c (keys %fmap) {
    print "$n warnings in $c\n"
  }
 }
-foreach $c (keys %fmap) {
+foreach $c (sort (keys %fmap)) {
  $n = 0;
  foreach $fgroup (split_hundreds($fmap{$c})) {
    $n += `grep -w ERROR $fgroup | wc -l`;
@ -53,3 +83,39 @@ foreach $c (keys %fmap) {
    print "$n errors in $c\n"
  }
 }
+
+$supertotal_cpu_time=0.0;
+$supertotal_clock_time=0.0;
+$supertotal_threads=0.0;
+
+foreach $c (sort (keys %fmap)) {
+  $n = 0;
+
+  $total_cpu_time=0.0;
+  $total_clock_time=0.0;
+  $total_threads=0.0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`;
+    
+    #print $lines ."\n";
+
+    @entries = split "\n", $lines;
+
+    foreach $line (@entries) {
+      $time, $threads = parse_accounting_entry($line);
+
+      $total_cpu_time += $time * $threads;
+      $total_threads += $threads;
+      if ( $time > $total_clock_time ) {
+        $total_clock_time += $time;
+      }
+    }
+  }
+  print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
+
+  $supertotal_cpu_time += $total_cpu_time;
+  $supertotal_clock_time += $total_clock_time;
+  $supertotal_threads += $total_threads;
+}
+print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
+
--- a/src/cudamatrix/cu-math.cc
+++ b/src/cudamatrix/cu-math.cc
@ -128,8 +128,8 @@ void Randomize(const CuMatrixBase<Real> &src,

 template<typename Real>
 void Splice(const CuMatrix<Real> &src, const CuArray<int32> &frame_offsets,
-            CuMatrix<Real> *tgt) {
-
+            CuMatrixBase<Real> *tgt) {
+  
  KALDI_ASSERT(src.NumCols()*frame_offsets.Dim() == tgt->NumCols());
  KALDI_ASSERT(src.NumRows() == tgt->NumRows());

@ -167,7 +167,8 @@ void Splice(const CuMatrix<Real> &src, const CuArray<int32> &frame_offsets,


 template<typename Real>
-void Copy(const CuMatrix<Real> &src, const CuArray<int32> &copy_from_indices, CuMatrix<Real> *tgt) { 
+void Copy(const CuMatrix<Real> &src, const CuArray<int32> &copy_from_indices,
+          CuMatrixBase<Real> *tgt) { 

  KALDI_ASSERT(copy_from_indices.Dim() == tgt->NumCols());
  KALDI_ASSERT(src.NumRows() == tgt->NumRows());
@ -207,13 +208,17 @@ template
 void RegularizeL1(CuMatrixBase<double> *weight, CuMatrixBase<double> *grad, double l1, double lr);

 template
-void Splice(const CuMatrix<float> &src, const CuArray<int32> &frame_offsets, CuMatrix<float> *tgt);
+void Splice(const CuMatrix<float> &src, const CuArray<int32> &frame_offsets,
+            CuMatrixBase<float> *tgt);
 template
-void Splice(const CuMatrix<double> &src, const CuArray<int32> &frame_offsets, CuMatrix<double> *tgt);
+void Splice(const CuMatrix<double> &src, const CuArray<int32> &frame_offsets,
+            CuMatrixBase<double> *tgt);
 template
-void Copy(const CuMatrix<float> &src, const CuArray<int32> &copy_from_indices, CuMatrix<float> *tgt);
+void Copy(const CuMatrix<float> &src, const CuArray<int32> &copy_from_indices,
+          CuMatrixBase<float> *tgt);
 template
-void Copy(const CuMatrix<double> &src, const CuArray<int32> &copy_from_indices, CuMatrix<double> *tgt);
+void Copy(const CuMatrix<double> &src, const CuArray<int32> &copy_from_indices,
+          CuMatrixBase<double> *tgt);

 template
 void Randomize(const CuMatrixBase<float> &src,
--- a/src/cudamatrix/cu-math.h
+++ b/src/cudamatrix/cu-math.h
@ -61,7 +61,7 @@ void Randomize(const CuMatrixBase<Real> &src,
 template<typename Real>
 void Splice(const CuMatrix<Real> &src,
            const CuArray<int32> &frame_offsets,
-            CuMatrix<Real> *tgt);
+            CuMatrixBase<Real> *tgt);

 /// Copies elements from src into tgt as given by copy_from_indices.
 /// The matrices src and tgt must have the same dimensions and
@ -71,7 +71,7 @@ void Splice(const CuMatrix<Real> &src,
 template<typename Real>
 void Copy(const CuMatrix<Real> &src,
          const CuArray<int32> &copy_from_indices,
-          CuMatrix<Real> *tgt);
+          CuMatrixBase<Real> *tgt);


 } // namespace cu
--- a/src/cudamatrix/cu-matrix.h
+++ b/src/cudamatrix/cu-matrix.h
@ -78,10 +78,10 @@ class CuMatrixBase {
                                     CuMatrixBase<Real> *grad, Real l1, Real lr);
  friend void cu::Splice<Real>(const CuMatrix<Real> &src,
                               const CuArray<int32> &frame_offsets,
-                               CuMatrix<Real> *tgt);
+                               CuMatrixBase<Real> *tgt);
  friend void cu::Copy<Real>(const CuMatrix<Real> &src,
                             const CuArray<int32> &copy_from_indices,
-                             CuMatrix<Real> *tgt);
+                             CuMatrixBase<Real> *tgt);
  friend void cu::Randomize<Real>(const CuMatrixBase<Real> &src,
                                  const CuArray<int32> &copy_from_idx,
                                  CuMatrixBase<Real> *tgt);
@ -290,9 +290,9 @@ class CuMatrixBase {
  void InvertElements();
  /// B = alpha * A
  void AddMat(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA = kNoTrans);
-  /// B = alpha * row + beta * B
+  /// (for each column c of *this), c = alpha * col + beta * c
  void AddVecToCols(Real alpha, const CuVectorBase<Real> &col, Real beta = 1.0);
-  /// B = alpha * row + beta * B
+  /// (for each row r of *this), r = alpha * row + beta * r
  void AddVecToRows(Real alpha, const CuVectorBase<Real> &row, Real beta = 1.0);
  /// C = alpha * A(^T)*B(^T) + beta * C
  void AddMatMat(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA,
--- a/src/cudamatrix/cu-vector.h
+++ b/src/cudamatrix/cu-vector.h
@ -59,7 +59,7 @@ class CuVectorBase {
                          const CuVectorBase<OtherReal> &v2);
  friend void cu::Splice<Real>(const CuMatrix<Real> &src,
                               const CuArray<int32> &frame_offsets,
-                               CuMatrix<Real> *tgt);
+                               CuMatrixBase<Real> *tgt);
  friend class CuRand<Real>;
  
  /// Dimensions
--- a/src/nnet/nnet-affine-transform.h
+++ b/src/nnet/nnet-affine-transform.h
@ -171,7 +171,7 @@ class AffineTransform : public UpdatableComponent {
  }

  /// Accessors to the component parameters
-  const CuVector<BaseFloat>& GetBias() {
+  const CuVector<BaseFloat>& GetBias() const {
    return bias_;
  }

@ -180,7 +180,7 @@ class AffineTransform : public UpdatableComponent {
    bias_.CopyFromVec(bias);
  }

-  const CuMatrix<BaseFloat>& GetLinearity() {
+  const CuMatrix<BaseFloat>& GetLinearity() const {
    return linearity_;
  }

@ -190,11 +190,11 @@ class AffineTransform : public UpdatableComponent {
    linearity_.CopyFromMat(linearity);
  }

-  const CuVector<BaseFloat>& GetBiasCorr() {
+  const CuVector<BaseFloat>& GetBiasCorr() const {
    return bias_corr_;
  }

-  const CuMatrix<BaseFloat>& GetLinearityCorr() {
+  const CuMatrix<BaseFloat>& GetLinearityCorr() const {
    return linearity_corr_;
  }

--- a/src/nnet/nnet-various.h
+++ b/src/nnet/nnet-various.h
@ -119,7 +119,7 @@ std::string MomentStatistics(const CuMatrix<Real> &mat) {
 * in N, out k*N, FrameOffset o_1,o_2,...,o_k
 * FrameOffset example 11frames: -5 -4 -3 -2 -1 0 1 2 3 4 5
 */
-class Splice : public Component {
+class Splice: public Component {
 public:
  Splice(int32 dim_in, int32 dim_out)
    : Component(dim_in, dim_out)
--- a/src/nnet2/nnet-component-test.cc
+++ b/src/nnet2/nnet-component-test.cc
@ -506,7 +506,7 @@ void UnitTestAffinePreconInputComponent() {

 void UnitTestBlockAffineComponent() {
  BaseFloat learning_rate = 0.01,
-      param_stddev = 0.1, bias_stddev = 1.0;
+      param_stddev = 0.1, bias_stddev = 0.1;
  int32 num_blocks = 1 + rand() % 3,
         input_dim = num_blocks * (2 + rand() % 4),
        output_dim = num_blocks * (2 + rand() % 4);
@ -655,6 +655,28 @@ void UnitTestFixedAffineComponent() {
  }
 }

+void UnitTestFixedScaleComponent() {
+  int32 m = 1 + rand() % 20;
+  {
+    CuVector<BaseFloat> vec(m);
+    vec.SetRandn();
+    FixedScaleComponent component;
+    component.Init(vec);
+    UnitTestGenericComponentInternal(component);
+  }
+}
+
+void UnitTestFixedBiasComponent() {
+  int32 m = 1 + rand() % 20;
+  {
+    CuVector<BaseFloat> vec(m);
+    vec.SetRandn();
+    FixedBiasComponent component;
+    component.Init(vec);
+    UnitTestGenericComponentInternal(component);
+  }
+}
+


 void UnitTestParsing() {
@ -825,6 +847,8 @@ int main() {
      UnitTestDctComponent();
      UnitTestFixedLinearComponent();
      UnitTestFixedAffineComponent();
+      UnitTestFixedScaleComponent();
+      UnitTestFixedBiasComponent();
      UnitTestAffineComponentPreconditioned();
      UnitTestAffineComponentPreconditionedOnline();
      UnitTestAffineComponentModified();
--- a/src/nnet2/nnet-component.cc
+++ b/src/nnet2/nnet-component.cc
@ -98,6 +98,10 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
    ans = new FixedLinearComponent();
  } else if (component_type == "FixedAffineComponent") {
    ans = new FixedAffineComponent();
+  } else if (component_type == "FixedScaleComponent") {
+    ans = new FixedScaleComponent();
+  } else if (component_type == "FixedBiasComponent") {
+    ans = new FixedBiasComponent();
  } else if (component_type == "SpliceComponent") {
    ans = new SpliceComponent();
  } else if (component_type == "SpliceMaxComponent") {
@ -290,6 +294,15 @@ Component *PermuteComponent::Copy() const {
  ans->reorder_ = reorder_;
  return ans;
 }
+void PermuteComponent::Init(const std::vector<int32> &reorder) {
+  reorder_ = reorder;
+  KALDI_ASSERT(!reorder.empty());
+  std::vector<int32> indexes(reorder);
+  std::sort(indexes.begin(), indexes.end());
+  for (int32 i = 0; i < static_cast<int32>(indexes.size()); i++)
+    KALDI_ASSERT(i == indexes[i] && "Not a permutation");
+}
+

 std::string Component::Info() const {
  std::stringstream stream;
@ -1071,6 +1084,19 @@ AffineComponent::AffineComponent(const AffineComponent &component):
    bias_params_(component.bias_params_),
    is_gradient_(component.is_gradient_) { }

+AffineComponent::AffineComponent(const CuMatrix<BaseFloat> &linear_params,
+                                 const CuVector<BaseFloat> &bias_params,
+                                 BaseFloat learning_rate):
+    UpdatableComponent(learning_rate),
+    linear_params_(linear_params),
+    bias_params_(bias_params) {
+  KALDI_ASSERT(linear_params.NumRows() == bias_params.Dim()&&
+               bias_params.Dim() != 0);
+  is_gradient_ = false;
+}
+
+
+
 void AffineComponent::SetZero(bool treat_as_gradient) {
  if (treat_as_gradient) {
    SetLearningRate(1.0);
@ -1193,11 +1219,16 @@ void AffineComponent::InitFromString(std::string args) {
 void AffineComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
                                int32, // num_chunks
                                CuMatrix<BaseFloat> *out) const {
+  KALDI_LOG << "First element of input is " << in(0, 0);
+  KALDI_LOG << "Input sum is " << in.Sum();
  // No need for asserts as they'll happen within the matrix operations.
  out->Resize(in.NumRows(), linear_params_.NumRows());
  out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row
  // of *out.
+  KALDI_LOG << "First element of output is " << (*out)(0, 0);
+  KALDI_LOG << "Linearity sum is " << linear_params_.Sum();
  out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 1.0);
+  KALDI_LOG << "First element of output is " << (*out)(0, 0);  
 }

 void AffineComponent::UpdateSimple(const CuMatrixBase<BaseFloat> &in_value,
@ -3435,86 +3466,50 @@ void SpliceComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
              << "Probably a code error.";
  out->Resize(num_chunks * output_chunk_size, output_dim);

-  if (0) { // rand() % 2 == 0) { // Occasionally do the older code,
-    // this will flag any inconsistency in the tests.
-  
-    for (int32 chunk = 0; chunk < num_chunks; chunk++) {
-      CuSubMatrix<BaseFloat> input_chunk(in,
-                                         chunk * input_chunk_size, input_chunk_size,
-                                         0, input_dim),
-          output_chunk(*out,
-                       chunk * output_chunk_size, output_chunk_size,
-                       0, output_dim);
+  // 'indexes' is, for each index from 0 to (left_context_+right_context_+1)-1,
+  // then for each row of "out", the corresponding row of "in" that we copy from.
+  int32 num_splice = left_context_ + right_context_ + 1,
+      const_dim = const_component_dim_;
+  std::vector<std::vector<int32> > indexes(num_splice);
+  // const_component_dim_ != 0, "const_indexes" will be used to determine which
+  // row of "in" we copy the last part of each row of "out" from (this part is
+  // not subject to splicing, it's assumed constant for each frame of "input".
+  std::vector<int32> const_indexes(const_dim == 0 ? 0 : out->NumRows());

-      for (int32 c = 0; c < left_context_ + right_context_ + 1; c++) {
-        CuSubMatrix<BaseFloat> input_part(input_chunk, 
-                                          c, output_chunk_size,
-                                          0, input_dim - const_component_dim_),
-            output_part(output_chunk, 
-                        0, output_chunk_size,
-                        (input_dim - const_component_dim_) * c,
-                        input_dim - const_component_dim_);
-        output_part.CopyFromMat(input_part);
-      }
-      //Append the constant component at the end of the output vector
-      if (const_component_dim_ != 0) {
-        CuSubMatrix<BaseFloat> input_part(input_chunk, 
-                                          0, output_chunk_size,
-                                          InputDim() - const_component_dim_,
-                                          const_component_dim_),
-            output_part(output_chunk, 
-                        0, output_chunk_size,
-                        OutputDim() - const_component_dim_,
-                        const_component_dim_);
-        output_part.CopyFromMat(input_part);
-      }
-    }
-  } else {
-    // 'indexes' is, for each index from 0 to (left_context_+right_context_+1)-1,
-    // then for each row of "out", the corresponding row of "in" that we copy from.
-    int32 num_splice = left_context_ + right_context_ + 1,
-        const_dim = const_component_dim_;
-    std::vector<std::vector<int32> > indexes(num_splice);
-    // const_component_dim_ != 0, "const_indexes" will be used to determine which
-    // row of "in" we copy the last part of each row of "out" from (this part is
-    // not subject to splicing, it's assumed constant for each frame of "input".
-    std::vector<int32> const_indexes(const_dim == 0 ? 0 : out->NumRows());
+  for (int32 c = 0; c < num_splice; c++) 
+    indexes[c].resize(out->NumRows());

-    for (int32 c = 0; c < num_splice; c++) 
-      indexes[c].resize(out->NumRows());
-
-    for (int32 chunk = 0; chunk < num_chunks; chunk++) {
-      for (int32 c = 0; c < num_splice; c++) {
-        for (int32 offset = 0; offset < output_chunk_size; offset++) {
-          indexes[c][chunk * output_chunk_size + offset] =
-              chunk * input_chunk_size + c + offset;
-        }
-      }
-      if (const_dim != 0) {
-        for (int32 offset = 0; offset < output_chunk_size; offset++)
-          const_indexes[chunk * output_chunk_size + offset] =
-              chunk * input_chunk_size + offset; // there is
-          // an arbitrariness here; since we assume the const_component
-          // is constant within a chunk, it doesn't matter from where we copy.
-      }
-    }
+  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
    for (int32 c = 0; c < num_splice; c++) {
-      int32 dim = input_dim - const_dim; // dimension we
-      // are splicing
-      CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
-                                   0, dim),
-          out_part(*out, 0, out->NumRows(),
-                   c * dim, dim);
-      out_part.CopyRows(in_part, indexes[c]);
+      for (int32 offset = 0; offset < output_chunk_size; offset++) {
+        indexes[c][chunk * output_chunk_size + offset] =
+            chunk * input_chunk_size + c + offset;
+      }
    }
    if (const_dim != 0) {
-      CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
-                                   in.NumCols() - const_dim, const_dim),
-          out_part(*out, 0, out->NumRows(),
-                   out->NumCols() - const_dim, const_dim);
-      out_part.CopyRows(in_part, const_indexes);
+      for (int32 offset = 0; offset < output_chunk_size; offset++)
+        const_indexes[chunk * output_chunk_size + offset] =
+            chunk * input_chunk_size + offset; // there is
+      // an arbitrariness here; since we assume the const_component
+      // is constant within a chunk, it doesn't matter from where we copy.
    }
-  }  
+  }
+  for (int32 c = 0; c < num_splice; c++) {
+    int32 dim = input_dim - const_dim; // dimension we
+    // are splicing
+    CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
+                                   0, dim),
+        out_part(*out, 0, out->NumRows(),
+                 c * dim, dim);
+    out_part.CopyRows(in_part, indexes[c]);
+  }
+  if (const_dim != 0) {
+    CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
+                                   in.NumCols() - const_dim, const_dim),
+        out_part(*out, 0, out->NumRows(),
+                 out->NumCols() - const_dim, const_dim);
+    out_part.CopyRows(in_part, const_indexes);
+  }
 }

 void SpliceComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
@ -3537,111 +3532,69 @@ void SpliceComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
 
  KALDI_ASSERT( OutputDim() == output_dim );

-  if (0) { // old code
-    in_deriv->Resize(num_chunks * input_chunk_size, input_dim); // Will zero it.
-    for (int32 chunk = 0; chunk < num_chunks; chunk++) {
-      CuSubMatrix<BaseFloat> in_deriv_chunk(*in_deriv, 
-                                            chunk * input_chunk_size, input_chunk_size, 
-                                            0, input_dim),
-          out_deriv_chunk(out_deriv,
-                          chunk * output_chunk_size, output_chunk_size,
-                          0, output_dim);
+  in_deriv->Resize(num_chunks * input_chunk_size, input_dim, kUndefined);

-
-      for (int32 c = 0; c < left_context_ + right_context_ + 1; c++) {
-        CuSubMatrix<BaseFloat> in_deriv_part(in_deriv_chunk, 
-                                             c, output_chunk_size,
-                                             0, input_dim - const_component_dim_),
-            out_deriv_part(out_deriv_chunk, 
-                           0, output_chunk_size,
-                           c * (input_dim - const_component_dim_),
-                           input_dim - const_component_dim_);
-        in_deriv_part.AddMat(1.0, out_deriv_part);
-      }
-
-      if (const_component_dim_ > 0) {
-        CuSubMatrix<BaseFloat> out_deriv_const_part(out_deriv_chunk,
-                                                    0, output_chunk_size,
-                                                    output_dim - const_component_dim_,
-                                                    const_component_dim_);
-        // Because we assume the "constant part" of the input is the same for all
-        // input rows, it's not clear how to propagate the derivative back.  We
-        // propagate the same value to all copies of it, but you should only take
-        // one of them, not sum them up.  In practice this is only used at the
-        // start of the network and the derivative probably won't ever be used.
-        for (int32 c = 0; c < in_deriv_chunk.NumRows(); c++) {
-          CuSubMatrix<BaseFloat> in_deriv_part(in_deriv_chunk, c, 1,
-                                               input_dim - const_component_dim_,
-                                               const_component_dim_);
-          in_deriv_part.Row(0).AddRowSumMat(1.0, out_deriv_const_part);
-        } 
-      }
-    }
-  } else {
-    in_deriv->Resize(num_chunks * input_chunk_size, input_dim, kUndefined);
-
-    int32 num_splice = left_context_ + right_context_ + 1,
-        const_dim = const_component_dim_;
-    // 'indexes' is, for each index from 0 to num_splice - 1,
-    // then for each row of "in_deriv", the corresponding row of "out_deriv" that
-    // we add, or -1 if.
+  int32 num_splice = left_context_ + right_context_ + 1,
+      const_dim = const_component_dim_;
+  // 'indexes' is, for each index from 0 to num_splice - 1,
+  // then for each row of "in_deriv", the corresponding row of "out_deriv" that
+  // we add, or -1 if.
    
-    std::vector<std::vector<int32> > indexes(num_splice);
-    // const_dim != 0, "const_indexes" will be used to determine which
-    // row of "in" we copy the last part of each row of "out" from (this part is
-    // not subject to splicing, it's assumed constant for each frame of "input".
-    std::vector<int32> const_indexes(const_dim == 0 ? 0 : in_deriv->NumRows(),
-                                     -1);
+  std::vector<std::vector<int32> > indexes(num_splice);
+  // const_dim != 0, "const_indexes" will be used to determine which
+  // row of "in" we copy the last part of each row of "out" from (this part is
+  // not subject to splicing, it's assumed constant for each frame of "input".
+  std::vector<int32> const_indexes(const_dim == 0 ? 0 : in_deriv->NumRows(),
+                                   -1);

-    for (int32 c = 0; c < indexes.size(); c++) 
-      indexes[c].resize(in_deriv->NumRows(), -1); // set to -1 by default,
-    // this gets interpreted by the CopyRows() code as a signal to zero the output...
+  for (int32 c = 0; c < indexes.size(); c++) 
+    indexes[c].resize(in_deriv->NumRows(), -1); // set to -1 by default,
+  // this gets interpreted by the CopyRows() code as a signal to zero the output...

-    int32 dim = input_dim - const_dim; // dimension we are splicing
+  int32 dim = input_dim - const_dim; // dimension we are splicing

-    for (int32 chunk = 0; chunk < num_chunks; chunk++) {
-      for (int32 c = 0; c < num_splice; c++)
-        for (int32 offset = 0; offset < output_chunk_size; offset++)
-          indexes[c][chunk * input_chunk_size + c + offset] =
-              chunk * output_chunk_size + offset;
+  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
+    for (int32 c = 0; c < num_splice; c++)
+      for (int32 offset = 0; offset < output_chunk_size; offset++)
+        indexes[c][chunk * input_chunk_size + c + offset] =
+            chunk * output_chunk_size + offset;

-      // Note: when changing over to the CUDA code, we also changed
-      // how the derivatives are propagated through the splicing layer
-      // for the const-component-dim.  The code was never being used,
-      // so it doesn't matter.  The way we now do it probably makes more
-      // sense (to get the derivative, you'd have to sum over time, not
-      // pick an arbitrary time)
-      if (const_dim != 0)
-        for (int32 offset = 0; offset < output_chunk_size; offset++)
-          const_indexes[chunk * input_chunk_size + offset] =
-              chunk * output_chunk_size + offset;
-    }
+    // Note: when changing over to the CUDA code, we also changed
+    // how the derivatives are propagated through the splicing layer
+    // for the const-component-dim.  The code was never being used,
+    // so it doesn't matter.  The way we now do it probably makes more
+    // sense (to get the derivative, you'd have to sum over time, not
+    // pick an arbitrary time)
+    if (const_dim != 0)
+      for (int32 offset = 0; offset < output_chunk_size; offset++)
+        const_indexes[chunk * input_chunk_size + offset] =
+            chunk * output_chunk_size + offset;
+  }
    
-    CuMatrix<BaseFloat> temp_mat(in_deriv->NumRows(), dim, kUndefined);
+  CuMatrix<BaseFloat> temp_mat(in_deriv->NumRows(), dim, kUndefined);
    
-    for (int32 c = 0; c < num_splice; c++) {
-      int32 dim = input_dim - const_dim; // dimension we
-      // are splicing
-      CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
-                                            c * dim, dim),
-          in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
-                        0, dim);
-      if (c == 0)
-        in_deriv_part.CopyRows(out_deriv_part, indexes[c]);
-      else {
-        temp_mat.CopyRows(out_deriv_part, indexes[c]);
-        in_deriv_part.AddMat(1.0, temp_mat);
-      }
-    }
-    if (const_dim != 0) {
-      CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
-                                            out_deriv.NumCols() - const_dim,
-                                            const_dim),
-          in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
-                        in_deriv->NumCols() - const_dim, const_dim);
-      in_deriv_part.CopyRows(out_deriv_part, const_indexes);
+  for (int32 c = 0; c < num_splice; c++) {
+    int32 dim = input_dim - const_dim; // dimension we
+    // are splicing
+    CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
+                                          c * dim, dim),
+        in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
+                      0, dim);
+    if (c == 0)
+      in_deriv_part.CopyRows(out_deriv_part, indexes[c]);
+    else {
+      temp_mat.CopyRows(out_deriv_part, indexes[c]);
+      in_deriv_part.AddMat(1.0, temp_mat);
    }
  }
+  if (const_dim != 0) {
+    CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
+                                          out_deriv.NumCols() - const_dim,
+                                          const_dim),
+        in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
+                      in_deriv->NumCols() - const_dim, const_dim);
+    in_deriv_part.CopyRows(out_deriv_part, const_indexes);
+  }
 }

 Component *SpliceComponent::Copy() const {
@ -4159,6 +4112,142 @@ void FixedAffineComponent::Read(std::istream &is, bool binary) {
 }


+void FixedScaleComponent::Init(const CuVectorBase<BaseFloat> &scales) {
+  KALDI_ASSERT(scales.Dim() != 0);
+  scales_ = scales;
+}
+
+void FixedScaleComponent::InitFromString(std::string args) {
+  std::string orig_args = args;
+  std::string filename;
+  bool ok = ParseFromString("scales", &args, &filename);
+
+  if (!ok || !args.empty()) 
+    KALDI_ERR << "Invalid initializer for layer of type "
+              << Type() << ": \"" << orig_args << "\"";
+
+  CuVector<BaseFloat> vec;
+  ReadKaldiObject(filename, &vec);
+  Init(vec);
+}
+
+
+std::string FixedScaleComponent::Info() const {
+  std::stringstream stream;
+  BaseFloat scales_size = static_cast<BaseFloat>(scales_.Dim()),
+      scales_mean = scales_.Sum() / scales_size,
+      scales_stddev = std::sqrt(VecVec(scales_, scales_) / scales_size)
+       - (scales_mean * scales_mean);
+  stream << Component::Info() << ", scales-mean=" << scales_mean
+         << ", scales-stddev=" << scales_stddev;
+  return stream.str();
+}
+
+void FixedScaleComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
+                                     int32 num_chunks,
+                                     CuMatrix<BaseFloat> *out) const {
+  *out = in;
+  out->MulColsVec(scales_);
+}
+
+void FixedScaleComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
+                                    const CuMatrixBase<BaseFloat> &, // out_value
+                                    const CuMatrixBase<BaseFloat> &out_deriv,
+                                    int32, // num_chunks
+                                    Component *, // to_update
+                                    CuMatrix<BaseFloat> *in_deriv) const {
+  *in_deriv = out_deriv;
+  in_deriv->MulColsVec(scales_);
+}
+
+Component* FixedScaleComponent::Copy() const {
+  FixedScaleComponent *ans = new FixedScaleComponent();
+  ans->scales_ = scales_;
+  return ans;
+}
+
+
+void FixedScaleComponent::Write(std::ostream &os, bool binary) const {
+  WriteToken(os, binary, "<FixedScaleComponent>");
+  WriteToken(os, binary, "<Scales>");
+  scales_.Write(os, binary);
+  WriteToken(os, binary, "</FixedScaleComponent>");  
+}
+
+void FixedScaleComponent::Read(std::istream &is, bool binary) {
+  ExpectOneOrTwoTokens(is, binary, "<FixedScaleComponent>", "<Scales>");
+  scales_.Read(is, binary);
+  ExpectToken(is, binary, "</FixedScaleComponent>");
+}
+
+void FixedBiasComponent::Init(const CuVectorBase<BaseFloat> &bias) {
+  KALDI_ASSERT(bias.Dim() != 0);
+  bias_ = bias;
+}
+
+void FixedBiasComponent::InitFromString(std::string args) {
+  std::string orig_args = args;
+  std::string filename;
+  bool ok = ParseFromString("bias", &args, &filename);
+
+  if (!ok || !args.empty()) 
+    KALDI_ERR << "Invalid initializer for layer of type "
+              << Type() << ": \"" << orig_args << "\"";
+
+  CuVector<BaseFloat> vec;
+  ReadKaldiObject(filename, &vec);
+  Init(vec);
+}
+
+
+std::string FixedBiasComponent::Info() const {
+  std::stringstream stream;
+  BaseFloat bias_size = static_cast<BaseFloat>(bias_.Dim()),
+      bias_mean = bias_.Sum() / bias_size,
+      bias_stddev = std::sqrt(VecVec(bias_, bias_) / bias_size)
+       - (bias_mean * bias_mean);
+  stream << Component::Info() << ", bias-mean=" << bias_mean
+         << ", bias-stddev=" << bias_stddev;
+  return stream.str();
+}
+
+void FixedBiasComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
+                                     int32 num_chunks,
+                                     CuMatrix<BaseFloat> *out) const {
+  *out = in;
+  out->AddVecToRows(1.0, bias_, 1.0);
+}
+
+void FixedBiasComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
+                                    const CuMatrixBase<BaseFloat> &, // out_value
+                                    const CuMatrixBase<BaseFloat> &out_deriv,
+                                    int32, // num_chunks
+                                    Component *, // to_update
+                                    CuMatrix<BaseFloat> *in_deriv) const {
+  *in_deriv = out_deriv;
+}
+
+Component* FixedBiasComponent::Copy() const {
+  FixedBiasComponent *ans = new FixedBiasComponent();
+  ans->bias_ = bias_;
+  return ans;
+}
+
+
+void FixedBiasComponent::Write(std::ostream &os, bool binary) const {
+  WriteToken(os, binary, "<FixedBiasComponent>");
+  WriteToken(os, binary, "<Bias>");
+  bias_.Write(os, binary);
+  WriteToken(os, binary, "</FixedBiasComponent>");  
+}
+
+void FixedBiasComponent::Read(std::istream &is, bool binary) {
+  ExpectOneOrTwoTokens(is, binary, "<FixedBiasComponent>", "<Bias>");
+  bias_.Read(is, binary);
+  ExpectToken(is, binary, "</FixedBiasComponent>");
+}
+
+


 std::string DropoutComponent::Info() const {
--- a/src/nnet2/nnet-component.h
+++ b/src/nnet2/nnet-component.h
@ -638,6 +638,11 @@ class AffineComponent: public UpdatableComponent {
  friend class SoftmaxComponent; // Friend declaration relates to mixing up.
 public:
  explicit AffineComponent(const AffineComponent &other);
+  // The next constructor is used in converting from nnet1.
+  AffineComponent(const CuMatrix<BaseFloat> &linear_params,
+                  const CuVector<BaseFloat> &bias_params,
+                  BaseFloat learning_rate);
+  
  virtual int32 InputDim() const { return linear_params_.NumCols(); }
  virtual int32 OutputDim() const { return linear_params_.NumRows(); }
  void Init(BaseFloat learning_rate,
@ -1153,6 +1158,7 @@ class SpliceComponent: public Component {
 };


+
 /// This is as SpliceComponent but outputs the max of
 /// any of the inputs (taking the max across time).
 class SpliceMaxComponent: public Component {
@ -1442,12 +1448,16 @@ private:
 };


-/// PermuteComponent does a random permutation of the dimensions.  Useful in
-/// conjunction with block-diagonal transforms.
+/// PermuteComponent does a permutation of the dimensions (by default, a fixed
+/// random permutation, but it may be specified).  Useful in conjunction with
+/// block-diagonal transforms.
 class PermuteComponent: public Component {
 public:
  void Init(int32 dim);
+  void Init(const std::vector<int32> &reorder);
  PermuteComponent(int32 dim) { Init(dim); }
+  PermuteComponent(const std::vector<int32> &reorder) { Init(reorder); }
+
  PermuteComponent() { } // e.g. prior to Read() or Init()
  
  virtual int32 InputDim() const { return reorder_.size(); }
@ -1463,17 +1473,17 @@ class PermuteComponent: public Component {
  virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
                         int32 num_chunks,
                         CuMatrix<BaseFloat> *out) const; 
-  virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value, // dummy
-                        const CuMatrixBase<BaseFloat> &out_value, // dummy
+  virtual void Backprop(const CuMatrixBase<BaseFloat> &,
+                        const CuMatrixBase<BaseFloat> &,
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        int32 num_chunks,
-                        Component *to_update, // dummy
+                        Component *,
                        CuMatrix<BaseFloat> *in_deriv) const;
  
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(PermuteComponent);
  std::vector<int32> reorder_; // This class sends input dimension i to
-  // output dimension reorder_[i].
+                               // output dimension reorder_[i].
 };


@ -1607,6 +1617,81 @@ class FixedAffineComponent: public Component {
 };


+/// FixedScaleComponent applies a fixed per-element scale; it's similar
+/// to the Rescale component in the nnet1 setup (and only needed for nnet1
+/// model conversion.
+class FixedScaleComponent: public Component {
+ public:
+  FixedScaleComponent() { } 
+  virtual std::string Type() const { return "FixedScaleComponent"; }
+  virtual std::string Info() const;
+  
+  void Init(const CuVectorBase<BaseFloat> &scales); 
+  
+  // InitFromString takes only the option scales=<string>,
+  // where the string is the filename of a Kaldi-format matrix to read.
+  virtual void InitFromString(std::string args);
+  
+  virtual int32 InputDim() const { return scales_.Dim(); }
+  virtual int32 OutputDim() const { return scales_.Dim(); }
+  virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
+                         int32 num_chunks,
+                         CuMatrix<BaseFloat> *out) const;
+  virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
+                        const CuMatrixBase<BaseFloat> &out_value,
+                        const CuMatrixBase<BaseFloat> &out_deriv,
+                        int32 num_chunks,
+                        Component *to_update, // may be identical to "this".
+                        CuMatrix<BaseFloat> *in_deriv) const;
+  virtual bool BackpropNeedsInput() const { return false; }
+  virtual bool BackpropNeedsOutput() const { return false; }
+  virtual Component* Copy() const;
+  virtual void Read(std::istream &is, bool binary);
+  virtual void Write(std::ostream &os, bool binary) const;
+
+ protected:
+  CuVector<BaseFloat> scales_;  
+  KALDI_DISALLOW_COPY_AND_ASSIGN(FixedScaleComponent);
+};
+
+/// FixedBiasComponent applies a fixed per-element bias; it's similar
+/// to the AddShift component in the nnet1 setup (and only needed for nnet1
+/// model conversion.
+class FixedBiasComponent: public Component {
+ public:
+  FixedBiasComponent() { } 
+  virtual std::string Type() const { return "FixedBiasComponent"; }
+  virtual std::string Info() const;
+  
+  void Init(const CuVectorBase<BaseFloat> &scales); 
+  
+  // InitFromString takes only the option bias=<string>,
+  // where the string is the filename of a Kaldi-format matrix to read.
+  virtual void InitFromString(std::string args);
+  
+  virtual int32 InputDim() const { return bias_.Dim(); }
+  virtual int32 OutputDim() const { return bias_.Dim(); }
+  virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
+                         int32 num_chunks,
+                         CuMatrix<BaseFloat> *out) const;
+  virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
+                        const CuMatrixBase<BaseFloat> &out_value,
+                        const CuMatrixBase<BaseFloat> &out_deriv,
+                        int32 num_chunks,
+                        Component *to_update, // may be identical to "this".
+                        CuMatrix<BaseFloat> *in_deriv) const;
+  virtual bool BackpropNeedsInput() const { return false; }
+  virtual bool BackpropNeedsOutput() const { return false; }
+  virtual Component* Copy() const;
+  virtual void Read(std::istream &is, bool binary);
+  virtual void Write(std::ostream &os, bool binary) const;
+
+ protected:
+  CuVector<BaseFloat> bias_;  
+  KALDI_DISALLOW_COPY_AND_ASSIGN(FixedBiasComponent);
+};
+
+
 /// This Component, if present, randomly zeroes half of
 /// the inputs and multiplies the other half by two.
 /// Typically you would use this in training but not in
--- a/src/nnet2/nnet-nnet.cc
+++ b/src/nnet2/nnet-nnet.cc
@ -661,7 +661,6 @@ void Nnet::Collapse(bool match_updatableness) {
  KALDI_LOG << "Collapsed " << num_collapsed << " components.";
 }

-
 Nnet *GenRandomNnet(int32 input_dim,
                    int32 output_dim) {

@ -711,6 +710,14 @@ Nnet *GenRandomNnet(int32 input_dim,



+int32 Nnet::LastUpdatableComponent() const {
+  int32 last_updatable_component = NumComponents();
+  for (int32 i = NumComponents() - 1; i >= 0; i--)
+    if (dynamic_cast<UpdatableComponent*>(components_[i]) != NULL)
+      last_updatable_component = i;
+  return last_updatable_component;
+}
+
 } // namespace nnet2
 } // namespace kaldi

--- a/src/nnet2/nnet-nnet.h
+++ b/src/nnet2/nnet-nnet.h
@ -98,6 +98,12 @@ class Nnet {
  /// this neural net, leaving everything else fixed.
  void CopyStatsFrom(const Nnet &nnet);

+
+  /// Returns the index of the last component which is updatable,
+  /// or NumComponents() if none are updatable.
+  int32 LastUpdatableComponent() const;
+
+  /// Returns the number of updatable components.
  int32 NumUpdatableComponents() const;
  
  /// Scales the parameters of each of the updatable components.
@ -192,9 +198,10 @@ class Nnet {
  /// AffineComponent learning-rate=0.01 l2-penalty=0.001 input-dim=10 output-dim=15 param-stddev=0.1
  void Init(std::istream &is);

-  /// This Init method works from a vector of components.  It will take ownership
-  /// of the pointers and resize the vector to zero to avoid a chance of the
-  /// caller deallocating them.
+  /// This Init method works from a vector of components.  It will take
+  /// ownership of the pointers and will resize the vector to zero to avoid a
+  /// chance of the caller deallocating them (but the vector itself is not
+  /// deleted).
  void Init(std::vector<Component*> *components);

  /// Appends this component to the components already in the neural net.
--- a/src/nnet2/nnet-update.cc
+++ b/src/nnet2/nnet-update.cc
@ -135,7 +135,8 @@ double NnetUpdater::ComputeTotAccuracy(

 void NnetUpdater::Backprop(CuMatrix<BaseFloat> *deriv) const {
  // We assume ComputeObjfAndDeriv has already been called.
-  for (int32 c = nnet_.NumComponents() - 1; c >= 0; c--) {
+  for (int32 c = nnet_.NumComponents() - 1;
+       c >= nnet_.LastUpdatableComponent(); c--) {
    const Component &component = nnet_.GetComponent(c);
    Component *component_to_update = (nnet_to_update_ == NULL ? NULL :
                                      &(nnet_to_update_->GetComponent(c)));
--- a/src/nnet2bin/Makefile
+++ b/src/nnet2bin/Makefile
@ -26,15 +26,18 @@ BINFILES = nnet-randomize-frames nnet-am-info nnet-init \
   nnet-modify-learning-rates nnet-normalize-stddev nnet-perturb-egs \
   nnet-perturb-egs-fmllr nnet-get-weighted-egs nnet-adjust-priors \
   cuda-compiled nnet-replace-last-layers nnet-am-switch-preconditioning \
-   nnet-train-simple-perturbed nnet-train-parallel-perturbed
+   nnet-train-simple-perturbed nnet-train-parallel-perturbed \
+   nnet1-to-raw-nnet

 OBJFILES =

+# Add this dependency to force cuda-compiled.o to be rebuilt when we reconfigure.
+cuda-compiled.o: ../kaldi.mk


 TESTFILES =

-ADDLIBS = ../nnet2/kaldi-nnet2.a ../gmm/kaldi-gmm.a \
+ADDLIBS = ../nnet2/kaldi-nnet2.a ../nnet/kaldi-nnet.a ../gmm/kaldi-gmm.a \
         ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a  \
         ../transform/kaldi-transform.a ../tree/kaldi-tree.a ../thread/kaldi-thread.a \
         ../cudamatrix/kaldi-cudamatrix.a ../matrix/kaldi-matrix.a \
--- a/src/nnet2bin/nnet-adjust-priors.cc
+++ b/src/nnet2bin/nnet-adjust-priors.cc
@ -48,16 +48,20 @@ BaseFloat KlDivergence(const Vector<BaseFloat> &p,

 void PrintPriorDiagnostics(const Vector<BaseFloat> &old_priors,
                           const Vector<BaseFloat> &new_priors) {
-  Vector<BaseFloat> diff_prior(new_priors);
-  diff_prior.AddVec(-1.0, old_priors);
-  diff_prior.ApplyAbs();
-  int32 max_index;
-  diff_prior.Max(&max_index);
-  KALDI_LOG << "Adjusting priors: largest absolute difference was for "
-            << "pdf " << max_index << ", " << old_priors(max_index)
-            << " -> " << new_priors(max_index);
-  KALDI_LOG << "Adjusting priors: K-L divergence from old to new is "
-            << KlDivergence(old_priors, new_priors);
+  if (old_priors.Dim() == 0) {
+    KALDI_LOG << "Model did not previously have priors attached.";
+  } else {
+    Vector<BaseFloat> diff_prior(new_priors);
+    diff_prior.AddVec(-1.0, old_priors);
+    diff_prior.ApplyAbs();
+    int32 max_index;
+    diff_prior.Max(&max_index);
+    KALDI_LOG << "Adjusting priors: largest absolute difference was for "
+              << "pdf " << max_index << ", " << old_priors(max_index)
+              << " -> " << new_priors(max_index);
+    KALDI_LOG << "Adjusting priors: K-L divergence from old to new is "
+              << KlDivergence(old_priors, new_priors);
+  }
 }


--- a/src/nnet2bin/nnet-am-init.cc
+++ b/src/nnet2bin/nnet-am-init.cc
@ -38,6 +38,7 @@ int main(int argc, char *argv[]) {
        "See example scripts to see how this works in practice.\n"
        "\n"
        "Usage:  nnet-am-init [options] <tree-in> <topology-in> <raw-nnet-in> <nnet-am-out>\n"
+        "or:  nnet-am-init [options] <transition-model-in> <raw-nnet-in> <nnet-am-out>\n"
        "e.g.:\n"
        " nnet-am-init tree topo \"nnet-init nnet.config - |\" 1.mdl\n";
        
@ -48,25 +49,37 @@ int main(int argc, char *argv[]) {
    
    po.Read(argc, argv);
    
-    if (po.NumArgs() != 4) {
+    if (po.NumArgs() != 3 && po.NumArgs() != 4) {
      po.PrintUsage();
      exit(1);
    }

-    std::string tree_rxfilename = po.GetArg(1),
-        topo_rxfilename = po.GetArg(2),
-        raw_nnet_rxfilename = po.GetArg(3),
-        nnet_wxfilename = po.GetArg(4);
+    std::string raw_nnet_rxfilename, nnet_wxfilename;
    
-    ContextDependency ctx_dep;
-    ReadKaldiObject(tree_rxfilename, &ctx_dep);
+    TransitionModel *trans_model = NULL;
+
+    if (po.NumArgs() == 4) {
+      std::string tree_rxfilename = po.GetArg(1),
+          topo_rxfilename = po.GetArg(2);
+      raw_nnet_rxfilename = po.GetArg(3);
+      nnet_wxfilename = po.GetArg(4);
    
-    HmmTopology topo;
-    ReadKaldiObject(topo_rxfilename, &topo);
-
-    // Construct the transition model from the tree and the topology file.
-    TransitionModel trans_model(ctx_dep, topo);
+      ContextDependency ctx_dep;
+      ReadKaldiObject(tree_rxfilename, &ctx_dep);
+    
+      HmmTopology topo;
+      ReadKaldiObject(topo_rxfilename, &topo);

+      // Construct the transition model from the tree and the topology file.
+      trans_model = new TransitionModel(ctx_dep, topo);
+    } else {
+      std::string trans_model_rxfilename = po.GetArg(1);
+      raw_nnet_rxfilename = po.GetArg(2);
+      nnet_wxfilename = po.GetArg(3);
+      trans_model = new TransitionModel();
+      ReadKaldiObject(trans_model_rxfilename, trans_model);
+    }
+    
    AmNnet am_nnet;    
    {
      Nnet nnet;
@ -76,16 +89,17 @@ int main(int argc, char *argv[]) {
      am_nnet.Init(nnet);
    }
    
-    if (am_nnet.NumPdfs() != trans_model.NumPdfs())
+    if (am_nnet.NumPdfs() != trans_model->NumPdfs())
      KALDI_ERR << "Mismatch in number of pdfs, neural net has "
                << am_nnet.NumPdfs() << ", transition model has "
-                << trans_model.NumPdfs();
+                << trans_model->NumPdfs();

    {
      Output ko(nnet_wxfilename, binary_write);
-      trans_model.Write(ko.Stream(), binary_write);
+      trans_model->Write(ko.Stream(), binary_write);
      am_nnet.Write(ko.Stream(), binary_write);
    }
+    delete trans_model;
    KALDI_LOG << "Initialized neural net and wrote it to " << nnet_wxfilename;
    return 0;
  } catch(const std::exception &e) {
--- a/src/nnet2bin/nnet-init.cc
+++ b/src/nnet2bin/nnet-init.cc
@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
        "Initialize the neural network from a config file with a line for each\n"
        "component.  Note, this only outputs the neural net itself, not the associated\n"
        "information such as the transition-model; you'll probably want to pipe\n"
-        "the output into something like am-nnet-init.\n"
+        "the output into something like nnet-am-init.\n"
        "\n"
        "Usage:  nnet-init [options] <config-in> <raw-nnet-out>\n"
        "e.g.:\n"
--- a/src/nnet2bin/nnet1-to-raw-nnet.cc
+++ b/src/nnet2bin/nnet1-to-raw-nnet.cc
@ -0,0 +1,203 @@
+// nnet2bin/nnet1-to-raw-nnet.cc
+
+// Copyright 2013  Johns Hopkins University (author:  Daniel Povey, Hainan Xu)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "hmm/transition-model.h"
+#include "nnet/nnet-nnet.h"
+#include "nnet/nnet-affine-transform.h"
+#include "nnet/nnet-activation.h"
+#include "nnet2/nnet-nnet.h"
+#include "nnet2/nnet-component.h"
+
+namespace kaldi {
+
+nnet2::Component *ConvertAffineTransformComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::AffineTransform *affine =
+      dynamic_cast<const nnet1::AffineTransform*>(&nnet1_component);
+  KALDI_ASSERT(affine != NULL);
+  // default learning rate is 1.0e-05, you can use the --learning-rate or
+  // --learning-rates option to nnet-am-copy to change it if you need.
+  BaseFloat learning_rate = 1.0e-05; 
+  return new nnet2::AffineComponent(affine->GetLinearity(),
+                                    affine->GetBias(),
+                                    learning_rate);
+}
+
+nnet2::Component *ConvertSoftmaxComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::Softmax *softmax =
+      dynamic_cast<const nnet1::Softmax*>(&nnet1_component);
+  KALDI_ASSERT(softmax != NULL);
+  return new nnet2::SoftmaxComponent(softmax->InputDim());
+}
+
+nnet2::Component *ConvertSigmoidComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::Sigmoid *sigmoid =
+      dynamic_cast<const nnet1::Sigmoid*>(&nnet1_component);
+  KALDI_ASSERT(sigmoid != NULL);
+  return new nnet2::SigmoidComponent(sigmoid->InputDim());
+}
+
+nnet2::Component *ConvertSpliceComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::Splice *splice =
+      dynamic_cast<const nnet1::Splice*>(&nnet1_component);
+  KALDI_ASSERT(splice != NULL);
+  int32 low, high;
+  std::vector<int32> frame_offsets;
+
+  std::ostringstream ostr;
+  splice->WriteData(ostr, false);
+
+  std::istringstream istr(ostr.str());
+  ReadIntegerVector(istr, false, &frame_offsets);
+
+  for (size_t i = 1; i < frame_offsets.size(); i++) {
+    KALDI_ASSERT(frame_offsets[i-1] + 1 == frame_offsets[i]);
+  }
+
+  low = frame_offsets[0];
+  high = frame_offsets[frame_offsets.size() - 1];
+
+  nnet2::SpliceComponent *res = new nnet2::SpliceComponent();
+  res->Init(splice->InputDim(), -low, high);
+  return res;
+}
+
+
+nnet2::Component *ConvertAddShiftComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::AddShift *add_shift =
+      dynamic_cast<const nnet1::AddShift*>(&nnet1_component);
+  KALDI_ASSERT(add_shift != NULL);
+  Vector<BaseFloat> bias;
+
+  add_shift->GetParams(&bias);
+  CuVector<BaseFloat> cu_bias(bias);
+
+  nnet2::FixedBiasComponent *res = new nnet2::FixedBiasComponent();
+  res->Init(cu_bias);
+  return res;
+}
+
+nnet2::Component *ConvertRescaleComponent(
+    const nnet1::Component &nnet1_component) {
+  const nnet1::Rescale *rescale =
+      dynamic_cast<const nnet1::Rescale*>(&nnet1_component);
+  KALDI_ASSERT(rescale != NULL);
+
+  Vector<BaseFloat> scale;
+  rescale->GetParams(&scale);
+
+  CuVector<BaseFloat> cu_scale(scale);
+
+  nnet2::FixedScaleComponent *res = new nnet2::FixedScaleComponent();
+  res->Init(cu_scale);
+  return res;
+}
+
+nnet2::Component *ConvertComponent(const nnet1::Component &nnet1_component) {
+  nnet1::Component::ComponentType type_in = nnet1_component.GetType();
+  switch (type_in) {
+    case nnet1::Component::kAffineTransform:
+      return ConvertAffineTransformComponent(nnet1_component);
+    case nnet1::Component::kSoftmax:
+      return ConvertSoftmaxComponent(nnet1_component);      
+    case nnet1::Component::kSigmoid:
+      return ConvertSigmoidComponent(nnet1_component);
+    case nnet1::Component::kSplice:
+      return ConvertSpliceComponent(nnet1_component); // note, this will for now only handle the
+      // special case nnet1::Component::where all splice indexes in nnet1_component are contiguous, e.g.
+      // -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5 .
+    case nnet1::Component::kAddShift:
+      return ConvertAddShiftComponent(nnet1_component); // convert to FixedBiasComponent
+    case nnet1::Component::kRescale:
+      return ConvertRescaleComponent(nnet1_component); // convert to FixedScaleComponent
+    default: KALDI_ERR << "Un-handled nnet1 component type "
+                       << nnet1::Component::TypeToMarker(type_in);
+    return NULL;
+  }
+}
+
+
+nnet2::Nnet *ConvertNnet1ToNnet2(const nnet1::Nnet &nnet1) {
+  // get a vector of nnet2::Component pointers and initialize the nnet2::Nnet with it.
+  size_t size = nnet1.NumComponents();
+  std::vector<nnet2::Component*> *components = new std::vector<nnet2::Component*>();
+  components->resize(size);
+  for (size_t i = 0; i < size; i++) {
+    (*components)[i] = ConvertComponent(nnet1.GetComponent(i));
+  }
+  
+  nnet2::Nnet *res = new nnet2::Nnet();
+  res->Init(components);
+  // not de-allocate the memory for components
+  // since the nnet takes the ownership
+  return res;
+}
+
+}
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+
+    const char *usage =
+        "Convert nnet1 neural net to nnet2 'raw' neural net\n"
+        ""
+        "\n"
+        "Usage:  nnet1-to-raw-nnet [options] <nnet1-in> <nnet2-out>\n"
+        "e.g.:\n"
+        " nnet1-to-raw-nnet srcdir/final.nnet - | nnet-am-init dest/tree dest/topo - dest/0.mdl\n";
+
+    bool binary_write = true;
+    int32 srand_seed = 0;
+    
+    ParseOptions po(usage);
+    po.Register("binary", &binary_write, "Write output in binary mode");
+    
+    po.Read(argc, argv);
+    srand(srand_seed);
+    
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string nnet1_rxfilename = po.GetArg(1),
+        raw_nnet2_wxfilename = po.GetArg(2);
+    
+    nnet1::Nnet nnet1;
+    ReadKaldiObject(nnet1_rxfilename, &nnet1);
+    nnet2::Nnet *nnet2 = ConvertNnet1ToNnet2(nnet1);
+    WriteKaldiObject(*nnet2, raw_nnet2_wxfilename, binary_write);
+    KALDI_LOG << "Converted nnet1 neural net to raw nnet2 and wrote it to "
+              << PrintableWxfilename(raw_nnet2_wxfilename);
+    delete nnet2;
+    return 0;
+  } catch(const std::exception &e) {
+    std::cerr << e.what() << '\n';
+    return -1;
+  }
+}
--- a/src/nnet2bin/raw-nnet-concat.cc
+++ b/src/nnet2bin/raw-nnet-concat.cc
@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
        "Concatenate two 'raw' neural nets, e.g. as output by nnet-init or\n"
        "nnet-to-raw-nnet\n"
        "\n"
-        "Usage:  raw-nnet-concat [options] <raw-nnet1-in> <raw-nnet2-in> <raw-nnet-out>\n"
+        "Usage:  raw-nnet-concat [options] <raw-nnet-in1> <raw-nnet-in2> <raw-nnet-out>\n"
        "e.g.:\n"
        " raw-nnet-concat nnet1 nnet2 nnet_concat\n";
    
--- a/src/nnetbin/nnet-forward.cc
+++ b/src/nnetbin/nnet-forward.cc
@ -124,15 +124,11 @@ int main(int argc, char *argv[]) {
                    << ", " << mat.NumRows() << "frm";

      //check for NaN/inf
-      for (int32 r = 0; r<mat.NumRows(); r++) {
-        for (int32 c = 0; c<mat.NumCols(); c++) {
-          BaseFloat val = mat(r,c);
-          if (val != val) KALDI_ERR << "NaN in features of : " << feature_reader.Key();
-          if (val == std::numeric_limits<BaseFloat>::infinity())
-            KALDI_ERR << "inf in features of : " << feature_reader.Key();
-        }
+      BaseFloat sum = mat.Sum();
+      if (!KALDI_ISFINITE(sum)) {
+        KALDI_ERR << "NaN or inf found in features of " << feature_reader.Key();
      }
-
+      
      // push it to gpu
      feats = mat;
      // fwd-pass