From 94c4646abac227ed093c3534f9587e7deed9f489 Mon Sep 17 00:00:00 2001
From: Chao Weng <cweng6@gmail.com>
Date: Tue, 10 Dec 2013 19:29:00 +0000
Subject: [PATCH] adding chime wsj eg

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3291 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
---
 egs/chime_wsj0/s5/cmd.sh                      |  29 +
 egs/chime_wsj0/s5/conf/decode_dnn.config      |   2 +
 egs/chime_wsj0/s5/conf/fbank.conf             |  11 +
 egs/chime_wsj0/s5/conf/mfcc.conf              |   1 +
 .../s5/local/binmask_wsj0_data_prep.sh        | 117 +++
 egs/chime_wsj0/s5/local/chime_format_data.sh  |  86 +++
 .../s5/local/clean_wsj0_data_prep.sh          | 190 +++++
 egs/chime_wsj0/s5/local/copy_clean_ali.sh     |  13 +
 egs/chime_wsj0/s5/local/cstr_ndx2flist.pl     |  54 ++
 egs/chime_wsj0/s5/local/cstr_wsj_data_prep.sh | 187 +++++
 .../s5/local/cstr_wsj_extend_dict.sh          | 172 +++++
 egs/chime_wsj0/s5/local/dict/add_counts.pl    |  31 +
 egs/chime_wsj0/s5/local/dict/count_rules.pl   |  44 ++
 egs/chime_wsj0/s5/local/dict/filter_dict.pl   |  19 +
 egs/chime_wsj0/s5/local/dict/find_acronyms.pl |  95 +++
 .../s5/local/dict/get_acronym_prons.pl        | 123 ++++
 .../s5/local/dict/get_candidate_prons.pl      | 187 +++++
 .../s5/local/dict/get_rule_hierarchy.pl       |  73 ++
 egs/chime_wsj0/s5/local/dict/get_rules.pl     | 204 ++++++
 .../s5/local/dict/limit_candidate_prons.pl    | 103 +++
 .../s5/local/dict/reverse_candidates.pl       |  50 ++
 egs/chime_wsj0/s5/local/dict/reverse_dict.pl  |  14 +
 egs/chime_wsj0/s5/local/dict/score_prons.pl   |  50 ++
 egs/chime_wsj0/s5/local/dict/score_rules.pl   |  52 ++
 .../s5/local/dict/select_candidate_prons.pl   |  84 +++
 .../s5/local/find_noisy_transcripts.pl        |  65 ++
 egs/chime_wsj0/s5/local/find_transcripts.pl   |  64 ++
 egs/chime_wsj0/s5/local/flist2scp.pl          |  31 +
 .../s5/local/generate_example_kws.sh          | 110 +++
 egs/chime_wsj0/s5/local/kws_data_prep.sh      |  60 ++
 egs/chime_wsj0/s5/local/ndx2flist.pl          |  62 ++
 egs/chime_wsj0/s5/local/nnet2/run_5b.sh       |  69 ++
 egs/chime_wsj0/s5/local/nnet2/run_5c.sh       |  24 +
 .../s5/local/noisy_wsj0_data_prep.sh          | 119 +++
 .../s5/local/normalize_transcript.pl          |  59 ++
 .../s5/local/reverb_wsj0_data_prep.sh         | 100 +++
 egs/chime_wsj0/s5/local/run_basis_fmllr.sh    |  42 ++
 egs/chime_wsj0/s5/local/run_dnn.sh            | 181 +++++
 egs/chime_wsj0/s5/local/run_fwdbwd.sh         |  41 ++
 egs/chime_wsj0/s5/local/run_mmi_tri2b.sh      |  60 ++
 egs/chime_wsj0/s5/local/run_mmi_tri4b.sh      |  50 ++
 egs/chime_wsj0/s5/local/run_nnet_cpu.sh       |   9 +
 egs/chime_wsj0/s5/local/run_raw_fmllr.sh      |  66 ++
 egs/chime_wsj0/s5/local/run_rnnlms_sgmm5b.sh  |  42 ++
 egs/chime_wsj0/s5/local/run_rnnlms_tri3b.sh   |  64 ++
 egs/chime_wsj0/s5/local/run_sgmm.sh           | 113 +++
 egs/chime_wsj0/s5/local/run_sgmm2.sh          | 148 ++++
 egs/chime_wsj0/s5/local/score.sh              |  67 ++
 egs/chime_wsj0/s5/local/score_combine.sh      |  95 +++
 egs/chime_wsj0/s5/local/score_mbr.sh          |  58 ++
 egs/chime_wsj0/s5/local/wsj_data_prep.sh      | 201 +++++
 egs/chime_wsj0/s5/local/wsj_extend_dict.sh    | 173 +++++
 egs/chime_wsj0/s5/local/wsj_format_data.sh    |  86 +++
 .../s5/local/wsj_format_local_lms.sh          |  52 ++
 egs/chime_wsj0/s5/local/wsj_prepare_dict.sh   |  83 +++
 egs/chime_wsj0/s5/local/wsj_train_lms.sh      | 202 +++++
 egs/chime_wsj0/s5/local/wsj_train_rnnlms.sh   | 153 ++++
 egs/chime_wsj0/s5/path.sh                     |   3 +
 egs/chime_wsj0/s5/run.sh                      | 261 +++++++
 egs/chime_wsj0/s5/steps/align_basis_fmllr.sh  | 150 ++++
 egs/chime_wsj0/s5/steps/align_fmllr.sh        | 148 ++++
 egs/chime_wsj0/s5/steps/align_nnet.sh         |  99 +++
 egs/chime_wsj0/s5/steps/align_raw_fmllr.sh    | 142 ++++
 egs/chime_wsj0/s5/steps/align_sgmm.sh         | 193 +++++
 egs/chime_wsj0/s5/steps/align_sgmm2.sh        | 193 +++++
 egs/chime_wsj0/s5/steps/align_si.sh           |  89 +++
 egs/chime_wsj0/s5/steps/append_feats.sh       |  67 ++
 egs/chime_wsj0/s5/steps/compute_cmvn_stats.sh |  80 ++
 egs/chime_wsj0/s5/steps/decode.sh             | 108 +++
 egs/chime_wsj0/s5/steps/decode_basis_fmllr.sh | 206 ++++++
 egs/chime_wsj0/s5/steps/decode_biglm.sh       |  86 +++
 egs/chime_wsj0/s5/steps/decode_combine.sh     |  59 ++
 egs/chime_wsj0/s5/steps/decode_fmllr.sh       | 217 ++++++
 egs/chime_wsj0/s5/steps/decode_fmllr_extra.sh | 250 +++++++
 egs/chime_wsj0/s5/steps/decode_fmmi.sh        | 111 +++
 egs/chime_wsj0/s5/steps/decode_fromlats.sh    |  90 +++
 egs/chime_wsj0/s5/steps/decode_fwdbwd.sh      | 122 +++
 egs/chime_wsj0/s5/steps/decode_nnet.sh        | 128 ++++
 egs/chime_wsj0/s5/steps/decode_nnet_cpu.sh    | 127 ++++
 egs/chime_wsj0/s5/steps/decode_raw_fmllr.sh   | 235 ++++++
 egs/chime_wsj0/s5/steps/decode_sgmm.sh        | 257 +++++++
 egs/chime_wsj0/s5/steps/decode_sgmm2.sh       | 211 ++++++
 .../s5/steps/decode_sgmm2_fromlats.sh         | 270 +++++++
 .../s5/steps/decode_sgmm2_rescore.sh          | 111 +++
 .../s5/steps/decode_sgmm2_rescore_project.sh  | 172 +++++
 .../s5/steps/decode_sgmm_fromlats.sh          | 273 +++++++
 .../s5/steps/decode_sgmm_rescore.sh           | 107 +++
 egs/chime_wsj0/s5/steps/decode_si.sh          | 108 +++
 egs/chime_wsj0/s5/steps/decode_with_map.sh    | 113 +++
 egs/chime_wsj0/s5/steps/get_ctm.sh            |  66 ++
 egs/chime_wsj0/s5/steps/get_fmllr_basis.sh    |  95 +++
 egs/chime_wsj0/s5/steps/get_lexicon_probs.sh  | 225 ++++++
 egs/chime_wsj0/s5/steps/get_train_ctm.sh      |  66 ++
 egs/chime_wsj0/s5/steps/lmrescore.sh          | 122 +++
 egs/chime_wsj0/s5/steps/make_bn_feats.sh      | 117 +++
 egs/chime_wsj0/s5/steps/make_denlats.sh       | 146 ++++
 egs/chime_wsj0/s5/steps/make_denlats_nnet.sh  | 177 +++++
 .../s5/steps/make_denlats_nnet_cpu.sh         | 146 ++++
 egs/chime_wsj0/s5/steps/make_denlats_sgmm.sh  | 159 ++++
 egs/chime_wsj0/s5/steps/make_denlats_sgmm2.sh | 170 +++++
 egs/chime_wsj0/s5/steps/make_fbank.sh         | 111 +++
 egs/chime_wsj0/s5/steps/make_fmllr_feats.sh   | 103 +++
 egs/chime_wsj0/s5/steps/make_fmmi_feats.sh    | 102 +++
 egs/chime_wsj0/s5/steps/make_index.sh         |  83 +++
 egs/chime_wsj0/s5/steps/make_mfcc.sh          | 111 +++
 egs/chime_wsj0/s5/steps/make_plp.sh           | 111 +++
 egs/chime_wsj0/s5/steps/mixup.sh              | 148 ++++
 egs/chime_wsj0/s5/steps/nnet2/align.sh        | 104 +++
 egs/chime_wsj0/s5/steps/nnet2/decode.sh       | 131 ++++
 egs/chime_wsj0/s5/steps/nnet2/get_egs.sh      | 276 +++++++
 egs/chime_wsj0/s5/steps/nnet2/get_lda.sh      | 120 +++
 .../s5/steps/nnet2/get_lda_block.sh           | 120 +++
 .../s5/steps/nnet2/get_perturbed_feats.sh     |  89 +++
 egs/chime_wsj0/s5/steps/nnet2/retrain_tanh.sh | 216 ++++++
 egs/chime_wsj0/s5/steps/nnet2/train_block.sh  | 376 ++++++++++
 egs/chime_wsj0/s5/steps/nnet2/train_tanh.sh   | 377 ++++++++++
 egs/chime_wsj0/s5/steps/pretrain_dbn.sh       | 257 +++++++
 egs/chime_wsj0/s5/steps/rnnlmrescore.sh       | 180 +++++
 egs/chime_wsj0/s5/steps/search_index.sh       |  50 ++
 egs/chime_wsj0/s5/steps/tandem/align_fmllr.sh | 185 +++++
 egs/chime_wsj0/s5/steps/tandem/align_sgmm.sh  | 233 ++++++
 egs/chime_wsj0/s5/steps/tandem/align_sgmm2.sh | 232 ++++++
 egs/chime_wsj0/s5/steps/tandem/align_si.sh    | 130 ++++
 egs/chime_wsj0/s5/steps/tandem/decode.sh      | 143 ++++
 .../s5/steps/tandem/decode_fmllr.sh           | 242 ++++++
 egs/chime_wsj0/s5/steps/tandem/decode_sgmm.sh | 300 ++++++++
 .../s5/steps/tandem/decode_sgmm2.sh           | 236 ++++++
 egs/chime_wsj0/s5/steps/tandem/decode_si.sh   | 143 ++++
 .../s5/steps/tandem/make_denlats.sh           | 184 +++++
 .../s5/steps/tandem/make_denlats_sgmm.sh      | 201 +++++
 .../s5/steps/tandem/make_denlats_sgmm2.sh     | 201 +++++
 .../s5/steps/tandem/mk_aslf_lda_mllt.sh       | 177 +++++
 .../s5/steps/tandem/mk_aslf_sgmm2.sh          | 178 +++++
 .../s5/steps/tandem/train_deltas.sh           | 163 +++++
 .../s5/steps/tandem/train_lda_mllt.sh         | 257 +++++++
 egs/chime_wsj0/s5/steps/tandem/train_mllt.sh  | 236 ++++++
 egs/chime_wsj0/s5/steps/tandem/train_mmi.sh   | 184 +++++
 .../s5/steps/tandem/train_mmi_sgmm.sh         | 190 +++++
 .../s5/steps/tandem/train_mmi_sgmm2.sh        | 190 +++++
 egs/chime_wsj0/s5/steps/tandem/train_mono.sh  | 161 ++++
 egs/chime_wsj0/s5/steps/tandem/train_sat.sh   | 278 +++++++
 egs/chime_wsj0/s5/steps/tandem/train_sgmm.sh  | 312 ++++++++
 egs/chime_wsj0/s5/steps/tandem/train_sgmm2.sh | 334 +++++++++
 egs/chime_wsj0/s5/steps/tandem/train_ubm.sh   | 168 +++++
 egs/chime_wsj0/s5/steps/train_deltas.sh       | 142 ++++
 egs/chime_wsj0/s5/steps/train_diag_ubm.sh     | 125 ++++
 egs/chime_wsj0/s5/steps/train_lda_mllt.sh     | 209 ++++++
 egs/chime_wsj0/s5/steps/train_mmi.sh          | 145 ++++
 egs/chime_wsj0/s5/steps/train_mmi_fmmi.sh     | 223 ++++++
 .../s5/steps/train_mmi_fmmi_indirect.sh       | 246 +++++++
 egs/chime_wsj0/s5/steps/train_mmi_sgmm.sh     | 150 ++++
 egs/chime_wsj0/s5/steps/train_mmi_sgmm2.sh    | 153 ++++
 egs/chime_wsj0/s5/steps/train_mono.sh         | 138 ++++
 egs/chime_wsj0/s5/steps/train_mpe.sh          | 158 ++++
 egs/chime_wsj0/s5/steps/train_nnet.sh         | 329 +++++++++
 egs/chime_wsj0/s5/steps/train_nnet_cpu.sh     | 535 ++++++++++++++
 .../s5/steps/train_nnet_cpu_conv.sh           | 692 ++++++++++++++++++
 egs/chime_wsj0/s5/steps/train_nnet_cpu_mmi.sh | 293 ++++++++
 .../s5/steps/train_nnet_cpu_tanh.sh           | 496 +++++++++++++
 egs/chime_wsj0/s5/steps/train_nnet_mmi.sh     | 190 +++++
 egs/chime_wsj0/s5/steps/train_nnet_mpe.sh     | 173 +++++
 .../s5/steps/train_nnet_scheduler.sh          | 178 +++++
 egs/chime_wsj0/s5/steps/train_quick.sh        | 191 +++++
 egs/chime_wsj0/s5/steps/train_raw_sat.sh      | 295 ++++++++
 egs/chime_wsj0/s5/steps/train_sat.sh          | 255 +++++++
 egs/chime_wsj0/s5/steps/train_sat_basis.sh    | 277 +++++++
 egs/chime_wsj0/s5/steps/train_sgmm.sh         | 274 +++++++
 egs/chime_wsj0/s5/steps/train_sgmm2.sh        | 296 ++++++++
 egs/chime_wsj0/s5/steps/train_sgmm2_group.sh  | 343 +++++++++
 egs/chime_wsj0/s5/steps/train_smbr.sh         | 152 ++++
 egs/chime_wsj0/s5/steps/train_ubm.sh          | 139 ++++
 .../s5/steps/word_align_lattices.sh           |  48 ++
 egs/chime_wsj0/s5/utils/add_disambig.pl       |  58 ++
 egs/chime_wsj0/s5/utils/add_lex_disambig.pl   | 118 +++
 egs/chime_wsj0/s5/utils/apply_map.pl          |  83 +++
 egs/chime_wsj0/s5/utils/best_wer.sh           |  32 +
 egs/chime_wsj0/s5/utils/combine_data.sh       |  37 +
 egs/chime_wsj0/s5/utils/convert_ctm.pl        |  92 +++
 egs/chime_wsj0/s5/utils/convert_slf.pl        | 138 ++++
 egs/chime_wsj0/s5/utils/copy_data_dir.sh      |  99 +++
 egs/chime_wsj0/s5/utils/eps2disambig.pl       |  23 +
 egs/chime_wsj0/s5/utils/filter_scp.pl         |  50 ++
 egs/chime_wsj0/s5/utils/find_arpa_oovs.pl     |  64 ++
 egs/chime_wsj0/s5/utils/fix_ctm.sh            |  32 +
 egs/chime_wsj0/s5/utils/fix_data_dir.sh       | 169 +++++
 egs/chime_wsj0/s5/utils/format_lm.sh          |  84 +++
 egs/chime_wsj0/s5/utils/format_lm_sri.sh      | 124 ++++
 egs/chime_wsj0/s5/utils/gen_topo.pl           |  63 ++
 egs/chime_wsj0/s5/utils/int2sym.pl            |  71 ++
 .../s5/utils/kwslist_post_process.pl          | 291 ++++++++
 egs/chime_wsj0/s5/utils/ln.pl                 |  58 ++
 egs/chime_wsj0/s5/utils/make_lexicon_fst.pl   | 161 ++++
 .../s5/utils/make_phone_bigram_lang.sh        |  98 +++
 .../s5/utils/make_unigram_grammar.pl          |  54 ++
 egs/chime_wsj0/s5/utils/mkgraph.sh            | 124 ++++
 .../s5/utils/nnet-cpu/make_nnet_config.pl     | 159 ++++
 .../utils/nnet-cpu/make_nnet_config_block.pl  | 156 ++++
 .../make_nnet_config_preconditioned.pl        | 277 +++++++
 .../utils/nnet-cpu/update_learning_rates.pl   | 141 ++++
 .../s5/utils/nnet/analyze_alignments.sh       |  71 ++
 egs/chime_wsj0/s5/utils/nnet/copy_feats.sh    |  62 ++
 egs/chime_wsj0/s5/utils/nnet/gen_dct_mat.py   |  53 ++
 egs/chime_wsj0/s5/utils/nnet/gen_hamm_mat.py  |  45 ++
 egs/chime_wsj0/s5/utils/nnet/gen_mlp_init.py  |  92 +++
 egs/chime_wsj0/s5/utils/nnet/gen_rbm_init.py  | 110 +++
 .../s5/utils/nnet/gen_recurrent_dnn.py        |  46 ++
 egs/chime_wsj0/s5/utils/nnet/gen_splice.py    |  40 +
 egs/chime_wsj0/s5/utils/nnet/init_nnet.sh     | 136 ++++
 egs/chime_wsj0/s5/utils/parse_options.sh      |  94 +++
 egs/chime_wsj0/s5/utils/pinyin_map.pl         |  78 ++
 egs/chime_wsj0/s5/utils/prepare_lang.sh       | 332 +++++++++
 egs/chime_wsj0/s5/utils/queue.pl              | 313 ++++++++
 egs/chime_wsj0/s5/utils/reduce_data_dir.sh    |  52 ++
 .../s5/utils/reduce_data_dir_by_reclist.sh    |  53 ++
 egs/chime_wsj0/s5/utils/remove_oovs.pl        |  43 ++
 egs/chime_wsj0/s5/utils/reverse_arpa.py       | 187 +++++
 egs/chime_wsj0/s5/utils/reverse_lm.sh         |  91 +++
 egs/chime_wsj0/s5/utils/reverse_lm_test.sh    |  90 +++
 .../s5/utils/rnnlm_compute_scores.sh          |  69 ++
 egs/chime_wsj0/s5/utils/run.pl                | 148 ++++
 egs/chime_wsj0/s5/utils/s2eps.pl              |  27 +
 egs/chime_wsj0/s5/utils/shuffle_list.pl       |  38 +
 egs/chime_wsj0/s5/utils/slurm.pl              | 131 ++++
 egs/chime_wsj0/s5/utils/spk2utt_to_utt2spk.pl |  27 +
 egs/chime_wsj0/s5/utils/split_data.sh         | 120 +++
 egs/chime_wsj0/s5/utils/split_scp.pl          | 221 ++++++
 egs/chime_wsj0/s5/utils/subset_data_dir.sh    | 159 ++++
 .../s5/utils/subset_data_dir_tr_cv.sh         | 104 +++
 egs/chime_wsj0/s5/utils/subset_scp.pl         |  87 +++
 egs/chime_wsj0/s5/utils/summarize_warnings.pl |  46 ++
 egs/chime_wsj0/s5/utils/sym2int.pl            |  98 +++
 egs/chime_wsj0/s5/utils/utt2spk_to_spk2utt.pl |  39 +
 egs/chime_wsj0/s5/utils/validate_data_dir.sh  | 218 ++++++
 egs/chime_wsj0/s5/utils/validate_dict_dir.pl  | 218 ++++++
 egs/chime_wsj0/s5/utils/validate_lang.pl      | 534 ++++++++++++++
 egs/chime_wsj0/s5/utils/write_kwslist.pl      | 333 +++++++++
 236 files changed, 33644 insertions(+)
 create mode 100644 egs/chime_wsj0/s5/cmd.sh
 create mode 100644 egs/chime_wsj0/s5/conf/decode_dnn.config
 create mode 100644 egs/chime_wsj0/s5/conf/fbank.conf
 create mode 100644 egs/chime_wsj0/s5/conf/mfcc.conf
 create mode 100755 egs/chime_wsj0/s5/local/binmask_wsj0_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/chime_format_data.sh
 create mode 100755 egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/copy_clean_ali.sh
 create mode 100755 egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
 create mode 100755 egs/chime_wsj0/s5/local/cstr_wsj_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/cstr_wsj_extend_dict.sh
 create mode 100755 egs/chime_wsj0/s5/local/dict/add_counts.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/count_rules.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/filter_dict.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/find_acronyms.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/get_acronym_prons.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/get_candidate_prons.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/get_rule_hierarchy.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/get_rules.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/limit_candidate_prons.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/reverse_candidates.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/reverse_dict.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/score_prons.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/score_rules.pl
 create mode 100755 egs/chime_wsj0/s5/local/dict/select_candidate_prons.pl
 create mode 100755 egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
 create mode 100755 egs/chime_wsj0/s5/local/find_transcripts.pl
 create mode 100755 egs/chime_wsj0/s5/local/flist2scp.pl
 create mode 100755 egs/chime_wsj0/s5/local/generate_example_kws.sh
 create mode 100755 egs/chime_wsj0/s5/local/kws_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/ndx2flist.pl
 create mode 100755 egs/chime_wsj0/s5/local/nnet2/run_5b.sh
 create mode 100755 egs/chime_wsj0/s5/local/nnet2/run_5c.sh
 create mode 100755 egs/chime_wsj0/s5/local/noisy_wsj0_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/normalize_transcript.pl
 create mode 100755 egs/chime_wsj0/s5/local/reverb_wsj0_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_basis_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_dnn.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_fwdbwd.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_mmi_tri2b.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_mmi_tri4b.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_nnet_cpu.sh
 create mode 100644 egs/chime_wsj0/s5/local/run_raw_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_rnnlms_sgmm5b.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_rnnlms_tri3b.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/local/run_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/local/score.sh
 create mode 100755 egs/chime_wsj0/s5/local/score_combine.sh
 create mode 100755 egs/chime_wsj0/s5/local/score_mbr.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_data_prep.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_extend_dict.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_format_data.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_format_local_lms.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_prepare_dict.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_train_lms.sh
 create mode 100755 egs/chime_wsj0/s5/local/wsj_train_rnnlms.sh
 create mode 100755 egs/chime_wsj0/s5/path.sh
 create mode 100755 egs/chime_wsj0/s5/run.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_basis_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_nnet.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_raw_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/align_si.sh
 create mode 100755 egs/chime_wsj0/s5/steps/append_feats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/compute_cmvn_stats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_basis_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_biglm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_combine.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_fmllr_extra.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_fmmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_fromlats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_fwdbwd.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_nnet.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_nnet_cpu.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_raw_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm2_fromlats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm2_rescore.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm2_rescore_project.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm_fromlats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_sgmm_rescore.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_si.sh
 create mode 100755 egs/chime_wsj0/s5/steps/decode_with_map.sh
 create mode 100755 egs/chime_wsj0/s5/steps/get_ctm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/get_fmllr_basis.sh
 create mode 100755 egs/chime_wsj0/s5/steps/get_lexicon_probs.sh
 create mode 100755 egs/chime_wsj0/s5/steps/get_train_ctm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/lmrescore.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_bn_feats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_denlats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_denlats_nnet.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_denlats_nnet_cpu.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_denlats_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_denlats_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_fbank.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_fmllr_feats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_fmmi_feats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_index.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_mfcc.sh
 create mode 100755 egs/chime_wsj0/s5/steps/make_plp.sh
 create mode 100755 egs/chime_wsj0/s5/steps/mixup.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/align.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/decode.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/get_egs.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/get_lda.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/get_lda_block.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/get_perturbed_feats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/retrain_tanh.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/train_block.sh
 create mode 100755 egs/chime_wsj0/s5/steps/nnet2/train_tanh.sh
 create mode 100755 egs/chime_wsj0/s5/steps/pretrain_dbn.sh
 create mode 100755 egs/chime_wsj0/s5/steps/rnnlmrescore.sh
 create mode 100755 egs/chime_wsj0/s5/steps/search_index.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/align_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/align_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/align_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/align_si.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/decode.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/decode_fmllr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/decode_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/decode_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/decode_si.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/make_denlats.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/mk_aslf_lda_mllt.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/mk_aslf_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_deltas.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_lda_mllt.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_mllt.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_mmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_mono.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_sat.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/tandem/train_ubm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_deltas.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_diag_ubm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_lda_mllt.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mmi_fmmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mmi_fmmi_indirect.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mmi_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mmi_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mono.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_mpe.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_cpu.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_cpu_conv.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_cpu_mmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_cpu_tanh.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_mmi.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_mpe.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_nnet_scheduler.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_quick.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_raw_sat.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_sat.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_sat_basis.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_sgmm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_sgmm2.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_sgmm2_group.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_smbr.sh
 create mode 100755 egs/chime_wsj0/s5/steps/train_ubm.sh
 create mode 100755 egs/chime_wsj0/s5/steps/word_align_lattices.sh
 create mode 100755 egs/chime_wsj0/s5/utils/add_disambig.pl
 create mode 100755 egs/chime_wsj0/s5/utils/add_lex_disambig.pl
 create mode 100755 egs/chime_wsj0/s5/utils/apply_map.pl
 create mode 100755 egs/chime_wsj0/s5/utils/best_wer.sh
 create mode 100755 egs/chime_wsj0/s5/utils/combine_data.sh
 create mode 100755 egs/chime_wsj0/s5/utils/convert_ctm.pl
 create mode 100755 egs/chime_wsj0/s5/utils/convert_slf.pl
 create mode 100755 egs/chime_wsj0/s5/utils/copy_data_dir.sh
 create mode 100755 egs/chime_wsj0/s5/utils/eps2disambig.pl
 create mode 100755 egs/chime_wsj0/s5/utils/filter_scp.pl
 create mode 100755 egs/chime_wsj0/s5/utils/find_arpa_oovs.pl
 create mode 100755 egs/chime_wsj0/s5/utils/fix_ctm.sh
 create mode 100755 egs/chime_wsj0/s5/utils/fix_data_dir.sh
 create mode 100755 egs/chime_wsj0/s5/utils/format_lm.sh
 create mode 100755 egs/chime_wsj0/s5/utils/format_lm_sri.sh
 create mode 100755 egs/chime_wsj0/s5/utils/gen_topo.pl
 create mode 100755 egs/chime_wsj0/s5/utils/int2sym.pl
 create mode 100755 egs/chime_wsj0/s5/utils/kwslist_post_process.pl
 create mode 100755 egs/chime_wsj0/s5/utils/ln.pl
 create mode 100755 egs/chime_wsj0/s5/utils/make_lexicon_fst.pl
 create mode 100755 egs/chime_wsj0/s5/utils/make_phone_bigram_lang.sh
 create mode 100755 egs/chime_wsj0/s5/utils/make_unigram_grammar.pl
 create mode 100755 egs/chime_wsj0/s5/utils/mkgraph.sh
 create mode 100755 egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config.pl
 create mode 100755 egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_block.pl
 create mode 100755 egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
 create mode 100755 egs/chime_wsj0/s5/utils/nnet-cpu/update_learning_rates.pl
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/analyze_alignments.sh
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/copy_feats.sh
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_dct_mat.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_hamm_mat.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_mlp_init.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_rbm_init.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_recurrent_dnn.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/gen_splice.py
 create mode 100755 egs/chime_wsj0/s5/utils/nnet/init_nnet.sh
 create mode 100755 egs/chime_wsj0/s5/utils/parse_options.sh
 create mode 100755 egs/chime_wsj0/s5/utils/pinyin_map.pl
 create mode 100755 egs/chime_wsj0/s5/utils/prepare_lang.sh
 create mode 100755 egs/chime_wsj0/s5/utils/queue.pl
 create mode 100755 egs/chime_wsj0/s5/utils/reduce_data_dir.sh
 create mode 100755 egs/chime_wsj0/s5/utils/reduce_data_dir_by_reclist.sh
 create mode 100755 egs/chime_wsj0/s5/utils/remove_oovs.pl
 create mode 100755 egs/chime_wsj0/s5/utils/reverse_arpa.py
 create mode 100755 egs/chime_wsj0/s5/utils/reverse_lm.sh
 create mode 100755 egs/chime_wsj0/s5/utils/reverse_lm_test.sh
 create mode 100755 egs/chime_wsj0/s5/utils/rnnlm_compute_scores.sh
 create mode 100755 egs/chime_wsj0/s5/utils/run.pl
 create mode 100755 egs/chime_wsj0/s5/utils/s2eps.pl
 create mode 100755 egs/chime_wsj0/s5/utils/shuffle_list.pl
 create mode 100755 egs/chime_wsj0/s5/utils/slurm.pl
 create mode 100755 egs/chime_wsj0/s5/utils/spk2utt_to_utt2spk.pl
 create mode 100755 egs/chime_wsj0/s5/utils/split_data.sh
 create mode 100755 egs/chime_wsj0/s5/utils/split_scp.pl
 create mode 100755 egs/chime_wsj0/s5/utils/subset_data_dir.sh
 create mode 100755 egs/chime_wsj0/s5/utils/subset_data_dir_tr_cv.sh
 create mode 100755 egs/chime_wsj0/s5/utils/subset_scp.pl
 create mode 100755 egs/chime_wsj0/s5/utils/summarize_warnings.pl
 create mode 100755 egs/chime_wsj0/s5/utils/sym2int.pl
 create mode 100755 egs/chime_wsj0/s5/utils/utt2spk_to_spk2utt.pl
 create mode 100755 egs/chime_wsj0/s5/utils/validate_data_dir.sh
 create mode 100755 egs/chime_wsj0/s5/utils/validate_dict_dir.pl
 create mode 100755 egs/chime_wsj0/s5/utils/validate_lang.pl
 create mode 100755 egs/chime_wsj0/s5/utils/write_kwslist.pl

diff --git a/egs/chime_wsj0/s5/cmd.sh b/egs/chime_wsj0/s5/cmd.sh
new file mode 100644
index 000000000..072aa3819
--- /dev/null
+++ b/egs/chime_wsj0/s5/cmd.sh
@@ -0,0 +1,29 @@
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) JHU cluster options
+export train_cmd="queue.pl -l arch=*64"
+export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
+export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
+
+#export cuda_cmd="..."
+
+
+#b) BUT cluster options
+#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
+#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
+#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
+
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
+
+#c) run it locally...
+#export train_cmd=run.pl
+#export decode_cmd=run.pl
+export cuda_cmd=run.pl
+#export mkgraph_cmd=run.pl
diff --git a/egs/chime_wsj0/s5/conf/decode_dnn.config b/egs/chime_wsj0/s5/conf/decode_dnn.config
new file mode 100644
index 000000000..bfaae8670
--- /dev/null
+++ b/egs/chime_wsj0/s5/conf/decode_dnn.config
@@ -0,0 +1,2 @@
+beam=18.0 # beam for decoding.  Was 13.0 in the scripts.
+latbeam=10.0 # this has most effect on size of the lattices.
diff --git a/egs/chime_wsj0/s5/conf/fbank.conf b/egs/chime_wsj0/s5/conf/fbank.conf
new file mode 100644
index 000000000..5fc7774b3
--- /dev/null
+++ b/egs/chime_wsj0/s5/conf/fbank.conf
@@ -0,0 +1,11 @@
+# No non-default options for now.
+--window-type=hamming # disable Dans window, use the standard
+--use-energy=false    # only fbank outputs
+--sample-frequency=16000 # Cantonese is sampled at 8kHz
+
+--low-freq=64         # typical setup from Frantisek Grezl
+--high-freq=8000
+--dither=1
+
+--num-mel-bins=40     # 8kHz so we use 15 bins
+--htk-compat=true     # try to make it compatible with HTK
diff --git a/egs/chime_wsj0/s5/conf/mfcc.conf b/egs/chime_wsj0/s5/conf/mfcc.conf
new file mode 100644
index 000000000..736150909
--- /dev/null
+++ b/egs/chime_wsj0/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/chime_wsj0/s5/local/binmask_wsj0_data_prep.sh b/egs/chime_wsj0/s5/local/binmask_wsj0_data_prep.sh
new file mode 100755
index 000000000..1914b3695
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/binmask_wsj0_data_prep.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+set -e
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 29/05/12
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
+  echo "The argument should be a the top-level WSJ corpus directory."
+  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
+  echo "within the top-level corpus directory."
+  exit 1;
+fi
+
+CORPUS=$1
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+  echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+  exit 1;
+fi
+
+cd $dir
+
+# reverb list for SI-84
+
+find $1/si_tr_s -name '*.wav' |  sort -u > train_si84_binmask.flist
+
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find $1/si_dt_05  -name '*.wav' | sort -u > dev_dt_05_binmask.flist
+
+find $1/si_et_05  -name '*.wav' | sort -u > test_eval92_5k_binmask.flist
+
+
+# Finding the transcript files:
+#find -L $CORPUS -iname '*.dot' > dot_files.flist
+if [ ! -e $dir/dot_files.flist ]; then
+  echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
+  exit 1;
+fi
+
+# Convert the transcripts into our format (no normalization yet)
+# adding suffix to utt_id 
+# 1 for reverb condition
+for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
+  cat $x.flist | perl -e ' 
+    while(<>) {
+      m:^\S+/(\w+)\.wav$: || die "Bad line $_";
+      $id = $1;
+      $id =~ tr/A-Z/a-z/;
+      print "$id $_"; 
+    }
+  ' | sort > ${x}_wav_tmp.scp
+  #cat ${x}_wav_tmp.scp | awk '{print $1}' \
+  #  | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
+  cat ${x}_wav_tmp.scp | perl -e '
+    while(<STDIN>) {
+      @A=split(" ", $_);
+      @B=split("/", $_);
+      $abs_path_len=@B;
+      $condition=$B[$abs_path_len-3];
+      if ($condition eq "9dB") {$key_suffix=8;}
+      elsif ($condition eq "6dB") {$key_suffix=9;}
+      elsif ($condition eq "3dB") {$key_suffix=a;}
+      elsif ($condition eq "0dB") {$key_suffix=b;}
+      elsif ($condition eq "m3dB") {$key_suffix=c;}
+      elsif ($condition eq "m6dB") {$key_suffix=d;}
+      else {print STDERR "error condition $condition";} 
+      print $A[0].$key_suffix." ".$A[1]."\n"; 
+    }
+  ' | sort -k1 > ${x}_wav.scp
+  cat ${x}_wav.scp | awk '{print $1}' \
+    | $local/find_noisy_transcripts.pl dot_files.flist > ${x}.trans1 
+done
+
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
+  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
+    | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+#  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
+#    > ${x}_wav.scp
+#done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
+  cat ${x}_wav.scp | awk '{print $1}' \
+    | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/chime_format_data.sh b/egs/chime_wsj0/s5/local/chime_format_data.sh
new file mode 100755
index 000000000..47bec0b04
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/chime_format_data.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script takes data prepared in a corpus-dependent way
+# in data/local/, and converts it into the "canonical" form,
+# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
+# data/train_si284, data/train_si84, etc.
+
+# Don't bother doing train_si84 separately (although we have the file lists
+# in data/local/) because it's just the first 7138 utterances in train_si284.
+# We'll create train_si84 after doing the feature extraction.
+
+. ./path.sh || exit 1;
+
+echo "Preparing train and test data"
+srcdir=data/local/data
+lmdir=data/local/nist_lm
+tmpdir=data/local/lm_tmp
+lexicon=data/local/lang_tmp/lexiconp.txt
+mkdir -p $tmpdir
+
+for x in test_eval92_clean test_eval92_noisy test_eval92_5k_clean test_eval92_5k_noisy dev_dt_05_clean dev_dt_05_reverb dev_dt_05_noisy dev_dt_20_clean dev_dt_20_reverb dev_dt_20_noisy train_si84_clean train_si84_reverb train_si84_noisy; do 
+  mkdir -p data/$x
+  cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
+  cp $srcdir/$x.txt data/$x/text || exit 1;
+  cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
+  cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
+  utils/filter_scp.pl data/$x/spk2utt $srcdir/spk2gender > data/$x/spk2gender || exit 1;
+done
+
+
+# Next, for each type of language model, create the corresponding FST
+# and the corresponding lang_test_* directory.
+
+echo Preparing language models for test
+
+for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
+  test=data/lang_test_${lm_suffix}
+  mkdir -p $test
+  for f in phones.txt words.txt phones.txt L.fst L_disambig.fst \
+     phones/; do
+    cp -r data/lang/$f $test
+  done
+  gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
+   utils/find_arpa_oovs.pl $test/words.txt  > $tmpdir/oovs_${lm_suffix}.txt
+
+  # grep -v '<s> <s>' because the LM seems to have some strange and useless
+  # stuff in it with multiple <s>'s in the history.  Encountered some other similar
+  # things in a LM from Geoff.  Removing all "illegal" combinations of <s> and </s>,
+  # which are supposed to occur only at being/end of utt.  These can cause 
+  # determinization failures of CLG [ends up being epsilon cycles].
+  gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
+    grep -v '<s> <s>' | \
+    grep -v '</s> <s>' | \
+    grep -v '</s> </s>' | \
+    arpa2fst - | fstprint | \
+    utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
+      --osymbols=$test/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > $test/G.fst
+  fstisstochastic $test/G.fst
+ # The output is like:
+ # 9.14233e-05 -0.259833
+ # we do expect the first of these 2 numbers to be close to zero (the second is
+ # nonzero because the backoff weights make the states sum to >1).
+ # Because of the <s> fiasco for these particular LMs, the first number is not
+ # as close to zero as it could be.
+
+  # Everything below is only for diagnostic.
+  # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+  # this might cause determinization failure of CLG.
+  # #0 is treated as an empty word.
+  mkdir -p $tmpdir/g
+  awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
+    < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
+  fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \
+   fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
+  fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
+    echo "Language model has cycles with empty words" && exit 1
+  rm -r $tmpdir/g
+done
+
+echo "Succeeded in formatting data."
+rm -r $tmpdir
diff --git a/egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh b/egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
new file mode 100755
index 000000000..45226cc00
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+set -e
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 29/05/12
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
+  echo "The argument should be a the top-level WSJ corpus directory."
+  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
+  echo "within the top-level corpus directory."
+  exit 1;
+fi
+
+CORPUS=$1
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+  echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+  exit 1;
+fi
+
+cd $dir
+
+# This version for SI-84
+cat $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
+  | $local/cstr_ndx2flist.pl $CORPUS | sort -u > train_si84_clean.flist
+
+# This version for SI-284
+#cat $CORPUS/wsj1/doc/indices/si_tr_s.ndx \
+#  $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
+#  | $local/cstr_ndx2flist.pl  $CORPUS | sort \
+#  | grep -v wsj0/si_tr_s/401 > train_si284.flist
+
+# Now for the test sets.
+# $CORPUS/wsj1/doc/indices/readme.doc 
+# describes all the different test sets.
+# Note: each test-set seems to come in multiple versions depending
+# on different vocabulary sizes, verbalized vs. non-verbalized
+# pronunciations, etc.  We use the largest vocab and non-verbalized
+# pronunciations.
+# The most normal one seems to be the "baseline 60k test set", which
+# is h1_p0. 
+
+# Nov'92 (333 utts)
+# These index files have a slightly different format; 
+# have to add .wv1, which is done in cstr_ndx2flist.pl 
+cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_clean.flist
+
+# Nov'92 (330 utts, 5k vocab)
+cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_5k_clean.flist
+
+# Nov'93: (213 utts)
+# Have to replace a wrong disk-id.
+#cat $CORPUS/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
+#  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93.flist
+
+# Nov'93: (215 utts, 5k)
+#cat $CORPUS/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
+#  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93_5k.flist
+
+# Dev-set for Nov'93 (503 utts)
+#cat $CORPUS/wsj1/doc/indices/h1_p0.ndx | \
+#  $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93.flist
+
+# Dev-set for Nov'93 (513 utts, 5k vocab)
+#cat $CORPUS/wsj1/doc/indices/h2_p0.ndx | \
+#  $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93_5k.flist
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find $CORPUS/wsj0/si_dt_20 -print | grep -i ".wv1" | sort > dev_dt_20_clean.flist
+find $CORPUS/wsj0/si_dt_05 -print | grep -i ".wv1" | sort > dev_dt_05_clean.flist
+
+
+# Finding the transcript files:
+find -L $CORPUS -iname '*.dot' > dot_files.flist
+
+# Convert the transcripts into our format (no normalization yet)
+# adding suffix to utt_id 
+# 0 for clean condition
+for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+  $local/flist2scp.pl $x.flist | sort > ${x}_sph_tmp.scp
+  cat ${x}_sph_tmp.scp | awk '{print $1}' \
+    | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
+  cat ${x}_sph_tmp.scp | awk '{printf("%s0 %s\n", $1, $2);}' > ${x}_sph.scp
+  cat ${x}_tmp.trans1 | awk '{printf("%s0 ", $1); for(i=2;i<=NF;i++) printf("%s ", $i); printf("\n");}' > ${x}.trans1
+done
+
+
+
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
+    | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
+    > ${x}_wav.scp
+done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+  cat ${x}_sph.scp | awk '{print $1}' \
+    | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+#in case we want to limit lm's on most frequent words, copy lm training word frequency list
+cp $CORPUS/wsj0/doc/lng_modl/vocab/wfl_64.lst $lmdir
+chmod u+w $lmdir/*.lst # had weird permissions on source.
+
+# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
+# verbalized pronunciations.   This is the most common test setup, I understand.
+
+cp $CORPUS/wsj0/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg.arpa.gz
+
+# trigram would be:
+cat $CORPUS/wsj0/doc/lng_modl/base_lm/tcb20onp.z | \
+  perl -e 'while(<>){ if(m/^\\data\\/){ print; last;  } } while(<>){ print; }' \
+  | gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr.arpa || exit 1;
+
+# repeat for 5k language models
+cp $CORPUS/wsj0/doc/lng_modl/base_lm/bcb05onp.z  $lmdir/lm_bg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg_5k.arpa.gz
+
+# trigram would be: !only closed vocabulary here!
+cp $CORPUS/wsj0/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_tg_5k.arpa.gz
+gunzip $lmdir/lm_tg_5k.arpa.gz
+tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
+rm $lmdir/lm_tg_5k.arpa
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
+
+
+if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
+  rm -f wsj0-train-spkrinfo.txt
+  wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
+    || ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
+         wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt );
+fi
+
+if [ ! -f wsj0-train-spkrinfo.txt ]; then
+  echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
+  echo "This is possibly omitted from the training disks; couldn't find it." 
+  echo "Everything else may have worked; we just may be missing gender info"
+  echo "which is only needed for VTLN-related diagnostics anyway."
+  exit 1
+fi
+# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
+# LDC put it on the web.  Perhaps it was accidentally omitted from the
+# disks.  
+
+cat $CORPUS/wsj0/doc/spkrinfo.txt \
+    ./wsj0-train-spkrinfo.txt  | \
+    perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
+    awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
+
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/copy_clean_ali.sh b/egs/chime_wsj0/s5/local/copy_clean_ali.sh
new file mode 100755
index 000000000..367a57b48
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/copy_clean_ali.sh
@@ -0,0 +1,13 @@
+#!/bin/bash                                                                    
+. path.sh                                                                      
+data=$1                                                                        
+old_ali_dir=$2                                                                 
+mix_ali_dir=$3                                                                 
+mkdir -p $mix_ali_dir                                                          
+                                                                               
+cp $old_ali_dir/{final.mdl,num_jobs,tree} $mix_ali_dir/   
+                                                                               
+gunzip -c $old_ali_dir/ali.*.gz | gzip -c > $old_ali_dir/ali.gz                
+                                                                               
+feats="ark,s,cs:copy-feats scp:$data/feats.scp ark:- |"                        
+copy-clean-ali "$feats" "ark:gunzip -c $old_ali_dir/ali.gz |" "ark:| gzip -c > $mix_ali_dir/ali.1.gz"
diff --git a/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl b/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
new file mode 100755
index 000000000..101834e86
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 12/1/12
+
+# This program takes as its standard input an .ndx file from the WSJ corpus that looks
+# like this:
+#;; File: tr_s_wv1.ndx, updated 04/26/94
+#;;
+#;; Index for WSJ0 SI-short Sennheiser training data
+#;; Data is read WSJ sentences, Sennheiser mic.
+#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts 
+#;; per speaker TI) = 7236 utts
+#;;
+#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
+#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
+#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
+
+# and as command-line argument it takes the names of the WSJ disk locations, e.g.:
+# /group/corpora/public/wsjcam0/data on DICE machines.
+# It outputs a list of absolute pathnames.
+
+$wsj_dir = $ARGV[0];
+
+while(<STDIN>){
+  if(m/^;/){ next; } # Comment.  Ignore it.
+  else {
+    m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
+    $filename = $2; # as a subdirectory of the distributed disk.
+    if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; }
+    $filename = "$wsj_dir/$filename";
+    if (-e $filename) {
+      print "$filename\n";
+    } else {
+      print STDERR "File $filename found in the index but not on disk\n";
+    }
+  }
+}
diff --git a/egs/chime_wsj0/s5/local/cstr_wsj_data_prep.sh b/egs/chime_wsj0/s5/local/cstr_wsj_data_prep.sh
new file mode 100755
index 000000000..3a447cdc2
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/cstr_wsj_data_prep.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+set -e
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 29/05/12
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
+  echo "The argument should be a the top-level WSJ corpus directory."
+  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
+  echo "within the top-level corpus directory."
+  exit 1;
+fi
+
+CORPUS=$1
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+  echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+  exit 1;
+fi
+
+cd $dir
+
+# This version for SI-84
+cat $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
+  | $local/cstr_ndx2flist.pl $CORPUS | sort \
+  | grep -v wsj0/si_tr_s/401 > train_si84.flist
+
+# This version for SI-284
+cat $CORPUS/wsj1/doc/indices/si_tr_s.ndx \
+  $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
+  | $local/cstr_ndx2flist.pl  $CORPUS | sort \
+  | grep -v wsj0/si_tr_s/401 > train_si284.flist
+
+# Now for the test sets.
+# $CORPUS/wsj1/doc/indices/readme.doc 
+# describes all the different test sets.
+# Note: each test-set seems to come in multiple versions depending
+# on different vocabulary sizes, verbalized vs. non-verbalized
+# pronunciations, etc.  We use the largest vocab and non-verbalized
+# pronunciations.
+# The most normal one seems to be the "baseline 60k test set", which
+# is h1_p0. 
+
+# Nov'92 (333 utts)
+# These index files have a slightly different format; 
+# have to add .wv1, which is done in cstr_ndx2flist.pl 
+cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92.flist
+
+# Nov'92 (330 utts, 5k vocab)
+cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_5k.flist
+
+# Nov'93: (213 utts)
+# Have to replace a wrong disk-id.
+cat $CORPUS/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93.flist
+
+# Nov'93: (215 utts, 5k)
+cat $CORPUS/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93_5k.flist
+
+# Dev-set for Nov'93 (503 utts)
+cat $CORPUS/wsj1/doc/indices/h1_p0.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93.flist
+
+# Dev-set for Nov'93 (513 utts, 5k vocab)
+cat $CORPUS/wsj1/doc/indices/h2_p0.ndx | \
+  $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93_5k.flist
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find $CORPUS/???1/??_??_20 -print | grep -i ".wv1" | sort > dev_dt_20.flist
+find $CORPUS/???1/??_??_05 -print | grep -i ".wv1" | sort > dev_dt_05.flist
+
+
+# Finding the transcript files:
+find -L $CORPUS -iname '*.dot' > dot_files.flist
+
+# Convert the transcripts into our format (no normalization yet)
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+  $local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
+  cat ${x}_sph.scp | awk '{print $1}' \
+    | $local/find_transcripts.pl dot_files.flist > $x.trans1
+done
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
+    | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
+    > ${x}_wav.scp
+done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+  cat ${x}_sph.scp | awk '{print $1}' \
+    | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+#in case we want to limit lm's on most frequent words, copy lm training word frequency list
+cp $CORPUS/wsj1/doc/lng_modl/vocab/wfl_64.lst $lmdir
+chmod u+w $lmdir/*.lst # had weird permissions on source.
+
+# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
+# verbalized pronunciations.   This is the most common test setup, I understand.
+
+cp $CORPUS/wsj1/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg.arpa.gz
+
+# trigram would be:
+cat $CORPUS/wsj1/doc/lng_modl/base_lm/tcb20onp.z | \
+  perl -e 'while(<>){ if(m/^\\data\\/){ print; last;  } } while(<>){ print; }' \
+  | gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr.arpa || exit 1;
+
+# repeat for 5k language models
+cp $CORPUS/wsj1/doc/lng_modl/base_lm/bcb05onp.z  $lmdir/lm_bg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg_5k.arpa.gz
+
+# trigram would be: !only closed vocabulary here!
+cp $CORPUS/wsj1/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_tg_5k.arpa.gz
+gunzip $lmdir/lm_tg_5k.arpa.gz
+tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
+rm $lmdir/lm_tg_5k.arpa
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
+
+
+if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
+  rm -f wsj0-train-spkrinfo.txt
+  wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
+    || ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
+         wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt );
+fi
+
+if [ ! -f wsj0-train-spkrinfo.txt ]; then
+  echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
+  echo "This is possibly omitted from the training disks; couldn't find it." 
+  echo "Everything else may have worked; we just may be missing gender info"
+  echo "which is only needed for VTLN-related diagnostics anyway."
+  exit 1
+fi
+# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
+# LDC put it on the web.  Perhaps it was accidentally omitted from the
+# disks.  
+
+cat $CORPUS/wsj0/doc/spkrinfo.txt \
+    $CORPUS/wsj1/doc/evl_spok/spkrinfo.txt \
+    $CORPUS/wsj1/doc/dev_spok/spkrinfo.txt \
+    $CORPUS/wsj1/doc/train/spkrinfo.txt \
+    ./wsj0-train-spkrinfo.txt  | \
+    perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
+    awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
+
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/cstr_wsj_extend_dict.sh b/egs/chime_wsj0/s5/local/cstr_wsj_extend_dict.sh
new file mode 100755
index 000000000..b2a9faad7
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/cstr_wsj_extend_dict.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+# This script builds a larger word-list and dictionary 
+# than used for the LMs supplied with the WSJ corpus.
+# It uses a couple of strategies to fill-in words in
+# the LM training data but not in CMUdict.  One is
+# to generate special prons for possible acronyms, that
+# just consist of the constituent letters.  The other
+# is designed to handle derivatives of known words
+# (e.g. deriving the pron of a plural from the pron of
+# the base-word), but in a more general, learned-from-data
+# way.
+# It makes use of scripts in local/dict/
+
+if [ $# -ne 1 ]; then
+  echo "Usage: local/cstr_wsj_train_lms.sh WSJ1_doc_dir"
+  exit 1
+fi
+
+export PATH=$PATH:`pwd`/local/dict/
+srcdir=$1
+
+if [ ! -d $srcdir/lng_modl ]; then
+  echo "Expecting 'lng_modl' under WSJ doc directory '$srcdir'"
+  exit 1
+fi
+
+mkdir -p data/local/dict_larger
+dir=data/local/dict_larger
+cp data/local/dict/* data/local/dict_larger # Various files describing phones etc.
+  # are there; we just want to copy them as the phoneset is the same.
+rm data/local/dict_larger/lexicon.txt # we don't want this.
+mincount=2 # Minimum count of an OOV we will try to generate a pron for.
+
+[ ! -f data/local/dict/cmudict/cmudict.0.7a ] && echo "CMU dict not in expected place" && exit 1;
+
+# Remove comments from cmudict; print first field; remove
+# words like FOO(1) which are alternate prons: our dict format won't
+# include these markers.
+grep -v ';;;' data/local/dict/cmudict/cmudict.0.7a | 
+ perl -ane 's/^(\S+)\(\d+\)/$1/; print; ' | sort | uniq > $dir/dict.cmu
+
+cat $dir/dict.cmu | awk '{print $1}' | sort | uniq > $dir/wordlist.cmu
+
+echo "Getting training data [this should take at least a few seconds; if not, there's a problem]"
+
+# Convert to uppercase, remove XML-like markings.
+# For words ending in "." that are not in CMUdict, we assume that these
+# are periods that somehow remained in the data during data preparation,
+# and we we replace the "." with "\n".  Note: we found this by looking at
+# oov.counts below (before adding this rule).
+
+touch $dir/cleaned.gz
+if [ `du -m $dir/cleaned.gz | cut -f 1` -eq 73 ]; then
+  echo "Not getting cleaned data in $dir/cleaned.gz again [already exists]";
+else
+ gunzip -c $srcdir/lng_modl/lm_train/np_data/{87,88,89}/*.z \
+  | awk '/^</{next}{print toupper($0)}' | perl -e '
+   open(F, "<$ARGV[0]")||die;
+   while(<F>){ chop; $isword{$_} = 1; }
+   while(<STDIN>) { 
+    @A = split(" ", $_); 
+    for ($n = 0; $n < @A; $n++) {
+      $a = $A[$n];
+      if (! $isword{$a} && $a =~ s/^([^\.]+)\.$/$1/) { # nonwords that end in "."
+         # and have no other "." in them: treat as period.
+         print "$a";
+         if ($n+1 < @A) { print "\n"; }
+      } else { print "$a "; }
+    }
+    print "\n";
+  }
+ ' $dir/wordlist.cmu | gzip -c > $dir/cleaned.gz
+fi
+  
+# get unigram counts
+echo "Getting unigram counts"
+gunzip -c $dir/cleaned.gz | tr -s ' ' '\n' | \
+  awk '{count[$1]++} END{for (w in count) { print count[w], w; }}' | sort -nr > $dir/unigrams
+
+cat $dir/unigrams | awk -v dict=$dir/dict.cmu \
+  'BEGIN{while(getline<dict) seen[$1]=1;} {if(!seen[$2]){print;}}' \
+   > $dir/oov.counts
+
+echo "Most frequent unseen unigrams are: "
+head $dir/oov.counts
+
+# Prune away singleton counts, and remove things with numbers in
+# (which should have been normalized) and with no letters at all.
+
+
+cat $dir/oov.counts | awk -v thresh=$mincount '{if ($1 >= thresh) { print $2; }}' \
+  | awk '/[0-9]/{next;} /[A-Z]/{print;}' > $dir/oovlist
+
+# Automatic rule-finding...
+
+# First make some prons for possible acronyms.
+# Note: we don't do this for things like U.K or U.N,
+# or A.B. (which doesn't exist anyway), 
+# as we consider this normalization/spelling errors.
+
+cat $dir/oovlist | local/dict/get_acronym_prons.pl $dir/dict.cmu  > $dir/dict.acronyms
+
+mkdir $dir/f $dir/b # forward, backward directions of rules...
+  # forward is normal suffix
+  # rules, backward is reversed (prefix rules).  These
+  # dirs contain stuff we create while making the rule-based
+  # extensions to the dictionary.
+
+# Remove ; and , from words, if they are present; these
+# might crash our scripts, as they are used as separators there.
+filter_dict.pl $dir/dict.cmu > $dir/f/dict 
+cat $dir/oovlist | filter_dict.pl > $dir/f/oovs
+reverse_dict.pl $dir/f/dict > $dir/b/dict
+reverse_dict.pl $dir/f/oovs > $dir/b/oovs
+
+# The next stage takes a few minutes.
+# Note: the forward stage takes longer, as English is
+# mostly a suffix-based language, and there are more rules
+# that it finds.
+for d in $dir/f $dir/b; do
+ (
+   cd $d
+   cat dict | get_rules.pl 2>get_rules.log >rules
+   get_rule_hierarchy.pl rules >hierarchy
+   awk '{print $1}' dict | get_candidate_prons.pl rules dict | \
+     limit_candidate_prons.pl hierarchy | \
+     score_prons.pl dict | \
+     count_rules.pl >rule.counts
+   # the sort command below is just for convenience of reading.
+   score_rules.pl <rule.counts | sort -t';' -k3,3 -n -r >rules.with_scores
+   get_candidate_prons.pl rules.with_scores dict oovs | \
+     limit_candidate_prons.pl hierarchy > oovs.candidates
+ )  &   
+done 
+wait
+
+# Merge the candidates.
+reverse_candidates.pl $dir/b/oovs.candidates | cat - $dir/f/oovs.candidates | sort > $dir/oovs.candidates
+select_candidate_prons.pl <$dir/oovs.candidates | awk -F';' '{printf("%s  %s\n", $1, $2);}' \
+  > $dir/dict.oovs
+
+cat $dir/dict.acronyms $dir/dict.oovs | sort | uniq > $dir/dict.oovs_merged
+
+awk '{print $1}' $dir/dict.oovs_merged | uniq > $dir/oovlist.handled
+sort $dir/oovlist | diff - $dir/oovlist.handled  | grep -v 'd' | sed 's:< ::' > $dir/oovlist.not_handled
+
+
+# add_counts.pl attaches to original counts to the list of handled/not-handled OOVs
+add_counts.pl $dir/oov.counts $dir/oovlist.handled | sort -nr > $dir/oovlist.handled.counts
+add_counts.pl $dir/oov.counts $dir/oovlist.not_handled | sort -nr > $dir/oovlist.not_handled.counts
+
+echo "**Top OOVs we handled are:**"; 
+head $dir/oovlist.handled.counts
+echo "**Top OOVs we didn't handle are as follows (note: they are mostly misspellings):**"; 
+head $dir/oovlist.not_handled.counts
+
+
+echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
+echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
+echo "Count of OOVs we didn't handle due to low count is" \
+    `awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`
+# The two files created above are for humans to look at, as diagnostics.
+
+cat <<EOF | cat - $dir/dict.cmu $dir/dict.oovs_merged | sort | uniq > $dir/lexicon.txt
+!SIL SIL
+<SPOKEN_NOISE> SPN
+<UNK> SPN
+<NOISE> NSN
+EOF
+
+echo "Created $dir/lexicon.txt"
diff --git a/egs/chime_wsj0/s5/local/dict/add_counts.pl b/egs/chime_wsj0/s5/local/dict/add_counts.pl
new file mode 100755
index 000000000..409277c72
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/add_counts.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+
+# Add counts to an oovlist.
+# Reads in counts as output by uniq -c, and
+# an oovlist, and prints out the counts of the oovlist.
+
+(@ARGV == 1 || @ARGV == 2) || die "Usage: add_counts.pl count_file [oovlist]\n";
+
+$counts = shift @ARGV;
+
+open(C, "<$counts") || die "Opening counts file $counts";
+
+while(<C>) {
+  @A = split(" ", $_);
+  @A == 2 || die "Bad line in counts file: $_";
+  ($count, $word) = @A;
+  $count =~ m:^\d+$: || die "Bad count $A[0]\n";
+  $counts{$word} = $count;
+}
+
+while(<>) {
+  chop;
+  $w = $_;
+  $w =~ m:\S+: || die "Bad word $w";
+  defined $counts{$w} || die "Word $w not present in counts file";
+  print "\t$counts{$w}\t$w\n";
+}
+    
+  
+
diff --git a/egs/chime_wsj0/s5/local/dict/count_rules.pl b/egs/chime_wsj0/s5/local/dict/count_rules.pl
new file mode 100755
index 000000000..2805e98c3
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/count_rules.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+
+# This program takes the output of score_prons.pl and collates
+# it for each (rule, destress) pair so that we get the
+# counts of right/partial/wrong for each pair.
+
+# The input is a 7-tuple on each line, like:
+# word;pron;base-word;base-pron;rule-name;de-stress;right|partial|wrong
+#
+# The output format is a 5-tuple like:
+#
+# rule;destress;right-count;partial-count;wrong-count
+#
+
+if (@ARGV != 0 && @ARGV != 1) {
+  die "Usage: count_rules.pl < scored_candidate_prons > rule_counts";
+}
+
+
+while(<>) {
+  chop;
+  $line = $_;
+  my ($word, $pron, $baseword, $basepron, $rulename, $destress, $score) = split(";", $line);
+  
+  my $key = $rulename . ";" . $destress;
+
+  if (!defined $counts{$key}) {
+    $counts{$key} = [ 0, 0, 0 ]; # new anonymous array.
+  }
+  $ref = $counts{$key};
+  if ($score eq "right") {
+    $$ref[0]++;
+  } elsif ($score eq "partial") {
+    $$ref[1]++;
+  } elsif ($score eq "wrong") {
+    $$ref[2]++;
+  } else {
+    die "Bad score $score\n";
+  }
+}
+
+while ( my ($key, $value) = each(%counts)) {
+  print $key . ";" . join(";", @$value) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/filter_dict.pl b/egs/chime_wsj0/s5/local/dict/filter_dict.pl
new file mode 100755
index 000000000..1210bb5e6
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/filter_dict.pl
@@ -0,0 +1,19 @@
+#!/usr/bin/perl
+
+
+# This program reads and writes either a dictionary or just a list
+# of words, and it removes any words containing ";" or "," as these
+# are used in these programs.  It will warn about these.
+# It will die if the pronunciations have these symbols in.
+while(<>) {
+  chop;
+  @A = split(" ", $_);
+  $word = shift @A;
+  
+  if ($word =~ m:[;,]:) {
+    print STDERR "Omitting line $_ since it has one of the banned characters ; or ,\n" ;
+  } else {
+    $_ =~ m:[;,]: && die "Phones cannot have ; or , in them.";
+    print $_ . "\n";
+  }
+}
diff --git a/egs/chime_wsj0/s5/local/dict/find_acronyms.pl b/egs/chime_wsj0/s5/local/dict/find_acronyms.pl
new file mode 100755
index 000000000..ed4655afa
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/find_acronyms.pl
@@ -0,0 +1,95 @@
+#!/usr/bin/perl
+
+# Reads a dictionary, and prints out a list of words that seem to be pronounced
+# as acronyms (not including plurals of acronyms, just acronyms).  Uses
+# the prons of the individual letters (A., B. and so on) to judge this.
+# Note: this is somewhat dependent on the convention used in CMUduct, that
+# the individual letters are spelled this way (e.g. "A.").
+
+$max_length = 6; # Max length of words that might be
+ # acronyms.
+
+while(<>) { # Read the dict.
+  chop;
+  @A = split(" ", $_);
+  $word = shift @A;
+  $pron = join(" ", @A);
+  if ($word =~ m/^([A-Z])\.$/ ) {
+    chop $word; # Remove trailing "." to get just the letter
+    $letter = $1;
+    if (!defined $letter_prons{$letter} ) { 
+      $letter_prons{$letter} = [ ]; # new anonymous array
+    }
+    $arrayref = $letter_prons{$letter};
+    push @$arrayref, $pron;
+  } elsif( length($word) <= $max_length ) {
+    $pronof{$word . "," . $pron} = 1;
+    $isword{$word} = 1;
+    #if (!defined $prons{$word} ) {
+    #  $prons{$word} = [ ];
+    #}
+    #  push @{$prons{$word}}, $pron;
+  }
+}
+
+sub get_letter_prons;
+
+foreach $word (keys %isword) {
+  my @letter_prons = get_letter_prons($word);
+  foreach $pron (@letter_prons) {
+    if (defined $pronof{$word.",".$pron}) {
+      print "$word  $pron\n";
+    }
+  }
+}
+
+
+sub get_letter_prons {
+  @acronym = split("", shift); # The letters in the word.
+  my @prons = ( "" );
+  
+  while (@acronym > 0) {
+    $l = shift @acronym;
+    $n = 1; # num-repeats of letter $l.
+    while (@acronym > 0 && $acronym[0] eq $l) {
+      $n++;
+      shift @acronym;
+    }
+    my $arrayref = $letter_prons{$l};
+    my @prons_of_block = ();
+    if ($n == 1) { # Just one repeat.
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, $lpron; # typically (always?) just one pron of a letter.
+      }
+    } elsif ($n == 2) { # Two repeats.  Can be "double a" or "a a"
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, "D AH1 B AH0 L " . $lpron;
+        push @prons_of_block, $lpron . $lpron;
+      }
+    } elsif ($n == 3) { # can be "triple a" or "a a a"
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, "T R IH1 P AH0 L " . $lpron;
+        push @prons_of_block, $lpron . $lpron . $lpron;
+      }
+    } elsif ($n >= 4) { # let's say it can only be that letter repeated $n times..
+      # not sure really.
+      foreach $lpron ( @$arrayref ) {
+        $nlpron = "";
+        for ($m = 0; $m < $n; $m++) { $nlpron = $nlpron . $lpron; }
+        push @prons_of_block, $nlpron;
+      }
+    }
+    my @new_prons = ();
+    foreach $pron (@prons) {
+      foreach $pron_of_block(@prons_of_block) {
+        if ($pron eq "") {
+          push @new_prons, $pron_of_block;
+        } else {
+          push @new_prons, $pron . " " . $pron_of_block;
+        }
+      }
+    }
+    @prons = @new_prons;
+  }
+  return @prons;
+}
diff --git a/egs/chime_wsj0/s5/local/dict/get_acronym_prons.pl b/egs/chime_wsj0/s5/local/dict/get_acronym_prons.pl
new file mode 100755
index 000000000..3f9936818
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/get_acronym_prons.pl
@@ -0,0 +1,123 @@
+#!/usr/bin/perl
+
+# Reads a dictionary (for prons of letters), and an OOV list,
+# and puts out candidate pronunciations of words in that list
+# that could plausibly be acronyms.
+# We judge that a word can plausibly be an acronym if it is
+# a sequence of just letters (no non-letter characters such
+# as "'"),  or something like U.K.,
+# and the number of letters is four or less.
+#
+# If the text were not already pre-normalized, there would
+# be other hints such as capitalization.
+
+# This program appends
+# the prons of the individual letters (A., B. and so on) to work out
+# the pron of the acronym.
+# Note: this is somewhat dependent on the convention used in CMUduct, that
+# the individual letters are spelled this way (e.g. "A."). [it seems
+# to also have the separated versions.
+
+if (!(@ARGV == 1 || @ARGV == 2)) { 
+  print "Usage: get_acronym_prons.pl dict [oovlist]";
+}
+
+$max_length = 4; # Max #letters in an acronym. (Longer 
+ # acronyms tend to have "pseudo-pronunciations", e.g. think about UNICEF.
+
+$dict = shift @ARGV;
+open(D, "<$dict") || die "Opening dictionary $dict";
+
+while(<D>) { # Read the dict, to get the prons of the letters.
+  chop;
+  @A = split(" ", $_);
+  $word = shift @A;
+  $pron = join(" ", @A);
+  if ($word =~ m/^([A-Z])\.$/ ) {
+    chop $word; # Remove trailing "." to get just the letter
+    $letter = $1;
+    if (!defined $letter_prons{$letter} ) { 
+      $letter_prons{$letter} = [ ]; # new anonymous array
+    }
+    $arrayref = $letter_prons{$letter};
+    push @$arrayref, $pron;
+  } elsif( length($word) <= $max_length ) {
+    $pronof{$word . "," . $pron} = 1;
+    $isword{$word} = 1;
+    #if (!defined $prons{$word} ) {
+    #  $prons{$word} = [ ];
+    #}
+    #  push @{$prons{$word}}, $pron;
+  }
+}
+
+sub get_letter_prons;
+
+while(<>) { # Read OOVs.
+  # For now, just do the simple cases without "." in 
+  # between... things with "." in the OOV list seem to
+  # be mostly errors.
+  chop;
+  $word = $_;
+  if ($word =~ m/^[A-Z]{1,5}$/) {
+    foreach $pron ( get_letter_prons($word) ) { # E.g. UNPO
+      print "$word  $pron\n";
+    }
+  } elsif ($word =~ m:^(\w\.){1,4}\w\.?$:) { # E.g. U.K.  Make the final "." optional.
+    $letters = $word;
+    $letters =~ s:\.::g;
+    foreach $pron ( get_letter_prons($letters) ) { 
+      print "$word  $pron\n";
+    }
+  }
+}
+
+sub get_letter_prons {
+  @acronym = split("", shift); # The letters in the word.
+  my @prons = ( "" );
+  
+  while (@acronym > 0) {
+    $l = shift @acronym;
+    $n = 1; # num-repeats of letter $l.
+    while (@acronym > 0 && $acronym[0] eq $l) {
+      $n++;
+      shift @acronym;
+    }
+    my $arrayref = $letter_prons{$l};
+    my @prons_of_block = ();
+    if ($n == 1) { # Just one repeat.
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, $lpron; # typically (always?) just one pron of a letter.
+      }
+    } elsif ($n == 2) { # Two repeats.  Can be "double a" or "a a"
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, "D AH1 B AH0 L " . $lpron;
+        push @prons_of_block, $lpron . " " . $lpron;
+      }
+    } elsif ($n == 3) { # can be "triple a" or "a a a"
+      foreach $lpron ( @$arrayref ) {
+        push @prons_of_block, "T R IH1 P AH0 L " . $lpron;
+        push @prons_of_block, "$lpron $lpron $lpron";
+      }
+    } elsif ($n >= 4) { # let's say it can only be that letter repeated $n times..
+      # not sure really.
+      foreach $lpron ( @$arrayref ) {
+        $nlpron = $lpron;
+        for ($m = 1; $m < $n; $m++) { $nlpron = $nlpron . " " . $lpron; }
+        push @prons_of_block, $nlpron;
+      }
+    }
+    my @new_prons = ();
+    foreach $pron (@prons) {
+      foreach $pron_of_block(@prons_of_block) {
+        if ($pron eq "") {
+          push @new_prons, $pron_of_block;
+        } else {
+          push @new_prons, $pron . " " . $pron_of_block;
+        }
+      }
+    }
+    @prons = @new_prons;
+  }
+  return @prons;
+}
diff --git a/egs/chime_wsj0/s5/local/dict/get_candidate_prons.pl b/egs/chime_wsj0/s5/local/dict/get_candidate_prons.pl
new file mode 100755
index 000000000..b13efd203
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/get_candidate_prons.pl
@@ -0,0 +1,187 @@
+#!/usr/bin/perl
+
+# This script takes three command-line arguments (typically files, or "-"):
+# the suffix rules (as output by get_rules.pl), the rule-hierarchy 
+# (from get_rule_hierarchy.pl), and the words that we want prons to be 
+# generated for (one per line).
+
+# The output consists of candidate generated pronunciations for those words,
+# together with information about how we generated those pronunciations.
+# This does not do pruning of the candidates using the restriction
+# "you can't use a more general rule when a more specific one is applicable".
+# That is done by limit_candidate_prons.pl.
+
+# Each line of the output consists of a 4-tuple, separated by ";", of the
+# form:
+# word;pron;base-word;base-pron;rule-name;destress[;rule-score]
+# [the last field is only present if you supplied rules with score information].
+# where:
+# - "word" is the input word that we queried for, e.g. WASTED
+# - "pron" is the generated pronunciation, e.g. "W EY1 S T AH0 D"
+# - rule-name is a 4-tuple separated by commas that describes the rule, e.g.
+#   "STED,STING,D,NG",
+# - "base-word" is the base-word we're getting the pron from,
+#   e.g. WASTING
+# - "base-pron" is the pron of the base-word, e.g. "W EY1 S T IH0 NG"
+# - "destress" is either "yes" or "no" and corresponds to whether we destressed the
+#   base-word or not [de-stressing just corresponds to just taking any 2's down to 1's,
+#   although we may extend this in future]... 
+# - "rule-score" is a numeric score of the rule (this field is only present
+#   if there was score information in your rules.
+
+
+(@ARGV == 2  || @ARGV == 3) || die "Usage: get_candidate_prons.pl rules base-dict [ words ]";
+
+$min_prefix_len = 3;  # this should probably match with get_rules.pl
+
+$rules = shift @ARGV; # Note: rules may be with destress "yes/no" indicators or without...
+                      # if without, it's treated as if both "yes" and "no" are present.
+$dict = shift @ARGV;
+
+open(R, "<$rules") || die "Opening rules file: $rules";
+
+sub process_word;
+
+while(<R>) {
+  chop $_;
+  my ($rule, $destress, $rule_score) = split(";", $_); # We may have "destress" markings (yes|no),
+  # and scores, or we may have just rule, in which case
+  # $destress and $rule_score will be undefined.
+
+  my @R = split(",", $rule, 4); # "my" means new instance of @R each
+  # time we do this loop -> important because we'll be creating
+  # a reference to @R below.
+  # Note: the last arg to SPLIT tells it how many fields max to get.
+  # This stops it from omitting empty trailing fields.
+  @R == 4 || die "Bad rule $_";
+  $suffix = $R[0]; # Suffix of word we want pron for.
+  if (!defined $isrule{$rule}) {
+    $isrule{$rule} = 1; # make sure we do this only once for each rule 
+    # (don't repeate for different stresses).
+    if (!defined $suffix2rule{$suffix}) {
+      # The syntax [ $x, $y, ... ] means a reference to a newly created array
+      # containing $x, $y, etc.   \@R creates an array reference to R.
+      # so suffix2rule is a hash from suffix to ref to array of refs to 
+      # 4-dimensional arrays.
+      $suffix2rule{$suffix} = [ \@R ];
+    } else {
+      # Below, the syntax @{$suffix2rule{$suffix}} dereferences the array
+      # reference inside the hash; \@R pushes onto that array a new array
+      # reference pointing to @R.
+      push @{$suffix2rule{$suffix}}, \@R;
+    }
+  }
+  if (!defined $rule_score) { $rule_score = -1; } # -1 means we don't have the score info.
+  
+  # Now store information on which destress markings (yes|no) this rule
+  # is valid for, and the associated scores (if supplied)
+  # If just the rule is given (i.e. no destress marking specified),
+  # assume valid for both.
+  if (!defined $destress) { # treat as if both "yes" and "no" are valid.
+    $rule_and_destress_to_rule_score{$rule.";yes"} = $rule_score;
+    $rule_and_destress_to_rule_score{$rule.";no"} = $rule_score;
+  } else {
+    $rule_and_destress_to_rule_score{$rule.";".$destress} = $rule_score;
+  }
+
+}
+
+open(D, "<$dict") || die "Opening base dictionary: $dict";
+while(<D>) {
+  @A = split(" ", $_);
+  $word = shift @A;
+  $pron = join(" ", @A);
+  if (!defined $word2prons{$word}) {
+    $word2prons{$word} = [ $pron ]; # Ref to new anonymous array containing just "pron".
+  } else {
+    push @{$word2prons{$word}}, $pron; # Push $pron onto array referred to (@$ref derefs array).
+  }
+}
+foreach $word (%word2prons) {
+  # Set up the hash "prefixcount", which says how many times a char-sequence
+  # is a prefix (not necessarily a strict prefix) of a word in the dict.
+  $len = length($word);
+  for ($l = 0; $l <= $len; $l++) {
+    $prefixcount{substr($word, 0, $l)}++;
+  }
+}
+
+open(R, "<$rules") || die "Opening rules file: $rules";
+
+
+while(<>) {
+  chop;
+  m/^\S+$/ || die;
+  process_word($_);
+}
+
+sub process_word {
+  my $word = shift @_;
+  $len = length($word);
+  # $owncount is used in evaluating whether a particular prefix is a prefix
+  # of some other word in the dict... if a word itself may be in the dict
+  # (usually because we're running this on the dict itself), we need to
+  # correct for this.
+  if (defined $word2prons{$word}) { $owncount = 1; } else { $owncount = 0; }
+  
+  for ($prefix_len = $min_prefix_len; $prefix_len <= $len; $prefix_len++) {
+    my $prefix = substr($word, 0, $prefix_len);
+    my $suffix = substr($word, $prefix_len);
+    if ($prefixcount{$prefix} - $owncount == 0) {
+      # This prefix is not a prefix of any word in the dict, so no point
+      # checking the rules below-- none of them can match.
+      next;
+    }
+    $rules_array_ref = $suffix2rule{$suffix};
+    if (defined $rules_array_ref) {
+      foreach $R (@$rules_array_ref) { # @$rules_array_ref dereferences the array.
+        # $R is a refernce to a 4-dimensional array, whose elements we access with
+        # $$R[0], etc.
+        my $base_suffix = $$R[1];
+        my $base_word = $prefix . $base_suffix;
+        my $base_prons_ref = $word2prons{$base_word};
+        if (defined $base_prons_ref) {
+          my $psuffix = $$R[2];
+          my $base_psuffix = $$R[3];
+          if ($base_psuffix ne "") { 
+            $base_psuffix = " " . $base_psuffix; 
+            # Include " ", the space between phones, to prevent
+            # matching partial phones below.
+          }
+          my $base_psuffix_len = length($base_psuffix);
+          foreach $base_pron (@$base_prons_ref) { # @$base_prons_ref derefs 
+            # that reference to an array.
+            my $base_pron_prefix_len = length($base_pron) - $base_psuffix_len;
+            # Note: these lengths are in characters, not phones.
+            if ($base_pron_prefix_len >= 0 && 
+                substr($base_pron, $base_pron_prefix_len) eq $base_psuffix) {
+              # The suffix of the base_pron is what it should be.
+              my $pron_prefix = substr($base_pron, 0, $base_pron_prefix_len);
+              my $rule = join(",", @$R); # we'll output this..
+              my $len = @R;
+              for ($destress = 0; $destress <= 1; $destress++) { # Two versions 
+                # of each rule: with destressing and without.
+                # pron is the generated pron.
+                if ($destress) {  $pron_prefix =~ s/2/1/g; }
+                my $pron;
+                if ($psuffix ne "") { $pron = $pron_prefix . " " . $psuffix; }
+                else { $pron = $pron_prefix; }
+                # Now print out the info about the generated pron.
+                my $destress_mark = ($destress ? "yes" : "no");
+                my $rule_score = $rule_and_destress_to_rule_score{$rule.";".$destress_mark};
+                if (defined $rule_score) { # Means that the (rule,destress) combination was
+                  # seen [note: this if-statement may be pointless, as currently we don't
+                  # do any pruning of rules].
+                  my @output = ($word, $pron, $base_word, $base_pron, $rule, $destress_mark);
+                  if ($rule_score != -1) { push @output, $rule_score; } # If scores were supplied,
+                  # we also output the score info.
+                  print join(";", @output) . "\n";
+                }
+              }
+            }  
+          }
+        }
+      }
+    }
+  }
+}  
diff --git a/egs/chime_wsj0/s5/local/dict/get_rule_hierarchy.pl b/egs/chime_wsj0/s5/local/dict/get_rule_hierarchy.pl
new file mode 100755
index 000000000..35805b46b
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/get_rule_hierarchy.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+
+#This reads in rules, of the form put out by get_rules.pl, e.g.:
+# ERT,,ER0 T,
+# MENT,ING,M AH0 N T,IH0 NG
+# S,TON,Z,T AH0 N
+# ,ER,IH0 NG,IH0 NG ER0
+# ,'S,M AH0 N,M AH0 N Z
+#TIONS,TIVE,SH AH0 N Z,T IH0 V
+
+# and it works out a hierarchy that says which rules are sub-cases
+# of which rules: it outputs on each line a pair separated by ";", where
+# each member of the pair is a rule, first one is the specialization, the
+# second one being more general.
+# E.g.:
+# RED,RE,D,/ED,E,D,
+# RED,RE,D,/D,,D,
+# GING,GE,IH0 NG,/ING,I,IH0 NG,
+# TOR,TING,T ER0,T IH0 NG/OR,OR,T ER0,T ER0 
+# ERED,ER,D,/RED,R,D,
+# ERED,ER,D,/ED,,D,
+
+
+
+
+while(<>) {
+  chop;
+  $rule = $_;
+  $isrule{$rule} = 1;
+  push @rules, $rule;
+}
+
+foreach my $rule (@rules) {
+  # Truncate the letters and phones in the rule, while we
+  # can, to get more general rules; if the more general rule
+  # exists, put out the pair.
+  @A = split(",", $rule);
+  @suffixa = split("", $A[0]);
+  @suffixb = split("", $A[1]);
+  @psuffixa = split(" ", $A[2]);
+  @psuffixb = split(" ", $A[3]);
+  for ($common_suffix_len = 0; $common_suffix_len < @suffixa && $common_suffix_len < @suffixb;) {
+    if ($suffixa[$common_suffix_len] eq $suffixb[$common_suffix_len]) {
+      $common_suffix_len++;
+    } else {
+      last;
+    }
+  }
+  for ($common_psuffix_len = 0; $common_psuffix_len < @psuffixa && $common_psuffix_len < @psuffixb;) {
+    if ($psuffixa[$common_psuffix_len] eq $psuffixb[$common_psuffix_len]) {
+      $common_psuffix_len++;
+    } else {
+      last;
+    }
+  }
+  # Get all combinations of pairs of integers <= (common_suffix_len, common_psuffix_len),
+  # except (0,0), and print out this rule together with the corresponding rule (if it exists).
+  for ($m = 0; $m <= $common_suffix_len; $m++) {
+    $sa = join("", @suffixa[$m...$#suffixa]); # @x[a..b] is array slice notation.
+    $sb = join("", @suffixb[$m...$#suffixb]);
+    for ($n = 0; $n <= $common_psuffix_len; $n++) {
+      if (!($m == 0 && $n == 0)) {
+        $psa = join(" ", @psuffixa[$n...$#psuffixa]);
+        $psb = join(" ", @psuffixb[$n...$#psuffixb]);
+        $more_general_rule = join(",", ($sa, $sb, $psa, $psb));
+        if (defined $isrule{$more_general_rule}) {
+          print $rule . ";" . $more_general_rule . "\n";
+        }
+      }
+    }
+  }
+}
+
diff --git a/egs/chime_wsj0/s5/local/dict/get_rules.pl b/egs/chime_wsj0/s5/local/dict/get_rules.pl
new file mode 100755
index 000000000..a5b57b088
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/get_rules.pl
@@ -0,0 +1,204 @@
+#!/usr/bin/perl
+
+# This program creates suggested suffix rules from a dictionary.
+# It outputs quadruples of the form:
+# suffix,base-suffix,psuffix,base-psuffix
+# where "suffix" is the suffix of the letters of a word, "base-suffix" is
+# the suffix of the letters of the base-word, "psuffix" is the suffix of the
+# pronunciation of the word (a space-separated list of phonemes), and
+# "base-psuffix" is the suffix of the pronunciation of the baseword.
+# As far as this program is concerned, there is no distinction between
+# "word" and "base-word".  To simplify things slightly, what it does
+# is return all tuples (a,b,c,d) [with a != b] such that there are
+# at least $min_suffix_count instances in the dictionary of
+# a (word-prefix, pron-prefix) pair where there exists (word,pron)
+# pairs of the form
+# ( word-prefix . a,  pron-prefix . c)
+# and 
+# ( word-prefix . b, pron-prefix . d)
+# For example if (a,b,c,d) equals (USLY,US,S L IY0,S)
+# then this quadruple will be output as long as there at least
+# e.g. 30 instances of prefixes like (FAM, F EY1 M AH0)
+# where there exist (word, pron) pairs like:
+# FAMOUS, F EY1 M AH0 S
+# FAMOUSLY  F EY1 M AH0 S L IY0
+#
+# There are some modifications to the picture above, for efficiency.
+# If $disallow_empty_suffix != 0, this program will not output 4-tuples where
+# the first element (the own-word suffix) is empty, as this would cause
+# efficiency problems in get_candidate_prons.pl.  If 
+# $ignore_prefix_stress != 0, this program will ignore stress markings
+# while evaluating whether prefixes are the same.
+# The minimum count for a quadruple to be output is $min_suffix_count
+# (e.g. 30).
+#
+# The function of this program is not to evaluate the accuracy of these rules;
+# it is mostly a pruning step, where we suggest rules that have large enough
+# counts to be suitable for our later procedure where we evaluate their
+# accuracy in predicting prons.
+
+$disallow_empty_suffix = 1; # Disallow rules where the suffix of the "own-word" is
+   # empty.  This is for efficiency in later stages (e.g. get_candidate_prons.pl).
+$min_prefix_len = 3;  # this must match with get_candidate_prons.pl
+$ignore_prefix_stress = 1; # or 0 to take account of stress in prefix.
+$min_suffix_count = 20;
+
+# Takes in dictionary.
+
+print STDERR "Reading dict\n";
+while(<>) {
+  @A = split(" ", $_);
+  my $word = shift @A;
+  my $pron = join(" ", @A);
+  if (!defined $prons{$word}) {
+    $prons{$word} = $pron;
+    push @words, $word;
+  } else {
+    $prons{$word} = $prons{$word} . ";" . $pron;
+  }
+}
+
+# Get common suffixes (e.g., count >100).  Include empty suffix.
+
+print STDERR "Getting common suffix counts.\n";
+{
+  foreach $word (@words) {
+    $len = length($word);
+    for ($x = $min_prefix_len; $x <= $len; $x++) {
+      $suffix_count{substr($word, $x)}++;
+    }
+  }
+
+  foreach $suffix (keys %suffix_count) {
+    if ($suffix_count{$suffix} >= $min_suffix_count) {
+      $newsuffix_count{$suffix} = $suffix_count{$suffix};
+    }
+  }
+  %suffix_count = %newsuffix_count;
+  undef %newsuffix_count;
+
+  foreach $suffix ( sort { $suffix_count{$b} <=> $suffix_count{$a} } keys %suffix_count ) {
+    print STDERR "$suffix_count{$suffix} $suffix\n";
+  }
+}
+
+print STDERR "Getting common suffix pairs.\n";
+
+{
+  print STDERR " Getting map from prefix -> suffix-set.\n";
+
+  # Create map from prefix -> suffix-set.
+  foreach $word (@words) {
+    $len = length($word);
+    for ($x = $min_prefix_len; $x <= $len; $x++) {
+      $prefix = substr($word, 0, $x);
+      $suffix = substr($word, $x);
+      if (defined $suffix_count{$suffix}) { # Suffix is common...
+        if (!defined $suffixes_of{$prefix}) {
+          $suffixes_of{$prefix} = [ $suffix ]; # Create a reference to a new array with
+          # one element.
+        } else {
+          push @{$suffixes_of{$prefix}}, $suffix; # Push $suffix onto array that the
+          # hash member is a reference .
+        }
+      }
+    }
+  }
+  my %suffix_set_count;
+  print STDERR " Getting map from suffix-set -> count.\n";
+  while ( my ($key, $value) = each(%suffixes_of) ) { 
+    my @suffixes = sort ( @$value );
+    $suffix_set_count{join(";", @suffixes)}++;
+  }
+  print STDERR " Getting counts for suffix pairs.\n";
+  while ( my ($suffix_set, $count) = each (%suffix_set_count) ) {
+    my @suffixes = split(";", $suffix_set);
+    # Consider pairs to be ordered.  This is more convenient
+    # later on.
+    foreach $suffix_a (@suffixes) {
+      foreach $suffix_b (@suffixes) {
+        if ($suffix_a ne $suffix_b) {
+          $suffix_pair = $suffix_a . "," . $suffix_b;
+          $suffix_pair_count{$suffix_pair} += $count;
+        }
+      }
+    }
+  }
+
+  # To save memory, only keep pairs above threshold in the hash.
+  while ( my ($suffix_pair, $count) = each (%suffix_pair_count) ) {
+    if ($count >= $min_suffix_count) {
+      $new_hash{$suffix_pair} = $count;
+    }
+  }
+  %suffix_pair_count = %new_hash;
+  undef %new_hash;
+
+  # Print out the suffix pairs so the user can see.
+  foreach $suffix_pair ( 
+      sort { $suffix_pair_count{$b} <=> $suffix_pair_count{$a} } keys %suffix_pair_count ) {
+    print STDERR "$suffix_pair_count{$suffix_pair} $suffix_pair\n";
+  }
+}
+
+print STDERR "Getting common suffix/suffix/psuffix/psuffix quadruples\n";
+
+{
+  while ( my ($prefix, $suffixes_ref) = each(%suffixes_of) ) {
+    # Note: suffixes_ref is a reference to an array.  We dereference with
+    # @$suffixes_ref.
+    # Consider each pair of suffixes (in each order).
+    foreach my $suffix_a ( @$suffixes_ref ) {
+      foreach my $suffix_b ( @$suffixes_ref ) {
+        # could just used "defined" in next line, but this is for clarity.
+        $suffix_pair = $suffix_a.",".$suffix_b;
+        if ( $suffix_pair_count{$suffix_pair} >= $min_suffix_count ) {
+          foreach $pron_a_str (split(";", $prons{$prefix.$suffix_a})) {
+            @pron_a = split(" ", $pron_a_str);
+            foreach $pron_b_str (split(";", $prons{$prefix.$suffix_b})) {
+              @pron_b = split(" ", $pron_b_str);
+              $len_a = @pron_a; # evaluating array as scalar automatically gives length.
+              $len_b = @pron_b;
+              for (my $pos = 0; $pos <= $len_a && $pos <= $len_b; $pos++) {
+                # $pos is starting-pos of psuffix-pair. 
+                $psuffix_a = join(" ", @pron_a[$pos...$#pron_a]);
+                $psuffix_b = join(" ", @pron_b[$pos...$#pron_b]);
+                $quadruple = $suffix_pair . "," . $psuffix_a . "," . $psuffix_b;
+                $quadruple_count{$quadruple}++;
+                
+                my $pron_a_pos = $pron_a[$pos], $pron_b_pos = $pron_b[$pos];
+                if ($ignore_prefix_stress) {
+                  $pron_a_pos =~ s/\d//; # e.g convert IH0 to IH.  Only affects
+                  $pron_b_pos =~ s/\d//; # whether we exit the loop below.
+                }
+                if ($pron_a_pos ne $pron_b_pos) {
+                  # This is important: we don't consider a pron suffix-pair to be
+                  # valid unless the pron prefix is the same.
+                  last;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  # To save memory, only keep pairs above threshold in the hash.
+  while ( my ($quadruple, $count) = each (%quadruple_count) ) {
+    if ($count >= $min_suffix_count) {
+      $new_hash{$quadruple} = $count;
+    }
+  }
+  %quadruple_count = %new_hash;
+  undef %new_hash;
+  
+  # Print out the quadruples for diagnostics.
+  foreach $quadruple ( 
+    sort { $quadruple_count{$b} <=> $quadruple_count{$a} } keys %quadruple_count ) {
+    print STDERR "$quadruple_count{$quadruple} $quadruple\n";
+  }
+}
+# Now print out the quadruples; these are the output of this program.
+foreach $quadruple (keys %quadruple_count) {
+  print $quadruple."\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/limit_candidate_prons.pl b/egs/chime_wsj0/s5/local/dict/limit_candidate_prons.pl
new file mode 100755
index 000000000..ceff9fbad
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/limit_candidate_prons.pl
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+
+# This program enforces the rule that
+# if a "more specific" rule applies, we cannot use the more general rule.
+# It takes in tuples generated by get_candidate_prons (one per line, separated
+# by ";"), of the form:
+# word;pron;base-word;base-pron;rule-name;de-stress[;rule-score]
+# [note: we mean that the last element, the numeric score of the rule, is optional]
+# and it outputs a (generally shorter) list
+# of the same form.
+
+
+# For each word:
+  # For each (base-word,base-pron):
+  #  Eliminate "more-general" rules as follows:
+  #    For each pair of rules applying to this (base-word, base-pron):
+  #      If pair is in more-general hash, disallow more general one.
+  # Let the output be: for each (base-word, base-pron, rule):
+  # for (destress-prefix) in [yes, no], do:
+  #  print out the word input, the rule-name, [destressed:yes|no], and the new pron.
+
+
+if (@ARGV != 1 && @ARGV != 2) {
+  die "Usage: limit_candidate_prons.pl rule_hierarchy [candidate_prons] > limited_candidate_prons";
+}
+
+$hierarchy = shift @ARGV;
+open(H, "<$hierarchy") || die "Opening rule hierarchy $hierarchy";
+
+while(<H>) {
+  chop;
+  m:.+;.+: || die "Bad rule-hierarchy line $_";
+  $hierarchy{$_} = 1; # Format is: if $rule1 is the string form of the more specific rule
+  # and $rule21 is that string form of the more general rule, then $hierarchy{$rule1.";".$rule2}
+  # is defined, else undefined.
+}
+
+
+sub process_word;
+
+undef $cur_word;
+@cur_lines = ();
+
+while(<>) {
+  # input, output is:
+  # word;pron;base-word;base-pron;rule-name;destress;score
+  chop;
+  m:^([^;]+);: || die "Unexpected input: $_";
+  $word = $1;
+  if (!defined $cur_word || $word eq $cur_word) {
+    if (!defined $cur_word) { $cur_word = $word; }
+    push @cur_lines, $_;
+  } else {
+    process_word(@cur_lines); # Process a series of suggested prons
+    # for a particular word.
+    $cur_word = $word;
+    @cur_lines = ( $_ ); 
+  }
+}
+process_word(@cur_lines);
+  
+sub process_word {
+  my %pair2rule_list; # hash from $baseword.";".$baseword to ref
+  # to array of [ line1, line2, ... ].
+  my @cur_lines = @_;
+  foreach my $line (@cur_lines) {
+    my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
+    my $key = $baseword.";".$basepron;
+    if (defined $pair2rule_list{$key}) {
+      push @{$pair2rule_list{$key}}, $line; # @{...} derefs the array pointed to 
+      # by the array ref inside {}. 
+    } else {
+      $pair2rule_list{$key} = [ $line ]; # [ $x ] is new anonymous array with 1 elem ($x)
+    }
+  }
+  while ( my ($key, $value) = each(%pair2rule_list) ) {
+    my @lines = @$value; # array of lines that are for this (baseword,basepron).
+    my @stress, @rules; # Arrays of stress markers and rule names, indexed by
+    # same index that indexes @lines.
+    for (my $n = 0; $n < @lines; $n++) {
+      my $line = $lines[$n];
+      my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
+      $stress[$n] = $destress;
+      $rules[$n] = $rulename;
+    }
+    for (my $m = 0; $m < @lines; $m++) {
+      my $ok = 1; # if stays 1, this line is OK.
+      for (my $n = 0; $n < @lines; $n++) {
+        if ($m != $n && $stress[$m] eq $stress[$n]) {
+          if (defined $hierarchy{$rules[$n].";".$rules[$m]}) {
+            # Note: this "hierarchy" variable is defined if $rules[$n] is a more
+            # specific instances of $rules[$m], thus invalidating $rules[$m].
+            $ok = 0;
+            last; # no point iterating further.
+          }
+        }
+      }
+      if ($ok != 0) {
+        print $lines[$m] . "\n";
+      }
+    }
+  }
+}
diff --git a/egs/chime_wsj0/s5/local/dict/reverse_candidates.pl b/egs/chime_wsj0/s5/local/dict/reverse_candidates.pl
new file mode 100755
index 000000000..d5c5effc2
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/reverse_candidates.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+
+# This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
+# which is 7-tuples, one per line, of the form:
+
+# word;pron;base-word;base-pron;rule-name;de-stress;rule-score
+# (where rule-score is somtimes listed as optional, but this
+# program does expect it, since we don't anticipate it being used
+# without it).
+# This program assumes that all the words and prons and rules have
+# come from a reversed dictionary (reverse_dict.pl) where the order
+# of the characters in the words, and the phones in the prons, have
+# been reversed, and it un-reverses them.  That it, the characters
+# in "word" and "base-word", and the phones in "pron" and "base-pron"
+# are reversed; and the rule ("rule-name") is parsed as a 4-tuple,
+# like:
+# suffix,base-suffix,psuffix,base-psuffix
+# so this program reverses the characters in "suffix" and "base-suffix"
+# and the phones (separated by spaces) in "psuffix" and "base-psuffix".
+
+sub reverse_str {
+  $str = shift;
+  return join("", reverse(split("", $str)));
+}
+sub reverse_pron {
+  $str = shift;
+  return join(" ", reverse(split(" ", $str)));
+}
+
+while(<>){ 
+  chop;
+  @A = split(";", $_);
+  @A == 7 || die "Bad input line $_: found $len fields, expected 7.";
+
+  ($word,$pron,$baseword,$basepron,$rule,$destress,$score) = @A;
+  $word = reverse_str($word);
+  $pron = reverse_pron($pron);
+  $baseword = reverse_str($baseword);
+  $basepron = reverse_pron($basepron);
+  @R = split(",", $rule, 4);
+  @R == 4 || die "Bad rule $rule";
+
+  $R[0] = reverse_str($R[0]); # suffix.
+  $R[1] = reverse_str($R[1]); # base-suffix.
+  $R[2] = reverse_pron($R[2]); # pron.
+  $R[3] = reverse_pron($R[3]); # base-pron.
+  $rule = join(",", @R);
+  @A = ($word,$pron,$baseword,$basepron,$rule,$destress,$score);
+  print join(";", @A) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/reverse_dict.pl b/egs/chime_wsj0/s5/local/dict/reverse_dict.pl
new file mode 100755
index 000000000..75681711b
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/reverse_dict.pl
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+
+# Used in conjunction with get_rules.pl
+# example input line: XANTHE  Z AE1 N DH
+# example output line: EHTNAX DH N AE1 Z
+
+while(<>){ 
+  @A = split(" ", $_);
+  $word = shift @A;
+  $word = join("", reverse(split("", $word))); # Reverse letters of word.
+  @A = reverse(@A); # Reverse phones in pron.
+  unshift @A, $word;
+  print join(" ", @A) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/score_prons.pl b/egs/chime_wsj0/s5/local/dict/score_prons.pl
new file mode 100755
index 000000000..fd5a004d8
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/score_prons.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+
+# This program takes candidate prons from "get_candidate_prons.pl" or
+# "limit_candidate_prons.pl", and a reference dictionary covering those words,
+# and outputs the same format but with scoring information added (so we go from
+# 6 to 7 fields).  The scoring information says, for each generated pron,
+# whether we have a match, a partial match, or no match, to some word in the
+# dictionary.  A partial match means it's correct except for stress.
+
+# The input is a 6-tuple on each line, like:
+# word;pron;base-word;base-pron;rule-name;de-stress
+#
+# The output is the same except with one more field, the score,
+# which may be "right", "wrong", "partial".
+
+if (@ARGV != 1 && @ARGV != 2) {
+  die "Usage: score_prons.pl reference_dict [candidate_prons] > scored_candidate_prons";
+}
+
+$dict = shift @ARGV;
+open(D, "<$dict") || die "Opening dictionary $dict";
+
+while(<D>) { # Set up some hashes that tell us when
+  # a (word,pron) pair is correct (and the same for
+  # prons with stress information removed).
+  chop;
+  @A = split(" ", $_);
+  $word = shift @A;
+  $pron = join(" ", @A);
+  $pron_nostress = $pron;
+  $pron_nostress =~ s:\d::g;
+  $word_and_pron{$word.";".$pron} = 1;
+  $word_and_pron_nostress{$word.";".$pron_nostress} = 1;
+}
+
+while(<>) {
+  chop;
+  $line = $_;
+  my ($word, $pron, $baseword, $basepron, $rulename, $destress) = split(";", $line);
+  $pron_nostress = $pron;
+  $pron_nostress =~ s:\d::g;
+  if (defined $word_and_pron{$word.";".$pron}) {
+    $score = "right";
+  } elsif (defined $word_and_pron_nostress{$word.";".$pron_nostress}) {
+    $score = "partial";
+  } else {
+    $score = "wrong";
+  }
+  print $line.";".$score."\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/score_rules.pl b/egs/chime_wsj0/s5/local/dict/score_rules.pl
new file mode 100755
index 000000000..8d165f7f1
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/score_rules.pl
@@ -0,0 +1,52 @@
+#!/usr/bin/perl
+
+# This program takes the output of count_rules.pl, which is tuples
+# of the form
+#
+# rule;destress;right-count;partial-count;wrong-count
+#
+# and outputs lines of the form
+#
+# rule;de-stress;score
+#
+# where the score, between 0 and 1 (1 better), is 
+# equal to:
+#
+# It forms a score between 0 and 1, of the form:
+# ((#correct) +  $partial_score * (#partial)) / (#correct + #partial + #wrong + $ballast)
+#
+# where $partial_score (e.g. 0.8) is the score we assign to a "partial" match,
+# and $ballast is a small number, e.g. 1, that is treated like "extra" wrong scores, to penalize
+# rules with few observations.
+#
+# It outputs all rules that at are at least the
+
+$ballast = 1;
+$partial_score = 0.8;
+$destress_penalty = 1.0e-05; # Give destressed rules a small
+# penalty vs. their no-destress counterparts, so if we
+# have to choose arbitrarily we won't destress (seems safer)>
+
+for ($n = 1; $n <= 4; $n++) {
+  if ($ARGV[0] eq "--ballast") {
+    shift @ARGV;
+    $ballast = shift @ARGV;
+  }
+  if ($ARGV[0] eq "--partial-score") {
+    shift @ARGV;
+    $partial_score = shift @ARGV;
+    ($partial_score >= 0.0 && $partial_score <= 1.0) || die "Invalid partial_score: $partial_score";
+  }
+}
+
+(@ARGV == 0 || @ARGV == 1) || die "Usage: score_rules.pl [--ballast ballast-count] [--partial-score partial-score] [input from count_rules.pl]";
+
+while(<>) {
+  @A = split(";", $_);
+  @A == 5 || die "Bad input line; $_";
+  ($rule,$destress,$right_count,$partial_count,$wrong_count) = @A;
+  $rule_score = ($right_count + $partial_score*$partial_count) / 
+    ($right_count+$partial_count+$wrong_count+$ballast);
+  if ($destress eq "yes") { $rule_score -= $destress_penalty; }
+  print join(";", $rule, $destress, sprintf("%.5f", $rule_score)) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/local/dict/select_candidate_prons.pl b/egs/chime_wsj0/s5/local/dict/select_candidate_prons.pl
new file mode 100755
index 000000000..d0018c98a
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/dict/select_candidate_prons.pl
@@ -0,0 +1,84 @@
+#!/usr/bin/perl
+
+# This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
+# or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
+#
+# word;pron;base-word;base-pron;rule-name;de-stress;rule-score
+#
+# and selects the most likely prons for the words based on rule
+# score.  It outputs in the same format as the input (thus, it is
+# similar to limit_candidates.pl in its input and output format,
+# except it has a different way of selecting the prons to put out).
+#
+# This script will select the $max_prons best pronunciations for
+# each candidate word, subject to the constraint that no pron should
+# have a rule score worse than $min_rule_score.
+# It first merges the candidates by, if there are multiple candidates
+# generating the same pron, selecting the candidate that had the
+# best associated score.  It then sorts the prons on score and
+# selects the n best prons (but doesn't print out candidates with
+# score beneath the threshold).
+
+
+$max_prons = 4;
+$min_rule_score = 0.35;
+
+
+for ($n = 1; $n <= 3; $n++) {
+  if ($ARGV[0] eq "--max-prons") {
+    shift @ARGV;
+    $max_prons = shift @ARGV;
+  }
+  if ($ARGV[0] eq "--min-rule-score") {
+    shift @ARGV;
+    $min_rule_score = shift @ARGV;
+  }
+}
+
+if (@ARGV != 0 && @ARGV != 1) {
+  die "Usage: select_candidates_prons.pl [candidate_prons] > selected_candidate_prons";
+}
+
+sub process_word;
+
+undef $cur_word;
+@cur_lines = ();
+
+while(<>) {
+  # input, output is:
+  # word;pron;base-word;base-pron;rule-name;destress;score
+  chop;
+  m:^([^;]+);: || die "Unexpected input: $_";
+  $word = $1;
+  if (!defined $cur_word || $word eq $cur_word) {
+    if (!defined $cur_word) { $cur_word = $word; }
+    push @cur_lines, $_;
+  } else {
+    process_word(@cur_lines); # Process a series of suggested prons
+    # for a particular word.
+    $cur_word = $word;
+    @cur_lines = ( $_ ); 
+  }
+}
+process_word(@cur_lines);
+
+
+sub process_word {
+  my %pron2rule_score; # hash from generated pron to rule score for that pron.
+  my %pron2line; # hash from generated pron to best line for that pron.
+  my @cur_lines = @_;
+  foreach my $line (@cur_lines) {
+    my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
+    if (!defined $pron2rule_score{$pron} ||
+        $rule_score > $pron2rule_score{$pron}) {
+      $pron2rule_score{$pron} = $rule_score;
+      $pron2line{$pron} = $line;
+    }
+  }
+  my @prons = sort { $pron2rule_score{$b} <=> $pron2rule_score{$a} } keys %pron2rule_score;
+  for (my $n = 0; $n < @prons && $n < $max_prons &&
+       $pron2rule_score{$prons[$n]} >= $min_rule_score; $n++) {
+    print $pron2line{$prons[$n]} . "\n";
+  }
+}
+
diff --git a/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl b/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
new file mode 100755
index 000000000..720c320c0
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
@@ -0,0 +1,65 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+# This program takes on its standard input a list of utterance
+# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
+# It takes as
+# Extracts from the dot files the transcripts for a given
+# dataset (represented by a file list).
+# 
+
+@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
+$dot_flist = shift @ARGV;
+
+open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
+while(<L>){
+    chop;
+    m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
+    $spk = $1;
+    $spk2dot{$spk} = $_;
+}
+
+
+
+while(<STDIN>){ 
+    chop;
+    $uttid_orig = $_;
+    $uttid = substr $uttid_orig, 0, 8; 
+    $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
+    $spk = $1;
+    if($spk ne $curspk) {
+        %utt2trans = { }; # Don't keep all the transcripts in memory...
+        $curspk = $spk;
+        $dotfile = $spk2dot{$spk};
+        defined $dotfile || die "No dot file for speaker $spk\n";
+        open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
+        while(<F>) {
+            $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
+            $trans = $1;
+            $utt = $2;
+            $utt2trans{$utt} = $trans;
+        }
+    }
+    if(!defined $utt2trans{$uttid}) {
+        print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
+    } else {
+        print "$uttid_orig $utt2trans{$uttid}\n";
+    }
+}
+
+
diff --git a/egs/chime_wsj0/s5/local/find_transcripts.pl b/egs/chime_wsj0/s5/local/find_transcripts.pl
new file mode 100755
index 000000000..0e5d71f79
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/find_transcripts.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+# This program takes on its standard input a list of utterance
+# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
+# It takes as
+# Extracts from the dot files the transcripts for a given
+# dataset (represented by a file list).
+# 
+
+@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
+$dot_flist = shift @ARGV;
+
+open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
+while(<L>){
+    chop;
+    m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
+    $spk = $1;
+    $spk2dot{$spk} = $_;
+}
+
+
+
+while(<STDIN>){ 
+    chop;
+    $uttid = $_;
+    $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
+    $spk = $1;
+    if($spk ne $curspk) {
+        %utt2trans = { }; # Don't keep all the transcripts in memory...
+        $curspk = $spk;
+        $dotfile = $spk2dot{$spk};
+        defined $dotfile || die "No dot file for speaker $spk\n";
+        open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
+        while(<F>) {
+            $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
+            $trans = $1;
+            $utt = $2;
+            $utt2trans{$utt} = $trans;
+        }
+    }
+    if(!defined $utt2trans{$uttid}) {
+        print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
+    } else {
+        print "$uttid $utt2trans{$uttid}\n";
+    }
+}
+
+
diff --git a/egs/chime_wsj0/s5/local/flist2scp.pl b/egs/chime_wsj0/s5/local/flist2scp.pl
new file mode 100755
index 000000000..6831d2d7b
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/flist2scp.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# takes in a file list with lines like
+# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
+# and outputs an scp in kaldi format with lines like
+# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
+# (the first thing is the utterance-id, which is the same as the basename of the file.
+
+
+while(<>){
+    m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
+    $id = $1;
+    $id =~ tr/A-Z/a-z/;  # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
+    print "$id $_";
+}
+
diff --git a/egs/chime_wsj0/s5/local/generate_example_kws.sh b/egs/chime_wsj0/s5/local/generate_example_kws.sh
new file mode 100755
index 000000000..2c8494381
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/generate_example_kws.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+
+
+if [ $# -ne 2 ]; then
+   echo "Usage: local/generate_example_kws.sh <data-dir> <kws-data-dir>"
+   echo " e.g.: local/generate_example_kws.sh data/test_eval92/ <data/kws>"
+   exit 1;
+fi
+
+datadir=$1;
+kwsdatadir=$2;
+text=$datadir/text;
+
+mkdir -p $kwsdatadir;
+
+# Generate keywords; we generate 20 unigram keywords with at least 20 counts,
+# 20 bigram keywords with at least 10 counts and 10 trigram keywords with at
+# least 5 counts.
+cat $text | perl -e '
+  %unigram = ();
+  %bigram = ();
+  %trigram = ();
+  while(<>) {
+    chomp;
+    @col=split(" ", $_);
+    shift @col;
+    for($i = 0; $i < @col; $i++) {
+      # unigram case
+      if (!defined($unigram{$col[$i]})) {
+        $unigram{$col[$i]} = 0;
+      }
+      $unigram{$col[$i]}++;
+
+      # bigram case
+      if ($i < @col-1) {
+        $word = $col[$i] . " " . $col[$i+1];
+        if (!defined($bigram{$word})) {
+          $bigram{$word} = 0;
+        }
+        $bigram{$word}++;
+      }
+
+      # trigram case
+      if ($i < @col-2) {
+        $word = $col[$i] . " " . $col[$i+1] . " " . $col[$i+2];
+        if (!defined($trigram{$word})) {
+          $trigram{$word} = 0;
+        }
+        $trigram{$word}++;
+      }
+    }
+  }
+
+  $max_count = 100;
+  $total = 20;
+  $current = 0;
+  $min_count = 20;
+  while ($current < $total && $min_count <= $max_count) {
+    foreach $x (keys %unigram) {
+      if ($unigram{$x} == $min_count) {
+        print "$x\n";
+        $unigram{$x} = 0;
+        $current++;
+      }
+      if ($current == $total) {
+        last;
+      }
+    }
+    $min_count++;
+  }
+  
+  $total = 20;
+  $current = 0;
+  $min_count = 4;
+  while ($current < $total && $min_count <= $max_count) {
+    foreach $x (keys %bigram) {
+      if ($bigram{$x} == $min_count) {
+        print "$x\n";
+        $bigram{$x} = 0;
+        $current++;
+      }
+      if ($current == $total) {
+        last;
+      }
+    }
+    $min_count++;
+  }
+  
+  $total = 10;
+  $current = 0;
+  $min_count = 3;
+  while ($current < $total && $min_count <= $max_count) {
+    foreach $x (keys %trigram) {
+      if ($trigram{$x} == $min_count) {
+        print "$x\n";
+        $trigram{$x} = 0;
+        $current++;
+      }
+      if ($current == $total) {
+        last;
+      }
+    }
+    $min_count++;
+  }
+  ' > $kwsdatadir/raw_keywords.txt
+
+echo "Keywords generation succeeded"
diff --git a/egs/chime_wsj0/s5/local/kws_data_prep.sh b/egs/chime_wsj0/s5/local/kws_data_prep.sh
new file mode 100755
index 000000000..5222a88c9
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/kws_data_prep.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+
+
+if [ $# -ne 3 ]; then
+   echo "Usage: local/kws_data_prep.sh <lang-dir> <data-dir> <kws-data-dir>"
+   echo " e.g.: local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/"
+   exit 1;
+fi
+
+langdir=$1;
+datadir=$2;
+kwsdatadir=$3;
+
+mkdir -p $kwsdatadir;
+
+# Create keyword id for each keyword
+cat $kwsdatadir/raw_keywords.txt | perl -e '
+  $idx=1;
+  while(<>) {
+    chomp;
+    printf "WSJ-%04d $_\n", $idx;
+    $idx++;
+  }' > $kwsdatadir/keywords.txt
+
+# Map the keywords to integers; note that we remove the keywords that
+# are not in our $langdir/words.txt, as we won't find them anyway...
+cat $kwsdatadir/keywords.txt | \
+  sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \
+  grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int
+
+# Compile keywords into FSTs
+transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:$kwsdatadir/keywords.fsts
+
+# Create utterance id for each utterance; Note that by "utterance" here I mean
+# the keys that will appear in the lattice archive. You may have to modify here
+cat $datadir/wav.scp | \
+  awk '{print $1}' | \
+  sort | uniq | perl -e '
+  $idx=1;
+  while(<>) {
+    chomp;
+    print "$_ $idx\n";
+    $idx++;
+  }' > $kwsdatadir/utter_id
+
+# Map utterance to the names that will appear in the rttm file. You have 
+# to modify the commands below accoring to your rttm file. In the WSJ case
+# since each file is an utterance, we assume that the actual file names will 
+# be the "names" in the rttm, so the utterance names map to themselves.
+cat $datadir/wav.scp | \
+  awk '{print $1}' | \
+  sort | uniq | perl -e '
+  while(<>) {
+    chomp;
+    print "$_ $_\n";
+  }' > $kwsdatadir/utter_map;
+echo "Kws data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/ndx2flist.pl b/egs/chime_wsj0/s5/local/ndx2flist.pl
new file mode 100755
index 000000000..b05704293
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/ndx2flist.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This program takes as its standard input an .ndx file from the WSJ corpus that looks
+# like this:
+#;; File: tr_s_wv1.ndx, updated 04/26/94
+#;;
+#;; Index for WSJ0 SI-short Sennheiser training data
+#;; Data is read WSJ sentences, Sennheiser mic.
+#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts 
+#;; per speaker TI) = 7236 utts
+#;;
+#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
+#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
+#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
+
+#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
+#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1  ... etc.
+# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
+# /mnt/matylda2/data/WSJ0/11-1.1.
+# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
+# uppercase rather than lower case filenames.
+
+foreach $fn (@ARGV) {
+    $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
+    $disk_id=$1; 
+    $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
+    $fn =~ s:/$::; # Remove final slash, just in case it is present.
+    $disk2fn{$disk_id} = $fn;
+}
+
+while(<STDIN>){
+    if(m/^;/){ next; } # Comment.  Ignore it.
+    else {
+      m/^([0-9_]+):\s*(\S+)$/  || die "Could not parse line $_";
+      $disk=$1;
+      if(!defined $disk2fn{$disk}) {
+          die "Disk id $disk not found";
+      }
+      $filename = $2; # as a subdirectory of the distributed disk.
+      if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
+          # The disk 13-16.1 has been uppercased for some reason, on the
+          # BUT system.  This is a fix specifically for that case.
+          $filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames.  Why?
+      }
+      print "$disk2fn{$disk}/$filename\n";
+  }
+}
diff --git a/egs/chime_wsj0/s5/local/nnet2/run_5b.sh b/egs/chime_wsj0/s5/local/nnet2/run_5b.sh
new file mode 100755
index 000000000..1e9adfc25
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/nnet2/run_5b.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+
+stage=0
+train_stage=-100
+# This trains only unadapted (just cepstral mean normalized) features,
+# and uses various combinations of VTLN warping factor and time-warping
+# factor to artificially expand the amount of data.
+
+. cmd.sh
+
+. utils/parse_options.sh  # to parse the --stage option, if given
+
+[ $# != 0 ] && echo "Usage: local/run_4b.sh [--stage <stage> --train-stage <train-stage>]" && exit 1;
+
+set -e
+
+if [ $stage -le 0 ]; then 
+  # Create the training data.
+  featdir=`pwd`/mfcc/nnet5b; mkdir -p $featdir
+  fbank_conf=conf/fbank_40.conf
+  echo "--num-mel-bins=40" > $fbank_conf
+  steps/nnet2/get_perturbed_feats.sh --cmd "$train_cmd" \
+    $fbank_conf $featdir exp/perturbed_fbanks_si284 data/train_si284 data/train_si284_perturbed_fbank &
+  steps/nnet2/get_perturbed_feats.sh --cmd "$train_cmd" --feature-type mfcc \
+    conf/mfcc.conf $featdir exp/perturbed_mfcc_si284 data/train_si284 data/train_si284_perturbed_mfcc &
+  wait
+fi
+
+if [ $stage -le 1 ]; then
+  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+    data/train_si284_perturbed_mfcc data/lang exp/tri4b exp/tri4b_ali_si284_perturbed_mfcc
+fi 
+
+if [ $stage -le 2 ]; then
+  steps/nnet2/train_block.sh --stage "$train_stage" \
+     --cleanup false \
+     --initial-learning-rate 0.01 --final-learning-rate 0.001 \
+     --num-epochs 10 --num-epochs-extra 5 \
+     --cmd "$decode_cmd" \
+     --hidden-layer-dim 1536 \
+     --num-block-layers 3 --num-normal-layers 3 \
+      data/train_si284_perturbed_fbank data/lang exp/tri4b_ali_si284_perturbed_mfcc exp/nnet5b  || exit 1
+fi
+
+if [ $stage -le 3 ]; then # create testing fbank data.
+  featdir=`pwd`/mfcc
+  fbank_conf=conf/fbank_40.conf
+  for x in test_eval92 test_eval93 test_dev93; do 
+    cp -rT data/$x data/${x}_fbank
+    rm -r ${x}_fbank/split* || true
+    steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
+      --cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir  || exit 1;
+    steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir  || exit 1;
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 10 \
+     exp/tri4b/graph_bd_tgpr data/test_dev93_fbank exp/nnet5b/decode_bd_tgpr_dev93
+
+  steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 8 \
+     exp/tri4b/graph_bd_tgpr data/test_eval92_fbank exp/nnet5b/decode_bd_tgpr_eval92
+fi
+
+
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/local/nnet2/run_5c.sh b/egs/chime_wsj0/s5/local/nnet2/run_5c.sh
new file mode 100755
index 000000000..288b56996
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/nnet2/run_5c.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# This is neural net training on top of adapted 40-dimensional features.
+# 
+
+. ./cmd.sh
+
+( 
+ steps/nnet2/train_tanh.sh \
+   --mix-up 8000 \
+   --initial-learning-rate 0.01 --final-learning-rate 0.001 \
+   --num-hidden-layers 4 --hidden-layer-dim 1024 \
+   --cmd "$decode_cmd" \
+    data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c || exit 1
+  
+  steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
+    --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
+     exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c/decode_bd_tgpr_dev93
+
+  steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
+    --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
+     exp/tri4b/graph_bd_tgpr data/test_eval92 exp/nnet5c/decode_bd_tgpr_eval92
+)
+
diff --git a/egs/chime_wsj0/s5/local/noisy_wsj0_data_prep.sh b/egs/chime_wsj0/s5/local/noisy_wsj0_data_prep.sh
new file mode 100755
index 000000000..8744f25d6
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/noisy_wsj0_data_prep.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+set -e
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 29/05/12
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
+  echo "The argument should be a the top-level WSJ corpus directory."
+  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
+  echo "within the top-level corpus directory."
+  exit 1;
+fi
+
+CORPUS=$1
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+  echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+  exit 1;
+fi
+
+cd $dir
+
+# reverb list for SI-84
+
+find $1/si_tr_s -name '*.wav' |  sort -u > train_si84_noisy.flist
+
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find $1/si_dt_20  -name '*.wav' | sort -u > dev_dt_20_noisy.flist
+find $1/si_dt_05  -name '*.wav' | sort -u > dev_dt_05_noisy.flist
+
+find $1/si_et_20  -name '*.wav' | sort -u > test_eval92_noisy.flist
+find $1/si_et_05  -name '*.wav' | sort -u > test_eval92_5k_noisy.flist
+
+
+# Finding the transcript files:
+#find -L $CORPUS -iname '*.dot' > dot_files.flist
+if [ ! -e $dir/dot_files.flist ]; then
+  echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
+  exit 1;
+fi
+
+# Convert the transcripts into our format (no normalization yet)
+# adding suffix to utt_id 
+# 1 for reverb condition
+for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
+  cat $x.flist | perl -e ' 
+    while(<>) {
+      m:^\S+/(\w+)\.wav$: || die "Bad line $_";
+      $id = $1;
+      $id =~ tr/A-Z/a-z/;
+      print "$id $_"; 
+    }
+  ' | sort > ${x}_wav_tmp.scp
+  #cat ${x}_wav_tmp.scp | awk '{print $1}' \
+  #  | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
+  cat ${x}_wav_tmp.scp | perl -e '
+    while(<STDIN>) {
+      @A=split(" ", $_);
+      @B=split("/", $_);
+      $abs_path_len=@B;
+      $condition=$B[$abs_path_len-5];
+      if ($condition eq "9dB") {$key_suffix=2;}
+      elsif ($condition eq "6dB") {$key_suffix=3;}
+      elsif ($condition eq "3dB") {$key_suffix=4;}
+      elsif ($condition eq "0dB") {$key_suffix=5;}
+      elsif ($condition eq "m3dB") {$key_suffix=6;}
+      elsif ($condition eq "m6dB") {$key_suffix=7;}
+      else {print STDERR "error condition $condition";} 
+      print $A[0].$key_suffix." ".$A[1]."\n"; 
+    }
+  ' | sort -k1 > ${x}_wav.scp
+  cat ${x}_wav.scp | awk '{print $1}' \
+    | $local/find_noisy_transcripts.pl dot_files.flist > ${x}.trans1 
+done
+
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
+  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
+    | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+#  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
+#    > ${x}_wav.scp
+#done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
+  cat ${x}_wav.scp | awk '{print $1}' \
+    | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/normalize_transcript.pl b/egs/chime_wsj0/s5/local/normalize_transcript.pl
new file mode 100755
index 000000000..9dd67af3d
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/normalize_transcript.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This takes data from the standard input that's unnormalized transcripts in the format
+# 4k2c0308 Of course there isn\'t any guarantee the company will keep its hot hand [misc_noise] 
+# 4k2c030a [loud_breath] And new hardware such as the set of personal computers I\. B\. M\. introduced last week can lead to unexpected changes in the software business [door_slam] 
+# and outputs normalized transcripts.
+# c.f. /mnt/matylda2/data/WSJ0/11-10.1/wsj0/transcrp/doc/dot_spec.doc
+
+@ARGV == 1 ||  die "usage: normalize_transcript.pl noise_word < transcript > transcript2";
+$noise_word = shift @ARGV;
+
+while(<STDIN>) {
+    $_ =~ m:^(\S+) (.+): || die "bad line $_";
+    $utt = $1;
+    $trans = $2;
+    print "$utt";
+    foreach $w (split (" ",$trans)) {
+        $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
+        $w =~ s:\\::g;      # Remove backslashes.  We don't need the quoting.
+        $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
+        $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
+        if($w =~ m:^\[\<\w+\]$:  || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
+           $w =~ m:^\[\w+\>\]$:  ||  # E.g. [door_slam>], this means a door slammed in the next word.  Delete.
+           $w =~ m:\[\w+/\]$: ||  # E.g. [phone_ring/], which indicates the start of this phenomenon.
+           $w =~ m:\[\/\w+]$: ||  # E.g. [/phone_ring], which indicates the end of this phenomenon.
+           $w eq "~" ||        # This is used to indicate truncation of an utterance.  Not a word.
+           $w eq ".") {      # "." is used to indicate a pause.  Silence is optional anyway so not much 
+                             # point including this in the transcript.
+            next; # we won't print this word.
+        } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath].
+            print " $noise_word";
+        } elsif($w =~ m:^\<([\w\']+)\>$:) {
+            # e.g. replace <and> with and.  (the <> means verbal deletion of a word).. but it's pronounced.
+            print " $1";
+        } elsif($w eq "--DASH") {
+            print " -DASH";  # This is a common issue; the CMU dictionary has it as -DASH.
+#        } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word
+#            print " $1 -DASH";
+        } else {
+            print " $w";
+        }
+    }
+    print "\n";
+}
diff --git a/egs/chime_wsj0/s5/local/reverb_wsj0_data_prep.sh b/egs/chime_wsj0/s5/local/reverb_wsj0_data_prep.sh
new file mode 100755
index 000000000..c6903f21c
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/reverb_wsj0_data_prep.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+set -e
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This is modified from the script in standard Kaldi recipe to account
+# for the way the WSJ data is structured on the Edinburgh systems. 
+# - Arnab Ghoshal, 29/05/12
+
+if [ $# -ne 1 ]; then
+  printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
+  echo "The argument should be a the top-level WSJ corpus directory."
+  echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
+  echo "within the top-level corpus directory."
+  exit 1;
+fi
+
+CORPUS=$1
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+  echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+  exit 1;
+fi
+
+cd $dir
+
+# reverb list for SI-84
+
+find $1/si_tr_s -name '*.wav' |  sort -u > train_si84_reverb.flist
+
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find $1/si_dt_20  -name '*.wav' | sort -u > dev_dt_20_reverb.flist
+find $1/si_dt_05  -name '*.wav' | sort -u > dev_dt_05_reverb.flist
+
+
+# Finding the transcript files:
+#find -L $CORPUS -iname '*.dot' > dot_files.flist
+if [ ! -e $dir/dot_files.flist ]; then
+  echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
+  exit 1;
+fi
+
+# Convert the transcripts into our format (no normalization yet)
+# adding suffix to utt_id 
+# 1 for reverb condition
+for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
+  cat $x.flist | perl -e ' 
+    while(<>) {
+      m:^\S+/(\w+)\.wav$: || die "Bad line $_";
+      $id = $1;
+      $id =~ tr/A-Z/a-z/;
+      print "$id $_"; 
+    }
+  ' | sort > ${x}_wav_tmp.scp
+  cat ${x}_wav_tmp.scp | awk '{print $1}' \
+    | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
+  cat ${x}_wav_tmp.scp | awk '{printf("%s1 %s\n", $1, $2);}' > ${x}_wav.scp
+  cat ${x}_tmp.trans1 | awk '{printf("%s1 ", $1); for(i=2;i<=NF;i++) printf("%s ", $i); printf("\n");}' > ${x}.trans1
+done
+
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
+  cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
+    | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
+#  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
+#    > ${x}_wav.scp
+#done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
+  cat ${x}_wav.scp | awk '{print $1}' \
+    | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+  cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/run_basis_fmllr.sh b/egs/chime_wsj0/s5/local/run_basis_fmllr.sh
new file mode 100755
index 000000000..3c04e480a
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_basis_fmllr.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. cmd.sh
+
+mfccdir=mfcc
+
+# Make "per-utterance" versions of the test sets where the speaker
+# information corresponds to utterances-- to demonstrate adaptation on
+# short utterances, particularly for basis fMLLR
+for x in test_eval92 test_eval93 test_dev93 ; do
+  y=${x}_utt
+  rm -r data/$y
+  cp -r data/$x data/$y
+  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
+  cp data/$y/utt2spk data/$y/spk2utt;
+  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
+done
+
+
+ # basis fMLLR experiments.
+ # First a baseline: decode per-utterance with normal fMLLR.
+steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_utt || exit 1;
+steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_utt || exit 1;
+
+ # get the fMLLR basis.
+steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train_si84 data/lang exp/tri3b
+
+ # decoding tri3b with basis fMLLR
+steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_basis || exit 1;
+steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92_basis || exit 1;
+
+  # The same, per-utterance.
+steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_basis_utt || exit 1;
+steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_basis_utt || exit 1;
+
+
diff --git a/egs/chime_wsj0/s5/local/run_dnn.sh b/egs/chime_wsj0/s5/local/run_dnn.sh
new file mode 100755
index 000000000..680a6ca31
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_dnn.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+# Copyright 2012-2013  Brno University of Technology (Author: Karel Vesely)
+# Apache 2.0
+
+# In this recipe we build DNN in four stages:
+# 1) Data preparations : the fMLLR features are stored to disk
+# 2) RBM pre-training : in this unsupervised stage we train stack of RBMs, a good starting point for Cross-entropy trainig
+# 3) Frame-level cross-entropy training : in this stage the objective is to classify frames correctly.
+# 4) Sequence-criterion training : in this stage the objective is to classify the whole sequence correctly,
+#     the idea is similar to the 'Discriminative training' in context of GMM-HMMs.
+
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+
+. ./path.sh ## Source the tools/utils (import the queue.pl)
+
+
+
+#false && \
+{
+gmmdir=exp/tri4b
+
+###
+### Generate the alignments of dev93 
+### (held-out set for Cross-entropy training)
+###
+steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
+  data/test_dev93 data/lang $gmmdir exp/tri4b_ali_dev93 || exit 1
+
+###
+### Store the fMLLR features, so we can train on them easily
+###
+
+# train si284
+# generate the features
+dir=data-fmllr-tri4b/train_si284
+steps/make_fmllr_feats.sh --nj 20 --cmd "$train_cmd" \
+   --transform-dir exp/tri4b_ali_si284 \
+   $dir data/train_si284 $gmmdir $dir/_log $dir/_data || exit 1
+
+# eval92
+dir=data-fmllr-tri4b/test_eval92
+steps/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
+   --transform-dir exp/tri4b/decode_tgpr_eval92 \
+   $dir data/test_eval92 $gmmdir $dir/_log $dir/_data || exit 1
+
+# dev93 (unsupervised fMLLR)
+# held-out set of Cross-entropy training
+dir=data-fmllr-tri4b/test_dev93
+steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
+   --transform-dir exp/tri4b/decode_tgpr_dev93 \
+   $dir data/test_dev93 $gmmdir $dir/_log $dir/_data || exit 1
+}
+
+
+
+###
+### Now we can pre-train stack of RBMs
+###
+#false && \
+{ # Pre-train the DBN
+dir=exp/tri4b_pretrain-dbn
+(tail --pid=$$ -F $dir/_pretrain_dbn.log 2>/dev/null)&
+$cuda_cmd $dir/_pretrain_dbn.log \
+  steps/pretrain_dbn.sh --rbm-iter 3 data-fmllr-tri4b/train_si284 $dir
+}
+
+
+
+###
+### Now we train the DNN optimizing cross-entropy.
+### This will take quite some time.
+###
+
+#false && \
+{ # Train the MLP
+dir=exp/tri4b_pretrain-dbn_dnn
+ali=exp/tri4b_ali
+feature_transform=exp/tri4b_pretrain-dbn/final.feature_transform
+dbn=exp/tri4b_pretrain-dbn/6.dbn
+(tail --pid=$$ -F $dir/_train_nnet.log 2>/dev/null)& 
+$cuda_cmd $dir/_train_nnet.log \
+  steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
+  data-fmllr-tri4b/train_si284 data-fmllr-tri4b/test_dev93 data/lang ${ali}_si284 ${ali}_dev93 $dir || exit 1;
+# decode with 'big-dictionary' (reuse HCLG graph)
+steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_bd_tgpr_dev93 || exit 1;
+steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_bd_tgpr_eval92 || exit 1;
+}
+
+
+
+###
+### Finally we train using sMBR criterion.
+### We do Stochastic-GD with per-utterance updates. 
+###
+### To get faster convergence, we will re-generate 
+### the lattices after 1st epoch of sMBR.
+###
+
+dir=exp/tri4b_pretrain-dbn_dnn_smbr
+srcdir=exp/tri4b_pretrain-dbn_dnn
+acwt=0.10
+
+# First we need to generate lattices and alignments:
+#false && \
+{
+steps/align_nnet.sh --nj 100 --cmd "$train_cmd" \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_ali_si284 || exit 1;
+steps/make_denlats_nnet.sh --nj 100 --cmd "$decode_cmd" \
+  --config conf/decode_dnn.config --acwt $acwt \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_denlats_si284  || exit 1;
+}
+# Now we re-train the hybrid by single iteration of sMBR 
+#false && \
+{
+steps/train_nnet_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir \
+  ${srcdir}_ali_si284 ${srcdir}_denlats_si284 $dir || exit 1
+}
+# Decode
+#false && \
+{
+for ITER in 1; do
+  # decode dev93 with big dict graph_bd_tgpr
+  steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config \
+    --nnet $dir/${ITER}.nnet --acwt $acwt \
+    exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_dev93_bd_tgpr_it${ITER} || exit 1
+  # decode eval92 with big dict graph_bd_tgpr
+  steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
+    --nnet $dir/${ITER}.nnet --acwt $acwt \
+    exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_eval92_bd_tgpr_it${ITER} || exit 1
+done 
+}
+
+
+###
+### Re-generate lattices and run several more iterations of sMBR
+###
+
+dir=exp/tri4b_pretrain-dbn_dnn_smbr_iter1-lats
+srcdir=exp/tri4b_pretrain-dbn_dnn_smbr
+acwt=0.10
+
+# First we need to generate lattices and alignments:
+#false && \
+{
+steps/align_nnet.sh --nj 100 --cmd "$train_cmd" \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_ali_si284 || exit 1;
+steps/make_denlats_nnet.sh --nj 100 --cmd "$decode_cmd" \
+  --config conf/decode_dnn.config --acwt $acwt \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_denlats_si284  || exit 1;
+}
+# Now we re-train the hybrid by several iterations of sMBR 
+#false && \
+{
+steps/train_nnet_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
+  data-fmllr-tri4b/train_si284 data/lang $srcdir \
+  ${srcdir}_ali_si284 ${srcdir}_denlats_si284 $dir 
+}
+# Decode
+#false && \
+{
+for ITER in 1 2 3 4; do
+  # decode dev93 with big dict graph_bd_tgpr
+  steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config \
+    --nnet $dir/${ITER}.nnet --acwt $acwt \
+    exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_dev93_bd_tgpr_it${ITER} || exit 1
+  # decode eval92 with big dict graph_bd_tgpr
+  steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
+    --nnet $dir/${ITER}.nnet --acwt $acwt \
+    exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_eval92_bd_tgpr_it${ITER} || exit 1
+done 
+}
+
+
+# Getting results [see RESULTS file]
+# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
diff --git a/egs/chime_wsj0/s5/local/run_fwdbwd.sh b/egs/chime_wsj0/s5/local/run_fwdbwd.sh
new file mode 100755
index 000000000..c84f2f1e0
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_fwdbwd.sh
@@ -0,0 +1,41 @@
+#prepare reverse lexicon and language model for backwards decoding
+utils/prepare_lang.sh --reverse true data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp.reverse data/lang.reverse || exit 1;
+utils/reverse_lm.sh data/local/nist_lm/lm_bg_5k.arpa.gz data/lang.reverse data/lang_test_bg_5k.reverse || exit 1;
+utils/reverse_lm_test.sh data/lang_test_bg_5k data/lang_test_bg_5k.reverse || exit 1;
+
+# normal forward decoding
+utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
+steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 8 --cmd "$decode_cmd" \
+  exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_10 || exit 1;
+
+# backward decoding
+utils/mkgraph.sh --reverse data/lang_test_bg_5k.reverse exp/tri2a exp/tri2a/graph_bg5k_r
+steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 8 --cmd "$decode_cmd" \
+  exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_reverse10 || exit 1;
+
+# pingpong decoding
+steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 8 --cmd "$decode_cmd" \
+  --first_pass exp/tri2a/decode_eval92_bg5k_10 exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_pingpong10 || exit 1;
+steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 8 --cmd "$decode_cmd" \
+  --first_pass exp/tri2a/decode_eval92_bg5k_reverse10 exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_pongping10 || exit 1;
+
+# same for bigger language models (on machine with 8GB RAM, you can run the whole decoding in 3-4 min without SGE)
+utils/prepare_lang.sh --reverse true data/local/dict_larger "<SPOKEN_NOISE>" data/local/lang_larger.reverse data/lang_bd.reverse || exit;
+utils/reverse_lm.sh --lexicon data/local/dict_larger/lexicon.txt data/local/local_lm/3gram-mincount/lm_pr6.0.gz data/lang_bd.reverse data/lang_test_bd_tgpr.reverse || exit 1;
+utils/reverse_lm_test.sh data/lang_test_bd_tgpr data/lang_test_bd_tgpr.reverse || exit 1;
+
+utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri2a exp/tri2a/graph_bd_tgpr
+steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 4 --cmd run.pl \
+  exp/tri2a/graph_bd_tgpr data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_10 || exit 1;
+
+utils/mkgraph.sh --reverse data/lang_test_bd_tgpr.reverse exp/tri2a exp/tri2a/graph_bd_tgpr_r
+steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 4 --cmd run.pl \
+  exp/tri2a/graph_bd_tgpr_r data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_reverse10 || exit 1;
+
+steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 4 --cmd run.pl \
+  --first_pass exp/tri2a/decode_eval92_bdtgpr4_10 exp/tri2a/graph_bd_tgpr_r data/test_eval92 \
+  exp/tri2a/decode_eval92_bdtgpr4_pingpong10 || exit 1;
+
+steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 4 --cmd run.pl \
+  --first_pass exp/tri2a/decode_eval92_bdtgpr4_reverse10 exp/tri2a/graph_bd_tgpr data/test_eval92 \
+  exp/tri2a/decode_eval92_bdtgpr4_pongping10 || exit 1;
diff --git a/egs/chime_wsj0/s5/local/run_mmi_tri2b.sh b/egs/chime_wsj0/s5/local/run_mmi_tri2b.sh
new file mode 100755
index 000000000..6517e46a1
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_mmi_tri2b.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. ./cmd.sh
+
+# Train and test MMI (and boosted MMI) on tri2b system.
+steps/make_denlats.sh --sub-split 20 --nj 10 --cmd "$train_cmd" \
+  data/train_si84 data/lang exp/tri2b exp/tri2b_denlats_si84 || exit 1;
+
+# train the basic MMI system.
+steps/train_mmi.sh --cmd "$train_cmd" \
+  data/train_si84 data/lang exp/tri2b_ali_si84 \
+  exp/tri2b_denlats_si84 exp/tri2b_mmi  || exit 1;
+for iter in 3 4; do
+  steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
+    exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_mmi/decode_tgpr_dev93_it$iter &
+  steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \
+     exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_mmi/decode_tgpr_eval92_it$iter &
+done
+
+# MMI with 0.1 boosting factor.
+steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
+  data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 \
+  exp/tri2b_mmi_b0.1  || exit 1;
+
+for iter in 3 4; do
+  steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
+    exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_mmi_b0.1/decode_tgpr_dev93_it$iter &
+  steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \
+     exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_mmi_b0.1/decode_tgpr_eval92_it$iter &
+done
+
+
+# Train a UBM with 400 components, for fMMI.
+steps/train_diag_ubm.sh --silence-weight 0.5 --nj 10 --cmd "$train_cmd" \
+  400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b
+
+ steps/train_mmi_fmmi.sh --boost 0.1 --cmd "$train_cmd" \
+   data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
+   exp/tri2b_fmmi_b0.1
+
+ for iter in `seq 3 8`; do 
+   steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
+     exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it$iter &
+ done
+
+ steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
+   data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
+   exp/tri2b_fmmi_b0.1_lr0.005 || exit 1;
+ for iter in `seq 3 8`; do 
+   steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
+     exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it$iter &
+ done
+
+ steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
+   data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
+   exp/tri2b_fmmi_indirect_b0.1
+ for iter in `seq 3 8`; do 
+   steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
+      exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it$iter &
+ done
diff --git a/egs/chime_wsj0/s5/local/run_mmi_tri4b.sh b/egs/chime_wsj0/s5/local/run_mmi_tri4b.sh
new file mode 100755
index 000000000..db34f8e1d
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_mmi_tri4b.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+. ./cmd.sh
+
+steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
+  --transform-dir exp/tri4b_ali_si284 \
+  data/train_si284 data/lang exp/tri4b exp/tri4b_denlats_si284 || exit 1;
+
+steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
+  data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284 \
+  exp/tri4b_mmi_b0.1  || exit 1;
+
+steps/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_tgpr_dev93 \
+  exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi_b0.1/decode_tgpr_dev93
+
+#first, train UBM for fMMI experiments.
+steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
+  600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b
+
+# Next, fMMI+MMI.
+steps/train_mmi_fmmi.sh \
+  --boost 0.1 --cmd "$train_cmd" data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b exp/tri4b_denlats_si284 \
+  exp/tri4b_fmmi_a || exit 1;
+
+for iter in 3 4 5 6 7 8; do
+ steps/decode_fmmi.sh --nj 10  --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode_tgpr_dev93  exp/tri4b/graph_tgpr data/test_dev93 \
+  exp/tri4b_fmmi_a/decode_tgpr_dev93_it$iter &
+done
+# decode the last iter with the bd model.
+for iter in 8; do
+ steps/decode_fmmi.sh --nj 10  --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode_bd_tgpr_dev93  exp/tri4b/graph_bd_tgpr data/test_dev93 \
+  exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it$iter &
+ steps/decode_fmmi.sh --nj 8  --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode_bd_tgpr_eval92  exp/tri4b/graph_bd_tgpr data/test_eval92 \
+  exp/tri4b_fmmi_a/decode_tgpr_eval92_it$iter &
+done
+
+
+# fMMI + mmi with indirect differential.
+steps/train_mmi_fmmi_indirect.sh \
+  --boost 0.1 --cmd "$train_cmd" data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b exp/tri4b_denlats_si284 \
+  exp/tri4b_fmmi_indirect || exit 1;
+
+for iter in 3 4 5 6 7 8; do
+ steps/decode_fmmi.sh --nj 10  --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode_tgpr_dev93  exp/tri4b/graph_tgpr data/test_dev93 \
+  exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it$iter &
+done
+
diff --git a/egs/chime_wsj0/s5/local/run_nnet_cpu.sh b/egs/chime_wsj0/s5/local/run_nnet_cpu.sh
new file mode 100755
index 000000000..c72e521f1
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_nnet_cpu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+. ./cmd.sh
+
+
+# ...
+
+local/nnet2/run_5c.sh
+
diff --git a/egs/chime_wsj0/s5/local/run_raw_fmllr.sh b/egs/chime_wsj0/s5/local/run_raw_fmllr.sh
new file mode 100644
index 000000000..be7d52e1c
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_raw_fmllr.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+
+steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
+    data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84_raw
+
+steps/train_raw_sat.sh --cmd "$train_cmd" \
+   2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84_raw exp/tri3c || exit 1;
+
+
+mfccdir=mfcc
+for x in test_eval92 test_eval93 test_dev93 ; do
+  y=${x}_utt
+  cp -rT data/$x data/$y
+  cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
+  cp data/$y/utt2spk data/$y/spk2utt;
+  steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; 
+done
+
+(
+utils/mkgraph.sh data/lang_test_tgpr exp/tri3c exp/tri3c/graph_tgpr || exit 1;
+steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93 || exit 1;
+steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92 || exit 1;
+
+steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_dev93_utt exp/tri3c/decode_tgpr_dev93_utt || exit 1;
+steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_eval92_utt exp/tri3c/decode_tgpr_eval92_utt || exit 1;
+
+steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 10 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93_2fmllr || exit 1;
+steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 8 --cmd "$decode_cmd" \
+  exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92_2fmllr || exit 1;
+)&
+
+(
+utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri3c exp/tri3c/graph_bd_tgpr || exit 1; 
+
+steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 8 exp/tri3c/graph_bd_tgpr \
+    data/test_eval92 exp/tri3c/decode_bd_tgpr_eval92 
+ steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 10 exp/tri3c/graph_bd_tgpr \
+   data/test_dev93 exp/tri3c/decode_bd_tgpr_dev93 
+)&
+
+steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+  data/train_si284 data/lang exp/tri3c exp/tri3c_ali_si284 || exit 1;
+
+
+steps/train_raw_sat.sh  --cmd "$train_cmd" \
+  4200 40000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4d || exit 1;
+(
+ utils/mkgraph.sh data/lang_test_tgpr exp/tri4d exp/tri4d/graph_tgpr || exit 1;
+ steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \
+   exp/tri4d/graph_tgpr data/test_dev93 exp/tri4d/decode_tgpr_dev93 || exit 1;
+ steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \
+   exp/tri4d/graph_tgpr data/test_eval92 exp/tri4d/decode_tgpr_eval92 || exit 1;
+) & 
+
+
+wait
+
+
+#for x in exp/tri3{b,c}/decode_tgpr*; do grep WER $x/wer_* | utils/best_wer.sh ; done
+
diff --git a/egs/chime_wsj0/s5/local/run_rnnlms_sgmm5b.sh b/egs/chime_wsj0/s5/local/run_rnnlms_sgmm5b.sh
new file mode 100755
index 000000000..67fcee50a
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_rnnlms_sgmm5b.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+for test in dev93 eval92; do
+
+  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_bd_tgpr data/lang_test_bd_fg \
+    data/test_${test} exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 || exit 1;
+
+
+# Note: for N-best-list generation, choosing the acoustic scale (12) that gave
+# the best WER on this test set.  Ideally we should do this on a dev set.
+
+ # This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
+  steps/rnnlmrescore.sh \
+    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
+    0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_${test} \
+    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm30_0.25  \
+    || exit 1;
+
+  steps/rnnlmrescore.sh \
+    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
+    0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_${test} \
+    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm100_0.5 \
+    || exit 1;
+
+  steps/rnnlmrescore.sh \
+    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
+    0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_${test} \
+    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm200_0.5 \
+    || exit 1;
+
+  steps/rnnlmrescore.sh \
+    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
+    0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
+    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.5 \
+    || exit 1;
+
+  steps/rnnlmrescore.sh \
+    --N 100 --cmd "$decode_cmd" --inv-acwt 12 \
+    0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
+    exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.75 \
+    || exit 1;
+done
diff --git a/egs/chime_wsj0/s5/local/run_rnnlms_tri3b.sh b/egs/chime_wsj0/s5/local/run_rnnlms_tri3b.sh
new file mode 100755
index 000000000..b98446e7b
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_rnnlms_tri3b.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. cmd.sh
+
+ # This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
+steps/rnnlmrescore.sh \
+  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm30_0.25  \
+  || exit 1;
+
+steps/rnnlmrescore.sh \
+  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm100_0.5 \
+  || exit 1;
+
+steps/rnnlmrescore.sh \
+  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm200_0.5 \
+  || exit 1;
+
+steps/rnnlmrescore.sh \
+  --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 \
+  || exit 1;
+
+steps/rnnlmrescore.sh \
+  --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 
+
+dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75_N1000
+rm -rf $dir
+cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 $dir
+steps/rnnlmrescore.sh \
+  --stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
+
+dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75
+rm -rf $dir
+cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
+steps/rnnlmrescore.sh \
+  --stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
+
+dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.25
+rm -rf $dir
+cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
+steps/rnnlmrescore.sh \
+  --stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.25 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg $dir
+
+steps/rnnlmrescore.sh \
+  --N 10 --cmd "$decode_cmd" --inv-acwt 17 \
+  0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
+  exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N10 \
+  || exit 1;
+
diff --git a/egs/chime_wsj0/s5/local/run_sgmm.sh b/egs/chime_wsj0/s5/local/run_sgmm.sh
new file mode 100755
index 000000000..62be4d837
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_sgmm.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+# This script is invoked from ../run.sh
+# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
+
+. cmd.sh
+
+# SGMM system on si84 data [sgmm5a].  Note: the system we aligned from used the si284 data for
+# training, but this shouldn't have much effect.
+
+(
+  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+    data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
+
+  steps/train_ubm.sh --cmd "$train_cmd" \
+    400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
+
+  steps/train_sgmm.sh --cmd "$train_cmd" \
+    3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \
+    exp/ubm5b/final.ubm exp/sgmm5a || exit 1;
+
+  (
+    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr
+    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
+      exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93
+  ) &
+
+  steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
+    --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1;
+  steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
+    data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84
+
+  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
+    data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1
+
+  for iter in 1 2 3 4; do
+    steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
+      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
+      exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
+  done
+
+  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
+   --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9
+
+  for iter in 1 2 3 4; do
+    steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
+      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
+      exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
+  done
+
+) &
+
+
+(
+# The next commands are the same thing on all the si284 data.
+
+# SGMM system on the si284 data [sgmm5b]
+  steps/train_ubm.sh --cmd "$train_cmd" \
+    600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
+
+  steps/train_sgmm.sh --cmd "$train_cmd" \
+    5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
+    exp/ubm5b/final.ubm exp/sgmm5b || exit 1;
+
+  (
+    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr
+    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
+      exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93
+    steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
+      exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92
+
+    utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1;
+    steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
+      exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93
+    steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
+      exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92
+  ) &
+
+  steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
+    --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 
+
+  steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
+    data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284
+
+  steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
+    data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1
+
+  for iter in 1 2 3 4; do
+    for test in dev93 eval92; do
+      steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
+        --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \
+        exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter &
+
+      steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
+        --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \
+        exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
+     done
+  done
+) &
+
+
+
+# Train quinphone SGMM system. 
+
+steps/train_sgmm.sh  --cmd "$train_cmd" \
+   --context-opts "--context-width=5 --central-position=2" \
+   5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
+   exp/ubm5b/final.ubm exp/sgmm5c || exit 1;
+
+# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93.
+steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_tgpr_dev93 \
+   data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 
+
diff --git a/egs/chime_wsj0/s5/local/run_sgmm2.sh b/egs/chime_wsj0/s5/local/run_sgmm2.sh
new file mode 100755
index 000000000..2e9f5d8e1
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/run_sgmm2.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+# This script is invoked from ../run.sh
+# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
+
+. cmd.sh
+
+# Note: you might want to try to give the option --spk-dep-weights=false to train_sgmm2.sh;
+# this takes out the "symmetric SGMM" part which is not always helpful.
+
+# SGMM system on si84 data [sgmm5a].  Note: the system we aligned from used the si284 data for
+# training, but this shouldn't have much effect.
+
+(
+  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+    data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
+
+  steps/train_ubm.sh --cmd "$train_cmd" \
+    400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
+
+  steps/train_sgmm2.sh --cmd "$train_cmd" \
+    7000 9000 data/train_si84 data/lang exp/tri4b_ali_si84 \
+    exp/ubm5a/final.ubm exp/sgmm2_5a || exit 1;
+
+  (
+    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5a exp/sgmm2_5a/graph_tgpr
+    steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
+      exp/sgmm2_5a/graph_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93
+  ) &
+
+  steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
+    --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm2_5a exp/sgmm2_5a_ali_si84 || exit 1;
+  steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
+    data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84
+
+  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
+    data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84 exp/sgmm2_5a_mmi_b0.1
+
+  for iter in 1 2 3 4; do
+    steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
+      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93 \
+      exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
+  done
+
+  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
+   --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84 exp/sgmm2_5a_mmi_b0.1_m0.9
+
+  for iter in 1 2 3 4; do
+    steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
+      --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93 \
+      exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
+  done
+
+) &
+
+
+(
+# The next commands are the same thing on all the si284 data.
+
+# SGMM system on the si284 data [sgmm5b]
+  steps/train_ubm.sh --cmd "$train_cmd" \
+    600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
+
+  steps/train_sgmm2.sh --cmd "$train_cmd" \
+   11000 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
+    exp/ubm5b/final.ubm exp/sgmm2_5b || exit 1;
+
+  (
+    utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5b exp/sgmm2_5b/graph_tgpr
+    steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
+      exp/sgmm2_5b/graph_tgpr data/test_dev93 exp/sgmm2_5b/decode_tgpr_dev93
+    steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
+      exp/sgmm2_5b/graph_tgpr data/test_eval92 exp/sgmm2_5b/decode_tgpr_eval92
+
+    utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm2_5b exp/sgmm2_5b/graph_bd_tgpr || exit 1;
+    steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
+      exp/sgmm2_5b/graph_bd_tgpr data/test_dev93 exp/sgmm2_5b/decode_bd_tgpr_dev93
+    steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
+      exp/sgmm2_5b/graph_bd_tgpr data/test_eval92 exp/sgmm2_5b/decode_bd_tgpr_eval92
+  ) &
+
+
+ # This shows how you would build and test a quinphone SGMM2 system, but
+  (
+   steps/train_sgmm2.sh --cmd "$train_cmd" \
+      --context-opts "--context-width=5 --central-position=2" \
+    11000 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
+     exp/ubm5b/final.ubm exp/sgmm2_5c || exit 1;
+   # Decode from lattices in exp/sgmm2_5b
+    steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_tgpr_dev93 \
+       data/test_dev93 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_dev93 exp/sgmm2_5c/decode_tgpr_dev93 
+    steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_tgpr_eval92 \
+       data/test_eval92 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_eval92 exp/sgmm2_5c/decode_tgpr_eval92 
+  ) &
+
+
+  steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
+    --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm2_5b exp/sgmm2_5b_ali_si284 
+
+  steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
+    data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284
+
+  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
+    data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284 exp/sgmm2_5b_mmi_b0.1
+
+  for iter in 1 2 3 4; do
+    for test in eval92; do # dev93
+      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
+        --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_fg data/test_${test} exp/sgmm2_5b/decode_bd_tgpr_${test} \
+        exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
+     done
+  done
+
+  steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
+    --zero-if-disjoint true data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284 exp/sgmm2_5b_mmi_b0.1_z
+
+  for iter in 1 2 3 4; do
+    for test in eval92 dev93; do
+      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
+        --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_fg data/test_${test} exp/sgmm2_5b/decode_bd_tgpr_${test} \
+        exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_${test}_it$iter &
+     done
+  done
+
+) &
+
+wait
+
+# Examples of combining some of the best decodings: SGMM+MMI with
+# MMI+fMMI on a conventional system.
+ 
+local/score_combine.sh data/test_eval92 \
+   data/lang_test_bd_tgpr \
+   exp/tri4b_fmmi_a/decode_tgpr_eval92_it8 \
+   exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3 \
+   exp/combine_tri4b_fmmi_a_sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it8_3
+
+
+# %WER 4.43 [ 250 / 5643, 41 ins, 12 del, 197 sub ] exp/tri4b_fmmi_a/decode_tgpr_eval92_it8/wer_11
+# %WER 3.85 [ 217 / 5643, 35 ins, 11 del, 171 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3/wer_10
+# combined to:
+# %WER 3.76 [ 212 / 5643, 32 ins, 12 del, 168 sub ] exp/combine_tri4b_fmmi_a_sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it8_3/wer_12
+
+# Checking MBR decode of baseline:
+cp -r -T exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3{,.mbr}
+local/score_mbr.sh data/test_eval92 data/lang_test_bd_tgpr exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr
+# MBR decoding did not seem to help (baseline was 3.85).  I think this is normal at such low WERs.
+%WER 3.86 [ 218 / 5643, 35 ins, 11 del, 172 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr/wer_10
diff --git a/egs/chime_wsj0/s5/local/score.sh b/egs/chime_wsj0/s5/local/score.sh
new file mode 100755
index 000000000..b18f35041
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/score.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+decode_mbr=true
+reverse=false
+word_ins_penalty=0.0
+min_lmwt=5
+max_lmwt=20
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  echo "    --reverse (true/false)          # score with time reversed features "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
+  lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+  lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
+  lattice-best-path --word-symbol-table=$symtab \
+    ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
+
+if $reverse; then
+  for lmwt in `seq $min_lmwt $max_lmwt`; do
+    mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig
+    awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
+       <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra
+  done
+fi
+
+# Note: the double level of quoting for the sed command
+$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
+   cat $dir/scoring/LMWT.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+     ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT || exit 1;
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/local/score_combine.sh b/egs/chime_wsj0/s5/local/score_combine.sh
new file mode 100755
index 000000000..576962c74
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/score_combine.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Copyright 2013  Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Script for system combination using minimum Bayes risk decoding.
+# This calls lattice-combine to create a union of lattices that have been 
+# normalized by removing the total forward cost from them. The resulting lattice
+# is used as input to lattice-mbr-decode. This should not be put in steps/ or 
+# utils/ since the scores on the combined lattice must not be scaled.
+
+# begin configuration section.
+cmd=run.pl
+min_lmwt=9
+max_lmwt=20
+lat_weights=
+#end configuration section.
+
+help_message="Usage: "$(basename $0)" [options] <data-dir> <graph-dir|lang-dir> <decode-dir1> <decode-dir2> [decode-dir3 ... ] <out-dir>
+Options:
+  --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes.
+  --min-lmwt INT                  # minumum LM-weight for lattice rescoring 
+  --max-lmwt INT                  # maximum LM-weight for lattice rescoring
+  --lat-weights STR               # colon-separated string of lattice weights
+";
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -lt 5 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+data=$1
+graphdir=$2
+odir=${@: -1}  # last argument to the script
+shift 2;
+decode_dirs=( $@ )  # read the remaining arguments into an array
+unset decode_dirs[${#decode_dirs[@]}-1]  # 'pop' the last argument which is odir
+num_sys=${#decode_dirs[@]}  # number of systems to combine
+
+symtab=$graphdir/words.txt
+[ ! -f $symtab ] && echo "$0: missing word symbol table '$symtab'" && exit 1;
+[ ! -f $data/text ] && echo "$0: missing reference '$data/text'" && exit 1;
+
+
+mkdir -p $odir/log
+
+for i in `seq 0 $[num_sys-1]`; do
+  model=${decode_dirs[$i]}/../final.mdl  # model one level up from decode dir
+  for f in $model ${decode_dirs[$i]}/lat.1.gz ; do
+    [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+  done
+  lats[$i]="\"ark:gunzip -c ${decode_dirs[$i]}/lat.*.gz |\""
+done
+
+mkdir -p $odir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' \
+  > $odir/scoring/test_filt.txt
+
+if [ -z "$lat_weights" ]; then
+  $cmd LMWT=$min_lmwt:$max_lmwt $odir/log/combine_lats.LMWT.log \
+    lattice-combine --inv-acoustic-scale=LMWT ${lats[@]} ark:- \| \
+    lattice-mbr-decode --word-symbol-table=$symtab ark:- \
+    ark,t:$odir/scoring/LMWT.tra || exit 1;
+else
+  $cmd LMWT=$min_lmwt:$max_lmwt $odir/log/combine_lats.LMWT.log \
+    lattice-combine --inv-acoustic-scale=LMWT --lat-weights=$lat_weights \
+    ${lats[@]} ark:- \| \
+    lattice-mbr-decode --word-symbol-table=$symtab ark:- \
+    ark,t:$odir/scoring/LMWT.tra || exit 1;
+fi
+
+$cmd LMWT=$min_lmwt:$max_lmwt $odir/scoring/log/score.LMWT.log \
+  cat $odir/scoring/LMWT.tra \| \
+  utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+  compute-wer --text --mode=present \
+  ark:$odir/scoring/test_filt.txt  ark,p:- ">&" $odir/wer_LMWT || exit 1;
+
+exit 0
diff --git a/egs/chime_wsj0/s5/local/score_mbr.sh b/egs/chime_wsj0/s5/local/score_mbr.sh
new file mode 100755
index 000000000..4052512f7
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/score_mbr.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Script for minimum bayes risk decoding.
+
+[ -f ./path.sh ] && . ./path.sh;
+
+# begin configuration section.
+cmd=run.pl
+min_lmwt=9
+max_lmwt=20
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score_mbr.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+# We submit the jobs separately, not as an array, because it's hard
+# to get the inverse of the LM scales.
+rm $dir/.error 2>/dev/null
+for inv_acwt in `seq $min_lmwt $max_lmwt`; do
+  acwt=`perl -e "print (1.0/$inv_acwt);"`
+  $cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \
+    lattice-mbr-decode  --acoustic-scale=$acwt --word-symbol-table=$symtab \
+      "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \
+    || touch $dir/.error &
+done
+wait;
+[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout.";
+     
+
+$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
+   cat $dir/scoring/LMWT.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+     ark:$dir/scoring/test_filt.txt  ark,p:- ">" $dir/wer_LMWT || exit 1;
+
diff --git a/egs/chime_wsj0/s5/local/wsj_data_prep.sh b/egs/chime_wsj0/s5/local/wsj_data_prep.sh
new file mode 100755
index 000000000..685b57aa7
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_data_prep.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+
+# Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+
+if [ $# -le 3 ]; then
+   echo "Arguments should be a list of WSJ directories, see ../run.sh for example."
+   exit 1;
+fi
+
+
+dir=`pwd`/data/local/data
+lmdir=`pwd`/data/local/nist_lm
+mkdir -p $dir $lmdir
+local=`pwd`/local
+utils=`pwd`/utils
+
+. ./path.sh # Needed for KALDI_ROOT
+export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+if [ ! -x $sph2pipe ]; then
+   echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+   exit 1;
+fi
+
+cd $dir
+
+# Make directory of links to the WSJ disks such as 11-13.1.  This relies on the command
+# line arguments being absolute pathnames.
+rm -r links/ 2>/dev/null
+mkdir links/
+ln -s $* links
+
+# Do some basic checks that we have what we expected.
+if [ ! -d links/11-13.1 -o ! -d links/13-34.1 -o ! -d links/11-2.1 ]; then
+  echo "wsj_data_prep.sh: Spot check of command line arguments failed"
+  echo "Command line arguments must be absolute pathnames to WSJ directories"
+  echo "with names like 11-13.1."
+  exit 1;
+fi
+
+# This version for SI-84
+
+cat links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
+ $local/ndx2flist.pl $* | sort | \
+ grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
+
+nl=`cat train_si84.flist | wc -l`
+[ "$nl" -eq 7138 ] || echo "Warning: expected 7138 lines in train_si84.flist, got $nl"
+
+# This version for SI-284
+cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
+ links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
+ $local/ndx2flist.pl  $* | sort | \
+ grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si284.flist
+
+nl=`cat train_si284.flist | wc -l`
+[ "$nl" -eq 37416 ] || echo "Warning: expected 37416 lines in train_si284.flist, got $nl"
+
+# Now for the test sets.
+# links/13-34.1/wsj1/doc/indices/readme.doc 
+# describes all the different test sets.
+# Note: each test-set seems to come in multiple versions depending
+# on different vocabulary sizes, verbalized vs. non-verbalized
+# pronunciations, etc.  We use the largest vocab and non-verbalized
+# pronunciations.
+# The most normal one seems to be the "baseline 60k test set", which
+# is h1_p0. 
+
+# Nov'92 (333 utts)
+# These index files have a slightly different format;
+# have to add .wv1
+cat links/11-13.1/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
+  $local/ndx2flist.pl $* |  awk '{printf("%s.wv1\n", $1)}' | \
+  sort > test_eval92.flist
+
+# Nov'92 (330 utts, 5k vocab)
+cat links/11-13.1/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
+  $local/ndx2flist.pl $* |  awk '{printf("%s.wv1\n", $1)}' | \
+  sort > test_eval92_5k.flist
+
+# Nov'93: (213 utts)
+# Have to replace a wrong disk-id.
+cat links/13-32.1/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
+  sed s/13_32_1/13_33_1/ | \
+  $local/ndx2flist.pl $* | sort > test_eval93.flist
+
+# Nov'93: (213 utts, 5k)
+cat links/13-32.1/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
+  sed s/13_32_1/13_33_1/ | \
+  $local/ndx2flist.pl $* | sort > test_eval93_5k.flist
+
+# Dev-set for Nov'93 (503 utts)
+cat links/13-34.1/wsj1/doc/indices/h1_p0.ndx | \
+  $local/ndx2flist.pl $* | sort > test_dev93.flist
+
+# Dev-set for Nov'93 (513 utts, 5k vocab)
+cat links/13-34.1/wsj1/doc/indices/h2_p0.ndx | \
+  $local/ndx2flist.pl $* | sort > test_dev93_5k.flist
+
+
+# Dev-set Hub 1,2 (503, 913 utterances)
+
+# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.  
+# Sometimes this gets copied from the CD's with upcasing, don't know 
+# why (could be older versions of the disks).
+find `readlink links/13-16.1`/???1/??_??_20 -print | grep -i ".wv1" | sort > dev_dt_20.flist
+find `readlink links/13-16.1`/???1/??_??_05 -print | grep -i ".wv1" | sort > dev_dt_05.flist
+
+
+# Finding the transcript files:
+for x in $*; do find -L $x -iname '*.dot'; done > dot_files.flist
+
+# Convert the transcripts into our format (no normalization yet)
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+   $local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
+   cat ${x}_sph.scp | awk '{print $1}' | $local/find_transcripts.pl  dot_files.flist > $x.trans1
+done
+
+# Do some basic normalization steps.  At this point we don't remove OOVs--
+# that will be done inside the training scripts, as we'd like to make the
+# data-preparation stage independent of the specific lexicon used.
+noiseword="<NOISE>";
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+   cat $x.trans1 | $local/normalize_transcript.pl $noiseword | sort > $x.txt || exit 1;
+done
+ 
+# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+  awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
+done
+
+# Make the utt2spk and spk2utt files.
+for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
+   cat ${x}_sph.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
+   cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
+done
+
+
+#in case we want to limit lm's on most frequent words, copy lm training word frequency list
+cp links/13-32.1/wsj1/doc/lng_modl/vocab/wfl_64.lst $lmdir
+chmod u+w $lmdir/*.lst # had weird permissions on source.
+
+# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
+# verbalized pronunciations.   This is the most common test setup, I understand.
+
+cp links/13-32.1/wsj1/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg.arpa.gz
+
+# trigram would be:
+cat links/13-32.1/wsj1/doc/lng_modl/base_lm/tcb20onp.z | \
+ perl -e 'while(<>){ if(m/^\\data\\/){ print; last;  } } while(<>){ print; }' | \
+ gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr.arpa || exit 1;
+
+# repeat for 5k language models
+cp links/13-32.1/wsj1/doc/lng_modl/base_lm/bcb05onp.z  $lmdir/lm_bg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_bg_5k.arpa.gz
+
+# trigram would be: !only closed vocabulary here!
+cp links/13-32.1/wsj1/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
+chmod u+w $lmdir/lm_tg_5k.arpa.gz
+gunzip $lmdir/lm_tg_5k.arpa.gz
+tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
+rm $lmdir/lm_tg_5k.arpa
+
+prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
+gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
+
+
+if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
+  rm wsj0-train-spkrinfo.txt
+  ! wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt && \
+    echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
+    wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt 
+fi
+
+if [ ! -f wsj0-train-spkrinfo.txt ]; then
+  echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
+  echo "This is possibly omitted from the training disks; couldn't find it." 
+  echo "Everything else may have worked; we just may be missing gender info"
+  echo "which is only needed for VTLN-related diagnostics anyway."
+  exit 1
+fi
+# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
+# LDC put it on the web.  Perhaps it was accidentally omitted from the
+# disks.  
+
+cat links/11-13.1/wsj0/doc/spkrinfo.txt \
+    links/13-32.1/wsj1/doc/evl_spok/spkrinfo.txt \
+    links/13-34.1/wsj1/doc/dev_spok/spkrinfo.txt \
+    links/13-34.1/wsj1/doc/train/spkrinfo.txt \
+   ./wsj0-train-spkrinfo.txt  | \
+    perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
+   awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
+
+
+echo "Data preparation succeeded"
diff --git a/egs/chime_wsj0/s5/local/wsj_extend_dict.sh b/egs/chime_wsj0/s5/local/wsj_extend_dict.sh
new file mode 100755
index 000000000..38a06bb48
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_extend_dict.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+
+# This script builds a larger word-list and dictionary 
+# than used for the LMs supplied with the WSJ corpus.
+# It uses a couple of strategies to fill-in words in
+# the LM training data but not in CMUdict.  One is
+# to generate special prons for possible acronyms, that
+# just consist of the constituent letters.  The other
+# is designed to handle derivatives of known words
+# (e.g. deriving the pron of a plural from the pron of
+# the base-word), but in a more general, learned-from-data
+# way.
+# It makes use of scripts in local/dict/
+
+if [ $# -ne 1 ]; then
+  echo "Usage: local/wsj_train_lms.sh /foo/bar/WSJ/13-32.1/"
+  exit 1
+fi
+if [ "`basename $1`" != 13-32.1 ]; then
+  echo "Expecting the argument to this script to end in 13-32.1"
+  exit 1
+fi
+
+# e.g.
+#srcdir=/mnt/matylda2/data/WSJ1/13-32.1
+export PATH=$PATH:`pwd`/local/dict/
+srcdir=$1
+mkdir -p data/local/dict_larger
+dir=data/local/dict_larger
+cp data/local/dict/* data/local/dict_larger # Various files describing phones etc.
+  # are there; we just want to copy them as the phoneset is the same.
+rm data/local/dict_larger/lexicon.txt # we don't want this.
+rm data/local/dict_larger/lexiconp.txt # we don't want this either.
+mincount=2 # Minimum count of an OOV we will try to generate a pron for.
+
+[ ! -f data/local/dict/cmudict/cmudict.0.7a ] && echo "CMU dict not in expected place" && exit 1;
+
+# Remove comments from cmudict; print first field; remove
+# words like FOO(1) which are alternate prons: our dict format won't
+# include these markers.
+grep -v ';;;' data/local/dict/cmudict/cmudict.0.7a | 
+ perl -ane 's/^(\S+)\(\d+\)/$1/; print; ' | sort | uniq > $dir/dict.cmu
+
+cat $dir/dict.cmu | awk '{print $1}' | sort | uniq > $dir/wordlist.cmu
+
+echo "Getting training data [this should take at least a few seconds; if not, there's a problem]"
+
+# Convert to uppercase, remove XML-like markings.
+# For words ending in "." that are not in CMUdict, we assume that these
+# are periods that somehow remained in the data during data preparation,
+# and we we replace the "." with "\n".  Note: we found this by looking at
+# oov.counts below (before adding this rule).
+
+touch $dir/cleaned.gz
+if [ `du -m $dir/cleaned.gz | cut -f 1` -eq 73 ]; then
+  echo "Not getting cleaned data in $dir/cleaned.gz again [already exists]";
+else
+ gunzip -c $srcdir/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/*.z \
+  | awk '/^</{next}{print toupper($0)}' | perl -e '
+   open(F, "<$ARGV[0]")||die;
+   while(<F>){ chop; $isword{$_} = 1; }
+   while(<STDIN>) { 
+    @A = split(" ", $_); 
+    for ($n = 0; $n < @A; $n++) {
+      $a = $A[$n];
+      if (! $isword{$a} && $a =~ s/^([^\.]+)\.$/$1/) { # nonwords that end in "."
+         # and have no other "." in them: treat as period.
+         print "$a";
+         if ($n+1 < @A) { print "\n"; }
+      } else { print "$a "; }
+    }
+    print "\n";
+  }
+ ' $dir/wordlist.cmu | gzip -c > $dir/cleaned.gz
+fi
+  
+# get unigram counts
+echo "Getting unigram counts"
+gunzip -c $dir/cleaned.gz | tr -s ' ' '\n' | \
+  awk '{count[$1]++} END{for (w in count) { print count[w], w; }}' | sort -nr > $dir/unigrams
+
+cat $dir/unigrams | awk -v dict=$dir/dict.cmu \
+  'BEGIN{while(getline<dict) seen[$1]=1;} {if(!seen[$2]){print;}}' \
+   > $dir/oov.counts
+
+echo "Most frequent unseen unigrams are: "
+head $dir/oov.counts
+
+# Prune away singleton counts, and remove things with numbers in
+# (which should have been normalized) and with no letters at all.
+
+
+cat $dir/oov.counts | awk -v thresh=$mincount '{if ($1 >= thresh) { print $2; }}' \
+  | awk '/[0-9]/{next;} /[A-Z]/{print;}' > $dir/oovlist
+
+# Automatic rule-finding...
+
+# First make some prons for possible acronyms.
+# Note: we don't do this for things like U.K or U.N,
+# or A.B. (which doesn't exist anyway), 
+# as we consider this normalization/spelling errors.
+
+cat $dir/oovlist | local/dict/get_acronym_prons.pl $dir/dict.cmu  > $dir/dict.acronyms
+
+mkdir $dir/f $dir/b # forward, backward directions of rules...
+  # forward is normal suffix
+  # rules, backward is reversed (prefix rules).  These
+  # dirs contain stuff we create while making the rule-based
+  # extensions to the dictionary.
+
+# Remove ; and , from words, if they are present; these
+# might crash our scripts, as they are used as separators there.
+filter_dict.pl $dir/dict.cmu > $dir/f/dict 
+cat $dir/oovlist | filter_dict.pl > $dir/f/oovs
+reverse_dict.pl $dir/f/dict > $dir/b/dict
+reverse_dict.pl $dir/f/oovs > $dir/b/oovs
+
+# The next stage takes a few minutes.
+# Note: the forward stage takes longer, as English is
+# mostly a suffix-based language, and there are more rules
+# that it finds.
+for d in $dir/f $dir/b; do
+ (
+   cd $d
+   cat dict | get_rules.pl 2>get_rules.log >rules
+   get_rule_hierarchy.pl rules >hierarchy
+   awk '{print $1}' dict | get_candidate_prons.pl rules dict | \
+     limit_candidate_prons.pl hierarchy | \
+     score_prons.pl dict | \
+     count_rules.pl >rule.counts
+   # the sort command below is just for convenience of reading.
+   score_rules.pl <rule.counts | sort -t';' -k3,3 -n -r >rules.with_scores
+   get_candidate_prons.pl rules.with_scores dict oovs | \
+     limit_candidate_prons.pl hierarchy > oovs.candidates
+ )  &   
+done 
+wait
+
+# Merge the candidates.
+reverse_candidates.pl $dir/b/oovs.candidates | cat - $dir/f/oovs.candidates | sort > $dir/oovs.candidates
+select_candidate_prons.pl <$dir/oovs.candidates | awk -F';' '{printf("%s  %s\n", $1, $2);}' \
+  > $dir/dict.oovs
+
+cat $dir/dict.acronyms $dir/dict.oovs | sort | uniq > $dir/dict.oovs_merged
+
+awk '{print $1}' $dir/dict.oovs_merged | uniq > $dir/oovlist.handled
+sort $dir/oovlist | diff - $dir/oovlist.handled  | grep -v 'd' | sed 's:< ::' > $dir/oovlist.not_handled
+
+
+# add_counts.pl attaches to original counts to the list of handled/not-handled OOVs
+add_counts.pl $dir/oov.counts $dir/oovlist.handled | sort -nr > $dir/oovlist.handled.counts
+add_counts.pl $dir/oov.counts $dir/oovlist.not_handled | sort -nr > $dir/oovlist.not_handled.counts
+
+echo "**Top OOVs we handled are:**"; 
+head $dir/oovlist.handled.counts
+echo "**Top OOVs we didn't handle are as follows (note: they are mostly misspellings):**"; 
+head $dir/oovlist.not_handled.counts
+
+
+echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
+echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
+echo "Count of OOVs we didn't handle due to low count is" \
+    `awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`
+# The two files created above are for humans to look at, as diagnostics.
+
+cat <<EOF | cat - $dir/dict.cmu $dir/dict.oovs_merged | sort | uniq > $dir/lexicon.txt
+!SIL SIL
+<SPOKEN_NOISE> SPN
+<UNK> SPN
+<NOISE> NSN
+EOF
+
+echo "Created $dir/lexicon.txt"
diff --git a/egs/chime_wsj0/s5/local/wsj_format_data.sh b/egs/chime_wsj0/s5/local/wsj_format_data.sh
new file mode 100755
index 000000000..ee1450f59
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_format_data.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script takes data prepared in a corpus-dependent way
+# in data/local/, and converts it into the "canonical" form,
+# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
+# data/train_si284, data/train_si84, etc.
+
+# Don't bother doing train_si84 separately (although we have the file lists
+# in data/local/) because it's just the first 7138 utterances in train_si284.
+# We'll create train_si84 after doing the feature extraction.
+
+. ./path.sh || exit 1;
+
+echo "Preparing train and test data"
+srcdir=data/local/data
+lmdir=data/local/nist_lm
+tmpdir=data/local/lm_tmp
+lexicon=data/local/lang_tmp/lexiconp.txt
+mkdir -p $tmpdir
+
+for x in train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do 
+  mkdir -p data/$x
+  cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
+  cp $srcdir/$x.txt data/$x/text || exit 1;
+  cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
+  cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
+  utils/filter_scp.pl data/$x/spk2utt $srcdir/spk2gender > data/$x/spk2gender || exit 1;
+done
+
+
+# Next, for each type of language model, create the corresponding FST
+# and the corresponding lang_test_* directory.
+
+echo Preparing language models for test
+
+for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
+  test=data/lang_test_${lm_suffix}
+  mkdir -p $test
+  for f in phones.txt words.txt phones.txt L.fst L_disambig.fst \
+     phones/; do
+    cp -r data/lang/$f $test
+  done
+  gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
+   utils/find_arpa_oovs.pl $test/words.txt  > $tmpdir/oovs_${lm_suffix}.txt
+
+  # grep -v '<s> <s>' because the LM seems to have some strange and useless
+  # stuff in it with multiple <s>'s in the history.  Encountered some other similar
+  # things in a LM from Geoff.  Removing all "illegal" combinations of <s> and </s>,
+  # which are supposed to occur only at being/end of utt.  These can cause 
+  # determinization failures of CLG [ends up being epsilon cycles].
+  gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
+    grep -v '<s> <s>' | \
+    grep -v '</s> <s>' | \
+    grep -v '</s> </s>' | \
+    arpa2fst - | fstprint | \
+    utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
+      --osymbols=$test/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > $test/G.fst
+  fstisstochastic $test/G.fst
+ # The output is like:
+ # 9.14233e-05 -0.259833
+ # we do expect the first of these 2 numbers to be close to zero (the second is
+ # nonzero because the backoff weights make the states sum to >1).
+ # Because of the <s> fiasco for these particular LMs, the first number is not
+ # as close to zero as it could be.
+
+  # Everything below is only for diagnostic.
+  # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+  # this might cause determinization failure of CLG.
+  # #0 is treated as an empty word.
+  mkdir -p $tmpdir/g
+  awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
+    < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
+  fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \
+   fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
+  fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
+    echo "Language model has cycles with empty words" && exit 1
+  rm -r $tmpdir/g
+done
+
+echo "Succeeded in formatting data."
+rm -r $tmpdir
diff --git a/egs/chime_wsj0/s5/local/wsj_format_local_lms.sh b/egs/chime_wsj0/s5/local/wsj_format_local_lms.sh
new file mode 100755
index 000000000..31b1a8662
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_format_local_lms.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright Johns Hopkins University (Author: Daniel Povey) 2012
+
+. ./path.sh
+
+[ ! -d data/lang_bd ] && echo "Expect data/local/lang_bd to exist" && exit 1;
+
+lm_srcdir_3g=data/local/local_lm/3gram-mincount
+lm_srcdir_4g=data/local/local_lm/4gram-mincount
+
+[ ! -d "$lm_srcdir_3g" ] && echo "No such dir $lm_srcdir_3g" && exit 1;
+[ ! -d "$lm_srcdir_4g" ] && echo "No such dir $lm_srcdir_4g" && exit 1;
+
+for d in data/lang_test_bd_{tg,tgpr,fg,fgpr}; do
+  rm -r $d 2>/dev/null
+  cp -r data/lang_bd $d
+done
+
+lang=data/lang_bd
+
+# Be careful: this time we dispense with the grep -v '<s> <s>' so this might
+# not work for LMs generated from all toolkits.
+gunzip -c $lm_srcdir_3g/lm_pr6.0.gz | \
+  arpa2fst - | fstprint | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
+      --osymbols=$lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > data/lang_test_bd_tgpr/G.fst || exit 1;
+  fstisstochastic data/lang_test_bd_tgpr/G.fst
+
+gunzip -c $lm_srcdir_3g/lm_unpruned.gz | \
+  arpa2fst - | fstprint | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
+      --osymbols=$lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > data/lang_test_bd_tg/G.fst || exit 1;
+  fstisstochastic data/lang_test_bd_tg/G.fst
+
+gunzip -c $lm_srcdir_4g/lm_unpruned.gz | \
+  arpa2fst - | fstprint | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
+      --osymbols=$lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > data/lang_test_bd_fg/G.fst || exit 1;
+  fstisstochastic data/lang_test_bd_fg/G.fst
+
+gunzip -c $lm_srcdir_4g/lm_pr7.0.gz | \
+  arpa2fst - | fstprint | \
+    utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
+      --osymbols=$lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+     fstrmepsilon > data/lang_test_bd_fgpr/G.fst || exit 1;
+  fstisstochastic data/lang_test_bd_fgpr/G.fst
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/local/wsj_prepare_dict.sh b/egs/chime_wsj0/s5/local/wsj_prepare_dict.sh
new file mode 100755
index 000000000..82ba8ad94
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_prepare_dict.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# Call this script from one level above, e.g. from the s3/ directory.  It puts
+# its output in data/local/.
+
+# The parts of the output of this that will be needed are
+# [in data/local/dict/ ]
+# lexicon.txt
+# extra_questions.txt
+# nonsilence_phones.txt
+# optional_silence.txt
+# silence_phones.txt
+
+# run this from ../
+dir=data/local/dict
+mkdir -p $dir
+
+
+# (1) Get the CMU dictionary
+svn co  https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \
+  $dir/cmudict || exit 1;
+
+# can add -r 10966 for strict compatibility.
+
+
+#(2) Dictionary preparation:
+
+
+# Make phones symbol-table (adding in silence and verbal and non-verbal noises at this point).
+# We are adding suffixes _B, _E, _S for beginning, ending, and singleton phones.
+
+# silence phones, one per line.
+(echo SIL; echo SPN; echo NSN) > $dir/silence_phones.txt
+echo SIL > $dir/optional_silence.txt
+
+# nonsilence phones; on each line is a list of phones that correspond
+# really to the same base phone.
+cat $dir/cmudict/cmudict.0.7a.symbols | perl -ane 's:\r::; print;' | \
+ perl -e 'while(<>){
+  chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_"; 
+  $phones_of{$1} .= "$_ "; }
+  foreach $list (values %phones_of) {print $list . "\n"; } ' \
+  > $dir/nonsilence_phones.txt || exit 1;
+
+# A few extra questions that will be added to those obtained by automatically clustering
+# the "real" phones.  These ask about stress; there's also one for silence.
+cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
+cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
+  $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+ >> $dir/extra_questions.txt || exit 1;
+
+grep -v ';;;' $dir/cmudict/cmudict.0.7a | \
+ perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' \
+  > $dir/lexicon1_raw_nosil.txt || exit 1;
+
+# Add to cmudict the silences, noises etc.
+
+(echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; echo '<NOISE> NSN'; ) | \
+ cat - $dir/lexicon1_raw_nosil.txt  > $dir/lexicon2_raw.txt || exit 1;
+
+
+# lexicon.txt is without the _B, _E, _S, _I markers.
+# This is the input to wsj_format_data.sh
+cp $dir/lexicon2_raw.txt $dir/lexicon.txt
+
+
+echo "Dictionary preparation succeeded"
+
diff --git a/egs/chime_wsj0/s5/local/wsj_train_lms.sh b/egs/chime_wsj0/s5/local/wsj_train_lms.sh
new file mode 100755
index 000000000..060f387f2
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_train_lms.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+
+# This script trains LMs on the WSJ LM-training data.
+# It requires that you have already run wsj_extend_dict.sh,
+# to get the larger-size dictionary including all of CMUdict
+# plus any OOVs and possible acronyms that we could easily 
+# derive pronunciations for.
+
+# This script takes no command-line arguments
+
+dir=data/local/local_lm
+srcdir=data/local/dict_larger
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
+( # First make sure the kaldi_lm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d kaldi_lm ]; then
+   echo Not installing the kaldi_lm toolkit since it is already there.
+ else
+   echo Downloading and installing the kaldi_lm tools
+   if [ ! -f kaldi_lm.tar.gz ]; then
+     wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
+   fi
+   tar -xvzf kaldi_lm.tar.gz || exit 1;
+   cd kaldi_lm
+   make || exit 1;
+   echo Done making the kaldi_lm tools
+ fi
+) || exit 1;
+
+
+
+if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
+  echo "Expecting files $srcdir/cleaned.gz and $srcdir/lexicon.txt to exist";
+  echo "You need to run local/wsj_extend_dict.sh before running this script."
+  exit 1;
+fi
+
+# Get a wordlist-- keep everything but silence, which should not appear in
+# the LM.
+awk '{print $1}' $srcdir/lexicon.txt | grep -v -w '!SIL' > $dir/wordlist.txt
+
+# Get training data with OOV words (w.r.t. our current vocab) replaced with <UNK>.
+echo "Getting training data with OOV words replaced with <UNK> (train_nounk.gz)" 
+gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.txt \
+  'BEGIN{while((getline<w)>0) v[$1]=1;}
+  {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<UNK> ";print ""}'|sed 's/ $//g' \
+  | gzip -c > $dir/train_nounk.gz
+
+# Get unigram counts (without bos/eos, but this doens't matter here, it's
+# only to get the word-map, which treats them specially & doesn't need their
+# counts).
+# Add a 1-count for each word in word-list by including that in the data,
+# so all words appear.
+gunzip -c $dir/train_nounk.gz | cat - $dir/wordlist.txt | \
+  awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
+ sort -nr > $dir/unigram.counts
+
+# Get "mapped" words-- a character encoding of the words that makes the common words very short.
+cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" > $dir/word_map
+
+gunzip -c $dir/train_nounk.gz | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
+  { for(n=1;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz
+
+# To save disk space, remove the un-mapped training data.  We could
+# easily generate it again if needed.
+rm $dir/train_nounk.gz 
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+
+exit 0
+
+### Below here, this script is showing various commands that 
+## were run during LM tuning.
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/3gram-mincount/
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 156.408740
+# 2.5 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/4gram-mincount
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 143.206294
+# 2.6 million N-grams.
+
+prune_lm.sh --arpa 4.0 $dir/4gram-mincount
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 146.927717
+# 2.15 million N-grams.
+
+prune_lm.sh --arpa 5.0 $dir/4gram-mincount
+# 1.86 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 150.162023
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+train_lm.sh --arpa --lmtype 3gram $dir
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 135.692866
+# 20.0 million N-grams
+
+! which ngram-count  \
+  && echo "SRILM tools not installed so not doing the comparison" && exit 1;
+
+#################
+# You could finish the script here if you wanted.
+# Below is to show how to do baselines with SRILM.
+#  You'd have to install the SRILM toolkit first.
+
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+    # kaldi_lm results
+sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
+mkdir -p $sdir
+gunzip -c $srcdir/cleaned.gz | head -$heldout_sent > $sdir/cleaned.heldout
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent > $sdir/cleaned.train
+(echo "<s>"; echo "</s>" ) | cat - $dir/wordlist.txt > $sdir/wordlist.final.s
+
+# 3-gram:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
+ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/cleaned.heldout # consider -debug 2
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -491456 ppl= 141.457 ppl1= 177.437
+
+# Trying 4-gram:
+ngram-count -text $sdir/cleaned.train -order 4 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o4g.kn.gz
+ngram -order 4 -lm $sdir/srilm.o4g.kn.gz -ppl $sdir/cleaned.heldout 
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -480939 ppl= 127.233 ppl1= 158.822
+
+#3-gram with pruning:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -prune 0.0000001 -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.pr7.kn.gz
+ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout 
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -510828 ppl= 171.947 ppl1= 217.616
+# Around 2.25M N-grams.
+# Note: this is closest to the experiment done with "prune_lm.sh --arpa 3.0 $dir/3gram-mincount/"
+# above, which gave 2.5 million N-grams and a perplexity of 156.
+
+# Note: all SRILM experiments above fully discount all singleton 3 and 4-grams.
+# You can use -gt3min=0 and -gt4min=0 to stop this (this will be comparable to
+# the kaldi_lm experiments above without "-mincount".
+
+##  From here is how to train with
+# IRSTLM.  This is not really working at the moment.
+export IRSTLM=$KALDI_ROOT/tools/irstlm/
+
+idir=$dir/irstlm
+mkdir $idir
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | $IRSTLM/scripts/add-start-end.sh | \
+  gzip -c > $idir/train.gz
+
+$IRSTLM/bin/dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no
+ cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
+{print $0;}}' > vocab.irstlm.20k
+
+
+$IRSTLM/bin/build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz  -p yes \
+  -n 3 -s improved-kneser-ney -b yes
+# Testing perplexity with SRILM tools:
+ngram -lm $idir/lm_3gram.gz  -ppl $sdir/cleaned.heldout 
+#data/local/local_lm/irstlm/lm_3gram.gz: line 162049: warning: non-zero probability for <unk> in closed-vocabulary LM
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 0 OOVs
+#0 zeroprobs, logprob= -513670 ppl= 175.041 ppl1= 221.599
+
+# Perplexity is very bad (should be ~141, since we used -p option,
+# not 175),
+# but adding -debug 3 to the command line shows that
+# the IRSTLM LM does not seem to sum to one properly, so it seems that
+# it produces an LM that isn't interpretable in the normal way as an ARPA
+# LM.
+
+
+
diff --git a/egs/chime_wsj0/s5/local/wsj_train_rnnlms.sh b/egs/chime_wsj0/s5/local/wsj_train_rnnlms.sh
new file mode 100755
index 000000000..c0d1afaf6
--- /dev/null
+++ b/egs/chime_wsj0/s5/local/wsj_train_rnnlms.sh
@@ -0,0 +1,153 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+
+# This script trains LMs on the WSJ LM-training data.
+# It requires that you have already run wsj_extend_dict.sh,
+# to get the larger-size dictionary including all of CMUdict
+# plus any OOVs and possible acronyms that we could easily 
+# derive pronunciations for.
+
+# This script takes no command-line arguments but takes the --cmd option.
+
+# Begin configuration section.
+rand_seed=0
+cmd=run.pl
+nwords=10000 # This is how many words we're putting in the vocab of the RNNLM. 
+hidden=30
+class=200 # Num-classes... should be somewhat larger than sqrt of nwords.
+direct=1000 # Probably number of megabytes to allocate for hash-table for "direct" connections.
+rnnlm_ver=rnnlm-0.3e # version of RNNLM to use
+# End configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. utils/parse_options.sh
+
+if [ $# != 1 ]; then
+   echo "Usage: local/wsj_train_rnnlms.sh [options] <dest-dir>"
+   echo "For options, see top of script file"
+   exit 1;
+fi
+
+dir=$1
+srcdir=data/local/dict_larger
+mkdir -p $dir
+
+export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
+
+
+( # First make sure the kaldi_lm toolkit is installed.
+ # Note: this didn't work out of the box for me, I had to
+ # change the g++ version to just "g++" (no cross-compilation
+ # needed for me as I ran on a machine that had been setup
+ # as 64 bit by default.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d $rnnlm_ver ]; then
+   echo Not installing the rnnlm toolkit since it is already there.
+ else
+   echo Downloading and installing the rnnlm tools
+   # http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
+   if [ ! -f $rnnlm_ver.tgz ]; then
+     wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
+   fi
+   mkdir $rnnlm_ver
+   cd $rnnlm_ver
+   tar -xvzf ../$rnnlm_ver.tgz || exit 1;
+   make CC=g++ || exit 1;
+   echo Done making the rnnlm tools
+ fi
+) || exit 1;
+
+
+if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
+  echo "Expecting files $srcdir/cleaned.gz and $srcdir/wordlist.final to exist";
+  echo "You need to run local/wsj_extend_dict.sh before running this script."
+  exit 1;
+fi
+
+cat $srcdir/lexicon.txt | awk '{print $1}' | grep -v -w '!SIL' > $dir/wordlist.all
+
+# Get training data with OOV words (w.r.t. our current vocab) replaced with <UNK>.
+echo "Getting training data with OOV words replaced with <UNK> (train_nounk.gz)" 
+gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.all \
+  'BEGIN{while((getline<w)>0) v[$1]=1;}
+  {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<UNK> ";print ""}'|sed 's/ $//g' \
+  | gzip -c > $dir/all.gz
+
+echo "Splitting data into train and validation sets."
+heldout_sent=10000
+gunzip -c $dir/all.gz | head -n $heldout_sent > $dir/valid.in # validation data
+gunzip -c $dir/all.gz | tail -n +$heldout_sent | \
+ perl -e ' use List::Util qw(shuffle); @A=<>; print join("", shuffle(@A)); ' \
+  > $dir/train.in # training data
+
+
+  # The rest will consist of a word-class represented by <RNN_UNK>, that
+  # maps (with probabilities) to a whole class of words.
+
+# Get unigram counts from our training data, and use this to select word-list
+# for RNNLM training; e.g. 10k most frequent words.  Rest will go in a class
+# that we (manually, at the shell level) assign probabilities for words that
+# are in that class.  Note: this word-list doesn't need to include </s>; this
+# automatically gets added inside the rnnlm program.
+# Note: by concatenating with $dir/wordlist.all, we are doing add-one
+# smoothing of the counts.
+
+cat $dir/train.in $dir/wordlist.all | grep -v '</s>' | grep -v '<s>' | \
+  awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
+  sort -nr > $dir/unigram.counts
+
+head -$nwords $dir/unigram.counts | awk '{print $2}' > $dir/wordlist.rnn
+
+tail -n +$nwords $dir/unigram.counts > $dir/unk_class.counts
+
+tot=`awk '{x=x+$1} END{print x}' $dir/unk_class.counts`
+awk -v tot=$tot '{print $2, ($1*1.0/tot);}' <$dir/unk_class.counts  >$dir/unk.probs
+
+
+for type in train valid; do
+  cat $dir/$type.in | awk -v w=$dir/wordlist.rnn \
+    'BEGIN{while((getline<w)>0) v[$1]=1;}
+    {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<RNN_UNK> ";print ""}'|sed 's/ $//g' \
+    > $dir/$type
+done
+rm $dir/train.in # no longer needed-- and big.
+
+# Now randomize the order of the training data.
+cat $dir/train | awk -v rand_seed=$rand_seed 'BEGIN{srand(rand_seed);} {printf("%f\t%s\n", rand(), $0);}' | \
+ sort | cut -f 2 > $dir/foo
+mv $dir/foo $dir/train
+
+# OK we'll train the RNNLM on this data.
+
+# todo: change 100 to 320.
+# using 100 classes as square root of 10k.
+echo "Training RNNLM (note: this uses a lot of memory! Run it on a big machine.)"
+#time rnnlm -train $dir/train -valid $dir/valid -rnnlm $dir/100.rnnlm \
+#  -hidden 100 -rand-seed 1 -debug 2 -class 100 -bptt 2 -bptt-block 20 \
+#  -direct-order 4 -direct 1000 -binary >& $dir/rnnlm1.log &
+
+$cmd $dir/rnnlm.log \
+   $KALDI_ROOT/tools/$rnnlm_ver/rnnlm -independent -train $dir/train -valid $dir/valid \
+   -rnnlm $dir/rnnlm -hidden $hidden -rand-seed 1 -debug 2 -class $class -bptt 2 -bptt-block 20 \
+   -direct-order 4 -direct $direct -binary || exit 1;
+
+
+# make it like a Kaldi table format, with fake utterance-ids.
+cat $dir/valid.in | awk '{ printf("uttid-%d ", NR); print; }' > $dir/valid.with_ids
+
+utils/rnnlm_compute_scores.sh $dir $dir/tmp.valid $dir/valid.with_ids \
+  $dir/valid.scores
+nw=`wc -w < $dir/valid.with_ids` # Note: valid.with_ids includes utterance-ids which
+  # is one per word, to account for the </s> at the end of each sentence; this is the
+  # correct number to normalize buy.
+p=`awk -v nw=$nw '{x=x+$2} END{print exp(x/nw);}' <$dir/valid.scores` 
+echo Perplexity is $p | tee $dir/perplexity.log
+
+rm $dir/train $dir/all.gz
+
+# This is a better setup, but takes a long time to train:
+#echo "Training RNNLM (note: this uses a lot of memory! Run it on a big machine.)"
+#time rnnlm -train $dir/train -valid $dir/valid -rnnlm $dir/320.rnnlm \
+#  -hidden 320 -rand-seed 1 -debug 2 -class 300 -bptt 2 -bptt-block 20 \
+#  -direct-order 4 -direct 2000 -binary
diff --git a/egs/chime_wsj0/s5/path.sh b/egs/chime_wsj0/s5/path.sh
new file mode 100755
index 000000000..11fb0b17d
--- /dev/null
+++ b/egs/chime_wsj0/s5/path.sh
@@ -0,0 +1,3 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet-cpubin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
+export LC_ALL=C
diff --git a/egs/chime_wsj0/s5/run.sh b/egs/chime_wsj0/s5/run.sh
new file mode 100755
index 000000000..0c3d5c906
--- /dev/null
+++ b/egs/chime_wsj0/s5/run.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+
+# This is a shell script, but it's recommended that you run the commands one by
+# one by copying and pasting into the shell.
+
+case 0 in    #goto here
+    1)
+;;           #here:
+esac
+
+#exit 1;
+#need wsj0 for the clean version and LMs
+wsj0=/mnt/spdb/wall_street_journal
+local/clean_wsj0_data_prep.sh $wsj0
+
+reverb=/mnt/spdb/CHiME/chime2-wsj0/reverberated 
+local/reverb_wsj0_data_prep.sh $reverb 
+
+noisy=/mnt/spdb/CHiME/chime2-wsj0/isolated
+local/noisy_wsj0_data_prep.sh $noisy 
+
+local/wsj_prepare_dict.sh || exit 1;
+
+utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
+
+local/chime_format_data.sh || exit 1;
+
+# Now make MFCC features.
+# mfccdir should be some place with a largish disk where you
+# want to store MFCC features.
+
+mfccdir=mfcc
+for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do 
+ steps/make_mfcc.sh --nj 10 \
+   data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+done
+
+# Note: the --boost-silence option should probably be omitted by default
+# for normal setups.  It doesn't always help. [it's to discourage non-silence
+# models from modeling silence.]
+mfccdir=mfcc
+for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do 
+ steps/make_mfcc.sh --nj 10 \
+   data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+done
+
+mfccdir=mfcc
+for x in dev_dt_05_reverb train_si84_reverb; do 
+ steps/make_mfcc.sh --nj 10 \
+   data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+done
+
+#begin train gmm systems using multi condition data
+#train_si84 = clean+reverb+noisy, 
+for s in train_si84 ; do 
+  mkdir -p data/$s
+  cp data/${s}_clean/spk2gender data/$s/ 
+  for x in text wav.scp; do
+    cat data/${s}_clean/$x data/${s}_reverb/$x data/${s}_noisy/$x | sort -k1 > data/$s/$x 
+  done
+  cat data/$s/wav.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $&\n";' > data/$s/utt2spk 
+  cat data/$s/utt2spk | utils/utt2spk_to_spk2utt.pl > data/$s/spk2utt 
+done
+
+mfccdir=mfcc
+for x in train_si84; do 
+ steps/make_mfcc.sh --nj 10 \
+   data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+done
+
+steps/train_mono.sh --boost-silence 1.25 --nj 10 \
+  data/train_si84 data/lang exp/mono0a || exit 1;
+
+
+
+utils/mkgraph.sh --mono data/lang_test_tgpr_5k exp/mono0a exp/mono0a/graph_tgpr_5k
+#steps/decode.sh --nj 8  \
+#  exp/mono0a/graph_tgpr_5k data/test_eval92_5k_clean exp/mono0a/decode_tgpr_eval92_5k_clean
+steps/decode.sh --nj 8  \
+  exp/mono0a/graph_tgpr_5k data/test_eval92_5k_noisy exp/mono0a/decode_tgpr_eval92_5k_noisy
+ 
+
+steps/align_si.sh --boost-silence 1.25 --nj 10 \
+   data/train_si84 data/lang exp/mono0a exp/mono0a_ali || exit 1;
+
+steps/train_deltas.sh --boost-silence 1.25 \
+    2000 10000 data/train_si84 data/lang exp/mono0a_ali exp/tri1 || exit 1;
+
+while [ ! -f data/lang_test_tgpr/tmp/LG.fst ] || \
+   [ -z data/lang_test_tgpr/tmp/LG.fst ]; do
+  sleep 20;
+done
+sleep 30;
+# or the mono mkgraph.sh might be writing 
+# data/lang_test_tgpr/tmp/LG.fst which will cause this to fail.
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri1 exp/tri1/graph_tgpr_5k || exit 1;
+
+#steps/decode.sh --nj 8 \
+#  exp/tri1/graph_tgpr data/test_eval92_5k_clean exp/tri1/decode_tgpr_eval92_5k_clean || exit 1;
+steps/decode.sh --nj 8 \
+  exp/tri1/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri1/decode_tgpr_eval92_5k_noisy || exit 1;
+
+
+# test various modes of LM rescoring (4 is the default one).
+# This is just confirming they're equivalent.
+#for mode in 1 2 3 4; do
+#steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \
+#  data/test_dev93 exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_tg$mode  || exit 1;
+#done
+
+# demonstrate how to get lattices that are "word-aligned" (arcs coincide with
+# words, with boundaries in the right place).
+#sil_label=`grep '!SIL' data/lang_test_tgpr/words.txt | awk '{print $2}'`
+#steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \
+#  data/lang_test_tgpr exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_aligned || exit 1;
+
+steps/align_si.sh --nj 10 \
+  data/train_si84 data/lang exp/tri1 exp/tri1_ali_si84 || exit 1;
+
+# Train tri2a, which is deltas + delta-deltas, on si84 data.
+steps/train_deltas.sh \
+  2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2a || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2a exp/tri2a/graph_tgpr_5k || exit 1;
+
+#steps/decode.sh --nj 8  \
+#  exp/tri2a/graph_tgpr_5k data/test_eval92_5k_clean exp/tri2a/decode_tgpr_eval92_5k_clean || exit 1;
+steps/decode.sh --nj 8  \
+  exp/tri2a/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2a/decode_tgpr_eval92_5k_noisy|| exit 1;
+
+#utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
+#steps/decode.sh --nj 8 \
+#  exp/tri2a/graph_bg5k data/test_eval92_5k_clean exp/tri2a/decode_bg_eval92_5k_clean || exit 1;
+
+steps/train_lda_mllt.sh \
+   --splice-opts "--left-context=3 --right-context=3" \
+   2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2b exp/tri2b/graph_tgpr_5k || exit 1;
+steps/decode.sh --nj 8 \
+  exp/tri2b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2b/decode_tgpr_eval92_5k_noisy || exit 1;
+#steps/decode.sh --nj 8 \
+#  exp/tri2b/graph_tgpr data/test_eval92_clean exp/tri2b/decode_tgpr_eval92_clean || exit 1;
+
+
+# Align tri2b system with si84 data.
+steps/align_si.sh  --nj 10 \
+  --use-graphs true data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84  || exit 1;
+
+
+# From 2b system, train 3b which is LDA + MLLT + SAT.
+steps/train_sat.sh \
+  2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b || exit 1;
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b exp/tri3b/graph_tgpr_5k || exit 1;
+steps/decode_fmllr.sh --nj 8 \
+  exp/tri3b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri3b/decode_tgpr_eval92_5k_noisy || exit 1;
+
+
+# From 3b multi-condition system, align noisy si84 data.
+steps/align_fmllr.sh --nj 10 \
+  data/train_si84_noisy data/lang exp/tri3b exp/tri3b_ali_si84_noisy || exit 1;
+
+steps/align_fmllr.sh --nj 10 \
+  data/dev_dt_05_noisy data/lang exp/tri3b exp/tri3b_ali_dev_dt_05 || exit 1;
+
+#begin training DNN-HMM system
+#only on noisy si84 
+
+. ./path.sh
+#RBM pretraining
+dir=exp/tri4a_dnn_pretrain
+$cuda_cmd $dir/_pretrain_dbn.log \
+  steps/pretrain_dbn.sh --use-gpu-id 0 --nn-depth 7 --rbm-iter 3 data-fbank/train_si84_noisy $dir
+#BP 
+dir=exp/tri4a_dnn
+ali=exp/tri3b_ali_si84_noisy
+ali_dev=exp/tri3b_ali_dev_dt_05
+feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
+dbn=exp/tri4a_dnn_pretrain/7.dbn
+$cuda_cmd $dir/_train_nnet.log \
+  steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
+  data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri4a_dnn exp/tri4a_dnn/graph_tgpr_5k || exit 1;
+steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri4a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
+
+#Retrain system using new ali,
+#this is essential 
+#repeat this process for 3 times 
+srcdir=exp/tri4a_dnn
+steps/align_nnet.sh --nj 10 \
+  data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
+steps/align_nnet.sh --nj 10 \
+  data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
+
+#no need to do pretraining again
+dir=exp/tri5a_dnn
+ali=exp/tri4a_dnn_ali_si84_noisy
+ali_dev=exp/tri4a_dnn_ali_dt_05_noisy
+feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
+dbn=exp/tri4a_dnn_pretrain/7.dbn
+$cuda_cmd $dir/_train_nnet.log \
+  steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
+  data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri5a_dnn exp/tri5a_dnn/graph_tgpr_5k || exit 1;
+steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri5a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
+
+
+srcdir=exp/tri5a_dnn
+steps/align_nnet.sh --nj 10 \
+  data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
+steps/align_nnet.sh --nj 10 \
+  data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
+
+. ./path.sh
+dir=exp/tri6a_dnn
+ali=exp/tri5a_dnn_ali_si84_noisy
+ali_dev=exp/tri5a_dnn_ali_dt_05_noisy
+feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
+dbn=exp/tri4a_dnn_pretrain/7.dbn
+$cuda_cmd $dir/_train_nnet.log \
+  steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
+  data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri6a_dnn exp/tri6a_dnn/graph_tgpr_5k || exit 1;
+steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri6a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
+
+srcdir=exp/tri6a_dnn
+steps/align_nnet.sh --nj 10 \
+  data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
+steps/align_nnet.sh --nj 10 \
+  data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
+
+. ./path.sh
+dir=exp/tri7a_dnn
+ali=exp/tri6a_dnn_ali_si84_noisy
+ali_dev=exp/tri6a_dnn_ali_dt_05_noisy
+feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
+dbn=exp/tri4a_dnn_pretrain/7.dbn
+$cuda_cmd $dir/_train_nnet.log \
+  steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
+  data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
+
+utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri7a_dnn exp/tri7a_dnn/graph_tgpr_5k || exit 1;
+steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
+  exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
+
+
+
diff --git a/egs/chime_wsj0/s5/steps/align_basis_fmllr.sh b/egs/chime_wsj0/s5/steps/align_basis_fmllr.sh
new file mode 100755
index 000000000..54f35b36a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_basis_fmllr.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2013  GoVivace Inc (Author: Nagendra Goel)
+# Apache 2.0
+
+# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
+# + fMLLR (probably with SAT models).
+# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
+# is not present), then does 2 iterations of fMLLR estimation.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match the source directory.
+
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.5 # factor by which to boost silence during alignment.
+fmllr_update_type=full
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --fmllr-update-type (full|diag|offset|none)      # default full."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+graphdir=$dir
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/boost_phones.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-basis-fmllr-gpost --fmllr-min-count=22  --num-iters=10 \
+        --size-scale=0.2 --step-size-iters=3 \
+        --write-weights=ark:$dir/pre_wgt.JOB \
+        $mdl $srcdir/fmllr.basis "$sifeats"  ark,s,cs:- \
+        ark:$dir/trans.JOB || exit 1;
+#  else
+#    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+#      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+#      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+#      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+#      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+#      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+#rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/align_fmllr.sh b/egs/chime_wsj0/s5/steps/align_fmllr.sh
new file mode 100755
index 000000000..3052eb409
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_fmllr.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
+# + fMLLR (probably with SAT models).
+# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
+# is not present), then does 2 iterations of fMLLR estimation.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match the source directory.
+
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # factor by which to boost silence during alignment.
+fmllr_update_type=full
+norm_vars=false
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --fmllr-update-type (full|diag|offset|none)      # default full."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $srcdir/full.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/align_nnet.sh b/egs/chime_wsj0/s5/steps/align_nnet.sh
new file mode 100755
index 000000000..fe70416e6
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_nnet.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2012  Brno University of Technology (Author: Karel Vesely)
+# Apache 2.0
+
+# Computes training alignments using MLP model
+
+# If you supply the "--use-graphs true" option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match with the source directory.
+
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+use_gpu_id=-1 # disable gpu
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  $0 data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+
+#Get the files we will need
+nnet=$srcdir/final.nnet;
+[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+
+class_frame_counts=$srcdir/ali_train_pdf.counts
+[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
+
+feature_transform=$srcdir/final.feature_transform
+if [ ! -f $feature_transform ]; then
+  echo "Missing feature_transform '$feature_transform'"
+  exit 1
+fi
+
+model=$dir/final.mdl
+[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
+
+###
+### Prepare feature pipeline (same as for decoding)
+###
+# Create the feature stream:
+feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
+# Optionally add cmvn
+if [ -f $srcdir/norm_vars ]; then
+  norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+  [ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
+fi
+# Optionally add deltas
+if [ -f $srcdir/delta_order ]; then
+  delta_order=$(cat $srcdir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+fi
+
+# Finally add feature_transform and the MLP
+feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
+###
+###
+###
+ 
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+# We could just use gmm-align-mapped in the next line, but it's less efficient as it compiles the
+# training graphs one by one.
+$cmd JOB=1:$nj $dir/log/align.JOB.log \
+  compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
+  align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \
+    "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+
+echo "$0: done aligning data."
diff --git a/egs/chime_wsj0/s5/steps/align_raw_fmllr.sh b/egs/chime_wsj0/s5/steps/align_raw_fmllr.sh
new file mode 100755
index 000000000..4e33a8a14
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_raw_fmllr.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
+# + fMLLR (probably with SAT models).
+# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
+# is not present), then does 2 iterations of fMLLR estimation.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match the source directory.
+
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # factor by which to boost silence during alignment.
+norm_vars=false
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+if [[ ! -f $srcdir/final.mat || ! -f $srcdir/full.mat ]]; then
+  echo "$0: we require final.mat and full.mat in the source directory $srcdir"
+fi
+
+full_lda_mat="get-full-lda-mat --print-args=false $srcdir/final.mat $srcdir/full.mat -|"
+cp $srcdir/full.mat $srcdir/final.mat $dir 
+
+splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+sifeats="$splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-raw-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+       $mdl "$full_lda_mat" "$splicedfeats" ark,s,cs:- ark:$dir/raw_trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$full_lda_mat" \
+       "$splicedfeats" ark,s,cs:- ark:$dir/raw_trans.JOB || exit 1;
+  fi
+fi
+
+feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/align_sgmm.sh b/egs/chime_wsj0/s5/steps/align_sgmm.sh
new file mode 100755
index 000000000..833afa539
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_sgmm.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/align_sgmm2.sh b/egs/chime_wsj0/s5/steps/align_sgmm2.sh
new file mode 100755
index 000000000..38ff02ddc
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_sgmm2.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm2-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm2-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm2-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/align_si.sh b/egs/chime_wsj0/s5/steps/align_si.sh
new file mode 100755
index 000000000..d525550f1
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/align_si.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments using a model with delta or
+# LDA+MLLT features.
+
+# If you supply the "--use-graphs true" option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match with the source directory.
+
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence during alignment.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_si.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_si.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/final.mdl - |"
+
+if $use_graphs; then 
+  [ $nj != "`cat $srcdir/num_jobs`" ] && echo "$0: mismatch in num-jobs" && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "$0: no such file $srcdir/fsts.1.gz" && exit 1;
+
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $srcdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+else
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+  # We could just use gmm-align in the next line, but it's less efficient as it compiles the
+  # training graphs one by one.
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" ark:- \
+      "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+echo "$0: done aligning data."
diff --git a/egs/chime_wsj0/s5/steps/append_feats.sh b/egs/chime_wsj0/s5/steps/append_feats.sh
new file mode 100755
index 000000000..9ae6e8279
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/append_feats.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# This script appends the features in two data directories.
+
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+cmd=run.pl
+nj=4
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: append_feats.sh [options] <src-data-dir1> <src-data-dir2> <dest-data-dir> <log-dir> <path-to-storage-dir>";
+   echo "options: "
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data_src1=$1
+data_src2=$2
+data=$3
+logdir=$4
+mfccdir=$5
+
+# make $mfccdir an absolute pathname.
+mfccdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfccdir ${PWD}`
+
+utils/split_data.sh $data_src1 $nj || exit 1;
+utils/split_data.sh $data_src2 $nj || exit 1;
+
+mkdir -p $mfccdir $logdir
+
+mkdir -p $data 
+cp $data_src1/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
+rm $data/cmvn.scp 2>/dev/null 
+rm -r $data/split* 2>/dev/null
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+$cmd JOB=1:$nj $logdir/append.JOB.log \
+   append-feats --truncate-frames=true \
+   scp:$data_src1/split$nj/JOB/feats.scp scp:$data_src2/split$nj/JOB/feats.scp \
+   ark,scp:$mfccdir/appended_$name.JOB.ark,$mfccdir/appended_$name.JOB.scp || exit 1;
+              
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $mfccdir/appended_$name.$n.scp >> $data/feats.scp || exit 1;
+done > $data/feats.scp || exit 1;
+
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating MFCC features for $name"
diff --git a/egs/chime_wsj0/s5/steps/compute_cmvn_stats.sh b/egs/chime_wsj0/s5/steps/compute_cmvn_stats.sh
new file mode 100755
index 000000000..17eb62e83
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/compute_cmvn_stats.sh
@@ -0,0 +1,80 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Compute cepstral mean and variance statistics per speaker.  
+# We do this in just one job; it's fast.
+# This script takes no options.
+#
+# Note: there is no option to do CMVN per utterance.  The idea is
+# that if you did it per utterance it would not make sense to do
+# per-speaker fMLLR on top of that (since you'd be doing fMLLR on
+# top of different offsets).  Therefore what would be the use
+# of the speaker information?  In this case you should probably
+# make the speaker-ids identical to the utterance-ids.  The
+# speaker information does not have to correspond to actual
+# speakers, it's just the level you want to adapt at.
+
+echo "$0 $@"  # Print the command line for logging
+
+fake=false
+if [ $1 == "--fake" ]; then
+  fake=true
+  shift
+fi
+
+if [ $# != 3 ]; then
+   echo "usage: compute_cmvn_stats.sh [--fake] <data-dir> <log-dir> <path-to-cmvn-dir>";
+   echo "(note: --fake gives you fake cmvn stats that do no normalization.)"
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+data=$1
+logdir=$2
+cmvndir=$3
+
+# make $cmvndir an absolute pathname.
+cmvndir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $cmvndir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $cmvndir || exit 1;
+mkdir -p $logdir || exit 1;
+
+
+required="$data/feats.scp $data/spk2utt"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_cmvn.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+if $fake; then
+  dim=`feat-to-dim scp:$data/feats.scp -`
+  ! cat $data/spk2utt | awk -v dim=$dim '{print $1, "["; for (n=0; n < dim; n++) { printf("0 "); } print "1";
+                                                        for (n=0; n < dim; n++) { printf("1 "); } print "0 ]";}' | \
+    copy-matrix ark:- ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp && \
+     echo "Error creating fake CMVN stats" && exit 1;
+else  
+  ! compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp \
+    2> $logdir/cmvn_$name.log && echo "Error computing CMVN stats" && exit 1;
+fi
+
+cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
+
+nc=`cat $data/cmvn.scp | wc -l` 
+nu=`cat $data/spk2utt | wc -l` 
+if [ $nc -ne $nu ]; then
+  echo "Error: it seems not all of the speakers got cmvn stats ($nc != $nu);"
+  exit 1;
+fi
+
+echo "Succeeded creating CMVN stats for $name"
diff --git a/egs/chime_wsj0/s5/steps/decode.sh b/egs/chime_wsj0/s5/steps/decode.sh
new file mode 100755
index 000000000..f41ba6349
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+stage=0
+nj=4
+cmd=run.pl
+max_active=7000
+max_arcs=-1
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+# note: there are no more min-lmwt and max-lmwt options, instead use
+# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
+skip_scoring=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "  --num-threads <n>                                # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+if [ $stage -le 0 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster$thread_string --max-arcs=$max_arcs --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh --cmd "$cmd" $scoring_opts $data $graphdir $dir
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_basis_fmllr.sh b/egs/chime_wsj0/s5/steps/decode_basis_fmllr.sh
new file mode 100755
index 000000000..b0521aa59
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_basis_fmllr.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+
+# Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
+#                  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that does basis fMLLR.  This can be on top of delta+delta-delta,
+# or LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+
+# Parameters in alignment of training data
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+align_beam=10
+retry_beam=40
+
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_basis_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_basis_fmllr.sh exp/tri2b/graph_tgpr data/train_si84 data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree $srcdir/fmllr.basis; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+## Set up the unadapted features "$sifeats" for testing set
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+##
+
+## Now get the first-pass fMLLR transforms.
+## We give all the default parameters in gmm-est-basis-fmllr
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-basis-fmllr-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+    --fmllr-min-count=200  --num-iters=10 --size-scale=0.2 \
+    --step-size-iters=3 --write-weights=ark:$dir/pre_wgt.JOB \
+     $adapt_model $srcdir/fmllr.basis "$sifeats" ark,s,cs:- \
+    ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt  \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-basis-fmllr --fmllr-min-count=200 \
+    --spk2utt=ark:$sdata/JOB/spk2utt --write-weights=ark:$dir/trans_tmp_wgt.JOB \
+    $adapt_model $srcdir/fmllr.basis "$pass1feats" ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_biglm.sh b/egs/chime_wsj0/s5/steps/decode_biglm.sh
new file mode 100755
index 000000000..ec2d0667c
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_biglm.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration.
+nj=4
+cmd=run.pl
+maxactive=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/decode_si_biglm.sh [options] <graph-dir> <old-LM-fst> <new-LM-fst> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode_si.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+
+graphdir=$1
+oldlm_fst=$2
+newlm_fst=$3
+data=$4
+dir=$5
+
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $graphdir/HCLG.fst $oldlm_fst $newlm_fst; do
+  [ ! -f $f ] && echo "decode_si.sh: no such file $f" && exit 1;
+done
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f `dirname $oldlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
+  echo "Warning: old LM words.txt does not match with that in $graphdir .. probably will not work.";
+[ -f `dirname $newlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
+  echo "Warning: new LM words.txt does not match with that in $graphdir .. probably will not work.";
+
+# fstproject replaces the disambiguation symbol #0, which only appears on the
+# input side, with the <eps> that appears in the corresponding arcs on the output side.
+oldlm_cmd="fstproject --project_output=true $oldlm_fst | fstarcsort --sort_type=ilabel |"
+newlm_cmd="fstproject --project_output=true $newlm_fst | fstarcsort --sort_type=ilabel |"
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-biglm-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $srcdir/final.mdl $graphdir/HCLG.fst "$oldlm_cmd" "$newlm_cmd" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_combine.sh b/egs/chime_wsj0/s5/steps/decode_combine.sh
new file mode 100755
index 000000000..b8ac5ede1
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_combine.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Combine two decoding directories by composing the lattices (we
+# apply a weight to each of the original weights, by default 0.5 each).
+
+# Begin configuration section.
+weight1=0.5 # Weight on 1st set of lattices.
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/decode_combine.sh [options] <data> <lang-dir|graph-dir> <decode-dir1> <decode-dir2> <decode-dir-out>"
+  echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --weight1 <weight>                       # Weight on 1st set of lattices (default 0.5)"
+  exit 1;
+fi
+
+data=$1
+lang_or_graphdir=$2
+srcdir1=$3
+srcdir2=$4
+dir=$5
+
+for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj1=`cat $srcdir1/num_jobs` || exit 1;
+nj2=`cat $srcdir2/num_jobs` || exit 1;
+[ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
+nj=$nj1
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+# The lattice-interp command does the score interpolation (with composition),
+# and the lattice-copy-backoff replaces the result with the 1st lattice, in 
+# cases where the composed result was empty.
+$cmd JOB=1:$nj $dir/log/interp.JOB.log \
+  lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
+   "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
+  lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
+   "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_fmllr.sh b/egs/chime_wsj0/s5/steps/decode_fmllr.sh
new file mode 100755
index 000000000..4d171a2a4
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_fmllr.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+first_max_arcs=-1
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+max_arcs=-1
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+skip_scoring=false
+scoring_opts=
+norm_vars=false
+# End configuration section
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+   echo "  --num-threads <n>                        # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+   echo "  --scoring-opts <opts>                    # options to local/score.sh"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+
+mkdir -p $dir/log
+split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
+              --num-threads $num_threads --skip-scoring $skip_scoring \
+              --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
+              --model $alignment_model --max-arcs $max_arcs --max-active \
+              $first_max_active $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+## Set up the unadapted features "$sifeats"
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+##
+
+## Now get the first-pass fMLLR transforms.
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
+    ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --max-arcs=$max_arcs \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass1feats" \
+    ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+fi
+
+rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/decode_fmllr_extra.sh b/egs/chime_wsj0/s5/steps/decode_fmllr_extra.sh
new file mode 100755
index 000000000..51cc06057
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_fmllr_extra.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+# This script does an extra pass of lattice generation over and above what the original
+# script did-- it's for robustness in the case where your original cepstral mean
+# normalization was way off.
+# We also added a new option --distribute=true (by default) to 
+# weight-silence-post.  This weights the silence frames in a different way,
+# weighting all posteriors on the frame rather than just the silence ones, which
+# removes a particular kind of bias that the old approach suffered from.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in first two passes.
+first_latbeam=4.0 # lattice pruning beam for si decode and first-pass fMLLR decode.
+                # the different spelling from lattice_beam is unfortunate; these scripts
+                # have a history.
+alignment_model=
+adapt_model=
+final_model=
+cleanup=true
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+distribute=true # option to weight-silence-post.
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+skip_scoring=false
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+   echo "  --num-threads <n>                        # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+   echo "  --scoring-opts <opts>                    # options to local/score.sh"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model\
+      --max-active $first_max_active --parallel-opts "${parallel_opts}" --num-threads $num_threads\
+      --skip-scoring true $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+## Set up the unadapted features "$sifeats"
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+##
+
+## Now get the first-pass fMLLR transforms.
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post --distribute=$distribute $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
+    ark:$dir/trans1.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans1.JOB ark:- ark:- |"
+
+## Do the first adapted lattice generation pass. 
+if [ $stage -le 2 ]; then
+  echo "$0: doing first adapted lattice generation phase"
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode1.JOB.log\
+    gmm-latgen-faster$thread_string --max-active=$first_max_active --beam=$first_beam --lattice-beam=$first_latbeam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat1.JOB.gz" \
+    || exit 1;
+fi
+
+
+## Do a second pass of estimating the transform.  Compose the transforms to get
+## $dir/trans2.*.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat1.JOB.gz|" ark:- \| \
+    weight-silence-post --distribute=$distribute $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass1feats" \
+    ark,s,cs:- ark:$dir/trans1b.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans1b.JOB ark:$dir/trans1.JOB \
+    ark:$dir/trans2.JOB  || exit 1;
+  if $cleanup; then
+    rm $dir/trans1b.* $dir/trans1.* $dir/lat1.*.gz
+  fi
+fi
+##
+
+pass2feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans2.JOB ark:- ark:- |"
+
+# Generate a 3rd set of lattices, with the "adaptation model"; we'll use these
+# to adapt a 3rd time, and we'll rescore them.  Since we should be close to the final
+# fMLLR, we don't bother dumping un-determinized lattices to disk.
+
+## Do the final lattice generation pass (but we'll rescore these lattices
+## after another stage of adaptation.)
+if [ $stage -le 4 ]; then
+  echo "$0: doing final lattice generation phase"
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode2.JOB.log\
+    gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass2feats" "ark:|gzip -c > $dir/lat2.JOB.gz" \
+    || exit 1;
+fi
+
+
+## Do a third pass of estimating the transform.  Compose the transforms to get
+## $dir/trans.*.
+if [ $stage -le 5 ]; then
+  echo "$0: estimating fMLLR transforms a third time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass3.JOB.log \
+    lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat2.JOB.gz|" ark:- \| \
+    weight-silence-post --distribute=$distribute $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass2feats" \
+    ark,s,cs:- ark:$dir/trans2b.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans2b.JOB ark:$dir/trans2.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+  if $cleanup; then
+    rm $dir/trans2b.* $dir/trans2.*
+  fi
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 6 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat2.JOB.gz|" "$feats" \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  if $cleanup; then
+    rm $dir/lat2.*.gz
+  fi
+fi
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_fmmi.sh b/egs/chime_wsj0/s5/steps/decode_fmmi.sh
new file mode 100755
index 000000000..1e7ab532f
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_fmmi.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# Decoding of fMMI or fMPE models (feature-space discriminative training).
+# If transform-dir supplied, expects e.g. fMLLR transforms in that dir.
+
+# Begin configuration section.  
+stage=1
+iter=final
+nj=4
+cmd=run.pl
+maxactive=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE.  Should match train.
+transform_dir=
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_fmmi.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode_fmmi.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo "You can also use fMLLR features-- you have to supply --transform-dir option."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --transform-dir <transform-dir>                  # where to find fMLLR transforms."
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "                                                   # speaker-adapted decoding"
+   echo "  --num-threads <n>                                # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+model=$srcdir/$iter.mdl
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode_fmmi.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_fmmi.sh: feature type is $feat_type";
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+fmpefeats="$feats fmpe-apply-transform $srcdir/$iter.fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+
+if [ $stage -le 1 ]; then
+  # Get Gaussian selection info.
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$ngselect $srcdir/$iter.fmpe "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+  
+if [ $stage -le 2 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster$thread_string --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $model $graphdir/HCLG.fst "$fmpefeats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+fi
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_fromlats.sh b/egs/chime_wsj0/s5/steps/decode_fromlats.sh
new file mode 100755
index 000000000..5b8f41a86
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_fromlats.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Decode, limited to the word-sequences that were present in a set
+# of lattices on disk.  The other lattices do not have to be built
+# with the same tree or the same context size-- however, you do
+# have to be using the same vocabulary (words.txt)-- if not you'd
+# have to map the vocabulary somehow.
+
+# Note: if the trees are identical, you can use gmm-rescore-lattice.
+
+# Mechanism: create an unweighted acceptor (on words) for each utterance,
+# compose that with G, determinize, and then use compile-train-graphs-fsts
+# to compile a graph for each utterance, to decode with.  
+
+# Begin configuration.
+cmd=run.pl
+maxactive=7000
+beam=20.0
+latbeam=7.0
+acwt=0.083333
+batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
+scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/decode_si_fromlats.sh [options] <data-dir> <lang> <old-decode-dir> <decode-dir>"
+   echo "e.g.: steps/decode_si_fromlats.sh data/test_dev93 data/lang_test_tg exp/tri2b/decode_tgpr_dev93 exp/tri2a/decode_tgpr_dev93_fromlats"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+
+data=$1
+lang=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+mkdir -p $dir/log
+
+nj=`cat $olddir/num_jobs` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+sdata=$data/split$nj
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj >$dir/num_jobs
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $olddir/lat.1.gz \
+    $srcdir/tree $lang/L_disambig.fst $lang/phones.txt; do
+  [ ! -f $f ] && echo "decode_si_fromlats.sh: no such file $f" && exit 1;
+done
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+
+$cmd JOB=1:$nj $dir/log/decode_lats.JOB.log \
+ lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
+  fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
+  fstdeterminizestar ark:- ark:- \| \
+  compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
+    --batch-size=$batch_size $scale_opts $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \|  \
+  gmm-latgen-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt \
+    --allow-partial=true --word-symbol-table=$lang/words.txt \
+    $srcdir/final.mdl ark:- "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $lang $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_fwdbwd.sh b/egs/chime_wsj0/s5/steps/decode_fwdbwd.sh
new file mode 100755
index 000000000..b12f0270a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_fwdbwd.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey), BUT (Author: Mirko Hannemann)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+first_pass=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+reverse=false
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+extra_beam=0.0 # small additional beam over varying beam
+max_beam=100.0 # maximum of varying beam
+scoring_opts=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_fwdbwd.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --first_pass <decode-dir>                        # decoding dir of first pass"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform_dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "                                                   # speaker-adapted decoding"
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "  --reverse [true/false]                           # time reversal of features"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst $graphdir/words.txt; do
+  [ ! -f $f ] && echo "decode_fwdbwd.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_fwdbwd.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+if $reverse; then
+  feats="$feats reverse-feats ark:- ark:- |"
+fi
+
+if [ -f $first_pass/lat.1.gz ]; then
+  echo "converting first pass lattice to graph arc acceptor"
+  $cmd JOB=1:$nj $dir/log/arc_graph.JOB.log \
+    time lattice-arcgraph $model $graphdir/HCLG.fst \
+    "ark:gunzip -c $first_pass/lat.JOB.gz|" ark,t:$dir/lat.JOB.arcs || exit 1;
+    #  --write-lattices=ark,t:$dir/lat.det
+    #  --acoustic-scale=$acwt --lattice-beam=$latbeam --prune=false \
+
+  echo "decode with tracking first pass lattice"
+  $cmd JOB=1:$nj $dir/log/decode_fwdbwd.JOB.log \
+    gmm-latgen-tracking --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+      --acoustic-scale=$acwt --allow-partial=true \
+      --extra-beam=$extra_beam --max-beam=$max_beam \
+      --word-symbol-table=$graphdir/words.txt  --verbose=2 \
+      $model $graphdir/HCLG.fst "$feats" ark:$dir/lat.JOB.arcs "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+else
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+   gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+     --acoustic-scale=$acwt --allow-partial=true \
+     --word-symbol-table=$graphdir/words.txt \
+     $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh $scoring_opts --cmd "$cmd" --reverse $reverse $scoring_opts $data $graphdir $dir
+
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_nnet.sh b/egs/chime_wsj0/s5/steps/decode_nnet.sh
new file mode 100755
index 000000000..e8f0d2865
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_nnet.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+# Copyright 2012-2013 Karel Vesely, Daniel Povey
+# Apache 2.0
+
+# Begin configuration section.  
+nnet= # Optionally pre-select network to use for getting state-likelihoods
+feature_transform= # Optionally pre-select feature transform (in front of nnet)
+model= # Optionally pre-select transition model
+class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors 
+
+stage=0 # stage=1 skips lattice generation
+nj=4
+cmd=run.pl
+max_active=7000 # maximum of active tokens
+max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
+beam=13.0 # GMM:13.0
+latbeam=8.0 # GMM:6.0
+acwt=0.10 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
+scoring_opts="--min-lmwt 4 --max-lmwt 15"
+skip_scoring=false
+use_gpu_id=-1 # disable gpu
+parallel_opts="-pe smp 2" # use 2 CPUs (1 DNN-forward, 1 decoder)
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the DNN + transition model is."
+   echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
+   echo ""
+   echo "This script works on plain or modified features (CMN,delta+delta-delta),"
+   echo "which are then sent through feature-transform. It works out what type"
+   echo "of features you used from content of srcdir."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo ""
+   echo "  --nnet <nnet>                                    # which nnet to use (opt.)"
+   echo "  --feature-transform <nnet>                       # select transform in front of nnet (opt.)"
+   echo "  --class-frame-counts <file>                      # file with frame counts (used to compute priors) (opt.)"
+   echo "  --model <model>                                  # which transition model to use (opt.)"
+   echo ""
+   echo "  --acwt <float>                                   # select acoustic scale for decoding"
+   echo "  --scoring-opts <opts>                            # options forwarded to local/score.sh"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
+  nnet=$srcdir/final.nnet; 
+fi
+[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  model=$srcdir/final.mdl;
+fi
+
+# find the feature_transform to use
+if [ -z "$feature_transform" ]; then
+  feature_transform=$srcdir/final.feature_transform
+fi
+if [ ! -f $feature_transform ]; then
+  echo "Missing feature_transform '$feature_transform'"
+  exit 1
+fi
+
+# check that files exist
+for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+# PREPARE THE LOG-POSTERIOR COMPUTATION PIPELINE
+if [ -z "$class_frame_counts" ]; then
+  class_frame_counts=$srcdir/ali_train_pdf.counts
+else
+  echo "Overriding class_frame_counts by $class_frame_counts"
+fi
+
+# Create the feature stream:
+feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
+# Optionally add cmvn
+if [ -f $srcdir/norm_vars ]; then
+  norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+  [ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
+fi
+# Optionally add deltas
+if [ -f $srcdir/delta_order ]; then
+  delta_order=$(cat $srcdir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+fi
+
+
+# Run the decoding in the queue
+if [ $stage -le 0 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet "$feats" ark:- \| \
+    latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+# Run the scoring
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_nnet_cpu.sh b/egs/chime_wsj0/s5/steps/decode_nnet_cpu.sh
new file mode 100755
index 000000000..8d2851608
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_nnet_cpu.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+
+# This script does decoding with a neural-net.  If the neural net was built on
+# top of fMLLR transforms from a conventional system, you should provide the
+# --transform-dir option.
+
+# Begin configuration section.
+stage=1
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.  If --transform-dir set, must match that number!
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=15.0
+max_active=7000
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=8.0 # Beam we use in lattice generation.
+iter=final
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+skip_scoring=false
+feat_type=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: steps/decode_nnet_cpu.sh [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: steps/decode_nnet_cpu.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/tri3b/graph_tgpr data/test_dev93 exp/tri4a_nnet/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 15.0"
+  echo "  --iter <iter>                            # Iteration of model to decode; default is final."
+  echo "  --scoring-opts <string>                  # options to local/score.sh"
+  echo "  --num-threads <n>                        # number of threads to use, default 1."
+  echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+model=$srcdir/$iter.mdl
+
+for f in $graphdir/HCLG.fst $data/feats.scp $model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+if [ -z "$feat_type" ]; then
+  if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+  echo "$0: feature type is $feat_type"
+fi
+
+case $feat_type in
+  raw) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";;
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  if [ "$feat_type" == "raw" ]; then
+    [ ! -f $transform_dir/raw_trans.1 ] && echo "$0: no such file $transform_dir/raw_trans.1" && exit 1;
+    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- |"
+  else
+    [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+  fi
+elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then
+  echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+
+if [ $stage -le 1 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt "$model" \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 2 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    echo "score best paths"
+    local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+    echo "score confidence and timing with sclite"
+  fi
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_raw_fmllr.sh b/egs/chime_wsj0/s5/steps/decode_raw_fmllr.sh
new file mode 100755
index 000000000..7cd929ed1
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_raw_fmllr.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
+
+# This decoding script is like decode_fmllr.sh, but it does the fMLLR on
+# the raw cepstra, using the model in the LDA+MLLT space
+# 
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+first_max_arcs=-1
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+use_normal_fmllr=false
+max_arcs=-1
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+skip_scoring=false
+scoring_opts=
+norm_vars=false
+# End configuration section
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 3)"
+   echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+   echo "  --num-threads <n>                        # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+   echo "  --scoring-opts <opts>                    # options to local/score.sh"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+
+mkdir -p $dir/log
+split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
+              --num-threads $num_threads --skip-scoring $skip_scoring \
+              --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
+              --model $alignment_model --max-arcs $max_arcs --max-active \
+              $first_max_active $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+if [[ ! -f $srcdir/final.mat || ! -f $srcdir/full.mat ]]; then
+  echo "$0: we require final.mat and full.mat in the source directory $srcdir"
+fi
+
+splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+sifeats="$splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
+
+full_lda_mat="get-full-lda-mat --print-args=false $srcdir/final.mat $srcdir/full.mat -|"
+
+##
+
+## Now get the first-pass fMLLR transforms.
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass raw-fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-fmllr-raw-gpost --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$full_lda_mat" \
+      "$splicedfeats" ark,s,cs:- ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- |"
+pass1feats="$pass1splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --max-arcs=$max_arcs \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating raw-fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt \
+     $adapt_model "$full_lda_mat" "$pass1splicedfeats" ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/raw_trans.JOB  || exit 1;
+fi
+##
+
+feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+
+if [ $stage -le 4 ] && $use_normal_fmllr; then
+  echo "$0: estimating normal fMLLR transforms"
+  $cmd JOB=1:$nj $dir/log/fmllr_pass3.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt \
+     $adapt_model "$feats" ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+fi
+
+if $use_normal_fmllr; then
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+fi
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+fi
+
+#rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm.sh b/egs/chime_wsj0/s5/steps/decode_sgmm.sh
new file mode 100755
index 000000000..ddb6a67e9
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+alignment_model=
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=15.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=8.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: steps/decode_sgmm.sh [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --alignment-model <ali-mdl>              # Model for the first-pass decoding."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+## Calculate FMLLR pre-transforms if needed. We are doing this here since this
+## step is requried by models both with and without speaker vectors
+if $use_fmllr; then
+  if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+    echo "$0: computing pre-transform for fMLLR computation."
+    sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+  fi
+fi
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+
+# Generate state-level lattice which we can rescore.  This is done with the 
+# alignment model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+## Check if the model has speaker vectors
+spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
+
+if [ $spkdim -gt 0 ]; then  ### For models with speaker vectors:
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm-est-spkvecs.
+  if [ $stage -le 3 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
+      sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
+      sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+  fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+  fi
+  rm $dir/pre_vecs.*
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  fi
+  rm $dir/pre_lat.*.gz
+
+else  ### For models without speaker vectors:
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ] && $use_fmllr; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+    rm $dir/pre_lat.*.gz
+  else  # Already done with decoding if no adaptation needed.
+    for n in `seq 1 $nj`; do
+      mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
+    done
+  fi
+
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  echo "score best paths"
+  local/score.sh --cmd "$cmd" $data $graphdir $dir
+  echo "score confidence and timing with sclite"
+  #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm2.sh b/egs/chime_wsj0/s5/steps/decode_sgmm2.sh
new file mode 100755
index 000000000..490b582d2
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm2.sh
@@ -0,0 +1,211 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=13.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+max_arcs=-1
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=6.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+skip_scoring=false
+scoring_opts=
+# note: there are no more min-lmwt and max-lmwt options, instead use
+# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: steps/decode_sgmm2.sh [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm2.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  if [ -f $transform_dir/trans.1 ]; then
+    echo "$0: using transforms from $transform_dir"
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+  elif [ -f $transform_dir/raw_trans.1 ]; then
+    feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"    
+  else
+    echo "$0: no such file $transform_dir/trans.1 or $transform_dir/raw_trans.1, invalid --transform-dir option?"
+    exit 1;
+  fi
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+# Generate state-level lattice which we can rescore.  This is done with the alignment
+# model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm2-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --max-arcs=$max_arcs --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $srcdir/final.alimdl \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm2-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm2-est-spkvecs.
+if [ $stage -le 3 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+if [ $stage -le 4 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+fi
+rm $dir/pre_vecs.*
+
+if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+  if [ $stage -le 5 ]; then # compute fMLLR transforms.
+    echo "$0: computing fMLLR transforms."
+    if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+      echo "$0: computing pre-transform for fMLLR computation."
+      sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+       --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+      $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+  fi
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+if [ $stage -le 6 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \
+    sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+    $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+rm $dir/pre_lat.*.gz
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at different
+# acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+  fi
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm2_fromlats.sh b/egs/chime_wsj0/s5/steps/decode_sgmm2_fromlats.sh
new file mode 100755
index 000000000..8db01d4a0
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm2_fromlats.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM2 system, with speaker vectors.  If the
+# SGMM2 system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# This script does not use a decoding graph, but instead you provide
+# a previous decoding directory with lattices in it.  This script will only
+# make use of the word sequences in the lattices; it limits the decoding
+# to those sequences.  You should also provide a "lang" directory from 
+# which this script will use the G.fst and L.fst.
+
+# Begin configuration section.
+stage=1
+alignment_model=
+transform_dir=    # dir to find fMLLR transforms.
+acwt=0.08333  # Just a default value, used for adaptation and beam-pruning..
+batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
+cmd=run.pl
+beam=20.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+lattice_beam=8.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_fromlats.sh [options] <data-dir> <lang-dir> <old-decode-dir> <decode-dir>"
+  echo ""
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --alignment-model <ali-mdl>              # Model for the first-pass decoding."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+data=$1
+lang=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`
+
+for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \
+    $srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then
+  transform_dir=$olddir
+fi
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+## Calculate FMLLR pre-transforms if needed. We are doing this here since this
+## step is requried by models both with and without speaker vectors
+if $use_fmllr; then
+  if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+    echo "$0: computing pre-transform for fMLLR computation."
+    sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+  fi
+fi
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+
+# Generate state-level lattice which we can rescore.  This is done with the 
+# alignment model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+ lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
+  fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
+  fstdeterminizestar ark:- ark:- \| \
+  compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
+    --batch-size=$batch_size $scale_opts \
+    $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
+  sgmm2-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \
+    "ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+## Check if the model has speaker vectors
+spkdim=`sgmm2-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
+
+if [ $spkdim -gt 0 ]; then  ### For models with speaker vectors:
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm2-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm2-est-spkvecs.
+  if [ $stage -le 3 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
+      sgmm2-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
+      sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+  fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+  fi
+  rm $dir/pre_vecs.*
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  fi
+  rm $dir/pre_lat.*.gz
+
+else  ### For models without speaker vectors:
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm2-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ] && $use_fmllr; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+    rm $dir/pre_lat.*.gz
+  else  # Already done with decoding if no adaptation needed.
+    for n in `seq 1 $nj`; do
+      mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
+    done
+  fi
+
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  echo "score best paths"
+  local/score.sh --cmd "$cmd" $data $lang $dir
+  echo "score confidence and timing with sclite"
+  #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore.sh b/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore.sh
new file mode 100755
index 000000000..4a752fd06
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  The directory with the lattices
+# is assumed to contain speaker vectors, if used.  Basically it rescores
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+skip_scoring=false
+scoring_opts=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --iter <iter>                            # iteration of model to use (default: final)"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
+   $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+gselect_opt="--gselect=ark,s,cs:gunzip -c $olddir/gselect.JOB.gz|"
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+if [ -f $olddir/trans.1 ]; then
+  echo "$0: using (in addition to any previous transforms) transforms from $olddir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
+fi
+##
+
+# Rescore the state-level lattices with the model provided.  Just
+# one command in this script.
+echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
+$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+  sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt \
+  $srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+if  ! $skip_scoring  ; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore_project.sh b/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore_project.sh
new file mode 100755
index 000000000..eb8347f75
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm2_rescore_project.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  This version does the "predictive"
+# SGMM, where we subtract some constant times the log-prob of the left
+# few spliced frames, and the same for the right few.
+# The directory with the lattices
+# is assumed to contain any speaker vectors, if used.  This script just
+# adds into the acoustic scores, (some constant, default -0.25) times
+# the acoustic score of the left model, and the same for the right model.
+
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+stage=0
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+prob_scale=-0.25
+dimensions=0:13:104:117
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/decode_sgmm_rescore_project.sh [options] <full-lda-mat> <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore_project.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "     exp/tri2b/full.mat exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a/decode_dev93_tgpr_predict"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --prob-scale <scale>                     # Default -0.25, scale on left and right models."
+  exit 1;
+fi
+
+full_lda_mat=$1
+graphdir=$2
+data=$3
+olddir=$4
+dir=$5
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $full_lda_mat $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz \
+   $olddir/gselect.1.gz $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+if [ $stage -le 0 ]; then
+  # Get full LDA+MLLT mat and its inverse.  Note: the full LDA+MLLT mat is
+  # the LDA+MLLT mat, plus the "rejected" rows of the LDA matrix.
+  $cmd $dir/log/get_full_lda.log \
+    get-full-lda-mat $srcdir/final.mat $full_lda_mat $dir/full.mat $dir/full_inv.mat || exit 1;
+fi
+
+if [ $stage -le 1 ]; then
+  left_start=`echo $dimensions | cut '-d:' -f 1`;
+  left_end=`echo $dimensions | cut '-d:' -f 2`;
+  right_start=`echo $dimensions | cut '-d:' -f 3`;
+  right_end=`echo $dimensions | cut '-d:' -f 4`;
+
+  # Prepare left and right models.  For now, the dimensions are hardwired (e.g., 13 MFCCs and splice 9 frames).
+  # Note: the choice of dividing by the prob of the left 4 and the right 4 frames is a bit arbitrary and
+  # we could investigate different configurations.
+  $cmd $dir/log/left.log \
+    sgmm2-project --start-dim=$left_start --end-dim=$left_end $srcdir/final.mdl $dir/full.mat $dir/left.mdl $dir/left.mat || exit 1;
+  $cmd $dir/log/right.log \
+    sgmm2-project --start-dim=$right_start --end-dim=$right_end $srcdir/final.mdl $dir/full.mat $dir/right.mdl $dir/right.mat || exit 1;
+fi
+
+
+# we apply the scaling on the new acoustic probs by adding the inverse
+# of that to the old acoustic probs, and then later inverting again.
+# this has to do with limitations in sgmm2-rescore-lattice: we can only
+# scale the *old* acoustic probs, not the new ones.
+inverse_prob_scale=`perl -e "print (1.0 / $prob_scale);"`
+cur_lats="ark:gunzip -c $olddir/lat.JOB.gz | lattice-scale --acoustic-scale=$inverse_prob_scale ark:- ark:- |"
+
+## Set up features.  Note: we only support LDA+MLLT features, this
+## is inherent in the method, we could not support deltas.
+
+for model_type in left right; do
+
+  feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" # spliced features.
+  if [ ! -z "$transform_dir" ]; then  # using speaker-specific transforms.
+     # we want to transform in the sequence: $dir/full.mat, then the result of
+     # (extend-transform-dim $transform_dir/trans.JOB), then $dir/full_inv.mat to
+     # get back to the spliced space, then the left.mat or right.mat.  But
+     # note that compose-transforms operates in matrix-multiplication order,
+     # which is opposite from the "order of applying the transforms" order.
+     new_dim=$[`copy-matrix --binary=false $dir/full.mat - | wc -l` - 1]; # 117 in normal case.
+     feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk 'ark:extend-transform-dim --new-dimension=$new_dim ark:$transform_dir/trans.JOB ark:- | compose-transforms ark:- $dir/full.mat ark:- | compose-transforms $dir/full_inv.mat ark:- ark:- | compose-transforms $dir/${model_type}.mat ark:- ark:- |' ark:- ark:- |"
+  else  # else, we transform with the "left" or "right" matrix; these transform from the
+        # spliced space.
+     feats="$feats transform-feats $dir/${model_type}.mat |"
+     # If we don't have the --transform-dir option, make sure the model was
+     # trained in the same way.
+     if grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+       echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+       echo "  but you are not providing the --transform-dir option in test time."
+     fi
+  fi
+  if [ -f $olddir/trans.1 ]; then
+     echo "$0: warning: not using transforms in $olddir (this is just a "
+     echo " limitation of the script right now, and could be fixed)."
+  fi
+  
+  if [ $stage -le 2 ]; then
+    echo "Getting gselect info for $model_type model."
+    $cmd JOB=1:$nj $dir/log/gselect.$model_type.JOB.log \
+       sgmm2-gselect $dir/$model_type.mdl "$feats" \
+       "ark,t:|gzip -c >$dir/gselect.$model_type.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.$model_type.JOB.gz|"
+
+
+  # Rescore the state-level lattices with the model provided.  Just
+  # one command in this script.
+  # The --old-acoustic-scale=1.0 option means we just add the scores
+  # to the old scores.
+  if [ $stage -le 3 ]; then
+    echo "$0: rescoring lattices with $model_type model"
+    $cmd JOB=1:$nj $dir/log/rescore.${model_type}.JOB.log \
+      sgmm2-rescore-lattice --old-acoustic-scale=1.0 "$gselect_opt" $spkvecs_opt \
+      $dir/$model_type.mdl "$cur_lats" "$feats" \
+      "ark:|gzip -c > $dir/lat.${model_type}.JOB.gz" || exit 1;
+  fi
+  cur_lats="ark:gunzip -c $dir/lat.${model_type}.JOB.gz |"
+done
+
+if [ $stage -le 4 ]; then
+  echo "$0: getting final lattices."
+  $cmd JOB=1:$nj $dir/log/scale_lats.JOB.log \
+    lattice-scale --acoustic-scale=$prob_scale "$cur_lats" "ark:|gzip -c >$dir/lat.JOB.gz" \
+   || exit 1;
+fi
+
+rm $dir/lat.{left,right}.*.gz 2>/dev/null  # note: if these still exist, it will
+ # confuse the scoring script.
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm_fromlats.sh b/egs/chime_wsj0/s5/steps/decode_sgmm_fromlats.sh
new file mode 100755
index 000000000..a926ed618
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm_fromlats.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+# This script does not use a decoding graph, but instead you provide
+# a previous decoding directory with lattices in it.  This script will only
+# make use of the word sequences in the lattices; it limits the decoding
+# to those sequences.  You should also provide a "lang" directory from 
+# which this script will use the G.fst and L.fst.
+
+# Begin configuration section.
+stage=1
+alignment_model=
+transform_dir=    # dir to find fMLLR transforms.
+acwt=0.08333  # Just a default value, used for adaptation and beam-pruning..
+batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
+cmd=run.pl
+beam=20.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=8.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_fromlats.sh [options] <data-dir> <lang-dir> <old-decode-dir> <decode-dir>"
+  echo ""
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --alignment-model <ali-mdl>              # Model for the first-pass decoding."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+data=$1
+lang=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`
+
+for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \
+    $srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then
+  transform_dir=$olddir
+fi
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+## Calculate FMLLR pre-transforms if needed. We are doing this here since this
+## step is requried by models both with and without speaker vectors
+if $use_fmllr; then
+  if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+    echo "$0: computing pre-transform for fMLLR computation."
+    sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+  fi
+fi
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+
+# Generate state-level lattice which we can rescore.  This is done with the 
+# alignment model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+ lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
+  fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
+  fstdeterminizestar ark:- ark:- \| \
+  compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
+    --batch-size=$batch_size $scale_opts \
+    $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
+  sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \
+    "ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+## Check if the model has speaker vectors
+spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
+
+if [ $spkdim -gt 0 ]; then  ### For models with speaker vectors:
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm-est-spkvecs.
+  if [ $stage -le 3 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
+      sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
+      sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+  fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+  fi
+  rm $dir/pre_vecs.*
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  fi
+  rm $dir/pre_lat.*.gz
+
+else  ### For models without speaker vectors:
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ] && $use_fmllr; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+    rm $dir/pre_lat.*.gz
+  else  # Already done with decoding if no adaptation needed.
+    for n in `seq 1 $nj`; do
+      mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
+    done
+  fi
+
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  echo "score best paths"
+  local/score.sh --cmd "$cmd" $data $lang $dir
+  echo "score confidence and timing with sclite"
+  #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_sgmm_rescore.sh b/egs/chime_wsj0/s5/steps/decode_sgmm_rescore.sh
new file mode 100755
index 000000000..9b23e8ece
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_sgmm_rescore.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  The directory with the lattices
+# is assumed to contain speaker vectors, if used.  Basically it rescores
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --iter <iter>                            # iteration of model to use (default: final)"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
+   $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+gselect_opt="--gselect=ark,s,cs:gunzip -c $olddir/gselect.JOB.gz|"
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+if [ -f $olddir/trans.1 ]; then
+  echo "$0: using (in addition to any previous transforms) transforms from $olddir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
+fi
+##
+
+# Rescore the state-level lattices with the model provided.  Just
+# one command in this script.
+echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
+$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+  sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt \
+  $srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_si.sh b/egs/chime_wsj0/s5/steps/decode_si.sh
new file mode 100755
index 000000000..f41ba6349
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_si.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+stage=0
+nj=4
+cmd=run.pl
+max_active=7000
+max_arcs=-1
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+# note: there are no more min-lmwt and max-lmwt options, instead use
+# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
+skip_scoring=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "  --num-threads <n>                                # number of threads to use, default 1."
+   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+if [ $stage -le 0 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster$thread_string --max-arcs=$max_arcs --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh --cmd "$cmd" $scoring_opts $data $graphdir $dir
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/decode_with_map.sh b/egs/chime_wsj0/s5/steps/decode_with_map.sh
new file mode 100755
index 000000000..4af3b9987
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/decode_with_map.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+# Copyright 2012  Neha Agrawal, Cisco Systems;
+#                 Johns Hopkins University (Author: Daniel Povey);
+#                 
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+mean_tau=20
+weight_tau=10
+flags=mw  # could also contain "v" for variance; the default
+          # tau for that is 50.
+stage=1
+# End configuration section.
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "Doing first-pass decoding before MAP decoding."
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    gmm-decode-faster --max-active=$max_active --beam=$beam \
+    --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $model $graphdir/HCLG.fst "$feats" ark:$dir/tmp.JOB.tra ark:$dir/pass1_decode.JOB.ali || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "Computing MAP stats and doing MAP-adapted decoding"
+  $cmd JOB=1:$nj $dir/log/decode_pass2.JOB.log \
+    ali-to-post ark:$dir/pass1_decode.JOB.ali ark:- \| \
+  gmm-adapt-map --mean-tau=$mean_tau --weight-tau=$weight_tau \
+       --update-flags=$flags --spk2utt=ark:$sdata/JOB/spk2utt \
+     $model "$feats" ark:- ark:- \| \
+  gmm-latgen-map --lattice-beam=$latbeam --acoustic-scale=$acwt \
+   --utt2spk=ark:$sdata/JOB/utt2spk --max-active=$max_active --beam=$beam \
+   --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+   $model ark,s,cs:- $graphdir/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz"
+fi
+#rm -f $dir/pass1_decode.*.ali
+#rm -f $dir/tmp.*.tra
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/get_ctm.sh b/egs/chime_wsj0/s5/steps/get_ctm.sh
new file mode 100755
index 000000000..866fa2ab2
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/get_ctm.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
+
+# This script produces CTM files from a decoding directory that has lattices
+# present.
+
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+use_segments=true # if we have a segments file, use it to convert
+                  # the segments to be relative to the original files.
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/get_ctm.sh [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --use-segments (true|false)     # use segments and reco2file_and_channel files "
+  echo "                                    # to produce a ctm relative to the original audio"
+  echo "                                    # files, with channel information (typically needed"
+  echo "                                    # for NIST scoring)."
+  echo "e.g.:"
+  echo "local/get_ctm.sh data/train data/lang exp/tri4a/decode/"
+  exit 1;
+fi
+
+data=$1
+lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
+dir=$3
+
+model=$dir/../final.mdl # assume model one level up from decoding dir.
+
+
+for f in $lang/words.txt $lang/phones/word_boundary.int \
+     $model $dir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+done
+
+name=`basename $data`; # e.g. eval2000
+
+mkdir -p $dir/scoring/log
+
+if [ $stage -le 0 ]; then
+  if [ -f $data/segments ]; then
+    f=$data/reco2file_and_channel
+    [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+    filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
+  else
+    filter_cmd=cat    
+  fi
+
+  $cmd LMWT=5:20 $dir/scoring/log/get_ctm.LMWT.log \
+    mkdir -p $dir/score_LMWT/ '&&' \
+    lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+    lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
+    nbest-to-ctm ark:- - \| \
+    utils/int2sym.pl -f 5 $lang/words.txt \| \
+    $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
+fi
+
+
diff --git a/egs/chime_wsj0/s5/steps/get_fmllr_basis.sh b/egs/chime_wsj0/s5/steps/get_fmllr_basis.sh
new file mode 100755
index 000000000..9ae46bc24
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/get_fmllr_basis.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
+#                  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that computes basis for basis-fMLLR (see decode_fmllr_basis.sh).
+# This can be on top of delta+delta-delta, or LDA+MLLT features.
+
+stage=0
+# Parameters in alignment of training data
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+per_utt=true # If true, then treat each utterance as a separate speaker for purposes of
+  # basis training... this is recommended if the number of actual speakers in your
+  # training set is less than (feature-dim) * (feature-dim+1).
+align_beam=10
+retry_beam=40
+silence_weight=0.01
+cmd=run.pl
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/get_fmllr_basis.sh [options] <data-dir> <lang-dir> <exp-dir>"
+   echo " e.g.: steps/decode_basis_fmllr.sh data/train_si84 data/lang exp/tri3b/"
+   echo "Note: we currently assume that this is the same data you trained the model with."
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   exit 1;
+fi
+
+data=$1
+lang=$2
+dir=$3
+
+nj=`cat $dir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+splice_opts=`cat $dir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+for f in $data/feats.scp $dir/final.alimdl $dir/final.mdl $dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set up the unadapted features "$sifeats".
+if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+  # Set up the adapted features "$feats" for training set.
+if [ -f $srcdir/trans.1 ]; then 
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$sdata/trans.JOB ark:- ark:- |";
+else
+  feats="$sifeats";
+fi
+
+
+if $per_utt; then
+  spk2utt_opt=  # treat each utterance as separate speaker when computing basis.
+  echo "Doing per-utterance adaptation for purposes of computing the basis."
+else
+  echo "Doing per-speaker adaptation for purposes of computing the basis."
+  [ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
+    echo "Warning: number of speakers is small, might be better to use --per-utt=true."
+  spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
+fi
+
+# Note: we get Gaussian level alignments with the "final.mdl" and the
+# speaker adapted features. 
+$cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
+  ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+  weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
+  gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
+  gmm-basis-fmllr-accs-gpost $spk2utt_opt \
+    $dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1; 
+
+# Compute the basis matrices.
+$cmd $dir/log/basis_training.log \
+  gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
+rm $dir/basis.acc.* 2>/dev/null
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/get_lexicon_probs.sh b/egs/chime_wsj0/s5/steps/get_lexicon_probs.sh
new file mode 100755
index 000000000..22053e2f3
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/get_lexicon_probs.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+# Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+
+# From a training or alignment directory, and an original lexicon.txt and lang/
+# directory, obtain a new lexicon with pronunciation probabilities.
+
+
+# Begin configuration section.  
+stage=0
+smooth_count=1.0 # Amount of count to add corresponding to each original lexicon entry;
+                 # this corresponds to add-one smoothing of the pron-probs.
+max_one=true   # If true, normalize the pron-probs so the maximum value for each word is 1.0,
+               # rather than summing to one.  This is quite standard.
+
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+   echo "Usage: steps/get_lexicon_probs.sh <data-dir> <lang-dir> <src-dir|ali-dir> <old-lexicon> <exp-dir> <new-lexicon>"
+   echo "e.g.: steps/get_lexicon_probs.sh data/train data/lang exp/tri5 data/local/lexicon.txt \\"
+   echo "                      exp/tri5_lexprobs data/local_withprob/lexicon.txt"
+   echo "Note: we assume you ran using word-position-dependent phones but both the old and new lexicon will not have"
+   echo "these markings.  We also assume the new lexicon will have pron-probs but the old one does not; this limitation"
+   echo "of the script can be removed later."
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # used to control partial re-running."
+   echo "  --max-one <true|false>                           # If true, normalize so max prob of each"
+   echo "                                                   # word is one.  Default: true"
+   echo "  --smooth <smooth-count>                          # Amount to smooth each count by (default: 1.0)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+old_lexicon=$4
+dir=$5
+new_lexicon=$6
+
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $srcdir/num_jobs` || exit 1;
+
+for f in $data/text $lang/L.fst $lang/phones/word_boundary.int $srcdir/ali.1.gz $old_lexicon; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/log
+utils/split_data.sh $data $nj # Make sure split data-dir exists.
+sdata=$data/split$nj
+
+
+mkdir -p $dir/log
+
+if [ $stage -le 0 ]; then
+
+  ( ( for n in `seq $nj`; do gunzip -c $srcdir/ali.$n.gz; done ) | \
+    linear-to-nbest ark:- "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $data/text |" '' '' ark:- | \
+    lattice-align-words $lang/phones/word_boundary.int $srcdir/final.mdl ark:- ark:- | \
+    lattice-to-phone-lattice --replace-words=false $srcdir/final.mdl ark:- ark,t:- | \
+    awk '{ if (NF == 4) { word_phones = sprintf("%s %s", $3, $4); count[word_phones]++; } } 
+        END { for(key in count) { print count[key], key; } }' | \
+          sed s:0,0,:: | awk '{print $2, $1, $3;}' | sed 's/_/ /g' | \
+          utils/int2sym.pl -f 3- $lang/phones.txt  | \
+          sed -E 's/_I( |$)/ /g' |  sed -E 's/_E( |$)/ /g' | sed -E 's/_B( |$)/ /g' | sed -E 's/_S( |$)/ /g' | \
+          utils/int2sym.pl -f 1 $lang/words.txt > $dir/lexicon_counts.txt
+  ) 2>&1 | tee $dir/log/get_fsts.log
+
+fi
+
+cat $old_lexicon | awk '{if (!($2 > 0.0 && $2 < 1.0)) { exit(1); }}' && \
+  echo "Error: old lexicon $old_lexicon appears to have pron-probs; we don't expect this." && \
+  exit 1;
+
+mkdir -p `dirname $new_lexicon` || exit 1;
+
+if [ $stage -le 1 ]; then
+  grep -v -w '^<eps>' $dir/lexicon_counts.txt | \
+  perl -e ' ($old_lexicon, $smooth_count, $max_one) = @ARGV;
+    ($smooth_count >= 0) || die "Invalid smooth_count $smooth_count";
+    ($max_one eq "true" || $max_one eq "false") || die "Invalid max_one variable $max_one";
+    open(O, "<$old_lexicon")||die "Opening old-lexicon file $old_lexicon"; 
+    while(<O>) {
+      $_ =~ m/(\S+)\s+(.+)/ || die "Bad old-lexicon line $_";
+      $word = $1;
+      $orig_pron = $2;
+      # Remember the mapping from canonical prons to original prons: in the case of
+      # syllable based systems we want to remember the locations of tabs in
+      # the original lexicon.
+      $pron = join(" ", split(" ", $orig_pron));
+      $orig_pron{$word,$pron} = $orig_pron;
+      $count{$word,$pron} += $smooth_count;
+      $tot_count{$word} += $smooth_count;
+    }
+    while (<STDIN>) {
+      $_ =~ m/(\S+)\s+(\S+)\s+(.+)/ || die "Bad new-lexicon line $_";
+      $word = $1;
+      $this_count = $2;
+      $pron = join(" ", split(" ", $3));
+      $count{$word,$pron} += $this_count;
+      $tot_count{$word} += $this_count;
+    }
+    if ($max_one eq "true") {  # replace $tot_count{$word} with max count
+       # of any pron.
+      %tot_count = {}; # set to empty assoc array.
+      foreach $key (keys %count) {
+        ($word, $pron) = split($; , $key); # $; is separator for strings that index assoc. arrays.
+        $this_count = $count{$key};
+        if (!defined $tot_count{$word} || $this_count > $tot_count{$word}) {
+          $tot_count{$word} = $this_count;
+        }
+      }
+    }
+    foreach $key (keys %count) {
+       ($word, $pron) = split($; , $key); # $; is separator for strings that index assoc. arrays.
+       $this_orig_pron = $orig_pron{$key};
+       if (!defined $this_orig_pron) { die "Word $word and pron $pron did not appear in original lexicon."; }
+       if (!defined $tot_count{$word}) { die "Tot-count not defined for word $word."; }
+       $prob = $count{$key} / $tot_count{$word};
+       print "$word\t$prob\t$this_orig_pron\n";  # Output happens here.
+    } '  $old_lexicon $smooth_count $max_one > $new_lexicon || exit 1;
+fi
+
+exit 0;
+
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/get_train_ctm.sh b/egs/chime_wsj0/s5/steps/get_train_ctm.sh
new file mode 100755
index 000000000..e81a20e82
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/get_train_ctm.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
+
+# This script produces CTM files from a training directory that has alignments
+# present.
+
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+use_segments=true # if we have a segments file, use it to convert
+                  # the segments to be relative to the original files.
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/get_train_ctm.sh [options] <data-dir> <lang-dir> <ali-dir|exp-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --use-segments (true|false)     # use segments and reco2file_and_channel files "
+  echo "                                    # to produce a ctm relative to the original audio"
+  echo "                                    # files, with channel information (typically needed"
+  echo "                                    # for NIST scoring)."
+  echo "e.g.:"
+  echo "local/get_train_ctm.sh data/train data/lang exp/tri3a_ali"
+  echo "Produces ctm in: exp/tri3a_ali/ctm"
+  exit 1;
+fi
+
+data=$1
+lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
+dir=$3
+
+model=$dir/final.mdl # assume model one level up from decoding dir.
+
+
+for f in $lang/words.txt $lang/phones/word_boundary.int \
+     $model $dir/ali.1.gz $lang/oov.int; do
+  [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+done
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir/scoring/log
+
+if [ $stage -le 0 ]; then
+  if [ -f $data/segments ]; then
+    f=$data/reco2file_and_channel
+    [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+    filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
+  else
+    filter_cmd=cat    
+  fi
+
+  $cmd $dir/log/get_ctm.log \
+    linear-to-nbest "ark:gunzip -c $dir/ali.*.gz|" \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/text |" \
+     '' '' ark:- \| \
+    lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
+    nbest-to-ctm ark:- - \| \
+    utils/int2sym.pl -f 5 $lang/words.txt \| \
+    $filter_cmd '>' $dir/ctm || exit 1;
+fi
diff --git a/egs/chime_wsj0/s5/steps/lmrescore.sh b/egs/chime_wsj0/s5/steps/lmrescore.sh
new file mode 100755
index 000000000..e6150ada9
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/lmrescore.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# Begin configuration section.
+mode=4
+cmd=run.pl
+skip_scoring=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+for x in `seq 2`; do
+  [ "$1" == "--cmd" ] && cmd=$2 && shift 2;
+  [ "$1" == "--mode" ] && mode=$2 && shift 2;
+done
+
+if [ $# != 5 ]; then
+   echo "Do language model rescoring of lattices (remove old LM, add new LM)"
+   echo "Usage: steps/lmrescore.sh [options] <old-lang-dir> <new-lang-dir> <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--mode (1|2|3|4)]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+newlang=$2
+data=$3
+indir=$4
+outdir=$5
+
+oldlm=$oldlang/G.fst
+newlm=$newlang/G.fst
+! cmp $oldlang/words.txt $newlang/words.txt && echo "Warning: vocabularies may be incompatible."
+[ ! -f $oldlm ] && echo Missing file $oldlm && exit 1;
+[ ! -f $newlm ] && echo Missing file $newlm && exit 1;
+! ls $indir/lat.*.gz >/dev/null && echo "No lattices input directory $indir" && exit 1;
+
+oldlmcommand="fstproject --project_output=true $oldlm |"
+newlmcommand="fstproject --project_output=true $newlm |"
+
+mkdir -p $outdir/log
+
+phi=`grep -w '#0' $newlang/words.txt | awk '{print $2}'`
+
+if [ "$mode" == 4 ]; then
+  # we have to prepare $outdir/Ldet.fst in this case: determinized
+  # lexicon (determinized on phones), with disambig syms removed.
+  # take L_disambig.fst; get rid of transition with "#0 #0" on it; determinize
+  # with epsilon removal; remove disambiguation symbols.
+  fstprint $newlang/L_disambig.fst | awk '{if($4 != '$phi'){print;}}' | fstcompile | \
+    fstdeterminizestar | fstrmsymbols $newlang/phones/disambig.int >$outdir/Ldet.fst || exit 1;
+fi
+
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+
+#for lat in $indir/lat.*.gz; do
+#  number=`basename $lat | cut -d. -f2`;
+#  newlat=$outdir/`basename $lat`
+
+case "$mode" in
+  1) # 1 is inexact, it's the original way of doing it.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      lattice-lmrescore --lm-scale=-1.0 "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:-  \| \
+      lattice-lmrescore --lm-scale=1.0 ark:- "$newlmcommand" "ark,t:|gzip -c>$outdir/lat.JOB.gz" \
+      || exit 1;
+    ;;
+  2)  # 2 is equivalent to 1, but using more basic operations, combined.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $newlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz || exit 1;
+    ;;
+  3) # 3 is "exact" in that we remove the old LM scores accepting any path
+     # through G.fst (which is what we want as that happened in lattice 
+     # generation), but we add the new one with "phi matcher", only taking
+     # backoff arcs if an explicit arc did not exist.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz || exit 1;
+    ;;
+  4) # 4 is also exact (like 3), but instead of subtracting the old LM-scores,
+     # it removes the old graph scores entirely and adds in the lexicon,
+     # grammar and transition weights.
+    mdl=`dirname $indir`/final.mdl
+    [ ! -f $mdl ] && echo No such model $mdl && exit 1;
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --lm-scale=0.0 ark:- ark:- \| \
+      lattice-to-phone-lattice $mdl ark:- ark:- \| \
+      lattice-compose ark:- $outdir/Ldet.fst ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
+      lattice-add-trans-probs --transition-scale=1.0 --self-loop-scale=0.1 \
+      $mdl ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz  || exit 1;
+    ;;
+esac
+
+rm $outdir/Ldet.fst 2>/dev/null
+
+if ! $skip_scoring ; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh --cmd "$cmd" $data $newlang $outdir
+else
+  echo "Not scoring because requested so..."
+fi
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/make_bn_feats.sh b/egs/chime_wsj0/s5/steps/make_bn_feats.sh
new file mode 100755
index 000000000..53bf57778
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_bn_feats.sh
@@ -0,0 +1,117 @@
+#!/bin/bash 
+
+# Copyright 2012  Karel Vesely, Daniel Povey
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+remove_last_layers=4 # remove N last components from the nnet
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: $0 [options] <tgt-data-dir> <src-data-dir> <nnet-dir> <log-dir> <abs-path-to-bn-feat-dir>";
+   echo "options: "
+   echo "  --trim-transforms <N>                            # number of NNet Components to remove from the end"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . path.sh; fi
+
+data=$1
+srcdata=$2
+nndir=$3
+logdir=$4
+bnfeadir=$5
+
+######## CONFIGURATION
+
+# copy the dataset metadata from srcdata.
+mkdir -p $data || exit 1;
+cp $srcdata/* $data 2>/dev/null; rm $data/feats.scp $data/cmvn.scp;
+
+# make $bnfeadir an absolute pathname.
+bnfeadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $bnfeadir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $bnfeadir || exit 1;
+mkdir -p $data || exit 1;
+mkdir -p $logdir || exit 1;
+
+
+srcscp=$srcdata/feats.scp
+scp=$data/feats.scp
+
+required="$srcscp $nndir/final.nnet"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "$0: no such file $f"
+    exit 1;
+  fi
+done
+
+if [ ! -d $srcdata/split$nj -o $srcdata/split$nj -ot $srcdata/feats.scp ]; then
+  utils/split_data.sh $srcdata $nj
+fi
+
+
+#cut the MLP
+nnet=$bnfeadir/feature_extractor.nnet
+copy-nnet --remove-last-layers=$remove_last_layers --binary=false $nndir/final.nnet $nnet 2>$logdir/feature_extractor.log
+
+#get the feature transform
+feature_transform=$nndir/final.feature_transform
+
+echo "Creating bn-feats into $data"
+
+###
+### Prepare feature pipeline
+feats="ark,s,cs:copy-feats scp:$srcdata/split$nj/JOB/feats.scp ark:- |"
+# Optionally add cmvn
+if [ -f $nndir/norm_vars ]; then
+  norm_vars=$(cat $nndir/norm_vars 2>/dev/null)
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$srcdata/utt2spk scp:$srcdata/cmvn.scp ark:- ark:- |"
+fi
+# Optionally add deltas
+if [ -f $nndir/delta_order ]; then
+  delta_order=$(cat $nndir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+fi
+###
+###
+
+#Run the forward pass
+$cmd JOB=1:$nj $logdir/make_bnfeats.JOB.log \
+  nnet-forward --feature-transform=$feature_transform $nnet "$feats" \
+  ark,scp:$bnfeadir/raw_bnfea_$name.JOB.ark,$bnfeadir/raw_bnfea_$name.JOB.scp \
+  || exit 1;
+
+
+N0=$(cat $srcdata/feats.scp | wc -l) 
+N1=$(cat $bnfeadir/raw_bnfea_$name.*.scp | wc -l)
+if [[ "$N0" != "$N1" ]]; then
+  echo "Error producing bnfea features for $name:"
+  echo "Original feats : $N0  Bottleneck feats : $N1"
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $bnfeadir/raw_bnfea_$name.$n.scp >> $data/feats.scp
+done
+
+
+echo "Succeeded creating MLP-BN features for $name ($data)"
+
diff --git a/egs/chime_wsj0/s5/steps/make_denlats.sh b/egs/chime_wsj0/s5/steps/make_denlats.sh
new file mode 100755
index 000000000..786407e1e
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_denlats.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training.
+# Creates its output in $dir/lat.*.gz
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# This is in bytes, but not "real" bytes-- you have to multiply
+# by something like 5 or 10 to get real bytes (not sure why so large)
+num_threads=1
+parallel_opts=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats.sh data/train data/lang exp/tri1 exp/tri1_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   echo "  --num-threads  <n>                # number of threads per decoding job"
+   echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+new_lang="$dir/"$(basename "$lang")
+echo "Making unigram grammar FST in $new_lang"
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+echo "Compiling decoding graph in $dir/dengraph"
+if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
+fi
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+   gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+    --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+        --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices."
diff --git a/egs/chime_wsj0/s5/steps/make_denlats_nnet.sh b/egs/chime_wsj0/s5/steps/make_denlats_nnet.sh
new file mode 100755
index 000000000..0ba20982e
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_denlats_nnet.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# Copyright 2012-2013 Karel Vesely, Daniel Povey
+# Apache 2.0.
+
+# Create denominator lattices for MMI/MPE/sMBR training.
+# Creates its output in $dir/lat.*.ark,$dir/lat.scp
+# The lattices are uncompressed, we need random access for DNN training.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+nnet=
+max_mem=20000000 # This will stop the processes getting too large.
+# This is in bytes, but not "real" bytes-- you have to multiply
+# by something like 5 or 10 to get real bytes (not sure why so large)
+# End configuration section.
+use_gpu_id=-1 # disable gpu
+parallel_opts="-pe smp 2"
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/$0 [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
+   echo "  e.g.: steps/$0 data/train data/lang exp/tri1 exp/tri1_denlats"
+   echo "Works for plain features (or CMN, delta), forwarded through feature-transform."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+new_lang="$dir/"$(basename "$lang")
+echo "Making unigram grammar FST in $new_lang"
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+echo "Compiling decoding graph in $dir/dengraph"
+if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
+fi
+
+
+
+#Get the files we will need
+cp $srcdir/{tree,final.mdl} $dir
+
+[ -z "$nnet" ] && nnet=$srcdir/final.nnet;
+[ ! -f "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+
+class_frame_counts=$srcdir/ali_train_pdf.counts
+[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
+
+feature_transform=$srcdir/final.feature_transform
+if [ ! -f $feature_transform ]; then
+  echo "Missing feature_transform '$feature_transform'"
+  exit 1
+fi
+
+model=$dir/final.mdl
+[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
+
+###
+### Prepare feature pipeline (same as for decoding)
+###
+# Create the feature stream:
+feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
+# Optionally add cmvn
+if [ -f $srcdir/norm_vars ]; then
+  norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+  [ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
+fi
+# Optionally add deltas
+if [ -f $srcdir/delta_order ]; then
+  delta_order=$(cat $srcdir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+fi
+
+# Finally add feature_transform and the MLP
+feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
+###
+###
+###
+
+
+
+###
+### We will produce lattices, where the correct path is not necessarily present
+###
+
+#1) We don't use reference path here...
+
+echo "Generating the denlats"
+#2) Generate the denominator lattices
+if [ $sub_split -eq 1 ]; then 
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+    latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+      --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+      $dir/dengraph/HCLG.fst "$feats" "ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=$(echo $feats | sed s:JOB/:$n/split$sub_split/JOB/:g)
+      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+          --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark,scp:$dir/lat.$n.JOB.ark,$dir/lat.$n.JOB.scp" || exit 1;
+      echo Merging lists for data subset $n
+      for k in `seq $sub_split`; do
+        cat $dir/lat.$n.$k.scp
+      done > $dir/lat.$n.all.scp
+      echo Merge the ark $n
+      lattice-copy scp:$dir/lat.$n.all.scp ark,scp:$dir/lat.$n.ark,$dir/lat.$n.scp || exit 1;
+      #remove the data
+      rm $dir/lat.$n.*.ark $dir/lat.$n.*.scp $dir/lat.$n.all.scp
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+      
+
+#3) Merge the SCPs to create full list of lattices (will use random access)
+echo Merging to single list $dir/lat.scp
+for ((n=1; n<=nj; n++)); do
+  cat $dir/lat.$n.scp
+done > $dir/lat.scp
+
+
+echo "$0: done generating denominator lattices."
diff --git a/egs/chime_wsj0/s5/steps/make_denlats_nnet_cpu.sh b/egs/chime_wsj0/s5/steps/make_denlats_nnet_cpu.sh
new file mode 100755
index 000000000..7dbb9c3f8
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_denlats_nnet_cpu.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training.
+# Creates its output in $dir/lat.*.gz
+
+# Begin configuration section.
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.  If --transform-dir set, must match that number!
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# This is in bytes, but not "real" bytes-- you have to multiply
+# by something like 5 or 10 to get real bytes (not sure why so large)
+# End configuration section.
+num_threads=1 # Number of threads used in nnet-logprob computation.  If you set
+              # this to a different value, make sure to also set the appropriate
+              # queue options.  If you set this too high it won't use all the
+              # threads as most of the time will be taken in the decoder.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats_nnet_cpu.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats_nnet_cpu.sh data/train data/lang exp/tri1 exp/tri1_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+new_lang="$dir/"$(basename "$lang")
+echo "Making unigram grammar FST in $new_lang"
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+echo "Compiling decoding graph in $dir/dengraph"
+if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
+fi
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats" ark:- \| \
+   latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+    --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats_subset" ark:- \| \
+        latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+        --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices."
diff --git a/egs/chime_wsj0/s5/steps/make_denlats_sgmm.sh b/egs/chime_wsj0/s5/steps/make_denlats_sgmm.sh
new file mode 100755
index 000000000..4f63bae5a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_denlats_sgmm.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($alidir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats_sgmm.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats_sgmm.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3 # could also be $srcdir, but only if no vectors supplied.
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+new_lang="$dir/"$(basename "$lang")
+echo "Making unigram grammar FST in $new_lang"
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
+
+echo "Compiling decoding graph in $dir/dengraph"
+if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $new_lang $alidir $dir/dengraph || exit 1;
+fi
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
+     echo "$0: LDA transforms differ between $alidir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  if [ -f $alidir/final.alimdl ]; then
+    echo "You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/chime_wsj0/s5/steps/make_denlats_sgmm2.sh b/egs/chime_wsj0/s5/steps/make_denlats_sgmm2.sh
new file mode 100755
index 000000000..b6b901252
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_denlats_sgmm2.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($alidir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+num_threads=1
+parallel_opts=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats_sgmm2.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats_sgmm2.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   echo "  --num-threads  <n>                # number of threads per decoding job"
+   echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
+   exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3 # could also be $srcdir, but only if no vectors supplied.
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+if [ $num_threads -gt 1 ]; then
+  # the -parallel becomes part of the binary name we decode with.
+  thread_string="-parallel --num-threads=$num_threads"
+fi
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+new_lang="$dir/"$(basename "$lang")
+echo "Making unigram grammar FST in $new_lang"
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
+
+echo "Compiling decoding graph in $dir/dengraph"
+if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $new_lang $alidir $dir/dengraph || exit 1;
+fi
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
+     echo "$0: LDA transforms differ between $alidir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+  [ "`cat $alidir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $alidir" && exit 1;
+else
+  if [ -f $alidir/final.alimdl ]; then
+    echo "$0: You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm2-latgen-faster$thread_string $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm2-latgen-faster$thread_string $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/chime_wsj0/s5/steps/make_fbank.sh b/egs/chime_wsj0/s5/steps/make_fbank.sh
new file mode 100755
index 000000000..45255058c
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_fbank.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Karel Vesely  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+fbank_config=conf/fbank.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_fbank.sh [options] <data-dir> <log-dir> <path-to-fbankdir>";
+   echo "options: "
+   echo "  --fbank-config <config-file>                      # config passed to compute-fbank-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+fbankdir=$3
+
+
+# make $fbankdir an absolute pathname.
+fbankdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $fbankdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $fbankdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $fbank_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_fbank.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_fbank.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-fbank-feats --verbose=2 --config=$fbank_config ark:- \
+    ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_fbank.JOB.log \
+    compute-fbank-feats  --verbose=2 --config=$fbank_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing fbank features for $name:"
+  tail $logdir/make_fbank.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $fbankdir/raw_fbank_$name.$n.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating filterbank features for $name"
diff --git a/egs/chime_wsj0/s5/steps/make_fmllr_feats.sh b/egs/chime_wsj0/s5/steps/make_fmllr_feats.sh
new file mode 100755
index 000000000..0c4fc1a22
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_fmllr_feats.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+# Copyright 2012  Karel Vesely
+#                 Johns Hopkins University (Author: Daniel Povey),
+#                 
+# Apache 2.0.
+
+# This script is for use in neural network training and testing; it dumps
+# (LDA+MLLT or splice+delta) + fMLLR features in a similar format to
+# conventional raw MFCC features. 
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+transform_dir=
+norm_vars=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: $0 [options] <tgt-data-dir> <src-data-dir> <gmm-dir> <log-dir> <fea-dir>"
+   echo "e.g.: $0 data-fmllr/train data/train exp/tri5a exp/make_fmllr_feats/log plp/processed/"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo "You can also use fMLLR features-- you have to supply --transform-dir option."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <transform-dir>                  # where to find fMLLR transforms."
+   exit 1;
+fi
+
+
+data=$1
+srcdata=$2
+gmmdir=$3
+logdir=$4
+feadir=$5
+
+
+
+#srcdir=$1 -> gmmdir
+#data=$2 -> srcdata
+#dir=$3 -> ruzne
+#tgtdata=$4 -> feadir
+
+sdata=$srcdata/split$nj;
+splice_opts=`cat $gmmdir/splice_opts 2>/dev/null`
+
+mkdir -p $data $logdir $feadir
+[[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+if [ -f $gmmdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $gmmdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist." && exit 1
+#  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+#     echo "Mismatch in number of jobs with $transform_dir" && exit 1;
+#  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk \"ark:cat $transform_dir/trans.* |\" ark:- ark:- |"
+fi
+
+
+#prepare the dir
+cp $srcdata/* $data; rm $data/{feats.scp,cmvn.scp};
+
+# make $bnfeadir an absolute pathname.
+feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
+
+name=`basename $data`
+
+#forward the feats
+$cmd JOB=1:$nj $logdir/make_fmllr_feats.JOB.log \
+  copy-feats "$feats" \
+  ark,scp:$feadir/feats_fmllr_$name.JOB.ark,$feadir/feats_fmllr_$name.JOB.scp || exit 1;
+   
+#merge the feats to single SCP
+for n in $(seq 1 $nj); do
+  cat $feadir/feats_fmllr_$name.$n.scp 
+done > $data/feats.scp
+
+echo "$0 finished... $srcdata -> $data ($gmmdir)"
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/make_fmmi_feats.sh b/egs/chime_wsj0/s5/steps/make_fmmi_feats.sh
new file mode 100755
index 000000000..fe6ceee14
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_fmmi_feats.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# Decoding of fMMI or fMPE models (feature-space discriminative training).
+# If transform-dir supplied, expects e.g. fMLLR transforms in that dir.
+
+# Begin configuration section.  
+iter=final
+nj=4
+cmd=run.pl
+ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE.  Should match train.
+transform_dir=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: $0 [options] <tgt-data-dir> <src-data-dir> <gmm-dir> <log-dir> <fea-dir>"
+   echo "e.g.: $0 data-fmmi/train data/train exp/tri5a_fmmi_b0.1 data-fmmi/train/_log data-fmmi/train/_data "
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo "You can also use fMLLR features-- you have to supply --transform-dir option."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <transform-dir>                  # where to find fMLLR transforms."
+   exit 1;
+fi
+
+
+data=$1
+srcdata=$2
+gmmdir=$3
+logdir=$4
+feadir=$5
+
+
+
+#srcdir=$1 -> gmmdir
+#data=$2 -> srcdata
+#dir=$3 -> ruzne
+#tgtdata=$4 -> feadir
+
+sdata=$srcdata/split$nj;
+splice_opts=`cat $gmmdir/splice_opts 2>/dev/null`
+
+mkdir -p $data $logdir $feadir
+[[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $gmmdir/$iter.fmpe; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+if [ -f $gmmdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $gmmdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+# Get Gaussian selection info.
+$cmd JOB=1:$nj $logdir/gselect.JOB.log \
+  gmm-gselect --n=$ngselect $gmmdir/$iter.fmpe "$feats" \
+  "ark:|gzip -c >$feadir/gselect.JOB.gz" || exit 1;
+
+#prepare the dir
+cp $srcdata/* $data; rm $data/{feats.scp,cmvn.scp};
+
+# make $bnfeadir an absolute pathname.
+feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
+
+#forward the feats
+$cmd JOB=1:$nj $logdir/make_fmmi_feats.JOB.log \
+  fmpe-apply-transform $gmmdir/$iter.fmpe "$feats" "ark,s,cs:gunzip -c $feadir/gselect.JOB.gz|"  \
+  ark,scp:$feadir/feats_fmmi.JOB.ark,$feadir/feats_fmmi.JOB.scp || exit 1;
+   
+#merge the feats to single SCP
+for n in $(seq 1 $nj); do
+  cat $feadir/feats_fmmi.$n.scp 
+done > $data/feats.scp
+
+echo "$0 finished... $srcdata -> $data ($gmmdir)"
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/make_index.sh b/egs/chime_wsj0/s5/steps/make_index.sh
new file mode 100755
index 000000000..4eef666ad
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_index.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0
+
+# Begin configuration section.  
+model= # You can specify the model to use
+cmd=run.pl
+acwt=0.083333
+lmwt=1.0
+max_silence_frames=50
+max_states=1000000
+max_expand=20 # limit memory blowup in lattice-align-words
+strict=true
+word_ins_penalty=0
+silence_word=  # Specify this only if you did so in kws_setup
+skip_optimization=false     # If you only search for few thousands of keywords, you probablly
+                            # can skip the optimization; but if you're going to search for 
+                            # millions of keywords, you'd better do set this optimization to 
+                            # false and do the optimization on the final index.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_index.sh [options] <kws-data-dir> <lang-dir> <decode-dir> <kws-dir>"
+   echo "... where <decode-dir> is where you have the lattices, and is assumed to be"
+   echo " a sub-directory of the directory where the model is."
+   echo "e.g.: steps/make_index.sh data/kws data/lang exp/sgmm2_5a_mmi/decode/ exp/sgmm2_5a_mmi/decode/kws/"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --acwt <float>                                   # acoustic scale used for lattice"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --lmwt <float>                                   # lm scale used for lattice"
+   echo "  --model <model>                                  # which model to use"
+   echo "                                                   # speaker-adapted decoding"
+   echo "  --max-silence-frames <int>                       # maximum #frames for silence"
+   exit 1;
+fi
+
+
+kwsdatadir=$1;
+langdir=$2;
+decodedir=$3;
+kwsdir=$4;
+srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory.
+
+mkdir -p $kwsdir/log;
+nj=`cat $decodedir/num_jobs` || exit 1;
+echo $nj > $kwsdir/num_jobs;
+word_boundary=$langdir/phones/word_boundary.int
+utter_id=$kwsdatadir/utter_id
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  model=$srcdir/final.mdl; 
+fi
+
+for f in $word_boundary $model $decodedir/lat.1.gz; do
+  [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1;
+done
+
+echo "Using model: $model"
+
+if [ ! -z $silence_word ]; then
+  silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'`
+  [ -z $silence_int ] && \
+    echo "Error: could not find integer representation of silence word $silence_word" && exit 1;
+  silence_opt="--silence-label=$silence_int"
+fi
+
+$cmd JOB=1:$nj $kwsdir/log/index.JOB.log \
+  lattice-add-penalty --word-ins-penalty=$word_ins_penalty "ark:gzip -cdf $decodedir/lat.JOB.gz|" ark:- \| \
+    lattice-align-words $silence_opt --max-expand=$max_expand $word_boundary $model  ark:- ark:- \| \
+    lattice-scale --acoustic-scale=$acwt --lm-scale=$lmwt ark:- ark:- \| \
+    lattice-to-kws-index --max-silence-frames=$max_silence_frames --strict=$strict ark:$utter_id ark:- ark:- \| \
+    kws-index-union --skip-optimization=$skip_optimization --strict=$strict --max-states=$max_states \
+    ark:- "ark:|gzip -c > $kwsdir/index.JOB.gz"
+    
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/make_mfcc.sh b/egs/chime_wsj0/s5/steps/make_mfcc.sh
new file mode 100755
index 000000000..3ca06c50e
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_mfcc.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+mfcc_config=conf/mfcc.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_mfcc.sh [options] <data-dir> <log-dir> <path-to-mfccdir>";
+   echo "options: "
+   echo "  --mfcc-config <config-file>                      # config passed to compute-mfcc-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+mfccdir=$3
+
+
+# make $mfccdir an absolute pathname.
+mfccdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfccdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $mfccdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $mfcc_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_mfcc.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_mfcc.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-mfcc-feats --verbose=2 --config=$mfcc_config ark:- \
+    ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_mfcc.JOB.log \
+    compute-mfcc-feats  --verbose=2 --config=$mfcc_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing mfcc features for $name:"
+  tail $logdir/make_mfcc.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $mfccdir/raw_mfcc_$name.$n.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating MFCC features for $name"
diff --git a/egs/chime_wsj0/s5/steps/make_plp.sh b/egs/chime_wsj0/s5/steps/make_plp.sh
new file mode 100755
index 000000000..0e543817b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/make_plp.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+plp_config=conf/plp.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_plp.sh [options] <data-dir> <log-dir> <path-to-plpdir>";
+   echo "options: "
+   echo "  --plp-config <config-file>                      # config passed to compute-plp-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+plpdir=$3
+
+
+# make $plpdir an absolute pathname.
+plpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plpdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $plpdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $plp_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_plp.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_plp.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-plp-feats --verbose=2 --config=$plp_config ark:- \
+    ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_plp.JOB.log \
+    compute-plp-feats  --verbose=2 --config=$plp_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing plp features for $name:"
+  tail $logdir/make_plp.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $plpdir/raw_plp_$name.$n.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating PLP features for $name"
diff --git a/egs/chime_wsj0/s5/steps/mixup.sh b/egs/chime_wsj0/s5/steps/mixup.sh
new file mode 100755
index 000000000..6a74eb88d
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/mixup.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# mix up (or down); do 3 iters of model training; realign; then do two more
+# iterations of model training.
+
+# Begin configuration section.
+cmd=run.pl
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=5
+realign_iters=3 # Space-separated list of iterations to realign on.
+stage=0
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/mixup.sh <num-gauss> <data-dir> <lang-dir> <old-exp-dir> <exp-dir>"
+   echo " e.g.: steps/mixup.sh 20000 data/train_si84 data/lang exp/tri3b exp/tri3b_20k"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   exit 1;
+fi
+
+numgauss=$1
+data=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+for f in $data/feats.scp $srcdir/final.mdl $srcdir/final.mat; do
+  [ ! -f $f ] && echo "mixup_lda_etc.sh: no such file $f" && exit 1;
+done
+
+nj=`cat $srcdir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null
+cp $srcdir/final.mat $dir
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/tree $dir
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $srcdir/trans.1 ]; then
+  echo Using transforms from $srcdir;
+  rm $dir/trans.* 2>/dev/null
+  ln.pl $srcdir/trans.* $dir  # Link those transforms to current directory.
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+else
+  feats="$sifeats"
+fi
+## Done setting up features.
+
+rm $dir/fsts.*.gz 2>/dev/null
+ln.pl $srcdir/fsts.*.gz $dir  # Link training-graph FSTs to current directory.
+
+## Mix up old model
+if [ $stage -le 0 ]; then
+  echo Mixing up old model to $numgauss Gaussians
+# Note: this script also works for mixing down.
+  $cmd $dir/log/mixup.log \
+    gmm-mixup --mix-up=$numgauss --mix-down=$numgauss \
+    $srcdir/final.mdl $srcdir/final.occs $dir/1.mdl || exit 1;
+fi
+## Done.
+
+cur_alidir=$srcdir # dir to find alignments.
+[ -z "$realign_iters" ] && ln.pl $srcdir/ali.*.gz $dir; # link alignments, if
+ # we won't be generating them.
+
+x=1
+while [ $x -le $num_iters ]; do
+  echo "$0: iteration $x"
+  if echo $realign_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "$0: realigning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+        "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    cur_alidir=$dir
+  fi
+  if [ $stage -le $x ]; then
+    echo "$0: accumulating statistics"
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $cur_alidir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    echo "$0: re-estimating model"
+    [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs  2>/dev/null
+  fi
+  x=$[$x+1]
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ -f $dir/trans.1 ]; then 
+  echo "$0: accumulating stats for alignment model."
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1;  
+  echo "$0: Re-estimating alignment model."
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+  rm $dir/final.alimdl 2>/dev/null
+  ln -s $x.alimdl $dir/final.alimdl 
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/nnet2/align.sh b/egs/chime_wsj0/s5/steps/nnet2/align.sh
new file mode 100755
index 000000000..c7a395981
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/align.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Copyright 2012  Brno University of Technology (Author: Karel Vesely)
+#           2013  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments using MLP model
+
+# If you supply the "--use-graphs true" option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match with the source directory.
+
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=
+iter=final
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: $0 [--transform-dir <transform-dir>] <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+for f in $srcdir/tree $srcdir/${iter}.mdl $data/feats.scp $lang/L.fst; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
+
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z "$feat_type" ]; then
+  if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=raw; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  raw) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+   ;;
+  lda) 
+    splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+    cp $srcdir/splice_opts $dir 2>/dev/null
+    cp $srcdir/final.mat $dir || exit 1;
+    feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp $sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  if ! [ $nj -eq `cat $transform_dir/num_jobs` ]; then
+    echo "$0: Number of jobs mismatch with transform-dir: $nj versus `cat $transform_dir/num_jobs`";
+    exit 1;
+  fi
+  if [ $feat_type == "lda" ]; then
+    [ ! -f $transform_dir/raw_trans.1 ] && echo "No such file $transform_dir/raw_trans.1" && exit 1;
+    echo "$0: using transforms from $transform_dir"
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+  fi
+  if [ $feat_type == "raw" ]; then
+    [ ! -f $transform_dir/raw_trans.1 ] && echo "No such file $transform_dir/raw_trans.1" && exit 1;
+    echo "$0: using raw-fMLLR transforms from $transform_dir"
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/raw_trans.JOB ark:- ark:- |"
+  fi
+fi
+
+
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+
+$cmd JOB=1:$nj $dir/log/align.JOB.log \
+  compile-train-graphs $dir/tree $srcdir/${iter}.mdl  $lang/L.fst "$tra" ark:- \| \
+  nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $srcdir/${iter}.mdl \
+      ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+
+echo "$0: done aligning data."
+
diff --git a/egs/chime_wsj0/s5/steps/nnet2/decode.sh b/egs/chime_wsj0/s5/steps/nnet2/decode.sh
new file mode 100755
index 000000000..fdf6c4109
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/decode.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+
+# This script does decoding with a neural-net.  If the neural net was built on
+# top of fMLLR transforms from a conventional system, you should provide the
+# --transform-dir option.
+
+# Begin configuration section.
+stage=1
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.  If --transform-dir set, must match that number!
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=15.0
+max_active=7000
+lat_beam=8.0 # Beam we use in lattice generation.
+iter=final
+num_threads=1 # if >1, will use gmm-latgen-faster-parallel
+parallel_opts=  # If you supply num-threads, you should supply this too.
+scoring_opts=
+skip_scoring=false
+feat_type=
+spk_vecs_dir=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: $0 --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/tri3b/graph_tgpr data/test_dev93 exp/tri4a_nnet/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 15.0"
+  echo "  --iter <iter>                            # Iteration of model to decode; default is final."
+  echo "  --scoring-opts <string>                  # options to local/score.sh"
+  echo "  --num-threads <n>                        # number of threads to use, default 1."
+  echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+model=$srcdir/$iter.mdl
+
+for f in $graphdir/HCLG.fst $data/feats.scp $model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+thread_string=
+[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+if [ -z "$feat_type" ]; then
+  if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=raw; fi
+  echo "$0: feature type is $feat_type"
+fi
+
+case $feat_type in
+  raw) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  if [ "$feat_type" == "lda" ]; then
+    [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+  else
+    [ ! -f $transform_dir/raw_trans.1 ] && echo "$0: no such file $transform_dir/raw_trans.1" && exit 1;
+    [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+      && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- |"
+  fi
+elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then
+  echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+if [ ! -z $spk_vecs_dir ]; then
+  [ ! -f $spk_vecs_dir/vecs.1 ] && echo "No such file $spk_vecs_dir/vecs.1" && exit 1;
+  spk_vecs_opt=("--spk-vecs=ark:cat $spk_vecs_dir/vecs.*|" "--utt2spk=ark:$data/utt2spk")
+else
+  spk_vecs_opt=()
+fi
+
+if [ $stage -le 1 ]; then
+  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet-latgen-faster$thread_string "${spk_vecs_opt[@]}" --max-active=$max_active --beam=$beam \
+     --lattice-beam=$lat_beam --acoustic-scale=$acwt --allow-partial=true \
+     --word-symbol-table=$graphdir/words.txt "$model" \
+     $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 2 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    echo "score best paths"
+    local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+    echo "score confidence and timing with sclite"
+  fi
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/nnet2/get_egs.sh b/egs/chime_wsj0/s5/steps/nnet2/get_egs.sh
new file mode 100755
index 000000000..9b52067d0
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/get_egs.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# This script, which will generally be called from other neural-net training
+# scripts, extracts the training examples used to train the neural net (and also
+# the validation examples used for diagnostics), and puts them in separate archives.
+
+# Begin configuration section.
+cmd=run.pl
+feat_type=
+num_utts_subset=300    # number of utterances in validation and training
+                       # subsets used for shrinkage and diagnostics
+hidden_layer_dim=300
+within_class_factor=0.0001
+num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
+num_train_frames_combine=10000 # # train frames for the above.
+num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
+samples_per_iter=400000 # each iteration of training, see this many samples
+                        # per job.  This is just a guideline; it will pick a number
+                        # that divides the number of samples in the entire data.
+transform_dir=     # If supplied, overrides alidir
+num_jobs_nnet=16    # Number of neural net jobs to run in parallel
+stage=0
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+spk_vecs_dir=
+random_copy=false
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/nnet2/get_egs.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/nnet2/get_egs.sh data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-jobs-nnet <num-jobs|16>                    # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/tree $dir
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+
+if [ -f $data/utt2uniq ]; then
+  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
+  echo "include all perturbed versions of the same 'real' utterances."
+  mv $dir/valid_uttlist $dir/valid_uttlist.tmp
+  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
+  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
+    sort | uniq | utils/apply_map.pl $dir/uniq2utt | \
+    awk '{for(n=1;n<=NF;n++) print $n;}' | sort  > $dir/valid_uttlist
+  rm $dir/uniq2utt $dir/valid_uttlist.tmp
+fi
+
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+[ -z "$transform_dir" ] && transform_dir=$alidir
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+   ;;
+  lda) 
+    splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+    cp $alidir/splice_opts $dir 2>/dev/null
+    cp $alidir/final.mat $dir    
+    feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -f $transform_dir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/trans.*|' ark:- ark:- |"
+fi
+if [ -f $transform_dir/raw_trans.1 ] && [ $feat_type == "raw" ]; then
+  echo "$0: using raw-fMLLR transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/raw_trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/raw_trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/raw_trans.*|' ark:- ark:- |"
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: working out number of frames of training data"
+  num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
+  echo $num_frames > $dir/num_frames
+else
+  num_frames=`cat $dir/num_frames` || exit 1;
+fi
+
+# Working out number of iterations per epoch.
+iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
+[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
+samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
+echo "$0: Every epoch, splitting the data up into $iters_per_epoch iterations,"
+echo "$0: giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
+
+
+## If --est-lda=true, o LDA on top of whatever features we already have; store
+## the matrix which we'll put into the neural network as a constant.
+
+feat_dim=`feat-to-dim "$train_subset_feats" -` || exit 1;
+lda_dim=$[$feat_dim*(1+2*($splice_width))]; # No dim reduction.
+
+nnet_context_opts="--left-context=$splice_width --right-context=$splice_width"
+mkdir -p $dir/egs
+
+if [ ! -z $spk_vecs_dir ]; then
+  [ ! -f $spk_vecs_dir/vecs.1 ] && echo "No such file $spk_vecs_dir/vecs.1" && exit 1;
+  spk_vecs_opt=("--spk-vecs=ark:cat $spk_vecs_dir/vecs.*|" "--utt2spk=ark:$data/utt2spk")
+else
+  spk_vecs_opt=()
+fi
+
+if [ $stage -le 2 ]; then
+  echo "Getting validation and training subset examples."
+  rm $dir/.error 2>/dev/null
+  $cmd $dir/log/create_valid_subset.log \
+    nnet-get-egs $nnet_context_opts "${spk_vecs_opt[@]}" "$valid_feats" \
+     "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/egs/valid_all.egs" || touch $dir/.error &
+  $cmd $dir/log/create_train_subset.log \
+    nnet-get-egs $nnet_context_opts "${spk_vecs_opt[@]}" "$train_subset_feats" \
+     "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/egs/train_subset_all.egs" || touch $dir/.error &
+  wait;
+  [ -f $dir/.error ] && exit 1;
+  echo "Getting subsets of validation examples for diagnostics and combination."
+  $cmd $dir/log/create_valid_subset_combine.log \
+    nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \
+        ark:$dir/egs/valid_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/valid_all.egs \
+    ark:$dir/egs/valid_diagnostic.egs || touch $dir/.error &
+
+  $cmd $dir/log/create_train_subset_combine.log \
+    nnet-subset-egs --n=$num_train_frames_combine ark:$dir/egs/train_subset_all.egs \
+    ark:$dir/egs/train_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/train_subset_all.egs \
+    ark:$dir/egs/train_diagnostic.egs || touch $dir/.error &
+  wait
+  cat $dir/egs/valid_combine.egs $dir/egs/train_combine.egs > $dir/egs/combine.egs
+
+  for f in $dir/egs/{combine,train_diagnostic,valid_diagnostic}.egs; do
+    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
+  done
+  rm $dir/egs/valid_all.egs $dir/egs/train_subset_all.egs $dir/egs/{train,valid}_combine.egs
+fi
+
+if [ $stage -le 3 ]; then
+  mkdir -p $dir/temp
+
+  # Other scripts might need to know the following info:
+  echo $num_jobs_nnet >$dir/egs/num_jobs_nnet
+  echo $iters_per_epoch >$dir/egs/iters_per_epoch
+  echo $samples_per_iter_real >$dir/egs/samples_per_iter
+
+  echo "Creating training examples";
+  # in $dir/egs, create $num_jobs_nnet separate files with training examples.
+  # The order is not randomized at this point.
+
+  egs_list=
+  for n in `seq 1 $num_jobs_nnet`; do
+    egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
+  done
+  echo "Generating training examples on disk"
+  # The examples will go round-robin to egs_list.
+  $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+    nnet-get-egs $nnet_context_opts "${spk_vecs_opt[@]}" "$feats" \
+    "ark,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
+    nnet-copy-egs ark:- $egs_list || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
+  # then split into multiple parts egs.JOB.*.scp for different parts of the
+  # data, 0 .. $iters_per_epoch-1.
+
+  if [ $iters_per_epoch -eq 1 ]; then
+    echo "Since iters-per-epoch == 1, just concatenating the data."
+    for n in `seq 1 $num_jobs_nnet`; do
+      cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
+      rm $dir/egs/egs_orig.$n.*.ark  # don't "|| exit 1", due to NFS bugs...
+    done
+  else # We'll have to split it up using nnet-copy-egs.
+    egs_list=
+    for n in `seq 0 $[$iters_per_epoch-1]`; do
+      egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
+    done
+    # note, the "|| true" below is a workaround for NFS bugs
+    # we encountered running this script with Debian-7, NFS-v4.
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
+      nnet-copy-egs --random=$random_copy --srand=JOB \
+        "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
+        '(' rm $dir/egs/egs_orig.JOB.*.ark '||' true ')' || exit 1;
+  fi
+fi
+
+if [ $stage -le 5 ]; then
+  # Next, shuffle the order of the examples in each of those files.
+  # Each one should not be too large, so we can do this in memory.
+  echo "Shuffling the order of training examples"
+  echo "(in order to avoid stressing the disk, these won't all run at once)."
+
+
+  # note, the "|| true" below is a workaround for NFS bugs
+  # we encountered running this script with Debian-7, NFS-v4.
+  for n in `seq 0 $[$iters_per_epoch-1]`; do
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
+      nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
+      ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
+      '(' rm $dir/egs/egs_tmp.JOB.$n.ark '||' true ')' || exit 1;
+  done
+fi
+
+echo "$0: Finished preparing training examples"
diff --git a/egs/chime_wsj0/s5/steps/nnet2/get_lda.sh b/egs/chime_wsj0/s5/steps/nnet2/get_lda.sh
new file mode 100755
index 000000000..6c261c423
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/get_lda.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# This script, which will generally be called from other neural-net training
+# scripts, extracts the training examples used to train the neural net (and also
+# the validation examples used for diagnostics), and puts them in separate archives.
+
+# Begin configuration section.
+cmd=run.pl
+
+feat_type=
+stage=0
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+rand_prune=4.0 # Relates to a speedup we do for LDA.
+within_class_factor=0.0001 # This affects the scaling of the transform rows...
+                           # sorry for no explanation, you'll have to see the code.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/nnet2/get_lda.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/nnet2/get_lda.sh data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo " As well as extracting the examples, this script will also do the LDA computation,"
+  echo " if --est-lda=true (default:true)"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/tree $dir
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ] && ! [ -f $alidir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+   ;;
+  lda) 
+    splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+    cp $alidir/splice_opts $dir 2>/dev/null
+    cp $alidir/final.mat $dir    
+      feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+fi
+if [ -f $alidir/raw_trans.1 ] && [ $feat_type == "raw" ]; then
+  echo "$0: using raw-fMLLR transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/raw_trans.JOB ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/raw_trans.*|' ark:- ark:- |"
+fi
+
+
+feat_dim=`feat-to-dim "$train_subset_feats" -` || exit 1;
+lda_dim=$[$feat_dim*(1+2*($splice_width))]; # No dim reduction.
+
+if [ $stage -le 0 ]; then
+  echo "$0: Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$rand_prune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+fi
+
+echo $feat_dim > $dir/feat_dim
+echo $lda_dim > $dir/lda_dim
+
+if [ $stage -le 1 ]; then
+  nnet-get-feature-transform --within-class-factor=$within_class_factor --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+echo "$0: Finished estimating LDA"
diff --git a/egs/chime_wsj0/s5/steps/nnet2/get_lda_block.sh b/egs/chime_wsj0/s5/steps/nnet2/get_lda_block.sh
new file mode 100755
index 000000000..e370be05b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/get_lda_block.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# This script, which will generally be called from other neural-net training
+# scripts, extracts the training examples used to train the neural net (and also
+# the validation examples used for diagnostics), and puts them in separate archives.
+
+# Begin configuration section.
+cmd=run.pl
+
+stage=0
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+rand_prune=4.0 # Relates to a speedup we do for LDA.
+within_class_factor=0.0001 # This affects the scaling of the transform rows...
+                           # sorry for no explanation, you'll have to see the code.
+block_size=10
+block_shift=5
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/nnet2/get_lda_block.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/nnet2/get_lda.sh data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo " As well as extracting the examples, this script will also do the LDA computation,"
+  echo " if --est-lda=true (default:true)"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/tree $dir
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+
+
+feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+
+feat_dim=`feat-to-dim "$train_subset_feats" -` || exit 1;
+
+if [ $stage -le 0 ]; then
+  echo "$0: Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$rand_prune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+fi
+
+echo $feat_dim > $dir/feat_dim
+
+echo -n > $dir/indexes
+# Get list of indexes, e.g. a file like: 
+# 0 1 2 3 4 5 6 7 8 9
+# 5 6 7 8 9 10 11 12 13 14 
+# 10 ...
+
+cur_index=0
+num_blocks=0
+context_length=$[1+2*($splice_width)]
+
+while [ $[$cur_index+$block_size] -lt $feat_dim ]; do
+  for n in `seq $cur_index $[cur_index+$block_size-1]`; do
+    echo -n `seq $n $feat_dim $[$n+($feat_dim*($context_length-1))]` '' >> $dir/indexes
+  done
+  echo >> $dir/indexes
+  num_blocks=$[$num_blocks+1]
+  cur_index=$[$cur_index+$block_shift]
+  if [ $[$cur_index+$block_size] -gt $feat_dim ]; then
+    cur_index=$[$feat_dim-$block_size];
+  fi
+done
+echo $num_blocks >$dir/num_blocks
+
+lda_dim=`cat $dir/indexes | wc -w`
+echo $lda_dim > $dir/lda_dim
+
+if [ $stage -le 1 ]; then
+  nnet-get-feature-transform-multi --within-class-factor=$within_class_factor $dir/indexes $dir/lda.*.acc $dir/lda.mat \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+echo "$0: Finished estimating LDA"
diff --git a/egs/chime_wsj0/s5/steps/nnet2/get_perturbed_feats.sh b/egs/chime_wsj0/s5/steps/nnet2/get_perturbed_feats.sh
new file mode 100755
index 000000000..0679da7b2
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/get_perturbed_feats.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+
+# begin configuration section
+
+cmd="run.pl"
+pairs="1.1-1.0 1.05-1.2 1.0-0.8 0.95-1.1 0.9-0.9" # Pairs of (VTLN warp factor, time-warp factor)
+stage=0
+cleanup=true
+feature_type=fbank
+# end configuration section
+
+set -e
+. utils/parse_options.sh 
+
+if [ $# -ne 5 ]; then
+  echo "Usage: $0 [options] <baseline-feature-config> <feature-storage-dir> <log-location> <input-data-dir> <output-data-dir> "
+  echo "e.g.: $0 mfcc conf/fbank_40.conf exp/perturbed_fbank_train data/train data/train_perturbed_fbank"
+  echo "Supported options: "
+  echo "--feature-type (fbank|mfcc|plp)  # Type of features we are making"
+  echo "--cmd 'command-program'      # Mechanism to run jobs, e.g. run.pl"
+  echo "--pairs <pairs>              # Pairs of (vtln-warp, time-warp) factors, "
+  echo "                             # default $pairs"
+  echo "--stage <stage>              # Use for partial re-run"
+  echo "--cleanup (true|false)       # If false, do not clean up temp files (default: true)"
+  exit 1;
+fi
+
+base_config=$1
+featdir=$2
+dir=$3 # dir/log* will contain log-files
+inputdata=$4
+data=$5
+
+for f in $base_config $inputdata/wav.scp; do 
+  if [ ! -f $f ]; then
+    echo "Expected file $f to exist"
+    exit 1;
+  fi
+done
+
+if [ "$feature_type" != "fbank" ] && [ "$feature_type" != "mfcc" ] && \
+   [ "$feature_type" != "plp" ]; then 
+  echo "$0: Invalid option --feature-type=$feature_type"
+  exit 1;
+fi
+
+mkdir -p $featdir
+mkdir -p $dir/conf $dir/log
+
+all_feature_dirs=""
+
+for pair in $pairs; do
+  vtln_warp=`echo $pair | cut -d- -f1`
+  time_warp=`echo $pair | cut -d- -f2`
+  fs=`perl -e "print ($time_warp*10);"`
+  conf=$dir/conf/$pair.conf
+  this_dir=$dir/$pair
+  
+  ( cat $base_config; echo; echo "--frame-shift=$fs"; echo "--vtln-warp=$vtln_warp" ) > $conf
+  
+  echo "Making ${feature_type} features for VTLN-warp $vtln_warp and time-warp $time_warp"
+
+  feature_data=${data}-$pair
+  all_feature_dirs="$all_feature_dirs $feature_data"
+
+  utils/copy_data_dir.sh --spk-prefix ${pair}- --utt-prefix ${pair}- $inputdata $feature_data
+  steps/make_${feature_type}.sh --${feature_type}-config $conf --nj 8 --cmd "$cmd" $feature_data $this_dir $featdir
+
+  steps/compute_cmvn_stats.sh $feature_data $this_dir $featdir
+done
+
+utils/combine_data.sh $data $all_feature_dirs
+
+
+# In the combined feature directory, create a file utt2uniq which maps
+# our extended utterance-ids to "unique utterances".  This enables the
+# script steps/nnet2/get_egs.sh to hold out data in a more proper way.
+cat $data/utt2spk | \
+   perl -e ' while(<STDIN>){ @A=split; $x=shift @A; $y=$x; 
+     foreach $pair (@ARGV) { $y =~ s/^${pair}-// && last; } print "$x $y\n"; } ' $pairs \
+  > $data/utt2uniq
+
+if $cleanup; then
+  echo "$0: Cleaning up temporary directories for ${feature_type} features."
+  # Note, this just removes the .scp files and so on, not the data which is located in
+  # $featdir and which is still needed.
+  rm -r $all_feature_dirs
+fi
diff --git a/egs/chime_wsj0/s5/steps/nnet2/retrain_tanh.sh b/egs/chime_wsj0/s5/steps/nnet2/retrain_tanh.sh
new file mode 100755
index 000000000..c58a61dd4
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/retrain_tanh.sh
@@ -0,0 +1,216 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script is for training networks with tanh nonlinearities; it starts with
+# a given model and supports increasing the hidden-layer dimension.  It is
+# otherwise similar to train_tanh.sh
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15      # Number of epochs during which we reduce
+                   # the learning rate; number of iteration is worked out from this.
+num_epochs_extra=5 # Number of epochs after we stop reducing
+                   # the learning rate.
+num_iters_final=20 # Maximum number of final iterations to give to the
+                   # optimization over the validation set.
+initial_learning_rate=0.04
+final_learning_rate=0.004
+softmax_learning_rate_factor=0.5 # Train this layer half as fast as the other layers.
+
+
+minibatch_size=128 # by default use a smallish minibatch size for neural net
+                   # training; this controls instability which would otherwise
+                   # be a problem with multi-threaded update.  Note: it also
+                   # interacts with the "preconditioned" update which generally
+                   # works better with larger minibatch size, so it's not
+                   # completely cost free.
+
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+
+
+stage=-5
+
+
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+         # specified.)  Will do this at the start.
+widen=0 # If specified, it will increase the hidden-layer dimension 
+                            # to this value.  Will do this at the start.
+bias_stddev=0.5 # will be used for widen
+
+num_threads=16
+parallel_opts="-pe smp $num_threads"  # using a smallish #threads by default, out of stability concerns.
+  # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
+cleanup=true
+# End configuration section.
+
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 [opts] <egs-dir> <old-nnet-dir> <exp-dir>"
+  echo " e.g.: $0 --widen 1024 exp/tri4_nnet/egs exp/tri4_nnet exp/tri5_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --stage <stage|-5>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  exit 1;
+fi
+
+egs_dir=$1
+nnet_dir=$2
+dir=$3
+
+# Check some files.
+for f in $egs_dir/egs.1.0.ark $nnet_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+num_jobs_nnet=`cat $egs_dir/num_jobs_nnet` || exit 1;
+iters_per_epoch=`cat $egs_dir/iters_per_epoch`  || exit 1;
+
+mkdir -p $dir/log
+
+cp $nnet_dir/splice_opts $dir 2>/dev/null
+cp $nnet_dir/final.mat $dir 2>/dev/null # any LDA matrix...
+cp $nnet_dir/tree $dir
+
+
+if [ $stage -le -2 ] && [ $mix_up -gt 0 ]; then
+  echo Mixing up to $mix_up components
+  $cmd $dir/log/mix_up.$x.log \
+    nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+      $nnet_dir/final.mdl $dir/0.mdl || exit 1;
+else 
+  cp $nnet_dir/final.mdl $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -1 ] && [ $widen -gt 0 ]; then
+  echo "$0: Widening nnet to hidden-layer-dim=$widen"
+  $cmd $dir/log/widen.log \
+    nnet-am-widen --hidden-layer-dim=$widen $dir/0.mdl $dir/0.mdl || exit 1;
+fi
+
+num_iters_reduce=$[$num_epochs * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_reduce+$num_iters_extra]
+
+echo "$0: Will train for $num_epochs + $num_epochs_extra epochs, equalling "
+echo "$0: $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
+echo "$0: (while reducing learning rate) + (with constant learning rate)."
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    # Set off jobs doing some diagnostics, in the background.
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
+
+    echo "Training neural net (pass $x)"
+
+    $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
+      nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
+      ark:$egs_dir/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
+      nnet-train-parallel --num-threads=$num_threads \
+         --minibatch-size=$minibatch_size --srand=$x $dir/$x.mdl \
+        ark:- $dir/$[$x+1].JOB.mdl \
+      || exit 1;
+
+    nnets_list=
+    for n in `seq 1 $num_jobs_nnet`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_reduce $initial_learning_rate $final_learning_rate`;
+    softmax_learning_rate=`perl -e "print $learning_rate * $softmax_learning_rate_factor;"`;
+    nnet-am-info $dir/$[$x+1].1.mdl > $dir/foo  2>/dev/null || exit 1
+    nu=`cat $dir/foo | grep num-updatable-components | awk '{print $2}'`
+    na=`cat $dir/foo | grep AffineComponent | wc -l` # number of last AffineComopnent layer [one-based]
+    lr_string="$learning_rate"
+    for n in `seq 2 $nu`; do 
+      if [ $n -eq $na ]; then lr=$softmax_learning_rate;
+      else lr=$learning_rate; fi
+      lr_string="$lr_string:$lr"
+    done
+    
+    $cmd $dir/log/average.$x.log \
+      nnet-am-average $nnets_list - \| \
+      nnet-am-copy --learning-rates=$lr_string - $dir/$[$x+1].mdl || exit 1;
+
+    rm $nnets_list
+  fi
+  x=$[$x+1]
+done
+
+# Now do combination.
+# At the end, final.mdl will be a combination of the last e.g. 10 models.
+if [ $num_iters_final -gt $num_iters_extra ]; then
+  echo "Setting num_iters_final=$num_iters_extra"
+  num_iters_final=$num_iters_extra
+fi
+start=$[$num_iters-$num_iters_final+1]
+nnets_list=
+for x in `seq $start $num_iters`; do
+  nnets_list="$nnets_list $dir/$x.mdl"
+done
+
+if [ $stage -le $num_iters ]; then
+  num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
+  mb=$[($num_egs+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+    $nnets_list ark:$egs_dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+sleep 2; # make sure final.mdl exists.
+
+# Compute the probability of the final, combined model with
+# the same subset we used for the previous compute_probs, as the
+# different subsets will lead to different probs.
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs &
+
+echo Done
+
+if $cleanup; then
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then 
+       # delete all but every 10th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
diff --git a/egs/chime_wsj0/s5/steps/nnet2/train_block.sh b/egs/chime_wsj0/s5/steps/nnet2/train_block.sh
new file mode 100755
index 000000000..aa6e2e725
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/train_block.sh
@@ -0,0 +1,376 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# this is as train_tanh3.sh but for on top of fbank feats-- we have block-diagonal
+# transforms for the first few layers, on separate frequency bands.
+# Otherwise it's tanh.
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15      # Number of epochs during which we reduce
+                   # the learning rate; number of iteration is worked out from this.
+num_epochs_extra=5 # Number of epochs after we stop reducing
+                   # the learning rate.
+num_iters_final=20 # Maximum number of final iterations to give to the
+                   # optimization over the validation set.
+initial_learning_rate=0.04
+final_learning_rate=0.004
+bias_stddev=0.0
+shrink_interval=5 # shrink every $shrink_interval iters except while we are 
+                  # still adding layers, when we do it every iter.
+shrink=true
+num_frames_shrink=2000 # note: must be <= --num-frames-diagnostic option to get_egs.sh, if
+                       # given.
+softmax_learning_rate_factor=0.5 # Train this layer half as fast as the other layers.
+
+hidden_layer_dim=300 #  You may want this larger, e.g. 1024 or 2048.
+
+minibatch_size=128 # by default use a smallish minibatch size for neural net
+                   # training; this controls instability which would otherwise
+                   # be a problem with multi-threaded update.  Note: it also
+                   # interacts with the "preconditioned" update which generally
+                   # works better with larger minibatch size, so it's not
+                   # completely cost free.
+
+samples_per_iter=200000 # each iteration of training, see this many samples
+                        # per job.  This option is passed to get_egs.sh
+num_jobs_nnet=16   # Number of neural net jobs to run in parallel.  This option
+                   # is passed to get_egs.sh.
+get_egs_stage=0
+spk_vecs_dir=
+
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+
+add_layers_period=2 # by default, add new layers every 2 iterations.
+
+num_block_layers=2
+num_normal_layers=2
+block_size=10
+block_shift=5
+
+stage=-5
+
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+splice_width=7 # meaning +- 7 frames on each side for second LDA
+randprune=4.0 # speeds up LDA.
+alpha=4.0
+max_change=10.0
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16
+parallel_opts="-pe smp $num_threads"  # using a smallish #threads by default, out of stability concerns.
+  # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
+cleanup=true
+egs_dir=
+lda_opts=
+egs_opts=
+# End configuration section.
+
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: $0 [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --initial-num-hidden-layers <#hidden-layers|1>   # Number of hidden layers to start with."
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|250>                              # Dimension to reduce spliced features to with LDA"
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --num-utts-subset <#utts|300>                    # Number of utterances in subsets used for validation and diagnostics"
+  echo "                                                   # (the validation subset is held out from training)"
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|-9>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/tree $dir
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+
+if [ $stage -le -4 ]; then
+  echo "$0: calling get_lda.sh"
+  steps/nnet2/get_lda_block.sh --block-size $block_size --block-shift $block_shift \
+    $lda_opts --splice-width $splice_width --cmd "$cmd" $data $lang $alidir $dir || exit 1;
+fi
+
+# these files will have been written by get_lda_block.sh
+feat_dim=`cat $dir/feat_dim` || exit 1;
+lda_dim=`cat $dir/lda_dim` || exit 1;
+num_blocks=`cat $dir/num_blocks` || exit 1;
+
+if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
+  echo "$0: calling get_egs.sh"
+  [ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
+  steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \
+      --splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts --feat-type raw \
+      $data $lang $alidir $dir || exit 1;
+fi
+
+if [ -z $egs_dir ]; then
+  egs_dir=$dir/egs
+fi
+
+iters_per_epoch=`cat $egs_dir/iters_per_epoch`  || exit 1;
+! [ $num_jobs_nnet -eq `cat $egs_dir/num_jobs_nnet` ] && \
+  echo "$0: Warning: using --num-jobs-nnet=`cat $egs_dir/num_jobs_nnet` from $egs_dir"
+num_jobs_nnet=`cat $egs_dir/num_jobs_nnet`
+
+
+if [ $stage -le -2 ]; then
+  echo "$0: initializing neural net";
+
+  hidden_block_size=`perl -e "print int(sqrt(($hidden_layer_dim*$hidden_layer_dim)/$num_blocks));"`
+  echo "Hidden block size is $hidden_block_size"
+  hidden_block_dim=$[$hidden_block_size*$num_blocks]
+  block_stddev=`perl -e "print 1.0/sqrt($block_size);"`
+  hidden_block_stddev=`perl -e "print 1.0/sqrt($hidden_block_size);"`
+  first_hidden_layer_stddev=`perl -e "print 1.0/sqrt($hidden_block_dim);"`
+  stddev=`perl -e "print 1.0/sqrt($hidden_layer_dim);"`
+
+  
+  cat >$dir/nnet.config <<EOF
+SpliceComponent input-dim=$feat_dim left-context=$splice_width right-context=$splice_width
+FixedAffineComponent matrix=$dir/lda.mat
+BlockAffineComponentPreconditioned input-dim=$lda_dim output-dim=$hidden_block_dim alpha=$alpha learning-rate=$initial_learning_rate num-blocks=$num_blocks param-stddev=$block_stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_block_dim
+EOF
+  for n in `seq 2 $num_block_layers`; do
+    cat >>$dir/nnet.config <<EOF
+BlockAffineComponentPreconditioned input-dim=$hidden_block_dim output-dim=$hidden_block_dim alpha=$alpha num-blocks=$num_blocks learning-rate=$initial_learning_rate param-stddev=$hidden_block_stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_block_dim
+EOF
+  done
+  cat >>$dir/nnet.config <<EOF
+AffineComponentPreconditioned input-dim=$hidden_block_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$first_hidden_layer_stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_layer_dim
+EOF
+  for n in `seq 2 $num_normal_layers`; do
+  cat >>$dir/nnet.config <<EOF
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_layer_dim
+EOF
+  done
+
+  cat >>$dir/nnet.config <<EOF
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$num_leaves alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=0 bias-stddev=0
+SoftmaxComponent dim=$num_leaves
+EOF
+
+  $cmd $dir/log/nnet_init.log \
+    nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+    $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+num_iters_reduce=$[$num_epochs * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_reduce+$num_iters_extra]
+
+echo "$0: Will train for $num_epochs + $num_epochs_extra epochs, equalling "
+echo "$0: $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
+echo "$0: (while reducing learning rate) + (with constant learning rate)."
+
+# This is when we decide to mix up from: halfway between when we've finished
+# adding the hidden layers and the end of training.
+mix_up_iter=$[$num_iters/2]
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    # Set off jobs doing some diagnostics, in the background.
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
+    if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
+      $cmd $dir/log/progress.$x.log \
+        nnet-show-progress $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
+    fi
+    
+    echo "Training neural net (pass $x)"
+    mdl=$dir/$x.mdl
+    
+    $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
+      nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
+      ark:$egs_dir/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
+      nnet-train-parallel --num-threads=$num_threads \
+         --minibatch-size=$minibatch_size --srand=$x "$mdl" \
+        ark:- $dir/$[$x+1].JOB.mdl \
+      || exit 1;
+
+    nnets_list=
+    for n in `seq 1 $num_jobs_nnet`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_reduce $initial_learning_rate $final_learning_rate`;
+    softmax_learning_rate=`perl -e "print $learning_rate * $softmax_learning_rate_factor;"`;
+    nnet-am-info $dir/$[$x+1].1.mdl > $dir/foo  2>/dev/null || exit 1
+    nu=`cat $dir/foo | grep num-updatable-components | awk '{print $2}'`
+    na=`cat $dir/foo | grep -v Fixed | grep AffineComponent | wc -l` 
+    # na is number of last updatable AffineComponent layer [one-based, counting only
+    # updatable components.]
+    lr_string="$learning_rate"
+    for n in `seq 2 $nu`; do 
+      if [ $n -eq $na ] || [ $n -eq $[$na-1] ]; then lr=$softmax_learning_rate;
+      else lr=$learning_rate; fi
+      lr_string="$lr_string:$lr"
+    done
+    
+    $cmd $dir/log/average.$x.log \
+      nnet-am-average $nnets_list - \| \
+      nnet-am-copy --learning-rates=$lr_string - $dir/$[$x+1].mdl || exit 1;
+
+    if $shrink && [ $[$x % $shrink_interval] -eq 0 ]; then
+      mb=$[($num_frames_shrink+$num_threads-1)/$num_threads]
+      $cmd $parallel_opts $dir/log/shrink.$x.log \
+        nnet-subset-egs --n=$num_frames_shrink --randomize-order=true --srand=$x \
+          ark:$egs_dir/train_diagnostic.egs ark:-  \| \
+        nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+          $dir/$[$x+1].mdl ark:- $dir/$[$x+1].mdl || exit 1;
+    else
+      # On other iters, do nnet-am-fix which is much faster and has roughly
+      # the same effect.
+      nnet-am-fix $dir/$[$x+1].mdl $dir/$[$x+1].mdl 2>$dir/log/fix.$x.log 
+    fi
+
+    if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
+      # mix up.
+      echo Mixing up from $num_leaves to $mix_up components
+      $cmd $dir/log/mix_up.$x.log \
+        nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+        $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+    rm $nnets_list
+  fi
+  x=$[$x+1]
+done
+
+# Now do combination.
+# At the end, final.mdl will be a combination of the last e.g. 10 models.
+nnets_list=()
+if [ $num_iters_final -gt $num_iters_extra ]; then
+  echo "Setting num_iters_final=$num_iters_extra"
+fi
+start=$[$num_iters-$num_iters_final+1]
+for x in `seq $start $num_iters`; do
+  idx=$[$x-$start]
+  if [ $x -gt $mix_up_iter ]; then
+    nnets_list[$idx]=$dir/$x.mdl # "nnet-am-copy --remove-dropout=true $dir/$x.mdl - |"
+  fi
+done
+
+if [ $stage -le $num_iters ]; then
+  num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
+  mb=$[($num_egs+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+    "${nnets_list[@]}" ark:$egs_dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+# Compute the probability of the final, combined model with
+# the same subset we used for the previous compute_probs, as the
+# different subsets will lead to different probs.
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs &
+
+sleep 2
+
+echo Done
+
+if $cleanup; then
+  echo Cleaning up data
+  if [ $egs_dir == "$dir/egs" ]; then
+    echo Removing training examples
+    rm $dir/egs/egs*
+  fi
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then 
+       # delete all but every 10th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
diff --git a/egs/chime_wsj0/s5/steps/nnet2/train_tanh.sh b/egs/chime_wsj0/s5/steps/nnet2/train_tanh.sh
new file mode 100755
index 000000000..46ed269d6
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/nnet2/train_tanh.sh
@@ -0,0 +1,377 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This script trains a fairly vanilla network with tanh nonlinearities.
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15      # Number of epochs during which we reduce
+                   # the learning rate; number of iteration is worked out from this.
+num_epochs_extra=5 # Number of epochs after we stop reducing
+                   # the learning rate.
+num_iters_final=20 # Maximum number of final iterations to give to the
+                   # optimization over the validation set.
+initial_learning_rate=0.04
+final_learning_rate=0.004
+bias_stddev=0.0
+shrink_interval=5 # shrink every $shrink_interval iters except while we are 
+                  # still adding layers, when we do it every iter.
+shrink=true
+num_frames_shrink=2000 # note: must be <= --num-frames-diagnostic option to get_egs.sh, if
+                       # given.
+softmax_learning_rate_factor=0.5 # Train this layer half as fast as the other layers.
+
+hidden_layer_dim=300 #  You may want this larger, e.g. 1024 or 2048.
+
+minibatch_size=128 # by default use a smallish minibatch size for neural net
+                   # training; this controls instability which would otherwise
+                   # be a problem with multi-threaded update.  Note: it also
+                   # interacts with the "preconditioned" update which generally
+                   # works better with larger minibatch size, so it's not
+                   # completely cost free.
+
+samples_per_iter=200000 # each iteration of training, see this many samples
+                        # per job.  This option is passed to get_egs.sh
+num_jobs_nnet=16   # Number of neural net jobs to run in parallel.  This option
+                   # is passed to get_egs.sh.
+get_egs_stage=0
+spk_vecs_dir=
+
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+
+add_layers_period=2 # by default, add new layers every 2 iterations.
+num_hidden_layers=3
+
+stage=-5
+
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+randprune=4.0 # speeds up LDA.
+alpha=4.0
+max_change=10.0
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16
+parallel_opts="-pe smp $num_threads"  # using a smallish #threads by default, out of stability concerns.
+  # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
+cleanup=true
+egs_dir=
+lda_opts=
+egs_opts=
+# End configuration section.
+
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: $0 [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --initial-num-hidden-layers <#hidden-layers|1>   # Number of hidden layers to start with."
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|250>                              # Dimension to reduce spliced features to with LDA"
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --num-utts-subset <#utts|300>                    # Number of utterances in subsets used for validation and diagnostics"
+  echo "                                                   # (the validation subset is held out from training)"
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|-9>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/tree $dir
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+
+if [ $stage -le -4 ]; then
+  echo "$0: calling get_lda.sh"
+  steps/nnet2/get_lda.sh $lda_opts --splice-width $splice_width --cmd "$cmd" $data $lang $alidir $dir || exit 1;
+fi
+
+# these files will have been written by get_lda.sh
+feat_dim=`cat $dir/feat_dim` || exit 1;
+lda_dim=`cat $dir/lda_dim` || exit 1;
+
+if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
+  echo "$0: calling get_egs.sh"
+  [ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
+  steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \
+      --splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts \
+      $data $lang $alidir $dir || exit 1;
+fi
+
+if [ -z $egs_dir ]; then
+  egs_dir=$dir/egs
+fi
+
+iters_per_epoch=`cat $egs_dir/iters_per_epoch`  || exit 1;
+! [ $num_jobs_nnet -eq `cat $egs_dir/num_jobs_nnet` ] && \
+  echo "$0: Warning: using --num-jobs-nnet=`cat $egs_dir/num_jobs_nnet` from $egs_dir"
+num_jobs_nnet=`cat $egs_dir/num_jobs_nnet`
+
+
+if ! [ $num_hidden_layers -ge 1 ]; then
+  echo "Invalid num-hidden-layers $num_hidden_layers"
+  exit 1
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: initializing neural net";
+
+  # Get spk-vec dim (in case we're using them).
+  if [ ! -z "$spk_vecs_dir" ]; then
+    spk_vec_dim=$[$(copy-vector --print-args=false "ark:cat $spk_vecs_dir/vecs.1|" ark,t:- | head -n 1 | wc -w) - 3];
+    ! [ $spk_vec_dim -gt 0 ] && echo "Error getting spk-vec dim" && exit 1;
+    ext_lda_dim=$[$lda_dim + $spk_vec_dim]
+    extend-transform-dim --new-dimension=$ext_lda_dim $dir/lda.mat $dir/lda_ext.mat || exit 1;
+    lda_mat=$dir/lda_ext.mat
+    ext_feat_dim=$[$feat_dim + $spk_vec_dim]
+  else
+    spk_vec_dim=0
+    lda_mat=$dir/lda.mat
+    ext_lda_dim=$lda_dim
+    ext_feat_dim=$feat_dim
+  fi
+
+  stddev=`perl -e "print 1.0/sqrt($hidden_layer_dim);"`
+  cat >$dir/nnet.config <<EOF
+SpliceComponent input-dim=$ext_feat_dim left-context=$splice_width right-context=$splice_width const-component-dim=$spk_vec_dim
+FixedAffineComponent matrix=$lda_mat
+AffineComponentPreconditioned input-dim=$ext_lda_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_layer_dim
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$num_leaves alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=0 bias-stddev=0
+SoftmaxComponent dim=$num_leaves
+EOF
+
+  # to hidden.config it will write the part of the config corresponding to a
+  # single hidden layer; we need this to add new layers. 
+  cat >$dir/hidden.config <<EOF
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
+TanhComponent dim=$hidden_layer_dim
+EOF
+  $cmd $dir/log/nnet_init.log \
+    nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+    $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+num_iters_reduce=$[$num_epochs * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_reduce+$num_iters_extra]
+
+echo "$0: Will train for $num_epochs + $num_epochs_extra epochs, equalling "
+echo "$0: $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
+echo "$0: (while reducing learning rate) + (with constant learning rate)."
+
+# This is when we decide to mix up from: halfway between when we've finished
+# adding the hidden layers and the end of training.
+finish_add_layers_iter=$[($num_hidden_layers-$initial_num_hidden_layers+1)*$add_layers_period]
+mix_up_iter=$[($num_iters + $finish_add_layers_iter)/2]
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    # Set off jobs doing some diagnostics, in the background.
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
+    if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
+      $cmd $dir/log/progress.$x.log \
+        nnet-show-progress $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
+    fi
+    
+    echo "Training neural net (pass $x)"
+    if [ $x -gt 0 ] && \
+      [ $x -le $[($num_hidden_layers-1)*$add_layers_period] ] && \
+      [ $[($x-1) % $add_layers_period] -eq 0 ]; then
+      mdl="nnet-init --srand=$x $dir/hidden.config - | nnet-insert $dir/$x.mdl - - |"
+    else
+      mdl=$dir/$x.mdl
+    fi
+
+
+    $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
+      nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
+      ark:$egs_dir/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
+      nnet-train-parallel --num-threads=$num_threads \
+         --minibatch-size=$minibatch_size --srand=$x "$mdl" \
+        ark:- $dir/$[$x+1].JOB.mdl \
+      || exit 1;
+
+    nnets_list=
+    for n in `seq 1 $num_jobs_nnet`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_reduce $initial_learning_rate $final_learning_rate`;
+    softmax_learning_rate=`perl -e "print $learning_rate * $softmax_learning_rate_factor;"`;
+    nnet-am-info $dir/$[$x+1].1.mdl > $dir/foo  2>/dev/null || exit 1
+    nu=`cat $dir/foo | grep num-updatable-components | awk '{print $2}'`
+    na=`cat $dir/foo | grep -v Fixed | grep AffineComponent | wc -l` 
+    # na is number of last updatable AffineComponent layer [one-based, counting only
+    # updatable components.]
+    lr_string="$learning_rate"
+    for n in `seq 2 $nu`; do 
+      if [ $n -eq $na ] || [ $n -eq $[$na-1] ]; then lr=$softmax_learning_rate;
+      else lr=$learning_rate; fi
+      lr_string="$lr_string:$lr"
+    done
+    
+    $cmd $dir/log/average.$x.log \
+      nnet-am-average $nnets_list - \| \
+      nnet-am-copy --learning-rates=$lr_string - $dir/$[$x+1].mdl || exit 1;
+
+    if $shrink && [ $[$x % $shrink_interval] -eq 0 ]; then
+      mb=$[($num_frames_shrink+$num_threads-1)/$num_threads]
+      $cmd $parallel_opts $dir/log/shrink.$x.log \
+        nnet-subset-egs --n=$num_frames_shrink --randomize-order=true --srand=$x \
+          ark:$egs_dir/train_diagnostic.egs ark:-  \| \
+        nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+          $dir/$[$x+1].mdl ark:- $dir/$[$x+1].mdl || exit 1;
+    else
+      # On other iters, do nnet-am-fix which is much faster and has roughly
+      # the same effect.
+      nnet-am-fix $dir/$[$x+1].mdl $dir/$[$x+1].mdl 2>$dir/log/fix.$x.log 
+    fi
+
+    if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
+      # mix up.
+      echo Mixing up from $num_leaves to $mix_up components
+      $cmd $dir/log/mix_up.$x.log \
+        nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+        $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+    rm $nnets_list
+  fi
+  x=$[$x+1]
+done
+
+# Now do combination.
+# At the end, final.mdl will be a combination of the last e.g. 10 models.
+nnets_list=()
+if [ $num_iters_final -gt $num_iters_extra ]; then
+  echo "Setting num_iters_final=$num_iters_extra"
+fi
+start=$[$num_iters-$num_iters_final+1]
+for x in `seq $start $num_iters`; do
+  idx=$[$x-$start]
+  if [ $x -gt $mix_up_iter ]; then
+    nnets_list[$idx]=$dir/$x.mdl # "nnet-am-copy --remove-dropout=true $dir/$x.mdl - |"
+  fi
+done
+
+if [ $stage -le $num_iters ]; then
+  num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
+  mb=$[($num_egs+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+    "${nnets_list[@]}" ark:$egs_dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+# Compute the probability of the final, combined model with
+# the same subset we used for the previous compute_probs, as the
+# different subsets will lead to different probs.
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs &
+
+sleep 2
+
+echo Done
+
+if $cleanup; then
+  echo Cleaning up data
+  if [ $egs_dir == "$dir/egs" ]; then
+    echo Removing training examples
+    rm $dir/egs/egs*
+  fi
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then 
+       # delete all but every 10th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
diff --git a/egs/chime_wsj0/s5/steps/pretrain_dbn.sh b/egs/chime_wsj0/s5/steps/pretrain_dbn.sh
new file mode 100755
index 000000000..f011fdda6
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/pretrain_dbn.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+# Copyright 2013 Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# To be run from ..
+#
+# Deep Belief Network pre-training by Contrastive Divergence (CD-1) algorithm.
+# The script can pre-train on plain features (ie. saved fMLLR features), 
+# or modified features (CMN, delta).
+# The script creates feature-transform in nnet format, which contains splice 
+# and shift+scale (zero mean and unit variance on DBN input).
+#
+# For special cases it is possible to use external feature-transform.
+# 
+
+# Begin configuration.
+#
+# nnet config
+nn_depth=6     #number of hidden layers
+hid_dim=2048   #number of units per layer
+# number of iterations
+rbm_iter=1            #number of pre-training epochs (Gaussian-Bernoulli RBM has 2x more)
+rbm_drop_data=0.0     #sample the training set, 1.0 drops all the data, 0.0 keeps all
+# pre-training opts
+rbm_lrate=0.4         #RBM learning rate
+rbm_lrate_low=0.01    #lower RBM learning rate (for Gaussian units)
+rbm_l2penalty=0.0002  #L2 penalty (increases RBM-mixing rate)
+# data processing config
+copy_feats=true    # resave the features randomized consecutively to tmpdir
+# feature config
+feature_transform= # Optionally reuse feature processing front-end (override splice,etc.)
+delta_order=       # Optionally use deltas on the input features
+apply_cmvn=false   # Optionally do CMVN of the input features
+norm_vars=false    # When apply_cmvn=true, this enables CVN
+splice=5           # Temporal splicing
+splice_step=1      # Stepsize of the splicing (1 is consecutive splice, 
+                   # value 2 would do [ -10 -8 -6 -4 -2 0 2 4 6 8 10 ] splicing)
+# misc.
+verbose=1 # enable per-cache reports
+# gpu config
+use_gpu_id= # manually select GPU id to run on, (-1 disables GPU) 
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+   echo "Usage: $0 <data> <exp-dir>"
+   echo " e.g.: $0 data/train exp/rbm_pretrain"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>           # config containing options"
+   echo ""
+   echo "  --nn-depth <N>                   # number of RBM layers"
+   echo "  --hid-dim <N>                    # number of hidden units per layer"
+   echo "  --rbm-iter <N>                   # number of CD-1 iterations per layer"
+   echo "  --dbm-drop-data <float>          # probability of frame-dropping,"
+   echo "                                   # can be used to subsample large datasets"
+   echo "  --rbm-lrate <float>              # learning-rate for Bernoulli-Bernoulli RBMs"
+   echo "  --rbm-lrate-low <float>          # learning-rate for Gaussian-Bernoulli RBM"
+   echo ""
+   echo "  --copy-feats <bool>              # copy features to /tmp, to accelerate training"
+   echo "  --apply-cmvn <bool>              # normalize input features (opt.)"
+   echo "    --norm-vars <bool>               # use variance normalization (opt.)"
+   echo "  --splice <N>                     # splice +/-N frames of input features"
+   exit 1;
+fi
+
+data=$1
+dir=$2
+
+
+for f in $data/feats.scp; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+echo "# INFO"
+echo "$0 : Pre-training Deep Belief Network as a stack of RBMs"
+printf "\t dir       : $dir \n"
+printf "\t Train-set : $data \n"
+
+[ -e $dir/${nn_depth}.dbn ] && echo "$0 Skipping, already have $dir/${nn_depth}.dbn" && exit 0
+
+mkdir -p $dir/log
+
+###### PREPARE FEATURES ######
+echo
+echo "# PREPARING FEATURES"
+# shuffle the list
+echo "Preparing train/cv lists"
+cat $data/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $dir/train.scp
+# print the list size
+wc -l $dir/train.scp
+
+#re-save the shuffled features, so they are stored sequentially on the disk in /tmp/
+if [ "$copy_feats" == "true" ]; then
+  tmpdir=$(mktemp -d); mv $dir/train.scp $dir/train.scp_non_local
+  utils/nnet/copy_feats.sh $dir/train.scp_non_local $tmpdir $dir/train.scp
+  #remove data on exit...
+  trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT
+fi
+
+#create a 10k utt subset for global cmvn estimates
+head -n 10000 $dir/train.scp > $dir/train.scp.10k
+
+
+
+###### PREPARE FEATURE PIPELINE ######
+
+#read the features
+feats="ark:copy-feats scp:$dir/train.scp ark:- |"
+
+#optionally add per-speaker CMVN
+if [ $apply_cmvn == "true" ]; then
+  echo "Will use CMVN statistics : $data/cmvn.scp"
+  [ ! -r $data/cmvn.scp ] && echo "Cannot find cmvn stats $data/cmvn.scp" && exit 1;
+  cmvn="scp:$data/cmvn.scp"
+  feats="$feats apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk $cmvn ark:- ark:- |"
+  # keep track of norm_vars option
+  echo "$norm_vars" >$dir/norm_vars 
+else
+  echo "apply_cmvn disabled (per speaker norm. on input features)"
+fi
+
+#optionally add deltas
+if [ "$delta_order" != "" ]; then
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+  echo "$delta_order" > $dir/delta_order
+fi
+
+#get feature dim
+echo -n "Getting feature dim : "
+feat_dim=$(feat-to-dim --print-args=false scp:$dir/train.scp -)
+echo $feat_dim
+
+
+# Now we will start building feature_transform which will 
+# be applied in CUDA to gain more speed.
+#
+# We will use 1GPU for both feature_transform and MLP training in one binary tool. 
+# This is against the kaldi spirit, but it is necessary, because on some sites a GPU 
+# cannot be shared accross by two or more processes (compute exclusive mode),
+# and we would like to use single GPU per training instance,
+# so that the grid resources can be used efficiently...
+
+
+if [ ! -z "$feature_transform" ]; then
+  echo Using already prepared feature_transform: $feature_transform
+  cp $feature_transform $dir/final.feature_transform
+else
+  # Generate the splice transform
+  echo "Using splice +/- $splice , step $splice_step"
+  feature_transform=$dir/tr_splice$splice-$splice_step.nnet
+  utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
+
+  # Renormalize the MLP input to zero mean and unit variance
+  feature_transform_old=$feature_transform
+  feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
+  echo "Renormalizing MLP input features into $feature_transform"
+  nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+    $feature_transform_old "$(echo $feats | sed 's|train.scp|train.scp.10k|')" \
+    ark:- 2>$dir/log/cmvn_glob_fwd.log |\
+  compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
+  nnet-concat --binary=false $feature_transform_old - $feature_transform
+
+  # MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ######
+  [ -f $dir/final.feature_transform ] && unlink $dir/final.feature_transform
+  (cd $dir; ln -s $(basename $feature_transform) final.feature_transform )
+fi
+
+
+
+###### GET THE DIMENSIONS ######
+num_fea=$(feat-to-dim --print-args=false "$feats nnet-forward --use-gpu-id=-1 $feature_transform ark:- ark:- |" - 2>/dev/null)
+num_hid=$hid_dim
+
+
+###### PERFORM THE PRE-TRAINING ######
+for depth in $(seq 1 $nn_depth); do
+  echo
+  echo "# PRE-TRAINING RBM LAYER $depth"
+  RBM=$dir/$depth.rbm
+  [ -f $RBM ] && echo "RBM '$RBM' already trained, skipping." && continue
+
+  #The first RBM needs special treatment, because of Gussian input nodes
+  if [ "$depth" == "1" ]; then
+    #This is Gaussian-Bernoulli RBM
+    #initialize
+    echo "Initializing '$RBM.init'"
+    utils/nnet/gen_rbm_init.py --dim=${num_fea}:${num_hid} --gauss --vistype=gauss --hidtype=bern > $RBM.init || exit 1
+    #pre-train
+    echo "Pretraining '$RBM' (reduced lrate and 2x more iters)"
+    rbm-train-cd1-frmshuff --learn-rate=$rbm_lrate_low --l2-penalty=$rbm_l2penalty \
+      --num-iters=$((2*$rbm_iter)) --drop-data=$rbm_drop_data --verbose=$verbose \
+      --feature-transform=$feature_transform \
+      ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+      $RBM.init "$feats" $RBM 2>$dir/log/rbm.$depth.log || exit 1
+  else
+    #This is Bernoulli-Bernoulli RBM
+    #cmvn stats for init
+    echo "Computing cmvn stats '$dir/$depth.cmvn' for RBM initialization"
+    if [ ! -f $dir/$depth.cmvn ]; then 
+      nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+       "nnet-concat $feature_transform $dir/$((depth-1)).dbn - |" \
+        "$(echo $feats | sed 's|train.scp|train.scp.10k|')" \
+        ark:- 2>$dir/log/cmvn_fwd.$depth.log | \
+      compute-cmvn-stats ark:- - 2>$dir/log/cmvn.$depth.log | \
+      cmvn-to-nnet - $dir/$depth.cmvn || exit 1
+    else
+      echo compute-cmvn-stats already done, skipping.
+    fi
+    #initialize
+    echo "Initializing '$RBM.init'"
+    utils/nnet/gen_rbm_init.py --dim=${num_hid}:${num_hid} --gauss --vistype=bern --hidtype=bern --cmvn-nnet=$dir/$depth.cmvn > $RBM.init || exit 1
+    #pre-train
+    echo "Pretraining '$RBM'"
+    rbm-train-cd1-frmshuff --learn-rate=$rbm_lrate --l2-penalty=$rbm_l2penalty \
+      --num-iters=$rbm_iter --drop-data=$rbm_drop_data --verbose=$verbose \
+      --feature-transform="nnet-concat $feature_transform $dir/$((depth-1)).dbn - |" \
+      ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+      $RBM.init "$feats" $RBM 2>$dir/log/rbm.$depth.log || exit 1
+  fi
+
+  #Create DBN stack
+  if [ "$depth" == "1" ]; then
+    rbm-convert-to-nnet --binary=true $RBM $dir/$depth.dbn
+  else 
+    rbm-convert-to-nnet --binary=true $RBM - | \
+    nnet-concat $dir/$((depth-1)).dbn - $dir/$depth.dbn
+  fi
+
+done
+
+echo
+echo "# REPORT"
+echo "# RBM pre-training progress (line per-layer)"
+grep progress $dir/log/rbm.*.log
+echo 
+
+echo "Pre-training finished."
+
+sleep 3
+exit 0
diff --git a/egs/chime_wsj0/s5/steps/rnnlmrescore.sh b/egs/chime_wsj0/s5/steps/rnnlmrescore.sh
new file mode 100755
index 000000000..bb511e19c
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/rnnlmrescore.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+
+
+# Begin configuration section.
+N=10
+inv_acwt=12
+cmd=run.pl
+use_phi=false  # This is kind of an obscure option.  If true, we'll remove the old
+  # LM weights (times 1-RNN_scale) using a phi (failure) matcher, which is
+  # appropriate if the old LM weights were added in this way, e.g. by
+  # lmrescore.sh.  Otherwise we'll use normal composition, which is appropriate
+  # if the lattices came directly from decoding.  This won't actually make much
+  # difference (if any) to WER, it's more so we know we are doing the right thing.
+test=false # Activate a testing option.
+stage=1 # Stage of this script, for partial reruns.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh
+. utils/parse_options.sh
+
+
+if [ $# != 6 ]; then
+   echo "Do language model rescoring of lattices (partially remove old LM, add new LM)"
+   echo "This version applies an RNNLM and mixes it with the LM scores"
+   echo "previously in the lattices., controlled by the first parameter (rnnlm-weight)"
+   echo ""
+   echo "Usage: utils/rnnlmrescore.sh <rnn-weight> <old-lang-dir> <rnn-dir> <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo "Main options:"
+   echo "  --inv-acwt <inv-acwt>          # default 12.  e.g. --inv-acwt 17.  Equivalent to LM scale to use."
+   echo "                                 # for N-best list generation... note, we'll score at different acwt's"
+   echo "  --cmd <run.pl|queue.pl [opts]> # how to run jobs."
+   echo "  --phi (true|false)             # Should be set to true if the source lattices were created"
+   echo "                                 # by lmrescore.sh, false if they came from decoding."
+   echo "  --N <N>                        # Value of N in N-best rescoring (default: 10)"
+   exit 1;
+fi
+
+
+
+rnnweight=$1
+oldlang=$2
+rnndir=$3
+data=$4
+indir=$5
+dir=$6
+
+
+acwt=`perl -e "print (1.0/$inv_acwt);"` # Note: we'll actually produce lattices
+ # that will be scored at a range of acoustic weights.  This acwt should be close
+ # to the final one we'll pick, though, for best performance (it controls the
+ # N-best list generation).
+
+for f in $oldlang/G.fst $rnndir/rnnlm $data/feats.scp $indir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist." && exit 1;
+done
+
+nj=`cat $indir/num_jobs` || exit 1;
+oldlm=$oldlang/G.fst
+adir=$dir/archives
+
+mkdir -p $dir;
+phi=`grep -w '#0' $oldlang/words.txt | awk '{print $2}'`
+
+rm $dir/.error 2>/dev/null
+mkdir -p $dir/log
+
+# First convert lattice to N-best.  Be careful because this
+# will be quite sensitive to the acoustic scale; this should be close
+# to the one we'll finally get the best WERs with.
+# Note: the lattice-rmali part here is just because we don't
+# need the alignments for what we're doing.
+if [ $stage -le 1 ]; then
+  echo "$0: converting lattices to N-best."
+  $cmd JOB=1:$nj $dir/log/lat2nbest.JOB.log \
+    lattice-to-nbest --acoustic-scale=$acwt --n=$N \
+    "ark:gunzip -c $indir/lat.JOB.gz|" ark:- \|  \
+    lattice-rmali ark:- "ark:|gzip -c >$dir/nbest1.JOB.gz" || exit 1;
+fi
+
+# next remove part of the old LM probs.  
+if $use_phi; then
+  if [ $stage -le 2 ]; then
+    echo "$0: removing old LM scores."
+    # Use the phi-matcher style of composition.. this is appropriate
+    # if the old LM scores were added e.g. by lmrescore.sh, using 
+    # phi-matcher composition.
+    $cmd JOB=1:$nj $dir/log/remove_old.JOB.log \
+      lattice-compose --phi-label=$phi "ark:gunzip -c $dir/nbest1.JOB.gz|" $oldlm \
+      "ark:|gzip -c >$dir/nbest2.JOB.gz"  || exit 1;
+  fi    
+else
+  if [ $stage -le 2 ]; then
+    echo "$0: removing old LM scores."
+    # this approach chooses the best path through the old LM FST, while
+    # subtracting the old scores.  If the lattices came straight from decoding,
+    # this is what we want.  Note here: each FST in "nbest1.JOB.gz" is a linear FST,
+    # it has no alternatives (the N-best format works by having multiple keys
+    # for each utterance).  When we do "lattice-1best" we are selecting the best
+    # path through the LM, there are no alternatives to consider within the
+    # original lattice.
+    $cmd JOB=1:$nj $dir/log/remove_old.JOB.log \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 "ark:gunzip -c $dir/nbest1.JOB.gz|" ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-1best ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- "ark:|gzip -c >$dir/nbest2.JOB.gz" \
+      || exit 1;
+  fi
+fi
+
+if [ $stage -le 3 ]; then
+# Decompose the n-best lists into 4 archives.
+  echo "$0: creating separate-archive form of N-best lists."
+  $cmd JOB=1:$nj $dir/log/make_new_archives.JOB.log \
+    mkdir -p $adir.JOB '&&' \
+    nbest-to-linear "ark:gunzip -c $dir/nbest2.JOB.gz|" \
+    "ark,t:$adir.JOB/ali" "ark,t:$adir.JOB/words" \
+    "ark,t:$adir.JOB/lmwt.nolm" "ark,t:$adir.JOB/acwt" || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing the same with old LM scores."
+# Create an archive with the LM scores before we
+# removed the LM probs (will help us do interpolation).
+$cmd JOB=1:$nj $dir/log/make_old_archives.JOB.log \
+  nbest-to-linear "ark:gunzip -c $dir/nbest1.JOB.gz|" "ark:/dev/null" \
+  "ark:/dev/null" "ark,t:$adir.JOB/lmwt.withlm" "ark:/dev/null" || exit 1;
+fi
+
+if $test; then # This branch is a sanity check that at the acwt where we generated
+  # the N-best list, we get the same WER.
+  echo "$0 [testing branch]: generating lattices without changing scores."
+  $cmd JOB=1:$nj $dir/log/test.JOB.log \
+    linear-to-nbest "ark:$adir.JOB/ali" "ark:$adir.JOB/words" "ark:$adir.JOB/lmwt.withlm" \
+     "ark:$adir.JOB/acwt" ark:- \| \
+    nbest-to-lattice ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+  exit 0;
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Creating archives with text-form of words, and LM scores without graph scores."
+    # Do some small tasks; for these we don't use the queue, it will only slow us down.
+  for n in `seq $nj`; do
+    utils/int2sym.pl -f 2- $oldlang/words.txt < $adir.$n/words > $adir.$n/words_text || exit 1;
+    mkdir -p $adir.$n/temp
+    paste $adir.$n/lmwt.nolm $adir.$n/lmwt.withlm | awk '{print $1, ($4-$2);}' > \
+      $adir.$n/lmwt.lmonly || exit 1;
+  done
+fi
+if [ $stage -le 6 ]; then
+  echo "$0: invoking rnnlm_compute_scores.sh which calls rnnlm, to get RNN LM scores."
+  $cmd JOB=1:$nj $dir/log/rnnlm_compute_scores.JOB.log \
+    utils/rnnlm_compute_scores.sh $rnndir $adir.JOB/temp $adir.JOB/words_text $adir.JOB/lmwt.rnn \
+    || exit 1;
+fi
+if [ $stage -le 7 ]; then
+  echo "$0: reconstructing total LM+graph scores including interpolation of RNNLM and old LM scores."
+  for n in `seq $nj`; do
+    paste $adir.$n/lmwt.nolm $adir.$n/lmwt.lmonly $adir.$n/lmwt.rnn | awk -v rnnweight=$rnnweight \
+      '{ key=$1; graphscore=$2; lmscore=$4; rnnscore=$6; 
+     score = graphscore+(rnnweight*rnnscore)+((1-rnnweight)*lmscore);
+     print $1,score; } ' > $adir.$n/lmwt.interp.$rnnweight || exit 1;
+  done
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: reconstructing archives back into lattices."
+  $cmd JOB=1:$nj $dir/log/reconstruct_lattice.JOB.log \
+    linear-to-nbest "ark:$adir.JOB/ali" "ark:$adir.JOB/words" \
+    "ark:$adir.JOB/lmwt.interp.$rnnweight" "ark:$adir.JOB/acwt" ark:- \| \
+    nbest-to-lattice ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $oldlang $dir
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/search_index.sh b/egs/chime_wsj0/s5/steps/search_index.sh
new file mode 100755
index 000000000..d314c5bcf
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/search_index.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0
+
+# Begin configuration section.  
+cmd=run.pl
+nbest=-1
+strict=true
+indices_dir=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 2 ]; then
+   echo "Usage: steps/search_index.sh [options] <kws-data-dir> <kws-dir>"
+   echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --nbest <int>                                    # return n best results. (-1 means all)"
+   echo "  --indices-dir <path>                             # where the indices should be stored, by default it will be in <kws-dir>"
+   exit 1;
+fi
+
+
+kwsdatadir=$1;
+kwsdir=$2;
+
+if [ -z $indices_dir ] ; then
+  indices_dir=$kwsdir
+fi
+
+mkdir -p $kwsdir/log;
+nj=`cat $indices_dir/num_jobs` || exit 1;
+keywords=$kwsdatadir/keywords.fsts;
+
+for f in $indices_dir/index.1.gz $keywords; do
+  [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1;
+done
+
+$cmd JOB=1:$nj $kwsdir/log/search.JOB.log \
+  kws-search --strict=$strict --negative-tolerance=-1 \
+  "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \
+  "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB"
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/align_fmllr.sh b/egs/chime_wsj0/s5/steps/tandem/align_fmllr.sh
new file mode 100755
index 000000000..843f90ebe
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/align_fmllr.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
+# + fMLLR (probably with SAT models).
+# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
+# is not present), then does 2 iterations of fMLLR estimation.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match the source directory.
+
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # factor by which to boost silence during alignment.
+fmllr_update_type=full
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: steps/tandem/align_fmllr.sh <data1-dir> <data2-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/tandem/align_fmllr.sh {mfcc,bottleneck}/data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --fmllr-update-type (full|diag|offset|none)      # default full."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+
+# Set up features.
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1; 
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+sifeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  sifeats="$sifeats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data1 ($data2) using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata1/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata1/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/align_sgmm.sh b/egs/chime_wsj0/s5/steps/tandem/align_sgmm.sh
new file mode 100755
index 000000000..e4ce687b0
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/align_sgmm.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: steps/tandem/align_sgmm.sh <data1-dir> <data2-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/tandem/align_sgmm.sh --transform-dir exp/tri3b data1/train data1/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+## Set up features.
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1; 
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/align_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/align_sgmm2.sh
new file mode 100755
index 000000000..1fc09b0b6
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/align_sgmm2.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: steps/tandem/align_sgmm2.sh <data-dir1> <data-dir2> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/tandem/align_sgmm2.sh --transform-dir exp/tri3b {mfcc,bottleneck}/data/train data/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1; 
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm2-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm2-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm2-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/align_si.sh b/egs/chime_wsj0/s5/steps/tandem/align_si.sh
new file mode 100755
index 000000000..b474a2373
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/align_si.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+# Computes training alignments using a model with delta or
+# LDA+MLLT features.
+
+# If you supply the "--use-graphs true" option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match with the source directory.
+
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence during alignment.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: steps/tandem/align_si.sh <data1-dir> <data2-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/tandem/align_si.sh {mfcc,bottleneck}/data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+# Set up the features
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+
+# Get some info on the feature types
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null` || exit 1;
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+# for lda-type features, we need to copy both the lda (for baseft) and mllt 
+# transformation (for the pasted features)
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+	  ;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1;
+   ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{tandem,splice_opts,normft2} $dir 2>/dev/null
+
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/final.mdl - |"
+
+if $use_graphs; then 
+  [ $nj != "`cat $srcdir/num_jobs`" ] && echo "$0: mismatch in num-jobs" && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "$0: no such file $srcdir/fsts.1.gz" && exit 1;
+
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $srcdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+else
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|";
+  # We could just use gmm-align in the next line, but it's less efficient as it compiles the
+  # training graphs one by one.
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" ark:- \
+      "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+echo "$0: done aligning data."
diff --git a/egs/chime_wsj0/s5/steps/tandem/decode.sh b/egs/chime_wsj0/s5/steps/tandem/decode.sh
new file mode 100755
index 000000000..b2b1bedb7
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/decode.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+min_lmwt=9
+max_lmwt=20
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/tandem/decode.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/tandem/decode.sh exp/mono/graph {mfcc,bottleneck}/data/test_dev93 exp/mono/decode_dev93"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --min-lmwt <int>                                 # minumum LM-weight for lattice rescoring "
+   echo "  --max-lmwt <int>                                 # maximum LM-weight for lattice rescoring "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=$4
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+
+mkdir -p $dir/log
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata1/1/feats.scp $sdata1/1/cmvn.scp $sdata2/1/feats.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+# Set up features.
+
+# Get some info on the feature types
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+case $feat_type in
+  delta) 
+	  echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  if [ -e $srcdir/lda.mat ]; then
+    feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/lda.mat ark:- ark:- |"
+  else
+    feats1="$feats1 add-deltas ark:- ark:- |"
+  fi
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $srcdir/final.mat ark:- ark:- |"
+fi
+
+# speaker dependent transformations as requested
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" --min_lmwt $min_lmwt --max_lmwt $max_lmwt $data1 $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/decode_fmllr.sh b/egs/chime_wsj0/s5/steps/tandem/decode_fmllr.sh
new file mode 100755
index 000000000..bad76d785
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/decode_fmllr.sh
@@ -0,0 +1,242 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+   echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph {mfcc,bottleneck}/data/test_dev93 exp/tri2b/decode_dev93"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+mkdir -p $dir/log
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/tandem/decode_si.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data1 $data2 $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+
+
+# Set up features.
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+sifeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  sifeats="$sifeats transform-feats $srcdir/final.mat ark:- ark:- |"
+fi
+
+
+
+## Now get the first-pass fMLLR transforms.
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata1/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
+    ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt  \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata1/JOB/spk2utt $adapt_model "$pass1feats" \
+    ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data1 $graphdir $dir
+
+rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/tandem/decode_sgmm.sh b/egs/chime_wsj0/s5/steps/tandem/decode_sgmm.sh
new file mode 100755
index 000000000..2e58d9b41
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/decode_sgmm.sh
@@ -0,0 +1,300 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+alignment_model=
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=15.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=8.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/tandem/decode_sgmm.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+  echo " e.g.: steps/tandem/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr {mfcc,bottleneck}/data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --alignment-model <ali-mdl>              # Model for the first-pass decoding."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+
+## Set up features.
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/   
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+
+## Calculate FMLLR pre-transforms if needed. We are doing this here since this
+## step is requried by models both with and without speaker vectors
+if $use_fmllr; then
+  if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+    echo "$0: computing pre-transform for fMLLR computation."
+    sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+  fi
+fi
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+
+# Generate state-level lattice which we can rescore.  This is done with the 
+# alignment model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+## Check if the model has speaker vectors
+spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
+
+if [ $spkdim -gt 0 ]; then  ### For models with speaker vectors:
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm-est-spkvecs.
+  if [ $stage -le 3 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
+      sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
+      sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+  fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+  fi
+  rm $dir/pre_vecs.*
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  fi
+  rm $dir/pre_lat.*.gz
+
+else  ### For models without speaker vectors:
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --utt2spk=ark:$sdata1/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ] && $use_fmllr; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+    rm $dir/pre_lat.*.gz
+  else  # Already done with decoding if no adaptation needed.
+    for n in `seq 1 $nj`; do
+      mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
+    done
+  fi
+
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  echo "score best paths"
+  local/score.sh --cmd "$cmd" $data $graphdir $dir
+  echo "score confidence and timing with sclite"
+  #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/decode_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/decode_sgmm2.sh
new file mode 100755
index 000000000..1f70b5179
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/decode_sgmm2.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=13.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+
+#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#         in the other scripts
+lattice_beam=6.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/tandem/decode_sgmm2.sh [options] <graph-dir> <data-dir1> <data-dir2> <decode-dir>"
+  echo " e.g.: steps/tandem/decode_sgmm2.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr {mfcc,bottleneck}/data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+## Set up features.
+
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/   
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+# Generate state-level lattice which we can rescore.  This is done with the alignment
+# model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm2-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $srcdir/final.alimdl \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm2-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm2-est-spkvecs.
+if [ $stage -le 3 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+if [ $stage -le 4 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+fi
+rm $dir/pre_vecs.*
+
+if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+  if [ $stage -le 5 ]; then # compute fMLLR transforms.
+    echo "$0: computing fMLLR transforms."
+    if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+      echo "$0: computing pre-transform for fMLLR computation."
+      sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm2-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+       --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+      $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+  fi
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+if [ $stage -le 6 ]; then
+  $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+    sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+    $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+rm $dir/pre_lat.*.gz
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at different
+# acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh --cmd "$cmd" $data1 $graphdir $dir
+fi
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/decode_si.sh b/egs/chime_wsj0/s5/steps/tandem/decode_si.sh
new file mode 100755
index 000000000..b2b1bedb7
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/decode_si.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+min_lmwt=9
+max_lmwt=20
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/tandem/decode.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/tandem/decode.sh exp/mono/graph {mfcc,bottleneck}/data/test_dev93 exp/mono/decode_dev93"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --min-lmwt <int>                                 # minumum LM-weight for lattice rescoring "
+   echo "  --max-lmwt <int>                                 # maximum LM-weight for lattice rescoring "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=$4
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+
+mkdir -p $dir/log
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata1/1/feats.scp $sdata1/1/cmvn.scp $sdata2/1/feats.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+# Set up features.
+
+# Get some info on the feature types
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+case $feat_type in
+  delta) 
+	  echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  if [ -e $srcdir/lda.mat ]; then
+    feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/lda.mat ark:- ark:- |"
+  else
+    feats1="$feats1 add-deltas ark:- ark:- |"
+  fi
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $srcdir/final.mat ark:- ark:- |"
+fi
+
+# speaker dependent transformations as requested
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" --min_lmwt $min_lmwt --max_lmwt $max_lmwt $data1 $graphdir $dir
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/make_denlats.sh b/egs/chime_wsj0/s5/steps/tandem/make_denlats.sh
new file mode 100755
index 000000000..166b93ecc
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/make_denlats.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# Create denominator lattices for MMI/MPE training.
+# Creates its output in $dir/lat.*.gz
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# This is in bytes, but not "real" bytes-- you have to multiply
+# by something like 5 or 10 to get real bytes (not sure why so large)
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/make_tandem_denlats.sh [options] <data1-dir> <data2-dir> <lang-dir> <src-dir> <exp-dir>"
+   echo "  e.g.: steps/make_tandem_denlats.sh {mfcc,bottleneck}/data/train data/lang exp/tri1 exp/tri1_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data1/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1;
+fi
+
+
+## Set up features.
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/   
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
+    exit 1;
+  fi
+fi
+
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   gmm-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+    --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      ssdata1=$data1/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata1 ] || [ $ssdata1 -ot $sdata1/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata1/$n $sub_split || exit 1;
+      fi
+      ssdata2=$data2/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata2 ] || [ $ssdata2 -ot $sdata2/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata2/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        gmm-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+        --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices."
diff --git a/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm.sh b/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm.sh
new file mode 100755
index 000000000..ebbe2a8e3
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($srcdir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/tandem/make_denlats_sgmm.sh [options] <data1-dir> <data2-dir> <lang-dir> <src-dir|srcdir> <exp-dir>"
+   echo "  e.g.: steps/tandem/make_denlats_sgmm.sh {mfcc,bottleneck}/data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4 # could also be $srcdir, but only if no vectors supplied.
+dir=$5
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+normft2=`cat $srcdir/normft2 2>/dev/null`
+mkdir -p $dir/log
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1;
+fi
+
+# Set up features
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+	  echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $srcdir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $srcdir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $srcdir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$srcdir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      ssdata1=$data1/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata1 ] || [ $ssdata1 -ot $sdata1/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata1/$n $sub_split || exit 1;
+      fi
+      ssdata2=$data2/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata2 ] || [ $ssdata2 -ot $sdata2/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata2/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm2.sh
new file mode 100755
index 000000000..51167eab8
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/make_denlats_sgmm2.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($srcdir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/tandem/make_denlats_sgmm2.sh [options] <data1-dir> <data2-dir> <lang-dir> <src-dir|srcdir> <exp-dir>"
+   echo "  e.g.: steps/tandem/make_denlats_sgmm2.sh {mfcc,bottleneck}/data1/train data1/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+srcdir=$4 # could also be $srcdir, but only if no vectors supplied.
+dir=$5
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data1/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1;
+fi
+
+
+## Set up features.
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+	  echo "$0: feature type is $feat_type"
+    cp $srcdir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $srcdir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $srcdir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $srcdir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$srcdir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "$0: You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm2-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      ssdata1=$data1/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata1 ] || [ $ssdata1 -ot $sdata1/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata1/$n $sub_split || exit 1;
+      fi
+      ssdata2=$data2/split$nj/$n/split$sub_split;
+      if [ ! -d $ssdata2 ] || [ $ssdata2 -ot $sdata2/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata2/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm2-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/chime_wsj0/s5/steps/tandem/mk_aslf_lda_mllt.sh b/egs/chime_wsj0/s5/steps/tandem/mk_aslf_lda_mllt.sh
new file mode 100755
index 000000000..50642461e
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/mk_aslf_lda_mllt.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+alignment_model=
+adapt_model=
+final_model=
+transform_dir=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/tandem/mk_aslf_lda_mllt.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+   echo " e.g.: steps/tandem/mk_aslf_lda_mllt.sh exp/tri2b/graph {mfcc,bottleneck}/data/test_dev93 exp/tri2b/decode_dev93"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+mkdir -p $dir/log`
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+
+# Set up features.
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+sifeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  sifeats="$sifeats transform-feats $srcdir/final.mat ark:- ark:- |"
+fi
+
+if [ -e $dir/trans.1. ]; then
+  echo "Using fMLLR transforms in $dir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+elif [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+echo "Rescoring lattices, converting to slf"
+mkdir -p $dir/slf
+$cmd JOB=1:$nj $dir/log/rescore.slf.JOB.log \
+  lattice-align-words $graphdir/phones/word_boundary.int $final_model "ark:gunzip -c $dir/lat.JOB.gz |" ark:- \| \
+  gmm-rescore-lattice $final_model ark:- "$feats" ark,t:- \| \
+  utils/int2sym.pl -f 3 $graphdir/words.txt \| \
+  utils/convert_slf.pl - $dir/slf
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/tandem/mk_aslf_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/mk_aslf_sgmm2.sh
new file mode 100755
index 000000000..7686efb32
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/mk_aslf_sgmm2.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+alignment_model=
+adapt_model=
+final_model=
+transform_dir=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/tandem/mk_aslf_sgmm2.sh [options] <graph-dir> <data1-dir> <data2-dir> <decode-dir>"
+   echo " e.g.: steps/tandem/mk_aslf_sgmm2.sh exp/tri2b/graph {mfcc,bottleneck}/data/test_dev93 exp/tri2b/decode_dev93"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data1=$2
+data2=$3
+dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+mkdir -p $dir/log
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+echo $nj > $dir/num_jobs
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+
+# Set up features.
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $srcdir/normft2 2>/dev/null`
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  echo "Using cmvn for feats2"
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+sifeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  sifeats="$sifeats transform-feats $srcdir/final.mat ark:- ark:- |"
+fi
+
+if [ -e $dir/trans.1. ]; then
+  echo "Using fMLLR transforms in $dir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+elif [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+echo "Rescoring lattices, converting to slf"
+mkdir -p $dir/slf
+$cmd JOB=1:$nj $dir/log/rescore.slf.JOB.log \
+  lattice-align-words $graphdir/phones/word_boundary.int $final_model "ark:gunzip -c $dir/lat.JOB.gz |" ark:- \| \
+  sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \
+    "--gselect=ark:gunzip -c $dir/gselect.JOB.gz |" $final_model ark:- "$feats" ark,t:- \| \
+  utils/int2sym.pl -f 3 $graphdir/words.txt \| \
+  utils/convert_slf.pl - $dir/slf
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_deltas.sh b/egs/chime_wsj0/s5/steps/tandem/train_deltas.sh
new file mode 100755
index 000000000..700a12d5d
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_deltas.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+# Begin configuration.
+stage=-4 #  This allows restarting after partway, when something when wrong.
+config=
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+normft2=true  # typically, the tandem features will be normalized already b/c of pca
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 7 ]; then
+   echo "Usage: steps/tandem/train_deltas.sh <num-leaves> <tot-gauss> <data1-dir> <data2-dir> <lang-dir> <alignment-dir> <exp-dir>"
+   echo " e.g.: steps/tandem/train_deltas.sh 2000 10000 {mfcc,bottleneck}/data/train_si84_half data/lang exp/mono_ali exp/tri1"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   echo "  --normft2 (true|false)                           # apply CMVN to second features?"
+   exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+dir=$7
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data1/feats.scp $data2/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_tandem.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+oov=`cat $lang/oov.int` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+# Set up features
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+# Set up stream 1 (usually spectral features, so we use deltas)
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+
+# Set up stream 2 (usually bottleneck/posteriors), normalize if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# save config
+echo $feats > $dir/tandem
+echo $normft2 > $dir/normft2
+
+rm $dir/.error 2>/dev/null
+
+if [ $stage -le -3 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: getting questions for tree-building, via clustering"
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data1/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: training pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+         "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+         "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+       "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --mix-up=$numgauss --power=$power \
+        --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+       "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+# Summarize warning messages...
+utils/summarize_warnings.pl  $dir/log
+
+echo "$0: Done training tandem system in $dir"
+
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_lda_mllt.sh b/egs/chime_wsj0/s5/steps/tandem/train_lda_mllt.sh
new file mode 100755
index 000000000..c2b42ca55
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_lda_mllt.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0.
+
+# Begin configuration.
+cmd=run.pl
+config=
+stage=-5
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+mllt_iters="2 4 6 12";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25  # Last iter to increase #Gauss on.
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+splice_opts=
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+
+dim1=30  # dimension first stream (spectral features)
+dim2=40  # dimension second stream (pasted features, usually bn/posteriors)
+
+# apply CMVN to the second feature stream
+normft2=true
+
+# do an extra LDA after pasting the features?
+extra_lda=false
+
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/tandem/train_lda_mllt.sh [options] <#leaves> <#gauss> <data1> <data2> <lang> <alignments> <dir>"
+  echo " e.g.: steps/tandem/train_lda_mllt.sh 2500 15000 {mfcc,bottleneck}/data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --normft2 (true|false)                           # apply CMVN to second data set (true)"
+  echo "  --extra-lda (true|false)                         # apply extra LDA after feature paste (false)"
+  echo "  --dim1 <n>                                       # dimension of the first feature stream by HLDA"
+  echo "  --dim2 <m>                                       # dimension of of the pasted features after 2nd HLDA"
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+dir=$7
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data1/feats.scp $data2/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_tandem_lda_mllt.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+
+
+# Set up features.
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+# set up feature stream 1;  here we assume spectral features which we will 
+# splice instead of deltas
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+
+# Now estimate LDA, which will only be applied to the spectral features
+# (assuming that the tandem features were already discriminatively trained).
+# This is instead of the deltas.
+if [ $stage -le -5 ]; then
+  echo "Accumulating LDA statistics (this only applies to the base feature part)."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats1" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+  est-lda --write-full-matrix=$dir/full.mat --dim=$dim1 $dir/lda.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+# add transform to the features
+feats1="$feats1 transform-feats $dir/lda.mat ark:- ark:- |"
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features;  note: $feats gets overwritten later in the script
+# once we have MLLT matrices
+tandemfeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+feats="$tandemfeats"
+
+# keep track of splicing/normalization options
+echo $splice_opts > $dir/splice_opts
+echo $normft2 > $dir/normft2
+
+
+# Begin training;  initially, we have no MLLT matrix
+cur_mllt_iter=0
+
+if [ $stage -le -4 -a $extra_lda == true ]; then
+  echo "Accumulating LDA statistics (for tandem features this time)."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+    acc-lda --rand-prune=$randprune $alidir/final.mdl "$tandemfeats" ark,s,cs:- \
+    $dir/lda.JOB.acc || exit 1;
+  est-lda --write-full-matrix=$dir/full.mat --dim=$dim2 $dir/0.mat $dir/lda.*.acc \
+    2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+  
+  feats="$tandemfeats transform-feats $dir/0.mat ark:- ark:- |"
+fi
+
+# keep track of the features
+echo $feats > $dir/tandem
+
+if [ $stage -le -3 ]; then
+  echo "Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+   acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+
+if [ $stage -le -2 ]; then
+  echo "Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  # could mix up if we wanted:
+  # gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data1/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo Training pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      
+      # see if this is the first MLLT iteration and there is no lda;  otherwise compose transforms
+      if [ $cur_mllt_iter == 0 -a $extra_lda == false ]; then
+        mv $dir/$x.mat.new $dir/$x.mat || exit 1;
+      else
+        compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_mllt_iter.mat $dir/$x.mat || exit 1;
+      fi
+
+      rm $dir/$x.*.macc
+    fi
+
+    # update features
+    feats="$tandemfeats transform-feats $dir/$x.mat ark:- ark:- |"
+    cur_mllt_iter=$x
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \
+        $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.{mdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $cur_mllt_iter.mat $dir/final.mat
+
+# Summarize warning messages...
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done training system with LDA+MLLT tandem features in $dir
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_mllt.sh b/egs/chime_wsj0/s5/steps/tandem/train_mllt.sh
new file mode 100755
index 000000000..5391c5f4f
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_mllt.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0.
+
+# This is a vanilla tandem system where the first stream is just extended with
+# delta+deltadeltas, in contrast to the train_lda_mllt.sh script, where the
+# temoporal context of the first stream is modeled via HLDA
+
+# Begin configuration.
+cmd=run.pl
+config=
+stage=-5
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+mllt_iters="2 4 6 12";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25  # Last iter to increase #Gauss on.
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+
+# apply CMVN to the second feature stream?
+normft2=true
+
+# Do additional LDA after pasting the features
+dim2=40
+extra_lda=false
+
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/tandem/train_mllt.sh [options] <#leaves> <#gauss> <data1> <data2> <lang> <alignments> <dir>"
+  echo " e.g.: steps/tandem/train_mllt.sh 2500 15000 {mfcc,bottleneck}/data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --normft2 (true|false)                           # apply CMVN to second data set (true)"
+  echo "  --extra-lda (true|false)                         # apply extra LDA after feature paste (false)"
+  echo "  --dim2 <n>                                       # dimension of the pasted features after 2nd HLDA"
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+dir=$7
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data1/feats.scp $data2/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_tandem_lda_mllt.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+
+
+# Set up features.
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+# set up feature stream 1;  here we assume spectral features which we will 
+# splice instead of deltas
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features;  note: $feats gets overwritten later in the script
+# once we have MLLT matrices
+tandemfeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+feats="$tandemfeats"
+
+# keep track of splicing/normalization options
+echo $feats > $dir/tandem
+echo $normft2 > $dir/normft2
+
+
+# Begin training;  initially, we have no MLLT matrix
+cur_mllt_iter=0
+
+if [ $stage -le -4 -a $extra_lda == true ]; then
+  echo "Accumulating LDA statistics (for tandem features this time)."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+    acc-lda --rand-prune=$randprune $alidir/final.mdl "$tandemfeats" ark,s,cs:- \
+    $dir/lda.JOB.acc || exit 1;
+  est-lda --write-full-matrix=$dir/full.mat --dim=$dim2 $dir/0.mat $dir/lda.*.acc \
+    2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+  
+  feats="$tandemfeats transform-feats $dir/0.mat ark:- ark:- |"
+fi
+
+if [ $stage -le -3 ]; then
+  echo "Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+   acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+
+if [ $stage -le -2 ]; then
+  echo "Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  # could mix up if we wanted:
+  # gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data1/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo Training pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      
+      # see if this is the first MLLT iteration and there is no lda;  otherwise compose transforms
+      if [ $cur_mllt_iter == 0 -a $extra_lda == false ]; then
+        mv $dir/$x.mat.new $dir/$x.mat || exit 1;
+      else
+        compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_mllt_iter.mat $dir/$x.mat || exit 1;
+      fi
+
+      rm $dir/$x.*.macc
+    fi
+
+    # update features
+    feats="$tandemfeats transform-feats $dir/$x.mat ark:- ark:- |"
+    cur_mllt_iter=$x
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \
+        $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.{mdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $cur_mllt_iter.mat $dir/final.mat
+
+# Summarize warning messages...
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done training system with LDA+MLLT tandem features in $dir
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_mmi.sh b/egs/chime_wsj0/s5/steps/tandem/train_mmi.sh
new file mode 100755
index 000000000..c0aeaacdb
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_mmi.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# MMI training (or optionally boosted MMI, if you give the --boost option).
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 6 ]; then
+  echo "Usage: steps/train_tandem_mmi.sh <data1> <data2> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_tandem_mmi.sh {mfcc,bottleneck}/data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+alidir=$4
+denlatdir=$5
+dir=$6
+
+mkdir -p $dir/log
+
+for f in $data1/feats.scp $data2/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/{final.mdl,tree} $dir
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+
+# Set up features
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1; 
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      gmm-acc-stats2 $cur_mdl "$feats" ark,s,cs:- \
+      $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw --tau=$tau $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw --weight-tau=$weight_tau - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm.sh b/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm.sh
new file mode 100755
index 000000000..3e68dbd6b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+acwt=0.1
+stage=0
+
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 6 ]; then
+  echo "Usage: steps/tandem/train_mmi_sgmm.sh <data1> <data2> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/tandem/train_mmi_sgmm.sh {mfcc,bottleneck}/data1/train_si84 data1/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+alidir=$4
+denlatdir=$5
+dir=$6
+mkdir -p $dir/log
+
+for f in $data1/feats.scp $data2/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+    $cmd $dir/log/update.$x.log \
+     sgmm-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm2.sh
new file mode 100755
index 000000000..cbeb289de
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_mmi_sgmm2.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+acwt=0.1
+stage=0
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 6 ]; then
+  echo "Usage: steps/tandem/train_mmi_sgmm2.sh <data1> <data2> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/tandem/train_mmi_sgmm2.sh {mfcc,bottleneck}/data1/train_si84 data1/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+alidir=$4
+denlatdir=$5
+dir=$6
+mkdir -p $dir/log
+
+for f in $data1/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+
+
+# Set up features
+
+sdata1=$data1/split$nj
+sdata2=$data2/split$nj
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm2-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      sgmm2-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      sgmm2-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+    $cmd $dir/log/update.$x.log \
+     sgmm2-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/sgmm2-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_mono.sh b/egs/chime_wsj0/s5/steps/tandem/train_mono.sh
new file mode 100755
index 000000000..5a323d330
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_mono.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#                 Korbinian Riedhammer
+# Apache 2.0
+
+
+# To be run from ..
+# Flat start and monophone training, with delta-delta features.
+# This script applies cepstral mean normalization (per speaker).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=40    # Number of iterations of training
+max_iter_inc=30 # Last iter to increase #Gauss on.
+totgauss=1000 # Target #Gaussians.  
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
+config= # name of config file.
+stage=-4
+power=0.2 # exponent to determine number of gaussians from occurrence counts
+normft2=true # typically, the tandem features will already be normalized due to pca
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/tandem/train_mono.sh [options] <data1-dir> <data2-dir> <lang-dir> <exp-dir>"
+  echo " e.g.: steps/tandem/train_mono.sh {mfcc,bottleneck}/data/train.1k data/lang exp/mono"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --normft2 (true|false)                           # apply CMVN to second features?"
+  exit 1;
+fi
+
+data1=$1
+data2=$2
+lang=$3
+dir=$4
+
+oov_sym=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+
+# Set up features.
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+# Use deltas on the first tream (most likely this will be MFCCs or alike)
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+
+# Second stream will most likely be bottleneck or posteriors, so normalize
+# if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# paste features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+example_feats="`echo '$feats' | sed s/JOB/1/g`";
+
+# get dimension
+allfeats=$(echo $feats | sed s:JOB:..:g)
+feat_dim=$(feat-to-dim --print-args=false "$allfeats" - 2> $dir/log/feat_dim)
+
+# save stats
+echo $feats > $dir/tandem
+echo $normft2 > $dir/normft2
+
+echo "$0: Initializing monophone system."
+
+[ ! -f $lang/phones/sets.int ] && exit 1;
+shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+
+if [ $stage -le -3 ]; then
+# Note: JOB=. makes it use the whole set;  we want that to make sure we have phoneme 
+  $cmd JOB=1 $dir/log/init.log \
+    gmm-init-mono $shared_phones_opt "--train-feats=$allfeats" $lang/topo $feat_dim \
+    $dir/0.mdl $dir/tree || exit 1;
+fi
+
+numgauss=`gmm-info --print-args=false $dir/0.mdl | grep gaussians | awk '{print $NF}'`
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+
+if [ $stage -le -2 ]; then
+  echo "$0: Compiling training graphs"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst \
+    "ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata1/JOB/text|" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Aligning data equally (pass 0)"
+  $cmd JOB=1:$nj $dir/log/align.0.JOB.log \
+    align-equal-compiled "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" ark,t:-  \| \
+    gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
+    $dir/0.JOB.acc || exit 1;
+fi
+
+# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
+# we fail to est "rare" phones and later on, they never align properly.
+
+if [ $stage -le 0 ]; then
+  gmm-est --min-gaussian-occupancy=3  --mix-up=$numgauss --power=$power \
+    $dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl 2> $dir/log/update.0.log || exit 1;
+  rm $dir/0.*.acc
+fi
+
+
+beam=6 # will change to 10 below after 1st pass
+# note: using slightly wider beams for WSJ vs. RM.
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: Pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: Aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \
+        || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" "ark:gunzip -c $dir/ali.JOB.gz|" \
+      $dir/$x.JOB.acc || exit 1;
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+  fi
+  if [ $x -le $max_iter_inc ]; then
+     numgauss=$[$numgauss+$incgauss];
+  fi
+  beam=10
+  x=$[$x+1]
+done
+
+( cd $dir; rm final.{mdl,occs} 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
+
+utils/summarize_warnings.pl $dir/log
+
+echo "Done training tandem mono-phone system in $dir"
+
+# example of showing the alignments:
+# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/ali.0.gz|" | head -4
+
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_sat.sh b/egs/chime_wsj0/s5/steps/tandem/train_sat.sh
new file mode 100755
index 000000000..5f16d71d0
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_sat.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# This does Speaker Adapted Training (SAT), i.e. train on
+# fMLLR-adapted features.  It can be done on top of either LDA+MLLT, or
+# delta and delta-delta features.  If there are no transforms supplied
+# in the alignment directory, it will estimate transforms itself before
+# building the tree (and in any case, it estimates transforms a number
+# of times during training).
+
+
+# Begin configuration section.
+stage=-5
+fmllr_update_type=full
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+normft2=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/tandem/train_sat.sh <#leaves> <#gauss> <data1> <data2> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/tandem/train_sat.sh 2500 15000 {mfcc,bottleneck}/data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+dir=$7
+
+for f in $data1/feats.scp $data2/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_tandem_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+
+mkdir -p $dir/log
+
+echo $nj >$dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+# Set up features.
+
+# We will use the same settings as with the alidir
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+	  echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+	  echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+sifeats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  sifeats="$sifeats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,tandem,normft2} $dir 2>/dev/null
+
+
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$alidir
+else 
+  if [ $stage -le -4 ]; then
+    echo "$0: obtaining initial fMLLR transforms since not present in $alidir"
+    $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+      ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata1/JOB/spk2utt $alidir/final.mdl "$sifeats" \
+      ark:- ark:$dir/trans.JOB || exit 1;
+  fi
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$dir
+fi
+
+if [ $stage -le -3 ]; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "$0: This is a bad warning.";
+
+  rm $dir/treeacc
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata1/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+      $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+        weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+        --spk2utt=ark:$sdata1/JOB/spk2utt $dir/$x.mdl \
+        "$feats" ark:- ark:$dir/tmp_trans.JOB || exit 1;
+      for n in `seq $nj`; do
+        ! ( compose-transforms --b-is-affine=true \
+          ark:$dir/tmp_trans.$n ark:$cur_trans_dir/trans.$n ark:$dir/composed_trans.$n \
+          && mv $dir/composed_trans.$n $dir/trans.$n && \
+          rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \
+          && echo "$0: Error composing transforms" && exit 1;
+      done
+    fi
+    feats="$sifeats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+    cur_trans_dir=$dir
+  fi
+  
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+
+
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution:"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} 
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_sgmm.sh b/egs/chime_wsj0/s5/steps/tandem/train_sgmm.sh
new file mode 100755
index 000000000..6cad17462
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_sgmm.sh
@@ -0,0 +1,312 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim;
+   # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+beam=8
+retry_beam=40
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+normft2=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 8 ]; then
+  echo "Usage: steps/tandem/train_sgmm.sh <num-leaves> <num-substates> <data1> <data2> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/tandem/train_sgmm.sh 3500 10000 {mfcc,bottleneck},data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+
+num_leaves=$1
+totsubstates=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+ubm=$7
+dir=$8
+
+# Check some files.
+for f in $data1/feats.scp $data2/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+numsubstates=$num_leaves # Initial #-substates.
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+	  echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1; 
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$num_leaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \
+    $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata1/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata1/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata1/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwcSt
+   else
+     # otherwise update M.
+     flags=vMwcSt
+   fi
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata1/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \
+         --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \
+       $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \
+        --utt2spk=ark:$sdata1/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \
+        "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_sgmm2.sh b/egs/chime_wsj0/s5/steps/tandem/train_sgmm2.sh
new file mode 100755
index 000000000..e1466db9f
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_sgmm2.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#                 Korbinian Riedhammer
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6 # use this to resume partially finished training 
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations of training
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_iters="6 10 14"; # Iters on which to increase phn dim and/or spk dim;
+    # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+               # Bigger -> more pruning; zero = no pruning.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+beam=8
+self_weight=0.9
+retry_beam=40
+leaves_per_group=5 # Relates to the SCTM (state-clustered tied-mixture) aspect:
+                   # average number of pdfs in a "group" of pdfs.
+update_m_iter=4
+spk_dep_weights=true # [Symmetric SGMM] set this to false if you don't want "u" (i.e. to turn off
+                      # symmetric SGMM.
+normft2=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 8 ]; then
+  echo "Usage: steps/tandem/train_sgmm2.sh <num-leaves> <num-substates> <data1> <data2> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/tandem/train_sgmm2.sh 5000 8000 {mfcc,bottleneck}/data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  echo "  --leaves-per-group <#leaves>                     # Average #leaves shared in one group"
+  exit 1;
+fi
+
+num_pdfs=$1  # final #leaves, at 2nd level of tree.
+totsubstates=$2
+data1=$3
+data2=$4
+lang=$5
+alidir=$6
+ubm=$7
+dir=$8
+
+num_groups=$[$num_pdfs/$leaves_per_group]
+first_spkvec_iter=`echo $spkvec_iters | awk '{print $1}'` || exit 1;
+
+# Check some files.
+for f in $data1/feats.scp $data2/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+if [ "$self_weight" == "1.0" ]; then
+  numsubstates=$num_groups # Initial #-substates.
+else
+  numsubstates=$num_pdfs # Initial #-substates.
+fi
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+
+
+# We will use the same settings as with the alidir
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+  	echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,tandem,normft2} $dir 2>/dev/null
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree-two-level --binary=false --verbose=1 --max-leaves-first=$num_groups \
+     --max-leaves-second=$num_pdfs $dir/treeacc $lang/phones/roots.int \
+     $dir/questions.qst $lang/topo $dir/tree $dir/pdf2group.map || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm2-init --spk-dep-weights=$spk_dep_weights --self-weight=$self_weight \
+       --pdf-map=$dir/pdf2group.map --phn-space-dim=$phn_dim \
+       --spk-space-dim=$spk_dim $lang/topo $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata1/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm2-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata1/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm2-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata1/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge $first_spkvec_iter ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwSct
+   else
+     if [ $x -ge $update_m_iter ]; then
+       flags=vMwSct # udpate M.
+     else
+       flags=vwSct # no M on early iters, if --update-m-iter option given.
+     fi
+   fi
+   $spk_dep_weights && [ $x -ge $first_spkvec_iter ] && flags=${flags}u; # update 
+   # spk-weight projections "u".
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm2-acc-stats $spkvecs_opt --utt2spk=ark:$sdata1/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm2-est --update-flags=$flags --split-substates=$numsubstates \
+       $increase_dim_opts --power=$power --write-occs=$dir/$[$x+1].occs \
+       $dir/$x.mdl "sgmm2-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm2-post-to-gpost $spkvecs_opt "$gselect_opt" \
+         --utt2spk=ark:$sdata1/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm2-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm2-est --update-flags=$flags --remove-speaker-space=true --power=$power \
+        $cur_alimdl "sgmm2-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/tandem/train_ubm.sh b/egs/chime_wsj0/s5/steps/tandem/train_ubm.sh
new file mode 100755
index 000000000..59ddcf62a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/tandem/train_ubm.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This trains a UBM (i.e. a mixture of Gaussians), by clustering
+# the Gaussians from a trained HMM/GMM system and then doing a few
+# iterations of UBM training.
+# We mostly use this for SGMM systems.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+silence_weight=  # You can set it to e.g. 0.0, to weight down silence in training.
+stage=-2
+num_gselect1=50 # first stage of Gaussian-selection
+num_gselect2=25 # second stage.
+intermediate_num_gauss=2000
+num_iters=3
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_tandem_ubm.sh <num-gauss> <data1> <data2> <lang> <ali-dir> <exp>"
+  echo " e.g.: steps/train_tandem_ubm.sh 400 {mfcc,bottneneck}/data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+num_gauss=$1
+data1=$2
+data2=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data1/feats.scp $data2/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+if [ $[$num_gauss*2] -gt $intermediate_num_gauss ]; then
+  echo "intermediate_num_gauss was too small $intermediate_num_gauss"
+  intermediate_num_gauss=$[$num_gauss*2];
+  echo "setting it to $intermediate_num_gauss"
+fi
+
+
+# Set various variables.
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata1=$data1/split$nj;
+sdata2=$data2/split$nj;
+
+[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1;
+[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1;
+
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+normft2=`cat $alidir/normft2 2>/dev/null`
+
+## Set up features.
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+case $feat_type in
+  delta) 
+  	echo "$0: feature type is $feat_type"
+  	;;
+  lda) 
+	  echo "$0: feature type is $feat_type"
+    cp $alidir/{lda,final}.mat $dir/ || exit 1;
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# set up feature stream 1;  this are usually spectral features, so we will add
+# deltas or splice them
+feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |"
+
+if [ "$feat_type" == "delta" ]; then
+  feats1="$feats1 add-deltas ark:- ark:- |"
+elif [ "$feat_type" == "lda" ]; then
+  feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
+fi
+
+# set up feature stream 2;  this are usually bottleneck or posterior features, 
+# which may be normalized if desired
+feats2="scp:$sdata2/JOB/feats.scp"
+
+if [ "$normft2" == "true" ]; then
+  feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |"
+fi
+
+# assemble tandem features
+feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |"
+
+# add transformation, if applicable
+if [ "$feat_type" == "lda" ]; then
+  feats="$feats transform-feats $dir/final.mat ark:- ark:- |"
+fi
+
+# splicing/normalization options
+cp $alidir/{splice_opts,tandem,normft2} $dir 2>/dev/null
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+if [ ! -z "$silence_weight" ]; then
+  weights_opt="--weights='ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- | weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
+else
+  weights_opt=
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: clustering model $alidir/final.mdl to get initial UBM"
+  $cmd $dir/log/cluster.log \
+    init-ubm --intermediate-num-gauss=$intermediate_num_gauss --ubm-num-gauss=$num_gauss \
+    --verbose=2 --fullcov-ubm=true $alidir/final.mdl $alidir/final.occs \
+    $dir/0.ubm   || exit 1;
+fi
+
+# Do initial phase of Gaussian selection and save it to disk -- later on we'll
+# do more Gaussian selection to further prune, as the model changes.
+
+
+if [ $stage -le -1 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$num_gselect1 "fgmm-global-to-gmm $dir/0.ubm - |" "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Pass $x"
+  $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+    gmm-gselect --n=$num_gselect2 "--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" \
+    "fgmm-global-to-gmm $dir/$x.ubm - |" "$feats" ark:- \| \
+    fgmm-global-acc-stats $weights_opt --gselect=ark,s,cs:- $dir/$x.ubm "$feats" \
+    $dir/$x.JOB.acc || exit 1;
+  lowcount_opt="--remove-low-count-gaussians=false"
+  [ $[$x+1] -eq $num_iters ] && lowcount_opt=   # Only remove low-count Gaussians 
+  # on last iter-- we can't do it earlier, or the Gaussian-selection info would
+  # be mismatched.
+  $cmd $dir/log/update.$x.log \
+    fgmm-global-est $lowcount_opt --verbose=2 $dir/$x.ubm "fgmm-global-sum-accs - $dir/$x.*.acc |" \
+      $dir/$[$x+1].ubm || exit 1;
+  rm $dir/$x.*.acc $dir/$x.ubm
+  x=$[$x+1]
+done
+
+rm $dir/gselect.*.gz
+rm $dir/final.ubm 2>/dev/null
+mv $dir/$x.ubm $dir/final.ubm || exit 1;
diff --git a/egs/chime_wsj0/s5/steps/train_deltas.sh b/egs/chime_wsj0/s5/steps/train_deltas.sh
new file mode 100755
index 000000000..c113f9ac5
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_deltas.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration.
+stage=-4 #  This allows restarting after partway, when something when wrong.
+config=
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+   echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
+   echo "e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+oov=`cat $lang/oov.int` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+
+rm $dir/.error 2>/dev/null
+
+if [ $stage -le -3 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: getting questions for tree-building, via clustering"
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: training pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+         "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+         "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+       "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --mix-up=$numgauss --power=$power \
+        --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+       "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+# Summarize warning messages...
+utils/summarize_warnings.pl  $dir/log
+
+echo "$0: Done training system with delta+delta-delta features in $dir"
+
diff --git a/egs/chime_wsj0/s5/steps/train_diag_ubm.sh b/egs/chime_wsj0/s5/steps/train_diag_ubm.sh
new file mode 100755
index 000000000..e43a9cb5b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_diag_ubm.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+# Copyright Johns Hopkins University (Author: Daniel Povey),  2012.  
+# Apache 2.0.
+
+# Train a diagonal mixture of Gaussians.  This is trained without
+# reference to class labels-- except that, optionally, you can down-weight
+# silence phones, and alignments are needed for that.
+#
+# The current use for this is in fMMI training.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+num_iters=3
+silence_weight=
+stage=-2
+# The value "intermediate" is a number of Gaussians we first obtain by clustering
+# the Gaussians within each state of the model, before clustering down to
+# $num_Gauss.  This is for efficiency.  It's not a very important parameter,
+# as far as I know.
+intermediate=2000
+num_gselect=50 # Number of Gaussian-selection indices to use while training
+               # the model.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: steps/train_diag_ubm.sh <num-gauss> <data> <lang> <alignment-dir|src-dir> <dir>"
+  echo " e.g.: steps/train_diag_ubm.sh 400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
+  echo "Options: "
+  echo "  --silence-weight <sil-weight>                  # default 1.0.  Use to down-weight silence."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <num-job>                                 # number of parallel jobs to run."
+  echo "  --num-iters <niter>                            # number of iterations of training (default: $num_iters)"
+  echo "  --stage <stage>                                # stage to do partial re-run from."
+  exit 1;
+fi
+
+num_gauss=$1
+data=$2
+lang=$3
+alidir=$4
+dir=$5
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -f $alidir/trans.1 ]; then
+  echo Using transforms from $alidir;
+  [ "$nj" -ne "`cat $alidir/num_jobs`" ] && \
+    echo "The number of jobs differs from alignment directory $alidir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+fi
+
+if [ ! -z "$silence_weight" ]; then
+  [ ! -f $alidir/ali.1.gz ] && \
+    echo "You specified weighting for silence but $alidir/ali.1.gz does not exist." && exit 1;
+  [ "$nj" -ne "`cat $alidir/num_jobs`" ] && \
+    echo "You specified silence weight but $alidir has different #jobs." && exit 1;
+  weights="--weights='ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- | weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
+else
+  weights=
+fi
+
+# $intermediate should be more than $num_gauss..
+[ $[$num_gauss*2] -gt $intermediate ] && intermediate=$[$num_gauss*2] \
+  && echo "Setting intermediate=$intermediate (it was too small)";
+
+if [ $stage -le -2 ]; then
+ echo "Clustering Gaussians in $alidir/final.mdl"
+ $cmd $dir/log/cluster.log \
+  init-ubm --fullcov-ubm=false --intermediate-num-gauss=$intermediate \
+    --ubm-num-gauss=$num_gauss $alidir/final.mdl $alidir/final.occs $dir/0.dubm   || exit 1;
+fi
+
+# Store Gaussian selection indices on disk-- this speeds up the training passes.
+if [ $stage -le -1 ]; then
+  echo Getting Gaussian-selection info
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$num_gselect $dir/0.dubm "$feats" \
+      "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+for x in `seq 0 $[$num_iters-1]`; do
+  echo "Training pass $x"
+  if [ $stage -le $x ]; then
+  # Accumulate stats.
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-global-acc-stats $weights "--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" \
+      $dir/$x.dubm "$feats" $dir/$x.JOB.acc || exit 1;
+    if [ $x -lt $[$num_iters-1] ]; then # Don't remove low-count Gaussians till last iter,
+      opt="--remove-low-count-gaussians=false" # or gselect info won't be valid any more.
+    fi
+    $cmd $dir/log/update.$x.log \
+      gmm-global-est $opt $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \
+      $dir/$[$x+1].dubm || exit 1;
+    rm $dir/$x.*.acc $dir/$x.dubm
+  fi
+done
+
+rm $dir/gselect.*.gz
+mv $dir/$num_iters.dubm $dir/final.dubm || exit 1;
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_lda_mllt.sh b/egs/chime_wsj0/s5/steps/train_lda_mllt.sh
new file mode 100755
index 000000000..9f0208d83
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_lda_mllt.sh
@@ -0,0 +1,209 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# Begin configuration.
+cmd=run.pl
+config=
+stage=-5
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+mllt_iters="2 4 6 12";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25  # Last iter to increase #Gauss on.
+dim=40
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+splice_opts=
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration.
+train_tree=true  # if false, don't actually train the tree.
+use_lda_mat=  # If supplied, use this LDA[+MLLT] matrix.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_lda_mllt.sh [options] <#leaves> <#gauss> <data> <lang> <alignments> <dir>"
+  echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_lda_mllt.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options
+           # so that later stages of system building can know what they were.
+
+sdata=$data/split$nj;
+split_data.sh $data $nj || exit 1;
+
+
+splicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+# Note: $feats gets overwritten later in the script.
+feats="$splicedfeats transform-feats $dir/0.mat ark:- ark:- |"
+
+
+
+if [ $stage -le -5 ]; then
+  if [ -z "$use_lda_mat" ]; then
+    echo "Accumulating LDA statistics."
+    $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- \
+      $dir/lda.JOB.acc || exit 1;
+    est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+    rm $dir/lda.*.acc
+  else
+    echo "Using supplied LDA matrix $use_lda_mat"
+    cp $use_lda_mat $dir/0.mat || exit 1;
+    [ ! -z "$mllt_iters" ] && \
+      echo "Warning: using supplied LDA matrix $use_lda_mat but we will do MLLT," && \
+      echo "which you might not want; to disable MLLT, specify --mllt-iters ''" && \
+      sleep 5
+  fi
+fi
+
+cur_lda_iter=0
+
+if [ $stage -le -4 ] && $train_tree; then
+  echo "Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+   acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  if $train_tree; then
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+    grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+    rm $dir/treeacc
+  else
+    cp $alidir/tree $dir/ || exit 1;
+    $cmd JOB=1 $dir/log/init_model.log \
+      gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+        "$feats subset-feats ark:- ark:-|" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo Training pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_lda_iter.mat $dir/$x.mat || exit 1;
+      rm $dir/$x.*.macc
+    fi
+    feats="$splicedfeats transform-feats $dir/$x.mat ark:- ark:- |"
+    cur_lda_iter=$x
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \
+        $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.{mdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $cur_lda_iter.mat $dir/final.mat
+
+# Summarize warning messages...
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done training system with LDA+MLLT features in $dir
diff --git a/egs/chime_wsj0/s5/steps/train_mmi.sh b/egs/chime_wsj0/s5/steps/train_mmi.sh
new file mode 100755
index 000000000..7f176fd65
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mmi.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option).
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+zero_if_disjoint=false # if true, ignore stats from frames where num + den
+                       # have no overlap. 
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/tree $dir
+cp $alidir/final.mdl $dir/0.mdl
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $dir/$x.mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --zero-if-disjoint=$zero_if_disjoint --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      gmm-acc-stats2 $dir/$x.mdl "$feats" ark,s,cs:- \
+      $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw --weight-tau=$weight_tau - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_mmi_fmmi.sh b/egs/chime_wsj0/s5/steps/train_mmi_fmmi.sh
new file mode 100755
index 000000000..cf91d3184
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mmi_fmmi.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
+
+# This script does MMI discriminative training, including
+# feature-space (like fMPE) and model-space components. 
+# If you give the --boost option it does "boosted MMI" (BMMI).
+# On the iterations of training it alternates feature-space
+# and model-space training.  We do 8 iterations in total--
+# 4 of each type ((B)MMI, f(B)MMI)
+
+
+# Begin configuration section.
+cmd=run.pl
+schedule="fmmi fmmi fmmi fmmi mmi mmi mmi mmi"
+boost=0.0
+learning_rate=0.01
+tau=400 # For model.  Note: we're doing smoothing "to the previous iteration",
+    # so --smooth-from-model so 400 seems like a more sensible default
+    # than 100.  We smooth to the previous iteration because now
+    # we are discriminatively training the features (and not using
+    # the indirect differential), so it seems like it wouldn't make 
+    # sense to use any element of ML.
+weight_tau=10 # for model weights.
+cancel=true # if true, cancel num and den counts as described in 
+     # the boosted MMI paper. 
+zero_if_disjoint=false # if true, ignore stats from frames where num + den
+                       # have no overlap. 
+indirect=true # if true, use indirect derivative.
+acwt=0.1
+stage=-1
+ngselect=2; # Just the 2 top Gaussians.  Beyond that, adding more Gaussians
+            # wouldn't make much difference since the posteriors would be very small.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_mmi_fmmi.sh <data> <lang> <ali-dir> <diag-ubm-dir> <denlat-dir> <exp-dir>"
+  echo " e.g.: steps/train_mmi_fmmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm2d exp/tri2b_denlats_si84 exp/tri2b_fmmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1) ... boosted MMI."
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  echo "  --learning-rate                                  # learning rate for fMMI, default 0.01"
+  echo "  --schedule                                       # learning schedule: by default,"
+  echo "                                                   # \"fmmi mmi fmmi mmi fmmi mmi fmmi mmi\""
+  exit 1;
+fi
+
+
+data=$1
+lang=$2
+alidir=$3
+dubmdir=$4  # where diagonal UBM is.
+denlatdir=$5
+dir=$6
+
+silphonelist=`cat $lang/phones/silence.csl`
+mkdir -p $dir/log
+
+for f in $data/feats.scp $lang/phones.txt $dubmdir/final.dubm $alidir/final.mdl \
+    $alidir/ali.1.gz $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
+done
+cp $alidir/final.mdl $alidir/tree $dir || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+# Note: $feats is the features before fMPE.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+fmpefeats="$feats" # At first, the features "after fMPE" are the same as the 
+                   # base features.
+
+
+# Initialize the fMPE object.  Note: we call it .fmpe because
+# that's what it was called in the original paper, but since
+# we're using the MMI objective function, it's really fMMI.
+
+fmpe-init $dubmdir/final.dubm $dir/0.fmpe 2>$dir/log/fmpe_init.log || exit 1;
+
+
+if [ $stage -le -1 ]; then
+  # Get the gselect (Gaussian selection) info for fMPE.
+  # Note: fMPE object starts with GMM object, so can be read
+  # as one.
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$ngselect $dir/0.fmpe "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+cp $alidir/final.mdl $dir/0.mdl
+
+x=0
+num_iters=`echo $schedule | wc -w`
+
+while [ $x -lt $num_iters ]; do
+  iter_type=`echo $schedule | cut -d ' ' -f $[$x+1]`
+  case $iter_type in 
+    fmmi)
+    echo "Iteration $x: doing fMMI"
+    if [ $stage -le $x ]; then
+      numpost="ark,s,cs:gunzip -c $alidir/ali.JOB.gz| ali-to-post ark:- ark:-|"
+        # Note: the command gmm-fmpe-acc-stats below requires the pre-fMPE features.
+      $cmd JOB=1:$nj $dir/log/acc_fmmi.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --zero-if-disjoint=$zero_if_disjoint --scale1=-1 ark:- "$numpost" ark:- \| \
+        gmm-fmpe-acc-stats $dir/$x.mdl $dir/$x.fmpe "$feats" \
+        "ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark,s,cs:- \
+        $dir/$x.JOB.fmpe_acc || exit 1;
+      
+      ( fmpe-sum-accs $dir/$x.fmpe_acc $dir/$x.*.fmpe_acc && \
+        rm $dir/$x.*.fmpe_acc && \
+        fmpe-est --learning-rate=$learning_rate $dir/$x.fmpe $dir/$x.fmpe_acc $dir/$[$x+1].fmpe ) \
+        2>$dir/log/est_fmpe.$x.log || exit 1;
+    fi
+    # We need to set the features to use the correct fMPE object.
+    fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+    rm $dir/$[x+1].mdl 2>/dev/null; ln -s $x.mdl $dir/$[$x+1].mdl # link previous model.
+    # Now, diagnostics.
+    objf_nf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Objf $dir/log/est_fmpe.$x.log | awk '{print $NF}'`
+    impr=`perl -e "print ($impr/$nf);"` # normalize by #frames.
+    echo On iter $x, objf was $objf, auxf improvement from fMMI was $impr | tee $dir/objf.$x.log
+    ;;
+    mmi) # MMI iteration.
+    echo "Iteration $x: doing MMI (getting stats)..."
+    # Get denominator stats...  For simplicity we rescore the lattice
+    # on all iterations, even though it shouldn't be necessary on the zeroth
+    # (but we want this script to work even if $alidir doesn't contain the
+    # model used to generate the lattice).
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --zero-if-disjoint=$zero_if_disjoint --merge=$cancel --scale1=-1 \
+        ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+        gmm-acc-stats2 $dir/$x.mdl "$fmpefeats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+      n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+      [ "$n" -ne $[$nj*2] ] && \
+        echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+      $cmd $dir/log/den_acc_sum.$x.log \
+        gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+      rm $dir/den_acc.$x.*.acc
+      $cmd $dir/log/num_acc_sum.$x.log \
+        gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+      rm $dir/num_acc.$x.*.acc
+
+      # note: this tau value is for smoothing to model parameters;
+      # you need to use gmm-ismooth-stats to smooth to the ML stats,
+      # but anyway this script does canceling of num and den stats on
+      # each frame (as suggested in the Boosted MMI paper) which would
+      # make smoothing to ML impossible without accumulating extra stats.
+      $cmd $dir/log/update.$x.log \
+        gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+        gmm-est-weights-ebw --weight-tau=$weight_tau - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    else 
+      echo "not doing this iteration because --stage=$stage"
+    fi
+  
+    # Some diagnostics.. note, this objf is somewhat comparable to the
+    # MMI objective function divided by the acoustic weight, and differences in it
+    # are comparable to the auxf improvement printed by the update program.
+    objf_nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+    impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
+    # for the canceling of stats.
+    echo On iter $x, objf was $objf, auxf improvement was $impr | tee $dir/objf.$x.log
+    rm $dir/$[x+1].fmpe 2>/dev/null; ln -s $x.fmpe $dir/$[$x+1].fmpe # link previous fMPE transform
+    ;;
+    *) echo "Invalid --schedule option: expected only mmi or fmmi.";
+  esac
+  x=$[$x+1]
+done
+
+echo "Succeeded with $num_iters iters iterations of MMI+fMMI training (boosting factor = $boost)"
+
+rm $dir/final.mdl 2>/dev/null; ln -s $num_iters.mdl $dir/final.mdl
+rm $dir/final.fmpe 2>/dev/null; ln -s $num_iters.fmpe $dir/final.fmpe 
+
+# Now do some cleanup.
+rm $dir/gselect.*.gz $dir/*.acc $dir/*.fmpe_acc
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/train_mmi_fmmi_indirect.sh b/egs/chime_wsj0/s5/steps/train_mmi_fmmi_indirect.sh
new file mode 100755
index 000000000..a27395903
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mmi_fmmi_indirect.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
+
+# This script does MMI discriminative training, including
+# feature-space (like fMPE) and model-space components. 
+# If you give the --boost option it does "boosted MMI" (BMMI).
+# On the iterations of training it alternates feature-space
+# and model-space training.  We do 8 iterations in total--
+# 4 of each type ((B)MMI, f(B)MMI)
+
+
+# Begin configuration section.
+cmd=run.pl
+schedule="fmmi mmi fmmi mmi fmmi mmi fmmi mmi"
+boost=0.0
+learning_rate=0.02
+tau=200 # For model.  Note: we're doing smoothing "to the previous iteration",
+    # so --smooth-from-model so 200 seems like a more sensible default
+    # than 100.  We smooth to the previous iteration because now
+    # we are discriminatively training the features (and not using
+    # the indirect differential), so it seems like it wouldn't make 
+    # sense to use any element of ML.
+cancel=true # if true, cancel num and den counts as described in 
+     # the boosted MMI paper. 
+zero_if_disjoint=false # if true, ignore stats from frames where num + den
+                       # have no overlap. 
+acwt=0.1
+stage=-1
+ngselect=2; # Just the 2 top Gaussians.  Beyond that, adding more Gaussians
+            # wouldn't make much difference since the posteriors would be very small.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_mmi_fmmi.sh <data> <lang> <diag-ubm-dir> <ali-dir> <denlat-dir> <exp-dir>"
+  echo " e.g.: steps/train_mmi_fmmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm2d exp/tri2b_denlats_si84 exp/tri2b_fmmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1) ... boosted MMI."
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  echo "  --learning-rate                                  # learning rate for fMMI, default 0.01"
+  echo "  --schedule                                       # learning schedule: by default,"
+  echo "                                                   # \"fmmi mmi fmmi mmi fmmi mmi fmmi mmi\""
+  exit 1;
+fi
+
+
+data=$1
+lang=$2
+alidir=$3
+dubmdir=$4  # where diagonal UBM is.
+denlatdir=$5
+dir=$6
+
+silphonelist=`cat $lang/phones/silence.csl`
+mkdir -p $dir/log
+
+for f in $data/feats.scp $lang/phones.txt $dubmdir/final.dubm $alidir/final.mdl \
+  $alidir/ali.1.gz $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
+done
+cp $alidir/final.mdl $alidir/tree $dir || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+# Note: $feats is the features before fMPE.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+fmpefeats="$feats" # At first, the features "after fMPE" are the same as the 
+                   # base features.
+
+
+# Initialize the fMPE object.  Note: we call it .fmpe because
+# that's what it was called in the original paper, but since
+# we're using the MMI objective function, it's really fMMI.
+
+fmpe-init $dubmdir/final.dubm $dir/0.fmpe 2>$dir/log/fmpe_init.log || exit 1;
+
+
+if [ $stage -le -1 ]; then
+  # Get the gselect (Gaussian selection) info for fMPE.
+  # Note: fMPE object starts with GMM object, so can be read
+  # as one.
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$ngselect $dir/0.fmpe "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+cp $alidir/final.mdl $dir/0.mdl
+
+x=0
+num_iters=`echo $schedule | wc -w`
+
+while [ $x -lt $num_iters ]; do
+  iter_type=`echo $schedule | cut -d ' ' -f $[$x+1]`
+  case $iter_type in 
+    fmmi) fmmi_iter=true; local_cancel=false;;
+    mmi) fmmi_iter=false; local_cancel=$cancel;;
+    *) echo "Bad iteration type $iter_type"; exit 1;;
+  esac
+
+  echo "Getting MMI stats (needed for fMMI and MMI iterations).";
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$local_cancel --scale1=-1 --zero-if-disjoint=$zero_if_disjoint \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      gmm-acc-stats2 $dir/$x.mdl "$fmpefeats" ark,s,cs:- \
+      $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    rm $dir/.error 2>/dev/null
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || touch $dir/.error &
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || touch $dir/.error &
+    wait
+    [ -f $dir/.error ] && echo "Error summing accs" && exit 1;
+    rm $dir/den_acc.$x.*.acc
+    rm $dir/num_acc.$x.*.acc
+  fi
+
+  if $fmmi_iter; then
+    echo "Iteration $x: doing fMMI"
+    if [ $stage -le $x ]; then
+      # Get model derivative.  Note: the "ml accumulator" is the same as the "numerator"
+      # since this is MMI.  We avoided doing the "canceling of stats" on this iteration
+      # so that this would be true (this canceling wouldn't affect the derivative anyway,
+      # so can have no benefit for fMMI, unlike MMI).
+      $cmd $dir/log/get_stats_deriv.$x.log \
+        gmm-get-stats-deriv $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc \
+        $dir/num_acc.$x.acc $dir/model_deriv.$x.gmmacc
+      numpost="ark,s,cs:gunzip -c $alidir/ali.JOB.gz| ali-to-post ark:- ark:-|"
+        # Note: the command gmm-fmpe-acc-stats below requires the pre-fMPE features.
+      $cmd JOB=1:$nj $dir/log/acc_fmmi.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --zero-if-disjoint=$zero_if_disjoint --merge=false --scale1=-1 \
+          ark:- "$numpost" ark:- \| \
+        gmm-fmpe-acc-stats --model-derivative=$dir/model_deriv.$x.gmmacc \
+          $dir/$x.mdl $dir/$x.fmpe "$feats" \
+         "ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark,s,cs:-  \
+         $dir/$x.JOB.fmpe_acc || exit 1;
+      
+      ( fmpe-sum-accs $dir/$x.fmpe_acc $dir/$x.*.fmpe_acc && \
+        rm $dir/$x.*.fmpe_acc && \
+        fmpe-est --learning-rate=$learning_rate $dir/$x.fmpe $dir/$x.fmpe_acc $dir/$[$x+1].fmpe ) \
+        2>$dir/log/est_fmpe.$x.log || exit 1;
+
+      fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+      # OK, now we do one iteration of the "rescaling update" where we use the
+      # old and new ML accs, and we shift and rescale the model to match the new
+      # features.
+      $cmd JOB=1:$nj $dir/log/acc_ml.$x.JOB.log \
+        gmm-acc-stats-ali $dir/$x.mdl "$fmpefeats" "ark:gunzip -c $alidir/ali.JOB.gz|" \
+          $dir/new_ml_acc.$x.JOB.acc || exit 1;
+      $cmd $dir/log/new_ml_acc_sum.$x.log \
+        gmm-sum-accs $dir/new_ml_acc.$x.acc $dir/new_ml_acc.$x.*.acc || exit 1;
+      $cmd $dir/log/update_rescale.$x.log \
+        gmm-est-rescale $dir/$x.mdl $dir/num_acc.$x.acc $dir/new_ml_acc.$x.acc \
+        $dir/$[$x+1].mdl || exit 1;
+    fi
+    # We need to set the features to use the correct fMPE object.
+    # This is a repeat of a command above-- in case we didn't do this stage.
+    fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+    # Now, diagnostics.
+    objf_nf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Objf $dir/log/est_fmpe.$x.log | awk '{print $NF}'`
+    impr=`perl -e "print ($impr/$nf);"` # normalize by #frames.
+    echo On iter $x, objf was $objf, auxf improvement from fMMI was $impr | tee $dir/objf.$x.log
+  else # MMI iteration-- on this iteration do model-space update.
+    echo "Iteration $x: doing MMI update"
+      # note: this tau value is for smoothing to model parameters;
+      # you need to use gmm-ismooth-stats to smooth to the ML stats,
+      # but anyway this script does canceling of num and den stats on
+      # each frame (as suggested in the Boosted MMI paper) which would
+      # make smoothing to ML impossible without accumulating extra stats.
+    if [ $stage -le $x ]; then
+      $cmd $dir/log/update.$x.log \
+        gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+        gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    else 
+      echo "not doing this iteration because --stage=$stage"
+    fi
+    
+    # Some diagnostics.. note, this objf is somewhat comparable to the
+    # MMI objective function divided by the acoustic weight, and differences in it
+    # are comparable to the auxf improvement printed by the update program.
+    objf_nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10*$12;}'`
+    impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
+    # for the canceling of stats.
+    echo On iter $x, objf was $objf, auxf improvement was $impr | tee $dir/objf.$x.log
+    rm $dir/$[x+1].fmpe 2>/dev/null; ln -s $x.fmpe $dir/$[$x+1].fmpe # link previous fMPE transform
+  fi
+  x=$[$x+1]
+done
+
+echo "Succeeded with $num_iters iters iterations of MMI+fMMI training (boosting factor = $boost)"
+
+rm $dir/final.mdl 2>/dev/null; ln -s $num_iters.mdl $dir/final.mdl
+rm $dir/final.fmpe 2>/dev/null; ln -s $num_iters.fmpe $dir/final.fmpe 
+
+# Now do some cleanup.
+rm $dir/gselect.*.gz $dir/*.acc $dir/*.fmpe_acc
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/steps/train_mmi_sgmm.sh b/egs/chime_wsj0/s5/steps/train_mmi_sgmm.sh
new file mode 100755
index 000000000..2b0af4212
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mmi_sgmm.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+acwt=0.1
+stage=0
+
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi_sgmm.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi_sgmm.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+cp $alidir/splice_opts $dir 2>/dev/null
+echo $nj > $dir/num_jobs
+
+cp $alidir/tree $dir
+cp $alidir/final.mdl $dir/0.mdl
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+    $cmd $dir/log/update.$x.log \
+     sgmm-est-ebw $update_opts $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+  fi
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_mmi_sgmm2.sh b/egs/chime_wsj0/s5/steps/train_mmi_sgmm2.sh
new file mode 100755
index 000000000..01184bb4e
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mmi_sgmm2.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+zero_if_disjoint=false
+acwt=0.1
+stage=0
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi_sgmm2.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi_sgmm2.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/tree $dir
+cp $alidir/final.mdl $dir/0.mdl
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time: 
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      test -s $dir/den_acc.$x.JOB.gz -a -s $dir/num_acc.$x.JOB.gz '||' \
+      sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --zero-if-disjoint=$zero_if_disjoint --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm2-acc-stats2 "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$feats" ark,s,cs:- \
+      "|gzip -c >$dir/num_acc.$x.JOB.gz" "|gzip -c >$dir/den_acc.$x.JOB.gz" || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.gz | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    num_acc_sum="sgmm2-sum-accs - ";
+    den_acc_sum="sgmm2-sum-accs - ";
+    for j in `seq $nj`; do 
+      num_acc_sum="$num_acc_sum 'gunzip -c $dir/num_acc.$x.$j.gz|'"; 
+      den_acc_sum="$den_acc_sum 'gunzip -c $dir/den_acc.$x.$j.gz|'"; 
+    done
+    $cmd $dir/log/update.$x.log \
+     sgmm2-est-ebw $update_opts $dir/$x.mdl "$num_acc_sum |" "$den_acc_sum |" \
+      $dir/$[$x+1].mdl || exit 1;
+    rm $dir/*_acc.$x.*.gz 
+  fi
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/sgmm2-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+rm $dir/*.acc 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_mono.sh b/egs/chime_wsj0/s5/steps/train_mono.sh
new file mode 100755
index 000000000..c15e277c8
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mono.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+
+# To be run from ..
+# Flat start and monophone training, with delta-delta features.
+# This script applies cepstral mean normalization (per speaker).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=40    # Number of iterations of training
+max_iter_inc=30 # Last iter to increase #Gauss on.
+totgauss=1000 # Target #Gaussians.  
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
+config= # name of config file.
+stage=-4
+power=0.25 # exponent to determine number of gaussians from occurrence counts
+feat_dim=-1 # This option is now ignored but retained for compatibility.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: steps/train_mono.sh [options] <data-dir> <lang-dir> <exp-dir>"
+  echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --feat_dim <dim>                                 # This option is ignored now but"
+  echo "                                                   # retained for back-compatibility."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+dir=$3
+
+oov_sym=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+example_feats="`echo $feats | sed s/JOB/1/g`";
+
+echo "$0: Initializing monophone system."
+
+[ ! -f $lang/phones/sets.int ] && exit 1;
+shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+
+if [ $stage -le -3 ]; then
+# Note: JOB=1 just uses the 1st part of the features-- we only need a subset anyway.
+  feat_dim=`feat-to-dim "$example_feats" - 2>/dev/null`
+  [ -z "$feat_dim" ] && echo "error getting feature dimension" && exit 1;
+  $cmd JOB=1 $dir/log/init.log \
+    gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
+    $dir/0.mdl $dir/tree || exit 1;
+fi
+
+numgauss=`gmm-info --print-args=false $dir/0.mdl | grep gaussians | awk '{print $NF}'`
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+
+if [ $stage -le -2 ]; then
+  echo "$0: Compiling training graphs"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst \
+    "ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Aligning data equally (pass 0)"
+  $cmd JOB=1:$nj $dir/log/align.0.JOB.log \
+    align-equal-compiled "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" ark,t:-  \| \
+    gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
+    $dir/0.JOB.acc || exit 1;
+fi
+
+# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
+# we fail to est "rare" phones and later on, they never align properly.
+
+if [ $stage -le 0 ]; then
+  gmm-est --min-gaussian-occupancy=3  --mix-up=$numgauss --power=$power \
+    $dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl 2> $dir/log/update.0.log || exit 1;
+  rm $dir/0.*.acc
+fi
+
+
+beam=6 # will change to 10 below after 1st pass
+# note: using slightly wider beams for WSJ vs. RM.
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: Pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: Aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \
+        || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" "ark:gunzip -c $dir/ali.JOB.gz|" \
+      $dir/$x.JOB.acc || exit 1;
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+  fi
+  if [ $x -le $max_iter_inc ]; then
+     numgauss=$[$numgauss+$incgauss];
+  fi
+  beam=10
+  x=$[$x+1]
+done
+
+( cd $dir; rm final.{mdl,occs} 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
+
+# example of showing the alignments:
+# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/ali.0.gz|" | head -4
+
diff --git a/egs/chime_wsj0/s5/steps/train_mpe.sh b/egs/chime_wsj0/s5/steps/train_mpe.sh
new file mode 100755
index 000000000..0808dea6a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_mpe.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option).
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+smooth_to_mode=true
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MPE training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-mpe-post --acoustic-scale=$acwt $cur_mdl \
+        "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- ark:- \| \
+      gmm-acc-stats2 $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+    if ! $smooth_to_model; then
+      echo "Iteration $x of MPE: computing ml (smoothing) stats"
+      $cmd JOB=1:$nj $dir/log/acc_ml.$x.JOB.log \
+        gmm-acc-stats $cur_mdl "$feats" \
+          "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" \
+          $dir/ml.$x.JOB.acc || exit 1;
+      $cmd $dir/log/acc_ml_sum.$x.log \
+        gmm-sum-accs $dir/ml.$x.acc $dir/ml.$x.*.acc || exit 1;
+      rm $dir/ml.$x.*.acc
+      num_stats="gmm-ismooth-stats --tau=$tau $dir/ml.$x.acc $dir/num_acc.$x.acc -|"
+    else 
+      num_stats="gmm-ismooth-stats --smooth-from-model=true --tau=$tau $cur_mdl $dir/num_acc.$x.acc -|"
+    fi  
+    
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw $cur_mdl "$num_stats" $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+ tail -n 50 $dir/log/acc.$x.*.log | perl -e 'while(<STDIN>) { if(m/lattice-to-mpe-post.+Overall average frame-accuracy is (\S+) over (\S+) frames/) { $tot_objf += $1*$2; $tot_frames += $2; }} $tot_objf /= $tot_frames; print "$tot_objf $tot_frames\n"; ' > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  # This gives us a projected objective function improvement.
+  echo "Iteration $x: objf was $objf, MPE auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MPE training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_nnet.sh b/egs/chime_wsj0/s5/steps/train_nnet.sh
new file mode 100755
index 000000000..c5bd3aab1
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet.sh
@@ -0,0 +1,329 @@
+#!/bin/bash
+
+# Copyright 2012/2013  Karel Vesely (Brno University of Technology)
+# Apache 2.0
+
+# Begin configuration.
+config=            # config, which is also sent to all other scripts
+
+
+# NETWORK INITIALIZATION
+mlp_init=          # select initialized MLP (override initialization)
+feature_transform= # select feature transform (=splice,rescaling,...) (don't build new one)
+#
+model_size=8000000 # nr. of parameteres in MLP
+hid_layers=4       # nr. of hidden layers (prior to sotfmax or bottleneck)
+bn_dim=            # set a value to get a bottleneck network
+hid_dim=           # select hidden dimension directly (override $model_size)
+dbn=               # select DBN to prepend to the MLP initialization
+#
+init_opts=         # options, passed to the initialization script
+
+# FEATURE PROCESSING
+copy_feats=true  # resave the train features in the re-shuffled order to tmpdir
+# feature config (applies always)
+apply_cmvn=false # apply normalization to input features?
+ norm_vars=false # use variance normalization?
+delta_order=
+# feature_transform:
+splice=5         # temporal splicing
+splice_step=1    # stepsize of the splicing (1 == no gap between frames)
+feat_type=plain
+# feature config (applies to feat_type traps)
+traps_dct_basis=11 # nr. od DCT basis (applies to `traps` feat_type, splice10 )
+# feature config (applies to feat_type transf) (ie. LDA+MLLT, no fMLLR)
+transf=
+splice_after_transf=5
+# feature config (applies to feat_type lda)
+lda_dim=300        # LDA dimension (applies to `lda` feat_type)
+
+# LABELS
+labels=            # use these labels to train (override deafault pdf alignments) 
+num_tgt=           # force to use number of outputs in the MLP (default is autodetect)
+
+# TRAINING SCHEDULER
+learn_rate=0.008   # initial learning rate
+train_opts=        # options, passed to the training script
+random=true
+bunch_size=256
+# OTHER
+use_gpu_id= # manually select GPU id to run on, (-1 disables GPU)
+analyze_alignments=true # run the alignment analysis script
+seed=777    # seed value used for training data shuffling and initialization
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh; 
+
+
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+   echo "Usage: $0 <data-train> <data-dev> <lang-dir> <ali-train> <ali-dev> <exp-dir>"
+   echo " e.g.: $0 data/train data/cv data/lang exp/mono_ali exp/mono_ali_cv exp/mono_nnet"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>  # config containing options"
+   exit 1;
+fi
+
+data=$1
+data_cv=$2
+lang=$3
+alidir=$4
+alidir_cv=$5
+dir=$6
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $alidir_cv/ali.1.gz $data/feats.scp $data_cv/feats.scp; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+echo
+echo "# INFO"
+echo "$0 : Training Neural Network"
+printf "\t dir       : $dir \n"
+printf "\t Train-set : $data $alidir \n"
+printf "\t CV-set    : $data_cv $alidir_cv \n"
+
+mkdir -p $dir/{log,nnet}
+
+#skip when already trained
+[ -e $dir/final.nnet ] && printf "\nSKIPPING TRAINING... ($0)\nnnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))\n\n" && exit 0
+
+###### PREPARE ALIGNMENTS ######
+echo
+echo "# PREPARING ALIGNMENTS"
+if [ ! -z $labels ]; then
+  echo "Using targets '$labels' (by force)"
+else
+  echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
+  #define pdf-alignment rspecifiers
+  labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
+  if [[ "$alidir" == "$alidir_cv" ]]; then
+    labels="$labels_tr"
+  else
+    labels="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz $alidir_cv/ali.*.gz |\" ark:- |"
+  fi
+
+  #get the priors, get pdf-counts from alignments
+  analyze-counts --binary=false "$labels_tr" $dir/ali_train_pdf.counts || exit 1
+  #copy the old transition model, will be needed by decoder
+  copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1
+  #copy the tree
+  cp $alidir/tree $dir/tree || exit 1
+
+  #analyze the train/cv alignments
+  if [ "$analyze_alignments" == "true" ]; then
+    utils/nnet/analyze_alignments.sh "TRAINING SET" "ark:gunzip -c $alidir/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_train
+    utils/nnet/analyze_alignments.sh "VALIDATION SET" "ark:gunzip -c $alidir_cv/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_cv
+  fi
+fi
+
+###### PREPARE FEATURES ######
+echo
+echo "# PREPARING FEATURES"
+# shuffle the list
+echo "Preparing train/cv lists :"
+cat $data/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $dir/train.scp
+cp $data_cv/feats.scp $dir/cv.scp
+# print the list sizes
+wc -l $dir/train.scp $dir/cv.scp
+
+#re-save the shuffled features, so they are stored sequentially on the disk in /tmp/
+if [ "$copy_feats" == "true" ]; then
+  tmpdir=$(mktemp -d); mv $dir/train.scp $dir/train.scp_non_local
+  utils/nnet/copy_feats.sh $dir/train.scp_non_local $tmpdir $dir/train.scp
+  #remove data on exit...
+  trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT
+fi
+
+#create a 10k utt subset for global cmvn estimates
+head -n 10000 $dir/train.scp > $dir/train.scp.10k
+
+
+
+###### PREPARE FEATURE PIPELINE ######
+
+#read the features
+feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
+feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
+
+#optionally add per-speaker CMVN
+if [ $apply_cmvn == "true" ]; then
+  echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp"
+  [ ! -r $data/cmvn.scp ] && echo "Cannot find cmvn stats $data/cmvn.scp" && exit 1;
+  [ ! -r $data_cv/cmvn.scp ] && echo "Cannot find cmvn stats $data_cv/cmvn.scp" && exit 1;
+  feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
+  feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data_cv/utt2spk scp:$data_cv/cmvn.scp ark:- ark:- |"
+  # keep track of norm_vars option
+  echo "$norm_vars" >$dir/norm_vars 
+else
+  echo "apply_cmvn is disabled (per speaker norm. on input features)"
+fi
+
+#optionally add deltas
+if [ "$delta_order" != "" ]; then
+  feats_tr="$feats_tr add-deltas --delta-order=$delta_order ark:- ark:- |"
+  feats_cv="$feats_cv add-deltas --delta-order=$delta_order ark:- ark:- |"
+  echo "$delta_order" > $dir/delta_order
+  echo "add-deltas (delta_order $delta_order)"
+fi
+
+#get feature dim
+echo "Getting feature dim : "
+feat_dim=$(feat-to-dim --print-args=false "$feats_tr" -)
+echo "Feature dim is : $feat_dim"
+
+# Now we will start building complex feature_transform which will 
+# be forwarded in CUDA to gain more speed.
+#
+# We will use 1GPU for both feature_transform and MLP training in one binary tool. 
+# This is against the kaldi spirit, but it is necessary, because on some sites a GPU 
+# cannot be shared accross by two or more processes (compute exclusive mode),
+# and we would like to use single GPU per training instance,
+# so that the grid resources can be used efficiently...
+
+if [ ! -z "$feature_transform" ]; then
+  echo "Using pre-computed feature-transform : '$feature_transform'"
+  tmp=$dir/$(basename $feature_transform) 
+  cp $feature_transform $tmp; feature_transform=$tmp
+else
+  # Generate the splice transform
+  echo "Using splice +/- $splice , step $splice_step"
+  feature_transform=$dir/tr_splice$splice-$splice_step.nnet
+  utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
+
+  # Choose further processing of spliced features
+  echo "Feature type : $feat_type"
+  case $feat_type in
+    plain)
+    ;;
+    traps)
+      #generate hamming+dct transform
+      feature_transform_old=$feature_transform
+      feature_transform=${feature_transform%.nnet}_hamm_dct${traps_dct_basis}.nnet
+      echo "Preparing Hamming DCT transform into : $feature_transform"
+      #prepare matrices with time-transposed hamming and dct
+      utils/nnet/gen_hamm_mat.py --fea-dim=$feat_dim --splice=$splice > $dir/hamm.mat
+      utils/nnet/gen_dct_mat.py --fea-dim=$feat_dim --splice=$splice --dct-basis=$traps_dct_basis > $dir/dct.mat
+      #put everything together
+      compose-transforms --binary=false $dir/dct.mat $dir/hamm.mat - | \
+        transf-to-nnet - - | \
+        nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
+    ;;
+    transf)
+      feature_transform_old=$feature_transform
+      feature_transform=${feature_transform%.nnet}_transf_splice${splice_after_transf}.nnet
+      [ -z $transf ] && $alidir/final.mat
+      [ ! -f $transf ] && echo "Missing transf $transf" && exit 1
+      feat_dim=$(feat-to-dim "$feats_tr nnet-forward 'nnet-concat $feature_transform_old \"transf-to-nnet $transf - |\" - |' ark:- ark:- |" -)
+      nnet-concat --binary=false $feature_transform_old \
+        "transf-to-nnet $transf - |" \
+        "utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice_after_transf |" \
+        $feature_transform || exit 1
+    ;;
+    lda)
+      transf=$dir/lda$lda_dim.mat
+      #get the LDA statistics
+      if [ ! -r "$dir/lda.acc" ]; then
+        echo "LDA: Converting alignments to posteriors $dir/lda_post.scp"
+        ali-to-post "ark:gunzip -c $alidir/ali.*.gz|" ark:- | \
+          weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark,scp:$dir/lda_post.ark,$dir/lda_post.scp 2>$dir/log/ali-to-post-lda.log || exit 1;
+        echo "Accumulating LDA statistics $dir/lda.acc on top of spliced feats"
+        acc-lda --rand-prune=4.0 $alidir/final.mdl "$feats_tr nnet-forward $feature_transform ark:- ark:- |" scp:$dir/lda_post.scp $dir/lda.acc 2>$dir/log/acc-lda.log || exit 1;
+      else
+        echo "LDA: Using pre-computed stats $dir/lda.acc"
+      fi
+      #estimate the transform  
+      echo "Estimating LDA transform $dir/lda.mat from the statistics $dir/lda.acc"
+      est-lda --write-full-matrix=$dir/lda.full.mat --dim=$lda_dim $transf $dir/lda.acc 2>$dir/log/lda.log || exit 1;
+      #append the LDA matrix to feature_transform
+      feature_transform_old=$feature_transform
+      feature_transform=${feature_transform%.nnet}_lda${lda_dim}.nnet
+      transf-to-nnet $transf - | \
+        nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
+      #remove the temporary file
+      rm $dir/lda_post.{ark,scp}
+    ;;
+    *)
+      echo "Unknown feature type $feat_type"
+      exit 1;
+    ;;
+  esac
+  # keep track of feat_type
+  echo $feat_type > $dir/feat_type
+
+  # Renormalize the MLP input to zero mean and unit variance
+  feature_transform_old=$feature_transform
+  feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
+  echo "Renormalizing MLP input features into $feature_transform"
+  nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+    $feature_transform_old "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" \
+    ark:- 2>$dir/log/nnet-forward-cmvn.log |\
+  compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
+  nnet-concat --binary=false $feature_transform_old - $feature_transform
+fi
+
+
+###### MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ######
+(cd $dir; [ ! -f final.feature_transform ] && ln -s $(basename $feature_transform) final.feature_transform )
+
+
+###### INITIALIZE THE NNET ######
+echo 
+echo "# NN-INITIALIZATION"
+if [ ! -z "$mlp_init" ]; then
+  echo "Using pre-initalized network $mlp_init";
+else
+  echo "Getting input/output dims :"
+  #initializing the MLP, get the i/o dims...
+  #input-dim
+  num_fea=$(feat-to-dim "$feats_tr nnet-forward $feature_transform ark:- ark:- |" - )
+  { #optioanlly take output dim of DBN
+    [ ! -z $dbn ] && num_fea=$(nnet-forward "nnet-concat $feature_transform $dbn -|" "$feats_tr" ark:- | feat-to-dim ark:- -)
+    [ -z "$num_fea" ] && echo "Getting nnet input dimension failed!!" && exit 1
+  }
+
+  #output-dim
+  [ -z $num_tgt ] && num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
+
+  #run the MLP initializing script
+  mlp_init=$dir/nnet.init
+  utils/nnet/init_nnet.sh --model_size $model_size --hid_layers $hid_layers \
+    ${bn_dim:+ --bn-dim $bn_dim} \
+    ${hid_dim:+ --hid-dim $hid_dim} \
+    --seed $seed ${init_opts} \
+    ${config:+ --config $config} \
+    $num_fea $num_tgt $mlp_init || exit 1
+
+  #optionally prepend dbn to the initialization
+  if [ ! -z $dbn ]; then
+    mlp_init_old=$mlp_init; mlp_init=$dir/nnet_$(basename $dbn)_dnn.init
+    nnet-concat $dbn $mlp_init_old $mlp_init 
+  fi
+fi
+
+
+###### TRAIN ######
+echo
+echo "# RUNNING THE NN-TRAINING SCHEDULER"
+steps/train_nnet_scheduler.sh \
+  --feature-transform $feature_transform \
+  --learn-rate $learn_rate \
+  --seed $seed \
+  --random $random \
+  --bunch-size $bunch_size \
+  ${train_opts} \
+  ${config:+ --config $config} \
+  ${use_gpu_id:+ --use-gpu-id $use_gpu_id} \
+  $mlp_init "$feats_tr" "$feats_cv" "$labels" $dir || exit 1
+
+
+echo "$0 successfuly finished.. $dir"
+
+sleep 3
+exit 0
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_cpu.sh b/egs/chime_wsj0/s5/steps/train_nnet_cpu.sh
new file mode 100755
index 000000000..63f4f3a0a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_cpu.sh
@@ -0,0 +1,535 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15 # Number of epochs during which we reduce
+              # the learning rate; number of iteration is worked out from this.
+num_epochs_extra=5 # Number of epochs after we stop reducing
+                   # the learning rate.
+num_iters_final=10 # Number of final iterations to give to the
+                   # optimization over the validation set.
+initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
+final_learning_rate=0.004  # for RM; or 0.001 is suitable for Swbd.
+num_utts_subset=300    # number of utterances in validation and training
+                       # subsets used for shrinkage and diagnostics
+num_valid_frames_shrink=0 # number of validation frames in the subset
+                             # used for shrinking
+num_train_frames_shrink=2000  # number of training frames in the subset used
+                              # for shrinking (by default we use all training
+                              # frames for this.)
+shrink_interval=3 # shrink every $shrink_interval iters,
+                 # except at the start of training when we do it every iter.
+within_class_factor=1.0 # affects LDA via scaling of the output (e.g. try setting to 0.01).
+num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
+num_train_frames_combine=10000 # # train frames for the above.
+num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
+minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
+                   # which would otherwise be a problem with multi-threaded update.  Note:
+                   # it also interacts with the "preconditioned" update, so it's not completely cost free.
+samples_per_iter=400000 # each iteration of training, see this many samples
+                             # per job.  This is just a guideline; it will pick a number
+                             # that divides the number of samples in the entire data.
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+num_jobs_nnet=16 # Number of neural net jobs to run in parallel
+feat_type=
+initial_dropout_scale=
+final_dropout_scale=
+add_layers_period=2 # by default, add new layers every 2 iterations.
+num_hidden_layers=2
+initial_num_hidden_layers=1  # we'll add the rest one by one.
+num_parameters=2000000 # 2 million parameters by default.
+stage=-9
+realign_iters=""
+beam=10  # for realignment.
+retry_beam=40
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+nnet_config_opts=
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+lda_dim=250
+randprune=4.0 # speeds up LDA.
+# If alpha is not set to the empty string, will do the preconditioned update.
+alpha=4.0
+shrink=true
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16 # Number of threads to run in parallel; you need to
+               # keep this in sync with parallel_opts.
+momentum_minibatches=0 # Note: if you set this to e.g. 100 it uses momentum (we
+    # formulate it slightly differently, as a time constant, e.g.  mu = 1 - 1/momentum_minibatches.
+    # This does not seem to be that useful in stabilizing the update-- possibly an interaction
+    # with the asychronous SGD.  Use an option like --nnet-config-opts "--max-change 50"
+    # which is more helpful.
+
+random_copy=false
+cleanup=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/train_nnet_cpu.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_nnet_cpu.sh data/train data/lang exp/tri3_ali exp/ tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --num-parameters <num-parameters|2000000>        # #parameters.  E.g. for 3 hours of data, try 750K parameters;"
+  echo "                                                   # for 100 hours of data, try 10M"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --initial-num-hidden-layers <#hidden-layers|1>   # Number of hidden layers to start with."
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|250>                              # Dimension to reduce spliced features to with LDA"
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --num-utts-subset <#utts|300>                    # Number of utterances in subsets used for validation and diagnostics"
+  echo "                                                   # (the validation subset is held out from training)"
+  echo "  --num-valid-frames-shrink <#frames|2000>         # Number of frames from the validation set used for shrinking"
+  echo "  --num-train-frames-shrink <#frames|0>            # Number of frames from the training set used for shrinking"
+  echo "                                                   # (by default it's included in training, which for some reason helps)."
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|-9>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix...
+cp $alidir/tree $dir
+
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+   ;;
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+   ;;
+  lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+fi
+if [ -f $alidir/raw_trans.1 ] && [ $feat_type == "raw" ]; then
+  echo "$0: using raw-fMLLR transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/raw_trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/raw_trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/raw_trans.*|' ark:- ark:- |"
+fi
+
+
+if [ $stage -le -9 ]; then
+  echo "$0: working out number of frames of training data"
+  num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
+  echo $num_frames > $dir/num_frames
+else
+  num_frames=`cat $dir/num_frames` || exit 1;
+fi
+
+# Working out number of iterations per epoch.
+iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
+[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
+samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
+echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
+echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
+
+
+## Do LDA on top of whatever features we already have; store the matrix which
+## we'll put into the neural network as a constant.
+
+if [ $stage -le -8 ]; then
+  echo "$0: Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+  est-lda --within-class-factor=$within_class_factor --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+
+##
+if [ $initial_num_hidden_layers -gt $num_hidden_layers ]; then
+  echo "Initial num-hidden-layers $initial_num_hidden_layers is greater than final number $num_hidden_layers";
+  exit 1;
+fi
+
+feat_dim=`feat-to-dim "$train_subset_feats" -` || exit 1;
+
+if [ $stage -le -7 ]; then
+  echo "$0: initializing neural net";
+  # to hidden.config it will write the part of the config corresponding to a
+  # single hidden layer; we need this to add new layers. 
+
+  if [ ! -z "$alpha" ]; then
+    dropout_opt=
+    [ ! -z $initial_dropout_scale ] && dropout_opt="--dropout-scale $initial_dropout_scale"
+    utils/nnet-cpu/make_nnet_config_preconditioned.pl --alpha $alpha $nnet_config_opts \
+      $dropout_opt \
+      --learning-rate $initial_learning_rate \
+      --lda-mat $splice_width $lda_dim $dir/lda.mat \
+      --initial-num-hidden-layers $initial_num_hidden_layers $dir/hidden_layer.config \
+      $feat_dim $num_leaves $num_hidden_layers $num_parameters \
+      > $dir/nnet.config || exit 1;
+  else
+    [ ! -z $initial_dropout_scale ] && echo "Dropout without preconditioning unsupported" && exit 1;
+    utils/nnet-cpu/make_nnet_config.pl $nnet_config_opts \
+      --learning-rate $initial_learning_rate \
+      --lda-mat $splice_width $lda_dim $dir/lda.mat \
+      --initial-num-hidden-layers $initial_num_hidden_layers $dir/hidden_layer.config \
+      $feat_dim $num_leaves $num_hidden_layers $num_parameters \
+      > $dir/nnet.config || exit 1;
+  fi
+  $cmd $dir/log/nnet_init.log \
+     nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+       $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -6 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+if [ $stage -le -5 ]; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+cp $alidir/ali.*.gz $dir
+
+
+nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
+
+if [ $stage -le -4 ]; then
+  echo "Getting validation and training subset examples."
+  rm $dir/.error 2>/dev/null
+  $cmd $dir/log/create_valid_subset.log \
+    nnet-get-egs $nnet_context_opts "$valid_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/valid_all.egs" || touch $dir/.error &
+  $cmd $dir/log/create_train_subset.log \
+    nnet-get-egs $nnet_context_opts "$train_subset_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/train_subset_all.egs" || touch $dir/.error &
+  wait;
+  [ -f $dir/.error ] && exit 1;
+  echo "Getting subsets of validation examples for shrinking, diagnostics and combination."
+  $cmd $dir/log/create_valid_subset_shrink.log \
+    nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs \
+     ark:$dir/valid_shrink.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_combine.log \
+    nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
+        ark:$dir/valid_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
+    ark:$dir/valid_diagnostic.egs || touch $dir/.error &
+
+  $cmd $dir/log/create_train_subset_shrink.log \
+    nnet-subset-egs --n=$num_train_frames_shrink ark:$dir/train_subset_all.egs \
+    ark:$dir/train_shrink.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_combine.log \
+    nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
+    ark:$dir/train_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
+    ark:$dir/train_diagnostic.egs || touch $dir/.error &
+  wait
+  cat $dir/valid_shrink.egs $dir/train_shrink.egs > $dir/shrink.egs
+  cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
+
+  for f in $dir/{shrink,combine,train_diagnostic,valid_diagnostic}.egs; do
+    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
+  done
+  rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_{shrink,combine}.egs
+fi
+
+if [ $stage -le -3 ]; then
+  mkdir -p $dir/egs
+  mkdir -p $dir/temp
+  echo "Creating training examples";
+  # in $dir/egs, create $num_jobs_nnet separate files with training examples.
+  # The order is not randomized at this point.
+
+  egs_list=
+  for n in `seq 1 $num_jobs_nnet`; do
+    egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
+  done
+  echo "Generating training examples on disk"
+  # The examples will go round-robin to egs_list.
+  $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+    nnet-get-egs $nnet_context_opts "$feats" \
+    "ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
+    nnet-copy-egs ark:- $egs_list || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  # combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
+  # then split into multiple parts egs.JOB.*.scp for different parts of the
+  # data, 0 .. $iters_per_epoch-1.
+
+  if [ $iters_per_epoch -eq 1 ]; then
+    echo "Since iters-per-epoch == 1, just concatenating the data."
+    for n in `seq 1 $num_jobs_nnet`; do
+      cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
+      rm $dir/egs/egs_orig.$n.*.ark || exit 1;
+    done
+  else # We'll have to split it up using nnet-copy-egs.
+    egs_list=
+    for n in `seq 0 $[$iters_per_epoch-1]`; do
+      egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
+    done
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
+      nnet-copy-egs --random=$random_copy --srand=JOB \
+        "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
+        rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
+  fi
+fi
+
+if [ $stage -le -1 ]; then
+  # Next, shuffle the order of the examples in each of those files.
+  # Each one should not be too large, so we can do this in memory.
+  echo "Shuffling the order of training examples"
+  echo "(in order to avoid stressing the disk, these won't all run at once)."
+
+  for n in `seq 0 $[$iters_per_epoch-1]`; do
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
+      nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
+      ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
+      rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
+  done
+fi
+
+num_iters_reduce=$[$num_epochs * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_reduce+$num_iters_extra]
+
+echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
+echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
+echo " (while reducing learning rate) + (with constant learning rate)."
+
+# up till $last_normal_shrink_iter we will shrink the parameters
+# in the normal way using the dev set, but after that we will
+# only re-compute the shrinkage parameters periodically.
+last_normal_shrink_iter=$[($num_hidden_layers-$initial_num_hidden_layers+1)*$add_layers_period + 2]
+mix_up_iter=$last_normal_shrink_iter  # this is pretty arbitrary.
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    mdl=$dir/$x.mdl
+    [ ! -z $initial_dropout_scale ] && mdl="nnet-am-copy --remove-dropout=true $mdl -|"
+    # Set off jobs doing some diagnostics, in the background.
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob "$mdl" ark:$dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob "$mdl" ark:$dir/train_diagnostic.egs &
+
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "Realigning data (pass $x)"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+         "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+        "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+
+    echo "Training neural net (pass $x)"
+    if [ $x -gt 0 ] && \
+       [ $x -le $[($num_hidden_layers-$initial_num_hidden_layers)*$add_layers_period] ] && \
+       [ $[($x-1) % $add_layers_period] -eq 0 ]; then
+      mdl="nnet-init --srand=$x $dir/hidden_layer.config - | nnet-insert $dir/$x.mdl - - |"
+    else
+      mdl=$dir/$x.mdl
+    fi
+
+    $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
+       nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
+         ark:$dir/egs/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
+       nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
+        --momentum-minibatches=$momentum_minibatches --srand=$x "$mdl" ark:- $dir/$[$x+1].JOB.mdl \
+       || exit 1;
+
+    nnets_list=
+    for n in `seq 1 $num_jobs_nnet`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_reduce $initial_learning_rate $final_learning_rate`;
+
+    if [ ! -z "$final_dropout_scale" ]; then
+      dropout_scale=`perl -e "print ($initial_dropout_scale + ($final_dropout_scale-$initial_dropout_scale)*(1+$x)/$num_iters);"`
+      dropout_opt="--dropout-scale=$dropout_scale"
+    else
+      dropout_opt=
+    fi
+
+    $cmd $dir/log/average.$x.log \
+       nnet-am-average $nnets_list - \| \
+       nnet-am-copy $dropout_opt --learning-rate=$learning_rate - $dir/$[$x+1].mdl || exit 1;
+
+    if $shrink; then
+      if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then
+        # For earlier iterations (while we've recently beeen adding layers), or every
+        # $shrink_interval=3 iters , just do shrinking normally.
+        mb=$[($num_valid_frames_shrink+$num_train_frames_shrink+$num_threads-1)/$num_threads]
+        $cmd $parallel_opts $dir/log/shrink.$x.log \
+          nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+            $dir/$[$x+1].mdl ark:$dir/shrink.egs $dir/$[$x+1].mdl || exit 1;
+      fi
+    fi
+    if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
+      # mix up.
+      echo Mixing up from $num_leaves to $mix_up components
+      $cmd $dir/log/mix_up.$x.log \
+        nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+         $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+    rm $nnets_list
+  fi
+  x=$[$x+1]
+done
+
+rm $dir/final.mdl 2>/dev/null
+
+# At the end, final.mdl will be a combination of the last e.g. 10 models.
+nnets_list=()
+start=$[$num_iters-$num_iters_final+1]
+for x in `seq $start $num_iters`; do
+  idx=$[$x-$start]
+  if [ $x -gt $mix_up_iter ]; then
+    if [ ! -z $initial_dropout_scale ]; then
+      nnets_list[$idx]="nnet-am-copy --remove-dropout=true $dir/$x.mdl - |"
+    else
+      nnets_list[$idx]=$dir/$x.mdl
+    fi
+  fi
+done
+
+if [ $stage -le $num_iters ]; then
+  mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+    "${nnets_list[@]}" ark:$dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+# Compute the probability of the final, combined model with
+# the same subset we used for the previous compute_probs, as the
+# different subsets will lead to different probs.
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
+
+echo Done
+
+if $cleanup; then
+  echo Cleaning up data
+  echo Removing training examples
+  rm -r $dir/egs
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then 
+       # delete all but every 10th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_cpu_conv.sh b/egs/chime_wsj0/s5/steps/train_nnet_cpu_conv.sh
new file mode 100755
index 000000000..fc2d4b484
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_cpu_conv.sh
@@ -0,0 +1,692 @@
+#!/bin/bash   
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This is as train_nnet_cpu.sh but supports convolutional-in-time
+# approaches where at different layers we see temporal context.
+# I am also taking the opportunity to remove some un-needed features
+# such as shrinking (no longer necessary for ReLUs).
+
+# Begin configuration section.
+cmd=run.pl 
+num_epochs_per_eon=10 # Number of epochs per LDA stage.
+num_epochs_extra=5      # Number of epochs after we stop reducing
+                        # the learning rate (after all stages)
+num_iters_final=10    # Number of final iterations to give to the
+                      # optimization over the validation set.
+num_iters_combine=20 # Maximum number of iterations we may try to combine over.
+                     # Number used will be the minimum of this and num_iters_extra,
+                     # which is itself a function of num_epochs_extra.
+initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
+final_learning_rate=0.004  # for RM; or 0.001 is suitable for Swbd.
+old_layer_learning_rate=  # If not set, defaults to final_learning_rate.
+final_layer_variance=1.0 # factor on variance for last layer... suggest 0.1 or 0.0..
+num_utts_subset=300    # number of utterances in validation and training
+                       # subsets used for diagnostics and combination.
+within_class_factor=1.0 # affects LDA via scaling of the output (e.g. try setting to 0.01).
+num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
+num_train_frames_combine=10000 # # train frames for the above.
+num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
+minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
+                   # which would otherwise be a problem with multi-threaded update.  Note:
+                   # it also interacts with the "preconditioned" update, so it's not completely cost free.
+samples_per_iter=400000  # each iteration of training, see this many samples
+                         # per job.  This is just a guideline; it will pick a number
+                         # that divides the number of samples in the entire data.
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+num_jobs_nnet=16 # Number of neural net jobs to run in parallel; you need to
+                 # keep this in sync with parallel_opts.
+feat_type=
+initial_dropout_scale=
+final_dropout_scale=
+
+add_layers_period=2 # by default, add new layers every 2 iterations.
+num_eons=2   # Number of stages of training; each time we do splice + LDA.
+               # One LDA on the initial spliced features; then one on the
+               # intermediate neural net features.
+num_hidden_layers_per_eon=2 # This is the number of full-size hidden layers per eon,
+                            # not counting the small one of dimensino $pre_splice_dim.
+splice_context=2 # meaning +- 2 frames on each side each time we do
+               # an LDA.
+pre_splice_dim=100 # Dimension we reduce to before each splicing and LDA.
+
+# LDA options...
+randprune=4.0 # speeds up LDA accumulation.
+
+num_parameters=2000000 # 2 million parameters by default.
+stage=-9
+realign=true # set to false if you don't want to do realignment.
+beam=10  # for realignment.
+retry_beam=40
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+parallel_opts="-pe smp 16" # by default we use 16 threads; this just lets the queue know.
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+
+# If alpha is not set to the empty string, will do the preconditioned update.
+alpha=4.0
+max_change=10.0 # max parameter-change per minibatch, helps ensure stability.
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16
+
+valid_is_heldout=false # For some reason, holding out the validation set from the training set
+                       # seems to hurt, so by default we don't do it (i.e. it's included in training)
+random_copy=false
+cleanup=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/train_nnet_cpu_conv.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_nnet_cpu_conv.sh data/train data/lang exp/tri3_ali exp/ tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --num-parameters <num-parameters|2000000>        # #parameters.  E.g. for 3 hours of data, try 750K parameters;"
+  echo "                                                   # for 100 hours of data, try 10M"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|250>                              # Dimension to reduce spliced features to with LDA"
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --num-utts-subset <#utts|300>                    # Number of utterances in subsets used for validation and diagnostics"
+  echo "                                                   # (the validation subset is held out from training)"
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|-9>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix...
+cp $alidir/tree $dir
+
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+   ;;
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+   ;;
+  lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+fi
+
+if [ $stage -le -9 ]; then
+  echo "$0: working out number of frames of training data"
+  num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
+  echo $num_frames > $dir/num_frames
+else
+  ! num_frames=`cat $dir/num_frames` && \
+    echo "file $dir/num_frames does not exist: perhaps running with invalid --stage option?" && \
+    exit 1;
+fi
+
+# Working out number of iterations per epoch.
+iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
+[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
+samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
+echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
+echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
+
+
+feat_dim=`feat-to-dim "$valid_feats" -` || exit 1;
+
+# Working out hidden-layer size [not counting the LDA parameters as being 
+# parameters, as they're not trainable in the net.]  Dimensions of input, intermediate,
+# output features are as follows, if 
+#                  h is  hidden-layer dimension (variable we are solving for)
+#                  n is (splice_context * 2 + 1)
+#                  d is input feature dim.
+#                  p is pre_splice_dim, which is small-ish dimension we create prior to the 
+#                       output layer each time we prepare to do the "intermediate" LDA.
+#            num-pdfs is the number of pdfs in the system.
+#                 Assume for this diagram that we have two full-size hidden layers between
+#                 each splice+LDA, and two LDA stages.
+#   d [splice]-> (n * d) [lda]-> (n * d) -> h -> h -> p -> [splice]-> (n * p) [lda]-> (n * p) -> h -> h -> num-pdfs
+#
+# The number of trainable parameters (not counting lda-type transforms) is:
+#   (n * d) * h +
+#   h * (num_eons - 1) * (n * p) +
+#   h * h * (num_hidden_layers_per_eon - 1) * num_eons +
+#   h * num_pdfs
+# which we can write as a 2nd order polynomial in h, equate to the
+# number of parameters, and arrange as:
+#   a h^2 + b h + c = 0 , with
+#   a = ((num_hidden_layers_per_eon - 1) * num_eons)
+#   b = ((n * d) + ((num_eons - 1) * (n * p)) + num_pdfs), 
+#   c = -num_parameters,
+#  so we get
+#  h =  (-b + sqrt(b^2 - 4 a c)) / (2a)
+
+num_splice=`echo $[2*$splice_context + 1]`;
+num_pdfs=`tree-info $dir/tree | grep num-pdfs | awk '{print $2;}'`
+echo "$0: Number of pdfs is $num_pdfs"
+
+hidden_layer_size=`perl -we '($num_parameters,$feat_dim,$num_eons,$num_hidden_layers_per_eon,$n,$p,$num_pdfs) = @ARGV;
+     $a = (($num_hidden_layers_per_eon - 1) * $num_eons);
+     $b = ($n * $feat_dim) + ($num_eons - 1) * ($n * $p) + $num_pdfs;
+     $c = -$num_parameters;
+     if ($a != 0.0) {  $h = int((-$b + sqrt($b*$b - 4 * $a * $c)) / (2*$a)); }
+     else { $h = int(-$c / $b); }
+     print $h;' $num_parameters $feat_dim $num_eons $num_hidden_layers_per_eon $num_splice $pre_splice_dim $num_pdfs` || exit 1;
+
+! [ $hidden_layer_size -gt 0 ] && exit 1;
+
+echo "$0: Hidden layer size is $hidden_layer_size"
+
+## Do LDA on top of whatever features we already have; store the matrix which
+## we'll put into the neural network as a constant.
+
+if [ $stage -le -8 ]; then
+  echo "$0: Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_context --right-context=$splice_context ark:- ark:- |" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+
+  lda_dim=$[$feat_dim*$num_splice]; # We do LDA without dimension reduction;
+             # it's a special form of preconditioning.
+  est-lda --allow-large-dim=true --within-class-factor=$within_class_factor --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+  echo "Computed LDA"
+fi
+
+
+if [ $stage -le -7 ]; then
+  echo "$0: initializing neural net";
+  ## Initialize a neural-net config with one hidden layer and
+  ## the computed LDA matrix.
+
+  spliced_dim=$[$feat_dim*$num_splice]
+  param_stddev=`perl -e "print 1.0/sqrt($spliced_dim);"`
+  cat > $dir/nnet.config <<EOF
+SpliceComponent input-dim=$feat_dim left-context=$splice_context right-context=$splice_context
+FixedLinearComponent matrix=$dir/lda.mat
+AffineComponentPreconditioned input-dim=$spliced_dim output-dim=$hidden_layer_size alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$param_stddev bias-stddev=0
+RectifiedLinearComponent dim=$hidden_layer_size
+AffineComponentPreconditioned input-dim=$hidden_layer_size output-dim=$num_pdfs alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=0 bias-stddev=0
+SoftmaxComponent dim=$num_pdfs
+EOF
+  $cmd $dir/log/nnet_init.log \
+     nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+       $dir/0.mdl || exit 1;
+
+fi
+
+if [ $stage -le -6 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+if [ $stage -le -5 ] && $realign; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+cp $alidir/ali.*.gz $dir
+
+
+full_context=$[$splice_context*$num_eons] || exit 1;
+nnet_context_opts="--left-context=$full_context --right-context=$full_context"
+
+if [ $stage -le -4 ]; then
+  echo "Getting validation and training subset examples."
+  rm $dir/.error 2>/dev/null
+  $cmd $dir/log/create_valid_subset.log \
+    nnet-get-egs $nnet_context_opts "$valid_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/valid_all.egs" || touch $dir/.error &
+  $cmd $dir/log/create_train_subset.log \
+    nnet-get-egs $nnet_context_opts "$train_subset_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/train_subset_all.egs" || touch $dir/.error &
+  wait;
+  [ -f $dir/.error ] && exit 1;
+  echo "Getting subsets of validation examples for diagnostics and combination."
+  $cmd $dir/log/create_valid_subset_combine.log \
+    nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
+        ark:$dir/valid_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
+    ark:$dir/valid_diagnostic.egs || touch $dir/.error &
+
+  $cmd $dir/log/create_train_subset_combine.log \
+    nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
+    ark:$dir/train_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
+    ark:$dir/train_diagnostic.egs || touch $dir/.error &
+  wait
+  cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
+
+  for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
+    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
+  done
+  rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs
+fi
+
+if [ $stage -le -3 ]; then
+  mkdir -p $dir/egs
+  mkdir -p $dir/temp
+  echo "Creating training examples";
+  # in $dir/egs, create $num_jobs_nnet separate files with training examples.
+  # The order is not randomized at this point.
+
+  egs_list=
+  for n in `seq 1 $num_jobs_nnet`; do
+    egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
+  done
+  echo "Generating training examples on disk"
+  # The examples will go round-robin to egs_list.
+  $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+    nnet-get-egs $nnet_context_opts "$feats" \
+    "ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
+    nnet-copy-egs ark:- $egs_list || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  # combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
+  # then split into multiple parts egs.JOB.*.scp for different parts of the
+  # data, 0 .. $iters_per_epoch-1.
+
+  if [ $iters_per_epoch -eq 1 ]; then
+    echo "Since iters-per-epoch == 1, just concatenating the data."
+    for n in `seq 1 $num_jobs_nnet`; do
+      cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
+      rm $dir/egs/egs_orig.$n.*.ark || exit 1;
+    done
+  else # We'll have to split it up using nnet-copy-egs.
+    egs_list=
+    for n in `seq 0 $[$iters_per_epoch-1]`; do
+      egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
+    done
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
+      nnet-copy-egs --random=$random_copy --srand=JOB \
+        "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
+        rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
+  fi
+fi
+
+if [ $stage -le -1 ]; then
+  # Next, shuffle the order of the examples in each of those files.
+  # Each one should not be too large, so we can do this in memory.
+  echo "Shuffling the order of training examples"
+  echo "(in order to avoid stressing the disk, these won't all run at once)."
+
+  for n in `seq 0 $[$iters_per_epoch-1]`; do
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
+      nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
+      ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
+      rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
+  done
+fi
+
+num_iters_per_eon=$[$num_epochs_per_eon * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_per_eon*$num_eons + $num_iters_extra]
+[ -z "$old_layer_learning_rate" ] && old_layer_learning_rate=$final_learning_rate
+
+echo "Will train for $num_iters total iterations: $num_iters_per_eon per eon times $num_eons eons, plus $num_iters_extra iters at the end"
+
+
+# Get the iteration number on which we'll mix up. [Don't do this until
+# we've added the last
+mix_up_iter_of_last_eon=$[($num_hidden_layers_per_eon-1)*$add_layers_period + 2]
+mix_up_iter=$[$mix_up_iter_of_last_eon + $num_iters_per_eon*($num_eons-1)]
+
+
+function do_eon_start_computation {
+  # Called at the start of an eon (but not the 1st eon)
+  echo "Preparing to do LDA computation at the start of eon $eon"
+  
+  echo "Doing SVD on final layer"
+  $cmd $dir/log/limit_rank_final.$y.log \
+    nnet-am-limit-rank-final --dim=$pre_splice_dim $dir/$y.mdl $dir/temp.mdl || exit 1;
+  
+  # Get the #components in this model.
+  num_components=`nnet-am-info $dir/temp.mdl | grep num-components | awk '{print $2}'`
+  
+  # First we extract the raw neural net, with the last two components (the softmax
+  # layer and the affine transform that precedes it) removed.  We put in "raw.$eon.mdl" the
+  # raw neural net.
+  nnet-am-copy --learning-rate=$old_layer_learning_rate $dir/temp.mdl - | \
+     nnet-to-raw-nnet --truncate=$[$num_components-2] - $dir/raw.$eon.net
+
+  nnet_feats="$feats nnet-compute $dir/raw.$eon.net ark:- ark:- | splice-feats --left-context=$splice_context --right-context=$splice_context ark:- ark:- |"
+  
+  echo "$0: Accumulating LDA statistics for eon $eon."
+  $cmd JOB=1:$nj $dir/log/lda_acc_eon$eon.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+    acc-lda --rand-prune=$randprune $alidir/final.mdl "$nnet_feats" ark,s,cs:- \
+    $dir/lda.JOB.acc || exit 1;
+
+  lda_dim=$[$pre_splice_dim*$num_splice]; # We do LDA without dimension reduction;
+                     # it's a special form of preconditioning.
+
+  echo "$0: estimating LDA for eon $eon"
+  nnet-get-feature-transform --allow-large-dim=true --within-class-factor=$within_class_factor --dim=$lda_dim $dir/lda.$eon.mat $dir/lda.*.acc \
+    2>$dir/log/lda_est_eon$eon.log || exit 1;
+  rm $dir/lda.*.acc
+  
+  # Create last few layers of the nnet, to be appended to raw.$eon.net
+  param_stddev_hidden=`perl -e "print 1.0/sqrt($lda_dim);"`
+  param_stddev_final=`perl -e "print $final_layer_variance/sqrt($num_pdfs);"`
+  cat <<EOF > $dir/extra_layers.$eon.config
+SpliceComponent input-dim=$pre_splice_dim left-context=$splice_context right-context=$splice_context
+FixedAffineComponent matrix=$dir/lda.$eon.mat
+AffineComponentPreconditioned input-dim=$lda_dim output-dim=$hidden_layer_size alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$param_stddev_hidden bias-stddev=0
+RectifiedLinearComponent dim=$hidden_layer_size
+AffineComponentPreconditioned input-dim=$hidden_layer_size output-dim=$num_pdfs alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$param_stddev_final bias-stddev=0
+SoftmaxComponent dim=$num_pdfs
+EOF
+  $cmd $dir/log/init_nnet.$eon.log \
+    nnet-init $dir/extra_layers.$eon.config $dir/raw2.$eon.net || exit 1
+  
+  $cmd $dir/log/nnet_init.log \
+    nnet-am-init $alidir/tree $lang/topo "raw-nnet-concat $dir/raw.$eon.net $dir/raw2.$eon.net -|" \
+    $dir/$y.mod.mdl || exit 1;
+
+  echo "Training transition probabilities and setting priors for new eon"
+  $cmd $dir/log/train_trans.$eon.log \
+    nnet-train-transitions $dir/$y.mod.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/$y.mod.mdl \
+    || exit 1;
+
+}
+function train_one_iter {
+
+  # Set off jobs doing some diagnostics, in the background.
+  $cmd $dir/log/compute_prob_valid.$y.log \
+    nnet-compute-prob $dir/$y.mdl ark:$dir/valid_diagnostic.egs &
+  $cmd $dir/log/compute_prob_train.$y.log \
+    nnet-compute-prob $dir/$y.mdl ark:$dir/train_diagnostic.egs &
+
+  echo "Training neural net (pass $y)"
+  if [ -f $dir/$y.mod.mdl ]; then
+    if [ $dir/$y.mdl -nt $dir/$y.mod.mdl ]; then
+      echo "Error: $dir/$y.mdl is newer than $dir/$y.mod.mdl, maybe you need to clean up and rerun?"
+      exit 1;
+    fi
+    mdl=$dir/$y.mod.mdl # In case we made some modification to the model,
+      # such as adding a hidden layer.
+  else
+    mdl=$dir/$y.mdl 
+  fi
+
+  $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$y.JOB.log \
+    nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$y \
+    ark:$dir/egs/egs.JOB.$[$y%$iters_per_epoch].ark ark:- \| \
+    nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
+    --srand=$y $mdl ark:- $dir/$[$y+1].JOB.mdl \
+       || exit 1;
+
+  nnets_list=
+  for n in `seq 1 $num_jobs_nnet`; do
+    nnets_list="$nnets_list $dir/$[$y+1].$n.mdl"
+  done
+  
+  $cmd $dir/log/average.$y.log \
+    nnet-am-average $nnets_list $dir/$[$y+1].mdl || exit 1;
+
+  rm $dir/$y.mod.mdl $nnets_list 2>/dev/null
+  return 0;
+}
+
+
+function modify_model_if_needed () {
+
+  # If needed, add hidden layers.  E.g. if add_layers_period=3 and num_hidden_layers_per_eon=3, 
+  # mix up on iters  3, 6 (this would give us 3 hidden layers as we start with one).
+
+  tmp=$[$add_layers_period*$num_hidden_layers_per_eon]
+  if [ $tmp -ge $[num_iters_per_eon-1] ]; then
+    echo "Error: not enough iterations per eon to add layers and mix up, $tmp vs $num_iters_per_eon"
+    echo "Try increasing --num-epochs or decreasing --samples-per-iter"
+    exit 1;
+  fi
+
+  if [ $[$x % $add_layers_period ] -eq 0 ] && [ $x -gt 0 ]; then
+    n=$[$x/$add_layers_period] # n = 1, 2 ..
+    if [ $n -lt $num_hidden_layers_per_eon ]; then # e.g. n = 1, 2.
+      echo "Adding new hidden layer"
+      # Add a normal hidden layer with ReLU nonlinearity.  We don't randomize this, we randomize
+      # the layer that goes to the softmax layer (nnet-insert does this by default).
+      param_stddev=`perl -e "print 1.0/sqrt($hidden_layer_size);"` || exit 1
+      learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_per_eon $initial_learning_rate $final_learning_rate` || exit 1;
+      cat <<EOF | tee $dir/nnet.config.$y | nnet-init --srand=$y - - | nnet-insert $dir/$y.mdl - $dir/$y.mod.mdl || exit 1
+AffineComponentPreconditioned input-dim=$hidden_layer_size output-dim=$hidden_layer_size alpha=$alpha max-change=$max_change learning-rate=$learning_rate param-stddev=$param_stddev bias-stddev=2
+RectifiedLinearComponent dim=$hidden_layer_size
+EOF
+    fi
+    if [ $n -eq $num_hidden_layers_per_eon ]; then # e.g. n = 3
+      if [ $[$eon+1] -eq $num_eons ]; then # last eon: mix-up, if applicable
+        if [ $mix_up -gt $num_pdfs ]; then
+          $cmd $dir/log/mix_up.$y.log \
+            nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+            $dir/$y.mdl $dir/$y.mod.mdl || exit 1;
+          mixed_up=true
+          echo "Mixed up from $num_pdfs to $mix_up"
+        else
+          echo "Not mixing up because mix-up=$mix_up, vs num-pdfs=$num_pdfs"
+        fi
+      fi
+    fi
+  fi
+
+  if [ $eon -gt 0 ] && [ $x -eq 0 ]; then
+    do_eon_start_computation;
+  fi
+}
+
+function modify_learning_rates() {
+  # Modify the learning rates of the trainable layers in the model.  For
+  # the layers from previous eons, leave them at the final learning rate,
+  # but for the layers added in the current eon, use the current learning
+  # learning rate from an exponentially decreasing schedule.
+  learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_per_eon $initial_learning_rate $final_learning_rate`;
+  
+  ! num_updatable_layers=`nnet-am-info $dir/$[$y+1].mdl | grep learning-rate | wc -l` 2>/dev/null \
+     && echo "Error getting info from $dir/$[$y+1].mdl" && exit 1;
+
+  # The number of layers that require a fixed learning rate is the number of
+  # previous eons ($eon) times (the number of hidden layers per eon + 1).
+  # It's + 1 because for each previous eon, we still have the matrix that was derived
+  # from the output layer, that goes to size $pre_splice_dim -- this is updatable.
+  num_fixed_layers=$[$eon*($num_hidden_layers_per_eon+1)];
+  # for the first num_hidden_layers_per_eon, use $final_learning_rate, else use
+  # $learning_rate.
+
+  learning_rates=`perl -we '($nl,$nf,$lr,$flr) = @ARGV; for ($n=0; $n<$nl;$n++) { push @A,  ($n < $nf ? $flr : $lr); } 
+      print join(":", @A);' $num_updatable_layers $num_fixed_layers $learning_rate $old_layer_learning_rate`
+
+  nnet-am-copy --learning-rates=$learning_rates $dir/$[$y+1].mdl $dir/$[$y+1].mdl 2>$dir/log/learning_rate.$y.log
+  
+}
+
+y=0 # y is the iteration counter that is used to number models.
+eon=0 # this is the eon counter.
+mixed_up=false
+
+while [ $eon -lt $num_eons ]; do
+  x=0 # x is the iteration counter within the eon.
+  while [ $x -lt $num_iters_per_eon ]; do
+    if [ $stage -le $y ]; then
+
+      modify_model_if_needed || exit 1;
+
+      train_one_iter || exit 1;
+
+      rm $dir/$y.mod.mdl 2>/dev/null
+
+      modify_learning_rates || exit 1;
+
+    fi
+    y=$[$y+1]
+    x=$[$x+1]
+  done
+  eon=$[$eon+1]
+done
+
+if $realign; then
+  if [ $stage -le $y ]; then
+    echo "Realigning data (pass $y)"
+    $cmd JOB=1:$nj $dir/log/align.$y.JOB.log \
+      nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/$y.mdl \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+fi
+
+x=0
+while [ $x -lt $num_iters_extra ]; do
+  if [ $stage -le $y ]; then  
+    train_one_iter || exit 1;
+  fi
+  y=$[$y+1]
+  x=$[$x+1]
+done
+
+
+if [ $num_iters_combine -gt $num_iters_extra ]; then
+  echo "Number of iterations for combination --num-iters-combine will be limited"
+  echo "to the number of iterations with constant learning rate, i.e. $num_iters_extra"
+  num_iters_combine=$num_iters_extra
+fi
+
+first_combine_iter=$[$y-$num_iters_combine]
+z=$first_combine_iter;
+nnets_to_combine=
+while [ $z -le $y ]; do
+  nnets_to_combine="$nnets_to_combine $dir/$z.mdl"
+  z=$[$z+1]
+done
+
+if [ $stage -le $y ]; then
+  echo "Doing final combination of model"
+  # mb is the minibatch size... we work out an efficient value to use.
+  mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+     $nnets_to_combine ark:$dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+
+if $cleanup; then
+  echo Cleaning up data
+  echo Removing training examples
+  rm -r $dir/egs
+  echo Removing most of the models
+  for x in `seq 0 $[$y-1]`; do
+    rm $dir/$x.mdl $dir/$x.mod.mdl 2>/dev/null
+  done
+  rm $dir/raw*.net $dir/temp.mdl 2>/dev/null
+fi
+
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_cpu_mmi.sh b/egs/chime_wsj0/s5/steps/train_nnet_cpu_mmi.sh
new file mode 100755
index 000000000..6669d5ee2
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_cpu_mmi.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI (or boosted MMI) training (A.K.A. sequence training) of a neural net based 
+# system as trained by train_nnet_cpu.sh
+
+
+# Begin configuration section.
+cmd=run.pl
+epochs_per_ebw_iter=1 # Number of times we iterate over the whole
+                       # data each time we do an "EBW" iteration.
+num_ebw_iters=4 # Number of "EBW" iterations.
+initial_learning_rate=0.001 # learning rate we start with.
+learning_rate_factor=1.0 # factor by which we change the learning
+                         # rate each iteration (should be <= 1.0)
+E=2.0  # this is slightly analogous to the constant E used in
+       # Extended Baum-Welch updates of GMMs.  It slows down (and
+       # somewhat regularizes) the update.
+
+minibatch_size=256 # since the learning rate is always quite low compared with
+                   # what we have at the start of ML training, we can probably
+                   # afford a somewhat higher minibatch size than there, as
+                   # there is less risk of instability.
+
+samples_per_iter=400000 # each phase of training, see this many samples
+                         # per job.  Note: this is a kind of suggestion; we
+                         # will actually find a number that will make the
+                          # #iters per epoch a whole number.
+num_jobs_nnet=8 # Number of neural net training jobs to run in parallel.
+                # not the same as the num-jobs (nj) which will be the same as the
+                # alignment and denlat directories.
+stage=0
+sub_stage=-3 # this can be used to start from a particular sub-iteration of an
+             # iteration
+acwt=0.1
+boost=0.0  # boosting for BMMI (you can try 0.1).. this is applied per frame.
+transform_dir=  # Note: by default any transforms in $alidir will be used.
+
+parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
+io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.)
+num_threads=16 # number of threads for neural net trainer..
+mkl_num_threads=1
+random_copy=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_nnet_cpu_mmi.sh [opts] <data> <lang> <src-dir> <ali-dir> <denlat-dir> <exp-dir>"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "Note, the terminology is: each iteration of EBW we do multiple epochs; each epoch"
+  echo " we have multiple iterations of training (note the same as the EBW iters)."
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-ebw-iters <#iters|4>                       # number of pseudo-Extended-Baum-Welch iterations (default: 4)"
+  echo "  --epochs-per-ebw-iter <#epochs|1>                # number of times to see all the data per EBW iter."
+  echo "  --initial-learning-rate <initial-lrate|0.005>    # learning rate to use on the first iteration"
+  echo "  --learning-rate-factor <lrate-factor|1.0>        # Factor by which to change the learning rate on each"
+  echo "                                                   # EBW iteration (should be <= 1.0)"
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)."
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for any especially I/O intensive jobs"
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, for each"
+  echo "                                                   # process.  Note: this will get modified to a number that will"
+  echo "                                                   # divide the data into a whole number of pieces."
+  echo "  --transform-dir <dir>                            # Directory to find fMLLR transforms; if not specified, "
+  echo "                                                   # $alidir will be used if it has transforms"
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  echo "  --sub-stage <sub-stage|0>                        # In conjunction with --stage, can be used to start a partially-completed"
+  echo "                                                   # training process (refers to the phase number)"
+  
+
+  exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+alidir=$4 # Also used for transforms by default, if transform-dir not specified.
+denlatdir=$5
+dir=$6 # experimental directory
+
+# Check that some files exist, mostly to verify correct directory arguments.
+for f in $data/feats.scp $lang/L.fst $srcdir/final.mdl $alidir/ali.1.gz $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/log
+cp $srcdir/tree $dir
+learning_rate=$initial_learning_rate
+if [ $stage -ge -1 ]; then
+  $cmd $dir/log/copy_initial.log \
+     nnet-am-copy --learning-rate=$learning_rate $srcdir/final.mdl $dir/0.1.mdl
+fi
+
+nnet_context_opts="--left-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+nj2=`cat $denlatdir/num_jobs` || exit 1; # number of jobs in denlat dir
+[ "$nj" != "$nj2" ] && echo "Mismatch in #jobs $nj vs $nj2" && exit 1;
+
+sdata=$data/split$nj
+
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix...
+cp $alidir/tree $dir
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
+     feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+   ;;
+  lda) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -z "$transform_dir" ] && [ -f "$alidir/trans.1" ]; then 
+  # --transform-dir option not set and $alidir has transforms in it.
+  transform_dir=$alidir
+fi
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  all_feats="$all_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: not using fMLLR transforms (assuming unadapted system)"
+fi
+
+echo "$0: working out number of frames of training data"
+
+num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
+
+# round to closest int
+iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
+[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
+samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
+
+echo "Every EBW iteration, splitting the data up into $iters_per_epoch iterations,"
+echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
+
+mkdir -p $dir/post $dir/egs
+
+num_epochs=$[$num_ebw_iters*$epochs_per_ebw_iter]
+
+x=0
+while [ $x -lt $num_epochs ]; do
+  z=$[$x / $epochs_per_ebw_iter];  # z is the (generally) smaller iteration number that identifies the EBW pass.
+  if [ $x -eq $[$z * $epochs_per_ebw_iter] ]; then
+    first_iter_of_epoch=true
+    echo "Starting pass $z of EBW"
+  else
+    first_iter_of_epoch=false
+  fi
+  echo "Epoch $x of $num_epochs"
+
+  if [ $stage -le $x ] && $first_iter_of_epoch; then
+    if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then
+      # First get the per-frame posteriors, by rescoring the lattices; this
+      # process also gives us at the same time the posteriors of each state for
+      # each frame (by default, pruned to 0.01 with a randomized algorithm).
+      # The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like
+      # matrix through unchanged.  (Note: nnet-logprob2-parallel can use up to
+      # $num_threads threads, but in practice it may be limited by the speed of
+      # the other elements of the pipe.
+      $cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \
+        nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \
+          "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
+        matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \
+        lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \
+        lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1;
+    fi
+    if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
+      # run nnet-get-egs for all files, to get the training examples for each frame--
+      # combines the feature and label/posterior information.  The posterior information
+      # consists of 2 things: the numerator posteriors from the alignments, the denominator
+      # posteriors from the lattices (times -1), and the smoothing posteriors from the 
+      # neural net log-probs (times E).  
+      # We copy the examples for each job round-robin to multiple archives, one for each
+      # of 1...$num_jobs_nnet.  
+      egs_out=""
+      for n in `seq 1 $num_jobs_nnet`; do
+        # indexes are egs_orig.$z.$num_jobs_nnet.$nj
+        egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark"
+      done
+      $cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \
+         ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+         ali-to-post ark:- ark:- \| \
+         sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \
+         sum-post --scale2=-1.0 ark:- "ark:gunzip -c $dir/post/den_post.$z.JOB.gz|" ark:- \| \
+         nnet-get-egs $nnet_context_opts "$feats" ark:- ark:- \| \
+         nnet-copy-egs ark:- $egs_out || exit 1;
+      rm $dir/post/smooth_post.$z.*.gz $dir/post/den_post.$z.*.gz 
+    fi
+    if $first_iter_of_epoch; then
+      # Diagnostics-- work out an extra term in the objf that we have to add to
+      # what we get from the nnet training.
+      tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; }  $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log
+      echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`"
+    fi
+    if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
+      echo "Merging training examples across original #jobs ($nj), and "
+      echo "splitting across number of nnet jobs $num_jobs_nnet"
+      egs_out2=""
+      for n in `seq 1 $iters_per_epoch`; do
+        # indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet
+        egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark"
+      done
+      # Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one
+      # job per parallel training job (different from the previous command).
+      # We sum up over the index JOB in the previous $cmd, and write to multiple
+      # archives, this time one for each "sub-iter".
+      # indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj
+      $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \
+        cat $dir/egs/egs_orig.$z.JOB.*.ark \| \
+        nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \
+          ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1;
+    fi
+    if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
+      echo "Randomizing order of examples in each job"
+      for n in `seq 1 $iters_per_epoch`; do
+        s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand
+        $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \
+          nnet-shuffle-egs "--srand=\$[JOB+$s]" \
+          ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \
+          rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1;
+      done
+    fi
+  fi
+  if [ $stage -le $x ]; then
+    # This block does the $iters_per_epoch iters of training.
+    y=1; # y is the "sub-iteration" number.
+    while [ $y -le $iters_per_epoch ]; do
+      echo "Iteration $x, sub-iteration $y"
+      if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then
+        $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \
+          nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
+          $dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
+          || exit 1;
+        nnets_list=
+        for n in `seq 1 $num_jobs_nnet`; do
+          nnets_list="$nnets_list $dir/$x.$y.$n.mdl"
+        done
+        if [ $y -eq $iters_per_epoch ]; then next_mdl=$dir/$[$x+1].1.mdl
+        else next_mdl=$dir/$x.$[$y+1].mdl; fi
+        # Average the parameters of all the parallel jobs.
+        $cmd $dir/log/average.$x.$y.log \
+           nnet-am-average $nnets_list $next_mdl || exit 1;
+        rm $nnets_list
+      fi
+      y=$[$y+1]
+    done
+  fi
+  if [ $learning_rate_factor != 1.0 ]; then
+    learning_rate=`perl -e "print $learning_rate * $learning_rate_factor;"`;
+    ! nnet-am-copy --print-args=false --learning-rate=$learning_rate $dir/$[$x+1].1.mdl $dir/$[$x+1].1.mdl && \
+       echo Error changing learning rate of neural net && exit 1;
+  fi
+  x=$[$x+1]
+done
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.1.mdl $dir/final.mdl
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_cpu_tanh.sh b/egs/chime_wsj0/s5/steps/train_nnet_cpu_tanh.sh
new file mode 100755
index 000000000..e5fb55002
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_cpu_tanh.sh
@@ -0,0 +1,496 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15 # Number of epochs during which we reduce
+              # the learning rate; number of iteration is worked out from this.
+num_epochs_extra=5 # Number of epochs after we stop reducing
+                   # the learning rate.
+num_iters_final=10 # Number of final iterations to give to the
+                   # optimization over the validation set.
+initial_learning_rate=0.04
+final_learning_rate=0.004
+softmax_learning_rate_factor=0.5 # Train half as slow as the other layers.
+
+num_utts_subset=300    # number of utterances in validation and training
+                       # subsets used for shrinkage and diagnostics
+hidden_layer_dim=300
+within_class_factor=0.0001
+num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
+num_train_frames_combine=10000 # # train frames for the above.
+num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
+minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
+                   # which would otherwise be a problem with multi-threaded update.  Note:
+                   # it also interacts with the "preconditioned" update, so it's not completely cost free.
+samples_per_iter=400000 # each iteration of training, see this many samples
+                             # per job.  This is just a guideline; it will pick a number
+                # that divides the number of samples in the entire data.
+
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+num_jobs_nnet=16 # Number of neural net jobs to run in parallel; you need to
+                 # keep this in sync with parallel_opts.
+feat_type=
+initial_dropout_scale=
+final_dropout_scale=
+add_layers_period=2 # by default, add new layers every 2 iterations.
+num_hidden_layers=3
+initial_num_hidden_layers=1  # we'll add the rest one by one.
+num_parameters=2000000 # 2 million parameters by default.
+stage=-9
+realign=true
+beam=10  # for realignment.
+retry_beam=40
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+randprune=4.0 # speeds up LDA.
+# If alpha is not set to the empty string, will do the preconditioned update.
+alpha=4.0
+max_change=10.0
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16
+parallel_opts="-pe smp $num_threads"  # using a smallish #threads by default, out of stability concerns.
+  # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
+cleanup=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/train_nnet_cpu.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_nnet_cpu.sh data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of main training"
+  echo "                                                   # while reducing learning rate (determines #iterations, together"
+  echo "                                                   # with --samples-per-iter and --num-jobs-nnet)"
+  echo "  --num-epochs-extra <#epochs-extra|5>             # Number of extra epochs of training"
+  echo "                                                   # after learning rate fully reduced"
+  echo "  --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
+  echo "                                                       # data, 0.01 for large data"
+  echo "  --final-learning-rate  <final-learning-rate|0.004>   # Learning rate at end of training, e.g. 0.004 for small"
+  echo "                                                   # data, 0.001 for large data"
+  echo "  --num-parameters <num-parameters|2000000>        # #parameters.  E.g. for 3 hours of data, try 750K parameters;"
+  echo "                                                   # for 100 hours of data, try 10M"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --initial-num-hidden-layers <#hidden-layers|1>   # Number of hidden layers to start with."
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-nnet <num-jobs|8>                     # Number of parallel jobs to use for main neural net"
+  echo "                                                   # training (will affect results as well as speed; try 8, 16)"
+  echo "                                                   # Note: if you increase this, you may want to also increase"
+  echo "                                                   # the learning rate."
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|250>                              # Dimension to reduce spliced features to with LDA"
+  echo "  --num-iters-final <#iters|10>                    # Number of final iterations to give to nnet-combine-fast to "
+  echo "                                                   # interpolate parameters (the weights are learned with a validation set)"
+  echo "  --num-utts-subset <#utts|300>                    # Number of utterances in subsets used for validation and diagnostics"
+  echo "                                                   # (the validation subset is held out from training)"
+  echo "  --num-frames-diagnostic <#frames|4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames|10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|-9>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix...
+cp $alidir/tree $dir
+
+
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+     head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
+   ;;
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+   ;;
+  lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+      train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
+fi
+
+if [ $stage -le -9 ]; then
+  echo "$0: working out number of frames of training data"
+  num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
+  echo $num_frames > $dir/num_frames
+else
+  num_frames=`cat $dir/num_frames` || exit 1;
+fi
+
+# Working out number of iterations per epoch.
+iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
+[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
+samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
+echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
+echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
+
+
+## Do LDA on top of whatever features we already have; store the matrix which
+## we'll put into the neural network as a constant.
+
+
+feat_dim=`feat-to-dim "$train_subset_feats" -` || exit 1;
+lda_dim=$[$feat_dim*(1+2*($splice_width))]; # No dim reduction.
+
+if [ $stage -le -8 ]; then
+  echo "$0: Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+
+  est-lda --within-class-factor=$within_class_factor --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+
+##
+if [ $initial_num_hidden_layers -gt $num_hidden_layers ]; then
+  echo "Initial num-hidden-layers $initial_num_hidden_layers is greater than final number $num_hidden_layers";
+  exit 1;
+fi
+
+if [ $stage -le -7 ]; then
+  echo "Compiling graphs of transcripts"
+  # Use model from $alidir-- anyway it has the same tree.
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $alidir/final.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+cp $alidir/ali.*.gz $dir
+
+nnet_context_opts="--left-context=$splice_width --right-context=$splice_width"
+
+if [ $stage -le -6 ]; then
+  echo "Getting validation and training subset examples."
+  rm $dir/.error 2>/dev/null
+  $cmd $dir/log/create_valid_subset.log \
+    nnet-get-egs $nnet_context_opts "$valid_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/valid_all.egs" || touch $dir/.error &
+  $cmd $dir/log/create_train_subset.log \
+    nnet-get-egs $nnet_context_opts "$train_subset_feats" \
+     "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/train_subset_all.egs" || touch $dir/.error &
+  wait;
+  [ -f $dir/.error ] && exit 1;
+  echo "Getting subsets of validation examples for diagnostics and combination."
+  $cmd $dir/log/create_valid_subset_combine.log \
+    nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
+        ark:$dir/valid_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
+    ark:$dir/valid_diagnostic.egs || touch $dir/.error &
+
+  $cmd $dir/log/create_train_subset_combine.log \
+    nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
+    ark:$dir/train_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
+    ark:$dir/train_diagnostic.egs || touch $dir/.error &
+  wait
+  cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
+
+  for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
+    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
+  done
+  rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs
+fi
+
+if [ $stage -le -5 ]; then
+  mkdir -p $dir/egs
+  mkdir -p $dir/temp
+  echo "Creating training examples";
+  # in $dir/egs, create $num_jobs_nnet separate files with training examples.
+  # The order is not randomized at this point.
+
+  egs_list=
+  for n in `seq 1 $num_jobs_nnet`; do
+    egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
+  done
+  echo "Generating training examples on disk"
+  # The examples will go round-robin to egs_list.
+  $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+    nnet-get-egs $nnet_context_opts "$feats" \
+    "ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
+    nnet-copy-egs ark:- $egs_list || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  # combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
+  # then split into multiple parts egs.JOB.*.scp for different parts of the
+  # data, 0 .. $iters_per_epoch-1.
+
+  if [ $iters_per_epoch -eq 1 ]; then
+    echo "Since iters-per-epoch == 1, just concatenating the data."
+    for n in `seq 1 $num_jobs_nnet`; do
+      cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
+      rm $dir/egs/egs_orig.$n.*.ark || exit 1;
+    done
+  else # We'll have to split it up using nnet-copy-egs.
+    egs_list=
+    for n in `seq 0 $[$iters_per_epoch-1]`; do
+      egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
+    done
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
+      nnet-copy-egs --random=$random_copy --srand=JOB \
+        "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
+        rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
+  fi
+fi
+
+if [ $stage -le -3 ]; then
+  # Next, shuffle the order of the examples in each of those files.
+  # Each one should not be too large, so we can do this in memory.
+  echo "Shuffling the order of training examples"
+  echo "(in order to avoid stressing the disk, these won't all run at once)."
+
+  for n in `seq 0 $[$iters_per_epoch-1]`; do
+    $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
+      nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
+      ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
+      rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
+  done
+fi
+
+
+
+if [ $stage -le -2 ]; then
+  echo "$0: initializing neural net";
+
+  stddev=`perl -e "print 1.0/sqrt($hidden_layer_dim);"`
+  cat >$dir/nnet.config <<EOF
+SpliceComponent input-dim=$feat_dim left-context=$splice_width right-context=$splice_width
+FixedLinearComponent matrix=$dir/lda.mat
+AffineComponentPreconditioned input-dim=$lda_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev
+TanhComponent dim=$hidden_layer_dim
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$num_leaves alpha=$alpha max-change=$max_change learning-rate=0.004 param-stddev=0 bias-stddev=0
+SoftmaxComponent dim=$num_leaves
+EOF
+
+  # to hidden.config it will write the part of the config corresponding to a
+  # single hidden layer; we need this to add new layers. 
+  cat >$dir/hidden.config <<EOF
+AffineComponentPreconditioned input-dim=$hidden_layer_dim output-dim=$hidden_layer_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=0
+TanhComponent dim=$hidden_layer_dim
+EOF
+  $cmd $dir/log/nnet_init.log \
+    nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+    $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+
+num_iters_reduce=$[$num_epochs * $iters_per_epoch];
+num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
+num_iters=$[$num_iters_reduce+$num_iters_extra]
+
+echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
+echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
+echo " (while reducing learning rate) + (with constant learning rate)."
+
+# This is when we decide to mix up from:
+mix_up_iter=$[($num_hidden_layers-$initial_num_hidden_layers+1)*$add_layers_period + 2]
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    # Set off jobs doing some diagnostics, in the background.
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$dir/train_diagnostic.egs &
+
+    if $realign && [ $x -eq $num_iters_reduce ]; then
+      echo "Realigning data (pass $x)"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+        "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+
+    echo "Training neural net (pass $x)"
+    if [ $x -gt 0 ] && \
+      [ $x -le $[($num_hidden_layers-$initial_num_hidden_layers)*$add_layers_period] ] && \
+      [ $[($x-1) % $add_layers_period] -eq 0 ]; then
+      mdl="nnet-init --srand=$x $dir/hidden.config - | nnet-insert $dir/$x.mdl - - |"
+    else
+      mdl=$dir/$x.mdl
+    fi
+
+
+    $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
+      nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
+      ark:$dir/egs/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
+      nnet-train-parallel --num-threads=$num_threads \
+         --minibatch-size=$minibatch_size --srand=$x "$mdl" \
+        ark:- $dir/$[$x+1].JOB.mdl \
+      || exit 1;
+
+    nnets_list=
+    for n in `seq 1 $num_jobs_nnet`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters_reduce $initial_learning_rate $final_learning_rate`;
+    softmax_learning_rate=`perl -e "print $learning_rate * $softmax_learning_rate_factor;"`;
+    nnet-am-info $dir/$[$x+1].1.mdl > $dir/foo
+    nu=`cat $dir/foo | grep num-updatable-components | awk '{print $2}'`
+    na=`cat $dir/foo | grep AffineComponent | wc -l` # number of last AffineComopnent layer [one-based]
+    lr_string="$learning_rate"
+    for n in `seq 2 $nu`; do 
+      if [ $n -eq $na ]; then lr=$softmax_learning_rate;
+      else lr=$learning_rate; fi
+      lr_string="$lr_string:$lr"
+    done
+    
+    $cmd $dir/log/average.$x.log \
+      nnet-am-average $nnets_list - \| \
+      nnet-am-copy --learning-rates=$lr_string - $dir/$[$x+1].mdl || exit 1;
+
+    if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
+      # mix up.
+      echo Mixing up from $num_leaves to $mix_up components
+      $cmd $dir/log/mix_up.$x.log \
+        nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+        $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+    rm $nnets_list
+  fi
+  x=$[$x+1]
+done
+
+rm $dir/final.mdl 2>/dev/null
+
+# At the end, final.mdl will be a combination of the last e.g. 10 models.
+nnets_list=()
+if [ $num_iters_final -gt $num_iters_extra ]; then
+  echo "setting num_iters_final=$num_iters_extra"
+fi
+start=$[$num_iters-$num_iters_final+1]
+for x in `seq $start $num_iters`; do
+  idx=$[$x-$start]
+  if [ $x -gt $mix_up_iter ]; then
+    nnets_list[$idx]="nnet-am-copy --remove-dropout=true $dir/$x.mdl - |"
+  fi
+done
+
+if [ $stage -le $num_iters ]; then
+  mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
+  $cmd $parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
+    "${nnets_list[@]}" ark:$dir/combine.egs $dir/final.mdl || exit 1;
+fi
+
+# Compute the probability of the final, combined model with
+# the same subset we used for the previous compute_probs, as the
+# different subsets will lead to different probs.
+$cmd $dir/log/compute_prob_valid.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
+$cmd $dir/log/compute_prob_train.final.log \
+  nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
+
+echo Done
+
+if $cleanup; then
+  echo Cleaning up data
+  echo Removing training examples
+  rm -r $dir/egs
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then 
+       # delete all but every 10th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_mmi.sh b/egs/chime_wsj0/s5/steps/train_nnet_mmi.sh
new file mode 100755
index 000000000..53c148a43
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_mmi.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+# Copyright 2013  Brno University of Technology (Author: Karel Vesely)  
+# Apache 2.0.
+
+# Sequence-discriminative MMI/BMMI training of DNN.
+# 4 iterations (by default) of Stochastic Gradient Descent with per-utterance updates.
+# Boosting of paths with more errors (BMMI) gets activated by '--boost <float>' option.
+
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0 #ie. disable boosting 
+acwt=0.1
+lmwt=1.0
+learn_rate=0.00001
+halving_factor=1.0 #ie. disable halving
+drop_frames=true
+verbose=1
+use_gpu_id=
+
+seed=777    # seed value used for training data shuffling
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 6 ]; then
+  echo "Usage: steps/$0 <data> <lang> <srcdir> <ali> <denlats> <exp>"
+  echo " e.g.: steps/$0 data/train_all data/lang exp/tri3b_dnn exp/tri3b_dnn_ali exp/tri3b_dnn_denlats exp/tri3b_dnn_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --num-iters <N>                                  # number of iterations to run"
+  echo "  --acwt <float>                                   # acoustic score scaling"
+  echo "  --lmwt <float>                                   # linguistic score scaling"
+  echo "  --learn-rate <float>                             # learning rate for NN training"
+  echo "  --drop-frames <bool>                             # drop frames num/den completely disagree"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+alidir=$4
+denlatdir=$5
+dir=$6
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/log
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+
+
+#Get the files we will need
+nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet);
+[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+cp $nnet $dir/0.nnet; nnet=$dir/0.nnet
+
+class_frame_counts=$srcdir/ali_train_pdf.counts
+[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
+cp $srcdir/ali_train_pdf.counts $dir
+
+feature_transform=$srcdir/final.feature_transform
+if [ ! -f $feature_transform ]; then
+  echo "Missing feature_transform '$feature_transform'"
+  exit 1
+fi
+cp $feature_transform $dir/final.feature_transform
+
+model=$dir/final.mdl
+[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
+
+
+
+# Shuffle the feature list to make the GD stochastic!
+# By shuffling features, we have to use lattices with random access (indexed by .scp file).
+cat $data/feats.scp | utils/shuffle_list.pl --srand $seed > $dir/train.scp
+
+
+###
+### Prepare feature pipeline
+###
+# Create the feature stream:
+feats="ark,s,cs:copy-feats scp:$dir/train.scp ark:- |"
+# Optionally add cmvn
+if [ -f $srcdir/norm_vars ]; then
+  norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+  [ ! -f $data/cmvn.scp ] && echo "$0: cannot find cmvn stats $data/cmvn.scp" && exit 1
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
+  cp $srcdir/norm_vars $dir
+fi
+# Optionally add deltas
+if [ -f $srcdir/delta_order ]; then
+  delta_order=$(cat $srcdir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+  cp $srcdir/delta_order $dir
+fi
+###
+###
+###
+
+
+###
+### Prepare the alignments
+### 
+# Assuming all alignments will fit into memory
+ali="ark:gunzip -c $alidir/ali.*.gz |"
+
+
+###
+### Prepare the lattices
+###
+# The lattices are indexed by SCP (they are not gziped because of the random access in SGD)
+lats="scp:$denlatdir/lat.scp"
+
+# Optionally apply boosting
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  #make lattice scp with same order as the shuffled feature scp
+  awk '{ if(r==0) { latH[$1]=$2; }
+         if(r==1) { if(latH[$1] != "") { print $1" "latH[$1] } }
+  }' $denlatdir/lat.scp r=1 $dir/train.scp > $dir/lat.scp
+  #get the list of alignments
+  ali-to-phones $alidir/final.mdl "$ali" ark,t:- | awk '{print $1;}' > $dir/ali.lst
+  #remove feature files which have no lattice or no alignment,
+  #(so that the mmi training tool does not blow-up due to lattice caching)
+  mv $dir/train.scp $dir/train.scp_unfilt
+  awk '{ if(r==0) { latH[$1]="1"; }
+         if(r==1) { aliH[$1]="1"; }
+         if(r==2) { if((latH[$1] != "") && (aliH[$1] != "")) { print $0; } }
+  }' $dir/lat.scp r=1 $dir/ali.lst r=2 $dir/train.scp_unfilt > $dir/train.scp
+  #create the lat pipeline
+  lats="ark,o:lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl scp:$dir/lat.scp '$ali' ark:- |"
+fi
+###
+###
+###
+
+# Run several iterations of the MMI/BMMI training
+cur_mdl=$nnet
+x=1
+while [ $x -le $num_iters ]; do
+  echo "Pass $x (learnrate $learn_rate)"
+  if [ -f $dir/$x.nnet ]; then
+    echo "Skipped, file $dir/$x.nnet exists"
+  else
+    $cmd $dir/log/mmi.$x.log \
+     nnet-train-mmi-sequential \
+       --feature-transform=$feature_transform \
+       --class-frame-counts=$class_frame_counts \
+       --acoustic-scale=$acwt \
+       --lm-scale=$lmwt \
+       --learn-rate=$learn_rate \
+       --drop-frames=$drop_frames \
+       --verbose=$verbose \
+       ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+       $cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
+  fi
+  cur_mdl=$dir/$x.nnet
+
+  #report the progress
+  grep -B 2 MMI-objective $dir/log/mmi.$x.log | sed -e 's|^[^)]*)[^)]*)||'
+
+  x=$((x+1))
+  learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}")
+  
+done
+
+(cd $dir; [ -e final.nnet ] && unlink final.nnet; ln -s $((x-1)).nnet final.nnet)
+
+echo "MMI/BMMI training finished"
+
+
+
+exit 0
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_mpe.sh b/egs/chime_wsj0/s5/steps/train_nnet_mpe.sh
new file mode 100755
index 000000000..893d4a4cc
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_mpe.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+# Copyright 2013  Brno University of Technology (Author: Karel Vesely)  
+# Apache 2.0.
+
+# Sequence-discriminative MPE/sMBR training of DNN.
+# 4 iterations (by default) of Stochastic Gradient Descent with per-utterance updates.
+# We select between MPE/sMBR optimization by '--do-smbr <bool>' option.
+
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+acwt=0.1
+lmwt=1.0
+learn_rate=0.00001
+halving_factor=1.0 #ie. disable halving
+do_smbr=true
+use_silphones=false #setting this to something will enable giving siphones to nnet-mpe
+verbose=1
+use_gpu_id=
+
+seed=777    # seed value used for training data shuffling
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 6 ]; then
+  echo "Usage: steps/$0 <data> <lang> <srcdir> <ali> <denlats> <exp>"
+  echo " e.g.: steps/$0 data/train_all data/lang exp/tri3b_dnn exp/tri3b_dnn_ali exp/tri3b_dnn_denlats exp/tri3b_dnn_smbr"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --num-iters <N>                                  # number of iterations to run"
+  echo "  --acwt <float>                                   # acoustic score scaling"
+  echo "  --lmwt <float>                                   # linguistic score scaling"
+  echo "  --learn-rate <float>                             # learning rate for NN training"
+  echo "  --do-smbr <bool>                                 # do sMBR training, otherwise MPE"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+alidir=$4
+denlatdir=$5
+dir=$6
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/log
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+
+
+#Get the files we will need
+nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet);
+[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+cp $nnet $dir/0.nnet; nnet=$dir/0.nnet
+
+class_frame_counts=$srcdir/ali_train_pdf.counts
+[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
+cp $srcdir/ali_train_pdf.counts $dir
+
+feature_transform=$srcdir/final.feature_transform
+if [ ! -f $feature_transform ]; then
+  echo "Missing feature_transform '$feature_transform'"
+  exit 1
+fi
+cp $feature_transform $dir/final.feature_transform
+
+model=$dir/final.mdl
+[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
+
+#enable/disable silphones from MPE training
+mpe_silphones_arg= #empty
+[ "$use_silphones" == "true" ] && mpe_silphones_arg="--silence-phones=$silphonelist"
+
+
+# Shuffle the feature list to make the GD stochastic!
+# By shuffling features, we have to use lattices with random access (indexed by .scp file).
+cat $data/feats.scp | utils/shuffle_list.pl --srand $seed > $dir/train.scp
+
+
+###
+### Prepare feature pipeline
+###
+# Create the feature stream:
+feats="ark,s,cs:copy-feats scp:$dir/train.scp ark:- |"
+# Optionally add cmvn
+if [ -f $srcdir/norm_vars ]; then
+  norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+  [ ! -f $data/cmvn.scp ] && echo "$0: cannot find cmvn stats $data/cmvn.scp" && exit 1
+  feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
+  cp $srcdir/norm_vars $dir
+fi
+# Optionally add deltas
+if [ -f $srcdir/delta_order ]; then
+  delta_order=$(cat $srcdir/delta_order)
+  feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
+  cp $srcdir/delta_order $dir
+fi
+###
+###
+###
+
+
+###
+### Prepare the alignments
+### 
+# Assuming all alignments will fit into memory
+ali="ark:gunzip -c $alidir/ali.*.gz |"
+
+
+###
+### Prepare the lattices
+###
+# The lattices are indexed by SCP (they are not gziped because of the random access in SGD)
+lats="scp:$denlatdir/lat.scp"
+
+
+# Run several iterations of the MPE/sMBR training
+cur_mdl=$nnet
+x=1
+while [ $x -le $num_iters ]; do
+  echo "Pass $x (learnrate $learn_rate)"
+  if [ -f $dir/$x.nnet ]; then
+    echo "Skipped, file $dir/$x.nnet exists"
+  else
+    #train
+    $cmd $dir/log/mpe.$x.log \
+     nnet-train-mpe-sequential \
+       --feature-transform=$feature_transform \
+       --class-frame-counts=$class_frame_counts \
+       --acoustic-scale=$acwt \
+       --lm-scale=$lmwt \
+       --learn-rate=$learn_rate \
+       --do-smbr=$do_smbr \
+       --verbose=$verbose \
+       $mpe_silphones_arg \
+       ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+       $cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
+  fi
+  cur_mdl=$dir/$x.nnet
+
+  #report the progress
+  grep -B 2 "Overall average frame-accuracy" $dir/log/mpe.$x.log | sed -e 's|.*)||'
+
+  x=$((x+1))
+  learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}")
+  
+done
+
+(cd $dir; [ -e final.nnet ] && unlink final.nnet; ln -s $((x-1)).nnet final.nnet)
+
+echo "MPE/sMBR training finished"
+
+
+
+exit 0
diff --git a/egs/chime_wsj0/s5/steps/train_nnet_scheduler.sh b/egs/chime_wsj0/s5/steps/train_nnet_scheduler.sh
new file mode 100755
index 000000000..b98e7837b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_nnet_scheduler.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+
+# Copyright 2012  Karel Vesely (Brno University of Technology)
+# Apache 2.0
+
+# Train neural network
+
+# Begin configuration.
+
+# training options
+learn_rate=0.008
+momentum=0
+l1_penalty=0
+l2_penalty=0
+# data processing
+bunch_size=256
+cache_size=16384
+seed=777
+feature_transform=
+# learn rate scheduling
+max_iters=20
+min_iters=
+start_halving_inc=0.5
+end_halving_inc=0.1
+halving_factor=0.5
+# misc.
+verbose=1
+#random
+random=true
+# gpu
+use_gpu_id=
+# tool
+train_tool="nnet-train-xent-hardlab-frmshuff"
+ 
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+[ -f path.sh ] && . ./path.sh; 
+
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: $0 <mlp-init> <feats-tr> <feats-cv> <labels> <exp-dir>"
+   echo " e.g.: $0 0.nnet scp:train.scp scp:cv.scp ark:labels.ark exp/dnn1"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>  # config containing options"
+   exit 1;
+fi
+
+mlp_init=$1
+feats_tr=$2
+feats_cv=$3
+labels=$4
+dir=$5
+
+
+[ ! -d $dir ] && mkdir $dir
+[ ! -d $dir/log ] && mkdir $dir/log
+[ ! -d $dir/nnet ] && mkdir $dir/nnet
+
+# Skip training
+[ -e $dir/final.nnet ] && echo "'$dir/final.nnet' exists, skipping training" && exit 0
+
+##############################
+#start training
+
+#choose mlp to start with
+mlp_best=$mlp_init
+mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
+#optionally resume training from the best epoch
+[ -e $dir/.mlp_best ] && mlp_best=$(cat $dir/.mlp_best)
+[ -e $dir/.learn_rate ] && learn_rate=$(cat $dir/.learn_rate)
+
+#prerun cross-validation
+$train_tool --cross-validate=true \
+ --bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
+ ${feature_transform:+ --feature-transform=$feature_transform} \
+ ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+ $mlp_best "$feats_cv" "$labels" \
+ 2> $dir/log/prerun.log || exit 1;
+
+acc=$(cat $dir/log/prerun.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+xent=$(cat $dir/log/prerun.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
+echo "CROSSVAL PRERUN ACCURACY $(printf "%.2f" $acc) (avg.xent$(printf "%.4f" $xent)), "
+
+#resume lr-halving
+halving=0
+[ -e $dir/.halving ] && halving=$(cat $dir/.halving)
+#training
+for iter in $(seq -w $max_iters); do
+  echo -n "ITERATION $iter: "
+  mlp_next=$dir/nnet/${mlp_base}_iter${iter}
+  
+  #skip iteration if already done
+  [ -e $dir/.done_iter$iter ] && echo -n "skipping... " && ls $mlp_next* && continue 
+  
+  #training
+  $train_tool \
+   --learn-rate=$learn_rate --momentum=$momentum --l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
+   --bunchsize=$bunch_size --cachesize=$cache_size --randomize=$random --verbose=$verbose \
+   ${feature_transform:+ --feature-transform=$feature_transform} \
+   ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+   ${seed:+ --seed=$seed} \
+   $mlp_best "$feats_tr" "$labels" $mlp_next \
+   2> $dir/log/iter$iter.log || exit 1; 
+
+  tr_acc=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+  tr_xent=$(cat $dir/log/iter$iter.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
+  echo -n "TRAIN ACCURACY $(printf "%.2f" $tr_acc) (avg.xent$(printf "%.4f" $tr_xent),lrate$(printf "%.6g" $learn_rate)), "
+  
+  #cross-validation
+  $train_tool --cross-validate=true \
+   --bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
+   ${feature_transform:+ --feature-transform=$feature_transform} \
+   ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
+   $mlp_next "$feats_cv" "$labels" \
+   2>>$dir/log/iter$iter.log || exit 1;
+  
+  acc_new=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+  xent_new=$(cat $dir/log/iter$iter.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
+  echo -n "CROSSVAL ACCURACY $(printf "%.2f" $acc_new) (avg.xent$(printf "%.4f" $xent_new)), "
+
+  #accept or reject new parameters (based no per-frame accuracy)
+  acc_prev=$acc
+  if [ "1" == "$(awk "BEGIN{print($acc_new>$acc);}")" ]; then
+    acc=$acc_new
+    mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)
+    mv $mlp_next $mlp_best
+    echo "nnet accepted ($(basename $mlp_best))"
+    echo $mlp_best > $dir/.mlp_best 
+  else
+    mlp_reject=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)_rejected
+    mv $mlp_next $mlp_reject
+    echo "nnet rejected ($(basename $mlp_reject))"
+  fi
+
+  #create .done file as a mark that iteration is over
+  touch $dir/.done_iter$iter
+
+  #stopping criterion
+  if [[ "1" == "$halving" && "1" == "$(awk "BEGIN{print($acc < $acc_prev+$end_halving_inc)}")" ]]; then
+    if [[ "$min_iters" != "" ]]; then
+      if [ $min_iters -gt $iter ]; then
+        echo we were supposed to finish, but we continue, min_iters : $min_iters
+        continue
+      fi
+    fi
+    echo finished, too small improvement $(awk "BEGIN{print($acc-$acc_prev)}")
+    break
+  fi
+
+  #start annealing when improvement is low
+  if [ "1" == "$(awk "BEGIN{print($acc < $acc_prev+$start_halving_inc)}")" ]; then
+    halving=1
+    echo $halving >$dir/.halving
+  fi
+  
+  #do annealing
+  if [ "1" == "$halving" ]; then
+    learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}")
+    echo $learn_rate >$dir/.learn_rate
+  fi
+done
+
+#select the best network
+if [ $mlp_best != $mlp_init ]; then 
+  mlp_final=${mlp_best}_final_
+  ( cd $dir/nnet; ln -s $(basename $mlp_best) $(basename $mlp_final); )
+  ( cd $dir; ln -s nnet/$(basename $mlp_final) final.nnet; )
+  echo "Succeeded training the Neural Network : $dir/final.nnet"
+else
+  "Error training neural network..."
+  exit 1
+fi
+
+
+
+
diff --git a/egs/chime_wsj0/s5/steps/train_quick.sh b/egs/chime_wsj0/s5/steps/train_quick.sh
new file mode 100755
index 000000000..80638b3c8
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_quick.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# Train a model on top of existing features (no feature-space learning of any
+# kind is done).  This script initializes the model from each stage of the
+# previous system's model, judging the similarities based on overlap of counts
+# in the tree stats.
+
+# Begin configuration..
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 15"; # Only realign twice.
+num_iters=20    # Number of iterations of training
+maxiterinc=15 # Last iter to increase #Gauss on.
+batch_size=750 # batch size to use while compiling graphs... memory/speed tradeoff.
+beam=10 # alignment beam.
+retry_beam=40
+stage=-5
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_quick.sh <num-leaves> <num-gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_quick.sh 2500 15000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+# Set various variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl`
+numgauss=$[totgauss/2] # Start with half the total number of Gaussians.  We won't have
+  # to mix up much probably, as we're initializing with the old (already mixed-up) pdf's.  
+[ $numgauss -lt $numleaves ] && numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
+nj=`cat $alidir/num_jobs` || exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  ln.pl $alidir/trans.* $dir # Link them to dest dir.
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -5 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: Initializing the model"
+
+  # The gmm-init-model command (with more than the normal # of command-line args)
+  # will initialize the p.d.f.'s to the p.d.f.'s in the alignment model.
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/tmp.mdl $alidir/tree $alidir/final.mdl  \
+    2>$dir/log/init_model.log || exit 1;
+
+  grep 'no stats' $dir/log/init_model.log && echo "$0: This is a bad warning.";
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: mixing up old model."
+  # We do both mixing-down and mixing-up to get the target #Gauss in each state,
+  # since the initial model may have either more or fewer Gaussians than we want.
+  gmm-mixup --mix-down=$numgauss --mix-up=$numgauss $dir/tmp.mdl $dir/1.occs $dir/1.mdl \
+    2> $dir/log/mixup.log || exit 1;
+  rm $dir/tmp.mdl 
+fi
+
+# Convert alignments to the new tree.
+if [ $stage -le -1 ]; then
+  echo "$0: converting old alignments"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling training graphs"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs --batch-size=$batch_size $dir/tree $dir/1.mdl $lang/L.fst  \
+    "ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: pass $x"
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo "$0: aligning data"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/$x.mdl \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" \
+      || exit 1;
+  fi
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|"  $dir/$x.JOB.acc || exit 1;
+    [ "`ls $dir/$x.*.acc | wc -w`" -ne "$nj" ] && echo "$0: wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs
+  fi
+  [[ $x -le $maxiterinc ]] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: estimating alignment model"
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ "`ls $dir/$x.*.acc | wc -w`" -ne "$nj" ] && echo "$0: wrong #accs" && exit 1;
+
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl || exit 1;
+  rm $dir/$x.*.acc
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_raw_sat.sh b/egs/chime_wsj0/s5/steps/train_raw_sat.sh
new file mode 100755
index 000000000..771e2f794
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_raw_sat.sh
@@ -0,0 +1,295 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This does Speaker Adapted Training (SAT).  We train on fMLLR-adapted features,
+# but in this "raw" script, these transforms are at the level of the raw
+# cepstra.  The model must be built on top of LDA+MLLT features, and the
+# transforms are estimated using the model, in a rather clever way.  If there
+# are no raw transforms supplied in the alignment directory, it will estimate
+# transforms itself before building the tree (and in any case, it estimates
+# transforms a number of times during training).
+# You need to decode the models it builds with decode_raw_fmllr.sh
+
+# Begin configuration section.
+stage=-6
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+mllt_iters="3 5 7 10"
+dim=40
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+phone_map=
+train_tree=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+phone_map_opt=
+[ ! -z "$phone_map" ] && phone_map_opt="--phone-map='$phone_map'"
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [[ ! -f $alidir/final.mat || ! -f $alidir/full.mat ]]; then
+  echo "$0: expected to find  $alidir/final.mat and $alidir/full.mat"
+  exit 1
+fi
+
+sisplicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+sifeats="$sisplicedfeats transform-feats $alidir/final.mat ark:- ark:- |"
+
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/raw_trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  cur_trans_dir=$alidir
+else 
+  if [ $stage -le -6 ]; then
+    echo "$0: obtaining initial fMLLR transforms since not present in $alidir"
+    # The next line is necessary because of $silphonelist otherwise being incorrect; would require
+    # old $lang dir which would require another option.  Not needed anyway.
+    [ ! -z "$phone_map" ] && \
+       echo "$0: error: you must provide transforms if you use the --phone-map option." && exit 1;
+    full_lda_mat="get-full-lda-mat --print-args=false $alidir/final.mat $alidir/full.mat -|"
+    $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+      ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt $alidir/final.mdl \
+        "$full_lda_mat" "$sisplicedfeats" ark:- ark:$dir/raw_trans.JOB || exit 1;
+  fi
+  cur_trans_dir=$dir
+fi
+
+splicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$cur_trans_dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- |"
+
+
+if [ $stage -le -5 ]; then
+  echo "Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+  est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;  
+  rm $dir/lda.*.acc
+fi
+
+cur_lda_iter=0
+feats="$splicedfeats transform-feats $dir/$cur_lda_iter.mat ark:- ark:- |"
+
+# To build the tree, we use the previous directory's LDA transform, which
+# is better as it has MLLT also.  It leads to higher auxiliary function
+# improvements in tree building, which is generally a good thing.
+tree_feats="$splicedfeats transform-feats $alidir/final.mat ark:- ark:- |"
+
+
+if [ $stage -le -4 ] && $train_tree; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts --ci-phones=$ciphonelist $alidir/final.mdl "$tree_feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  # Since we trained the tree on different feats, we don't use gmm-init-model, which
+  # would initialize the tree with invalid features.  This doesn't really matter anyway,
+  # the first iteration of training will set suitable initial parameters.
+  cp $alidir/tree $dir/ || exit 1;
+  $cmd JOB=1 $dir/log/init_model.log \
+    gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+    "$tree_feats subset-feats ark:- ark:-|" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $phone_map_opt $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+
+      full_lda_mat="get-full-lda-mat --print-args=false $dir/$cur_lda_iter.mat $dir/full.mat - |"
+      $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+        weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt $dir/$x.mdl "$full_lda_mat" \
+          "$splicedfeats" ark:- ark:$dir/tmp_trans.JOB || exit 1;
+      for n in `seq $nj`; do
+        ! ( compose-transforms --b-is-affine=true \
+          ark:$dir/tmp_trans.$n ark:$cur_trans_dir/raw_trans.$n ark:$dir/composed_trans.$n \
+          && mv $dir/composed_trans.$n $dir/raw_trans.$n && \
+          rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \
+          && echo "$0: Error composing transforms" && exit 1;
+      done
+    fi
+    cur_trans_dir=$dir
+    splicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$cur_trans_dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- |"
+    feats="$splicedfeats transform-feats $dir/$cur_lda_iter.mat ark:- ark:- |"
+  fi
+
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_lda_iter.mat $dir/$x.mat || exit 1;
+      rm $dir/$x.*.macc
+    fi
+    cur_lda_iter=$x
+    feats="$splicedfeats transform-feats $dir/$cur_lda_iter.mat ark:- ark:- |"
+  fi
+  
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs 2>/dev/null
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  sifeats="$sisplicedfeats transform-feats $dir/$cur_lda_iter.mat ark:- ark:- |"
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+ln -s $cur_lda_iter.mat $dir/final.mat
+
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution (not sure if this is totally correct):"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} 
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_sat.sh b/egs/chime_wsj0/s5/steps/train_sat.sh
new file mode 100755
index 000000000..95daf9b7d
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_sat.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This does Speaker Adapted Training (SAT), i.e. train on
+# fMLLR-adapted features.  It can be done on top of either LDA+MLLT, or
+# delta and delta-delta features.  If there are no transforms supplied
+# in the alignment directory, it will estimate transforms itself before
+# building the tree (and in any case, it estimates transforms a number
+# of times during training).
+
+
+# Begin configuration section.
+stage=-5
+fmllr_update_type=full
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+phone_map=
+train_tree=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+phone_map_opt=
+[ ! -z "$phone_map" ] && phone_map_opt="--phone-map='$phone_map'"
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$alidir
+else 
+  if [ $stage -le -5 ]; then
+    echo "$0: obtaining initial fMLLR transforms since not present in $alidir"
+    # The next line is necessary because of $silphonelist otherwise being incorrect; would require
+    # old $lang dir which would require another option.  Not needed anyway.
+    [ ! -z "$phone_map" ] && \
+       echo "$0: error: you must provide transforms if you use the --phone-map option." && exit 1;
+    $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+      ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $alidir/final.mdl "$sifeats" \
+      ark:- ark:$dir/trans.JOB || exit 1;
+  fi
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$dir
+fi
+
+if [ $stage -le -4 ] && $train_tree; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  if $train_tree; then
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+    grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+    rm $dir/treeacc
+  else
+    cp $alidir/tree $dir/ || exit 1;
+    $cmd JOB=1 $dir/log/init_model.log \
+      gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+        "$feats subset-feats ark:- ark:-|" || exit 1;
+  fi
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $phone_map_opt $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+      $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+        weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+        --spk2utt=ark:$sdata/JOB/spk2utt $dir/$x.mdl \
+        "$feats" ark:- ark:$dir/tmp_trans.JOB || exit 1;
+      for n in `seq $nj`; do
+        ! ( compose-transforms --b-is-affine=true \
+          ark:$dir/tmp_trans.$n ark:$cur_trans_dir/trans.$n ark:$dir/composed_trans.$n \
+          && mv $dir/composed_trans.$n $dir/trans.$n && \
+          rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \
+          && echo "$0: Error composing transforms" && exit 1;
+      done
+    fi
+    feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+    cur_trans_dir=$dir
+  fi
+  
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+
+
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution:"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} 
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_sat_basis.sh b/egs/chime_wsj0/s5/steps/train_sat_basis.sh
new file mode 100755
index 000000000..efc76938b
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_sat_basis.sh
@@ -0,0 +1,277 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright 2013  GoVivace Inc. (Author: Nagendra Goel), Apache 2.0
+
+# This does Speaker Adapted Training (SAT), i.e. train on
+# fMLLR-adapted features.  It can be done on top of either LDA+MLLT, or
+# delta and delta-delta features.  If there are no transforms supplied
+# in the alignment directory, it will estimate transforms itself before
+# building the tree (and in any case, it estimates transforms a number
+# of times during training).
+
+
+# Begin configuration section.
+stage=-5
+fmllr_update_type=full
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+phone_map=
+train_tree=true
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+phone_map_opt=
+[ ! -z "$phone_map" ] && phone_map_opt="--phone-map='$phone_map'"
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$alidir
+else 
+  if [ $stage -le -5 ]; then
+    echo "$0: obtaining initial basis fMLLR transforms since not present in $alidir"
+    # The next line is necessary because of $silphonelist otherwise being incorrect; would require
+    # old $lang dir which would require another option.  Not needed anyway.
+    [ ! -z "$phone_map" ] && \
+       echo "$0: error: you must provide transforms if you use the --phone-map option." && exit 1;
+      $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+          ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:-  \| \
+          weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+	  gmm-post-to-gpost $alidir/final.mdl "$sifeats" ark:- ark:- \| \
+	  gmm-basis-fmllr-accs-gpost $spk2utt_opt \
+	  $alidir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1; 
+
+      # Compute the basis matrices.
+      $cmd $dir/log/basis_training.log \
+	  gmm-basis-fmllr-training $alidir/final.mdl $alidir/fmllr.basis $dir/basis.acc.* || exit 1;
+      $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+          ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:-  \| \
+          weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+          gmm-post-to-gpost $alidir/final.mdl "$sifeats" ark:- ark:- \| \
+	  gmm-est-basis-fmllr-gpost --fmllr-min-count=22  --num-iters=10 \
+          --size-scale=0.2 --step-size-iters=3 \
+          --write-weights=ark:$dir/pre_wgt.JOB \
+          $alidir/final.mdl $alidir/fmllr.basis "$sifeats"  ark,s,cs:- \
+          ark:$alidir/trans.JOB || exit 1;
+
+      feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+      cur_trans_dir=$alidir
+    fi
+fi
+
+if [ $stage -le -4 ] && $train_tree; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  if $train_tree; then
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+    grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+    rm $dir/treeacc
+  else
+    cp $alidir/tree $dir/ || exit 1;
+    $cmd JOB=1 $dir/log/init_model.log \
+      gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+        "$feats subset-feats ark:- ark:-|" || exit 1;
+  fi
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $phone_map_opt $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+      $cmd JOB=1:$nj $dir/log/fmllr_est.$x.JOB.log \
+          ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+          weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+	  gmm-post-to-gpost $dir/$x.mdl "$feats" ark:- ark:- \| \
+	  gmm-basis-fmllr-accs-gpost $spk2utt_opt \
+	  $dir/$x.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1; 
+
+      # Compute the basis matrices.
+      $cmd $dir/log/basis_training.log \
+	  gmm-basis-fmllr-training $dir/$x.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
+
+      $cmd JOB=1:$nj $dir/log/fmllr_app.$x.JOB.log \
+          ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+          weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+          gmm-post-to-gpost $dir/$x.mdl "$sifeats" ark:- ark:- \| \
+	  gmm-est-basis-fmllr-gpost --fmllr-min-count=22  --num-iters=10 \
+          --size-scale=0.2 --step-size-iters=3 \
+          --write-weights=ark:$dir/pre_wgt.JOB \
+          $dir/$x.mdl $dir/fmllr.basis "$sifeats"  ark,s,cs:- \
+          ark:$dir/trans.JOB || exit 1;
+
+    fi
+    feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+    cur_trans_dir=$dir
+  fi
+  
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+
+
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution:"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} 
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_sgmm.sh b/egs/chime_wsj0/s5/steps/train_sgmm.sh
new file mode 100755
index 000000000..472d5a601
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_sgmm.sh
@@ -0,0 +1,274 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim;
+   # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+beam=8
+retry_beam=40
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/train_sgmm.sh <num-leaves> <num-substates> <data> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/train_sgmm.sh 3500 10000 data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+
+num_leaves=$1
+totsubstates=$2
+data=$3
+lang=$4
+alidir=$5
+ubm=$6
+dir=$7
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+numsubstates=$num_leaves # Initial #-substates.
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$num_leaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \
+    $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwcSt
+   else
+     # otherwise update M.
+     flags=vMwcSt
+   fi
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \
+         --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \
+       $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \
+        --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \
+        "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_sgmm2.sh b/egs/chime_wsj0/s5/steps/train_sgmm2.sh
new file mode 100755
index 000000000..750e194fc
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_sgmm2.sh
@@ -0,0 +1,296 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6 # use this to resume partially finished training 
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations of training
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_iters="6 10 14"; # Iters on which to increase phn dim and/or spk dim;
+    # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+               # Bigger -> more pruning; zero = no pruning.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+beam=8
+self_weight=0.9
+retry_beam=40
+leaves_per_group=5 # Relates to the SCTM (state-clustered tied-mixture) aspect:
+                   # average number of pdfs in a "group" of pdfs.
+update_m_iter=4
+spk_dep_weights=true # [Symmetric SGMM] set this to false if you don't want "u" (i.e. to turn off
+                      # symmetric SGMM.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/train_sgmm2.sh <num-leaves> <num-substates> <data> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/train_sgmm2.sh 5000 8000 data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  echo "  --leaves-per-group <#leaves>                     # Average #leaves shared in one group"
+  exit 1;
+fi
+
+num_pdfs=$1  # final #leaves, at 2nd level of tree.
+totsubstates=$2
+data=$3
+lang=$4
+alidir=$5
+ubm=$6
+dir=$7
+
+num_groups=$[$num_pdfs/$leaves_per_group]
+first_spkvec_iter=`echo $spkvec_iters | awk '{print $1}'` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+if [ "$self_weight" == "1.0" ]; then
+  numsubstates=$num_groups # Initial #-substates.
+else
+  numsubstates=$num_pdfs # Initial #-substates.
+fi
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+elif [ -f $alidir/raw_trans.1 ]; then
+  echo "$0: using raw-fMLLR transforms from $alidir"
+  feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree-two-level $context_opts --binary=false --verbose=1 --max-leaves-first=$num_groups \
+     --max-leaves-second=$num_pdfs $dir/treeacc $lang/phones/roots.int \
+     $dir/questions.qst $lang/topo $dir/tree $dir/pdf2group.map || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm2-init --spk-dep-weights=$spk_dep_weights --self-weight=$self_weight \
+       --pdf-map=$dir/pdf2group.map --phn-space-dim=$phn_dim \
+       --spk-space-dim=$spk_dim $lang/topo $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm2-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm2-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge $first_spkvec_iter ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwSct
+   else
+     if [ $x -ge $update_m_iter ]; then
+       flags=vMwSct # udpate M.
+     else
+       flags=vwSct # no M on early iters, if --update-m-iter option given.
+     fi
+   fi
+   $spk_dep_weights && [ $x -ge $first_spkvec_iter ] && flags=${flags}u; # update 
+   # spk-weight projections "u".
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm2-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm2-est --update-flags=$flags --split-substates=$numsubstates \
+       $increase_dim_opts --power=$power --write-occs=$dir/$[$x+1].occs \
+       $dir/$x.mdl "sgmm2-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm2-post-to-gpost $spkvecs_opt "$gselect_opt" \
+         --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm2-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm2-est --update-flags=$flags --remove-speaker-space=true --power=$power \
+        $cur_alimdl "sgmm2-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_sgmm2_group.sh b/egs/chime_wsj0/s5/steps/train_sgmm2_group.sh
new file mode 100755
index 000000000..13f87b8c3
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_sgmm2_group.sh
@@ -0,0 +1,343 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This version of the train_sgmm2 script has several jobs on each machine, and adds the
+# accumulators up in memory.
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+cmd=run.pl
+stage=-6 # use this to resume partially finished training 
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations of training
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_iters="6 10 14"; # Iters on which to increase phn dim and/or spk dim;
+    # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+               # Bigger -> more pruning; zero = no pruning.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+beam=8
+self_weight=0.9
+retry_beam=40
+leaves_per_group=5 # Relates to the SCTM (state-clustered tied-mixture) aspect:
+                   # average number of pdfs in a "group" of pdfs.
+update_m_iter=4
+spk_dep_weights=true # [Symmetric SGMM] set this to false if you don't want "u" (i.e. to turn off
+                      # symmetric SGMM.
+group=3 # Number of jobs to group together on a single machine, and add the stats locally.
+parallel_opts="-pe smp 3" # You should make this match "group".
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/train_sgmm2.sh <num-leaves> <num-substates> <data> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/train_sgmm2.sh 5000 8000 data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --group <n>                                      # number of jobs on one machine, default 3."
+  echo "  --parallel-opts <opts>                           # string to append to queue command, default '-pe smp 3'"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+num_pdfs=$1  # final #leaves, at 2nd level of tree.
+totsubstates=$2
+data=$3
+lang=$4
+alidir=$5
+ubm=$6
+dir=$7
+
+num_groups=$[$num_pdfs/$leaves_per_group]
+first_spkvec_iter=`echo $spkvec_iters | awk '{print $1}'` || exit 1;
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+if [ "$self_weight" == "1.0" ]; then
+  numsubstates=$num_groups # Initial #-substates.
+else
+  numsubstates=$num_pdfs # Initial #-substates.
+fi
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree-two-level --binary=false --verbose=1 --max-leaves-first=$num_groups \
+     --max-leaves-second=$num_pdfs $dir/treeacc $lang/phones/roots.int \
+     $dir/questions.qst $lang/topo $dir/tree $dir/pdf2group.map || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm2-init --spk-dep-weights=$spk_dep_weights --self-weight=$self_weight \
+       --pdf-map=$dir/pdf2group.map --phn-space-dim=$phn_dim \
+       --spk-space-dim=$spk_dim $lang/topo $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm2-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm2-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge $first_spkvec_iter ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwSct
+   else
+     if [ $x -ge $update_m_iter ]; then
+       flags=vMwSct # udpate M.
+     else
+       flags=vwSct # no M on early iters, if --update-m-iter option given.
+     fi
+   fi
+   $spk_dep_weights && [ $x -ge $first_spkvec_iter ] && flags=${flags}u; # update 
+   # spk-weight projections "u".
+   
+   # Submit separate jobs for small groups (of size $group) of accumulators.
+   Args=() # bash array of training commands for 1:nj, that put accs to stdout.
+   for n in `seq $nj`; do
+     Args[$n]=`echo "sgmm2-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \
+             --update-flags=$flags '$gselect_opt' --rand-prune=$rand_prune \
+             $dir/$x.mdl '$feats' 'ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|' - |" | sed s/JOB/$n/g`
+   done
+   
+   g=0
+   rm $dir/.error 2>/dev/null
+   if [ $stage -le $x ]; then
+     while [ $[$g*$group] -lt $nj ]; do
+       if [ -s $dir/acc.$x.$g.gz ]; then
+         echo "Skipping creation of acc $dir/acc.$x.$g.gz as it already exists."
+       else 
+         start=$[$g*$group + 1]; # start-position in array Args.
+       # see http://www.thegeekstuff.com/2010/06/bash-array-tutorial/, this uses Bash arrays."
+       # The syntax "${Args[@]:$start:$group}" is equivalent to, say,
+       # "${Args[3]}" "${Args[4]}" if start=3 and group=2.  Except it's smart about the end
+       # of the array, it won't give you empty quoted strings if the length "group" takes you off
+       # the end of the array.
+         $cmd $parallel_opts $dir/log/acc.$x.$g.log \
+           sgmm2-sum-accs --parallel=true "|gzip -c >$dir/acc.$x.$g.gz" "${Args[@]:$start:$group}"  || touch $dir/.error &
+       fi
+       g=$[$g+1];
+     done
+     wait
+     if [ -f $dir/.error ]; then
+       echo "Something went wrong during accumulation on pass $x"
+       exit 1;
+     fi
+   fi
+   
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     acc_sum="sgmm2-sum-accs - ";
+     for j in `seq 0 $[$g-1]`; do acc_sum="$acc_sum 'gunzip -c $dir/acc.$x.$j.gz|'"; done
+     $cmd $dir/log/update.$x.log \
+       sgmm2-est --update-flags=$flags --split-substates=$numsubstates \
+       $increase_dim_opts --power=$power --write-occs=$dir/$[$x+1].occs \
+       $dir/$x.mdl "$acc_sum|" $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/acc.$x.*.gz $dir/$x.occs 2>/dev/null
+   fi
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      Args=() # bash array of training commands for 1:nj, that put accs to stdout.
+      for n in `seq $nj`; do
+        Args[$n]=`echo "ali-to-post 'ark:gunzip -c $dir/ali.JOB.gz|' ark:- | \
+          sgmm2-post-to-gpost $spkvecs_opt '$gselect_opt' \
+          --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl '$feats' ark,s,cs:- ark:- | \
+                  sgmm2-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl '$feats' ark,s,cs:- - |" | sed s/JOB/$n/g`
+      done
+      g=0
+      rm $dir/.error 2>/dev/null
+      while [ $[$g*$group] -lt $nj ]; do
+        if [ -s $dir/acc.$x.$g.gz ]; then
+          echo "Skipping creation of acc $dir/acc.$x.$g.gz as it already exists."
+        else 
+          start=$[$g*$group + 1]; # start-position in array Args.
+          $cmd $parallel_opts $dir/log/acc.$x.$g.log \
+            sgmm2-sum-accs --parallel=true "|gzip -c >$dir/acc.$x.$g.gz" "${Args[@]:$start:$group}"  || touch $dir/.error &
+        fi
+        g=$[$g+1];
+      done
+      wait
+      if [ -f $dir/.error ]; then
+        echo "Something went wrong during accumulation on pass $x"
+        exit 1;
+      fi
+      acc_sum="sgmm2-sum-accs - ";
+      for j in `seq 0 $[$g-1]`; do acc_sum="$acc_sum 'gunzip -c $dir/acc.$x.$j.gz|'"; done
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm2-est --update-flags=$flags --remove-speaker-space=true --power=$power \
+        $cur_alimdl "$acc_sum|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/acc.$x.*.gz || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/chime_wsj0/s5/steps/train_smbr.sh b/egs/chime_wsj0/s5/steps/train_smbr.sh
new file mode 100755
index 000000000..8b03cf727
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_smbr.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# sMBR training 
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+cancel=true # if true, cancel num and den counts on each frame.
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+smooth_to_mode=true
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_smbr.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_smbr.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_smbr"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of sMBR training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-smbr-post --acoustic-scale=$acwt $cur_mdl \
+        "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- ark:- \| \
+      gmm-acc-stats2 $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of sMBR accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+    if ! $smooth_to_model; then
+      echo "Iteration $x of sMBR: computing ml (smoothing) stats"
+      $cmd JOB=1:$nj $dir/log/acc_ml.$x.JOB.log \
+        gmm-acc-stats $cur_mdl "$feats" \
+          "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" \
+          $dir/ml.$x.JOB.acc || exit 1;
+      $cmd $dir/log/acc_ml_sum.$x.log \
+        gmm-sum-accs $dir/ml.$x.acc $dir/ml.$x.*.acc || exit 1;
+      rm $dir/ml.$x.*.acc
+      num_stats="gmm-ismooth-stats --tau=$tau $dir/ml.$x.acc $dir/num_acc.$x.acc -|"
+    else 
+      num_stats="gmm-ismooth-stats --smooth-from-model=true --tau=$tau $cur_mdl $dir/num_acc.$x.acc -|"
+    fi  
+    
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw $cur_mdl "$num_stats" $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+ tail -n 50 $dir/log/acc.$x.*.log | perl -e 'while(<STDIN>) { if(m/lattice-to-smbr-post.+Overall average frame-accuracy is (\S+) over (\S+) frames/) { $tot_objf += $1*$2; $tot_frames += $2; }} $tot_objf /= $tot_frames; print "$tot_objf $tot_frames\n"; ' > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  # This gives us a projected objective function improvement.
+  echo "Iteration $x: objf was $objf, sMBR auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "sMBR training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/chime_wsj0/s5/steps/train_ubm.sh b/egs/chime_wsj0/s5/steps/train_ubm.sh
new file mode 100755
index 000000000..71241547a
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/train_ubm.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This trains a UBM (i.e. a mixture of Gaussians), by clustering
+# the Gaussians from a trained HMM/GMM system and then doing a few
+# iterations of UBM training.
+# We mostly use this for SGMM systems.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+silence_weight=  # You can set it to e.g. 0.0, to weight down silence in training.
+stage=-2
+num_gselect1=50 # first stage of Gaussian-selection
+num_gselect2=25 # second stage.
+intermediate_num_gauss=2000
+num_iters=3
+no_fmllr=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: steps/train_ubm.sh <num-gauss> <data> <lang> <ali-dir> <exp>"
+  echo " e.g.: steps/train_ubm.sh 400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"\
+  echo "  --no-fmllr (true|false)                          # ignore speaker matrices even if present"
+  exit 1;
+fi
+
+num_gauss=$1
+data=$2
+lang=$3
+alidir=$4
+dir=$5
+
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+if [ $[$num_gauss*2] -gt $intermediate_num_gauss ]; then
+  echo "intermediate_num_gauss was too small $intermediate_num_gauss"
+  intermediate_num_gauss=$[$num_gauss*2];
+  echo "setting it to $intermediate_num_gauss"
+fi
+
+
+# Set various variables.
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+
+if [ -f $alidir/trans.1 ]; then
+  if $no_fmllr; then
+    echo "$0: deliberately ignoring speaker transforms from $alidir"
+  else
+    echo "$0: using transforms from $alidir"
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  fi
+elif [ -f $alidir/raw_trans.1 ]; then
+  echo "$0: using raw-FMLLR transforms from $alidir"
+  feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"  
+fi
+##
+
+if [ ! -z "$silence_weight" ]; then
+  weights_opt="--weights='ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- | weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
+else
+  weights_opt=
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: clustering model $alidir/final.mdl to get initial UBM"
+  $cmd $dir/log/cluster.log \
+    init-ubm --intermediate-num-gauss=$intermediate_num_gauss --ubm-num-gauss=$num_gauss \
+    --verbose=2 --fullcov-ubm=true $alidir/final.mdl $alidir/final.occs \
+    $dir/0.ubm   || exit 1;
+fi
+
+# Do initial phase of Gaussian selection and save it to disk -- later on we'll
+# do more Gaussian selection to further prune, as the model changes.
+
+
+if [ $stage -le -1 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$num_gselect1 "fgmm-global-to-gmm $dir/0.ubm - |" "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Pass $x"
+  $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+    gmm-gselect --n=$num_gselect2 "--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" \
+    "fgmm-global-to-gmm $dir/$x.ubm - |" "$feats" ark:- \| \
+    fgmm-global-acc-stats $weights_opt --gselect=ark,s,cs:- $dir/$x.ubm "$feats" \
+    $dir/$x.JOB.acc || exit 1;
+  lowcount_opt="--remove-low-count-gaussians=false"
+  [ $[$x+1] -eq $num_iters ] && lowcount_opt=   # Only remove low-count Gaussians 
+  # on last iter-- we can't do it earlier, or the Gaussian-selection info would
+  # be mismatched.
+  $cmd $dir/log/update.$x.log \
+    fgmm-global-est $lowcount_opt --verbose=2 $dir/$x.ubm "fgmm-global-sum-accs - $dir/$x.*.acc |" \
+      $dir/$[$x+1].ubm || exit 1;
+  rm $dir/$x.*.acc $dir/$x.ubm
+  x=$[$x+1]
+done
+
+rm $dir/gselect.*.gz
+rm $dir/final.ubm 2>/dev/null
+mv $dir/$x.ubm $dir/final.ubm || exit 1;
diff --git a/egs/chime_wsj0/s5/steps/word_align_lattices.sh b/egs/chime_wsj0/s5/steps/word_align_lattices.sh
new file mode 100755
index 000000000..2adcfdec6
--- /dev/null
+++ b/egs/chime_wsj0/s5/steps/word_align_lattices.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright Johns Hopkins University (Author: Daniel Povey)  2012
+# Apache 2.0.
+
+# Begin configuration section.
+silence_label=0
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+for x in `seq 2`; do
+  [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
+  [ "$1" == "--cmd" ] && cmd="$2" && shift 2;
+done
+
+if [ $# != 3 ]; then
+   echo "Word-align lattices (make the arcs sync up with words)"
+   echo ""
+   echo "Usage: scripts/walign_lats.sh [options] <lang-dir> <decode-dir-in> <decode-dir-out>"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label <integer-id-of-silence-word>]"
+   exit 1;
+fi
+
+. ./path.sh || exit 1;
+
+lang=$1
+indir=$2
+outdir=$3
+
+mdl=`dirname $indir`/final.mdl
+wbfile=$lang/phones/word_boundary.int
+
+for f in $mdl $wbfile $indir/num_jobs; do
+  [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $outdir/log
+
+
+cp $indir/num_jobs $outdir;
+nj=`cat $indir/num_jobs`
+
+$cmd JOB=1:$nj $outdir/log/align.JOB.log \
+  lattice-align-words --silence-label=$silence_label --test=true \
+   $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;
+
diff --git a/egs/chime_wsj0/s5/utils/add_disambig.pl b/egs/chime_wsj0/s5/utils/add_disambig.pl
new file mode 100755
index 000000000..c605659e1
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/add_disambig.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Adds some specified number of disambig symbols to a symbol table.
+# Adds these as #1, #2, etc.
+# If the --include-zero option is specified, includes an extra one
+# #0.
+
+$include_zero = 0;
+if($ARGV[0] eq "--include-zero") {
+    $include_zero = 1;
+    shift @ARGV;
+}
+
+if(@ARGV != 2) {
+    die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
+}
+
+
+$input = $ARGV[0];
+$nsyms = $ARGV[1];
+
+open(F, "<$input") || die "Opening file $input";
+
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "Bad line $_";
+    $lastsym = $A[1];
+    print;
+}
+
+if(!defined($lastsym)){
+ die "Empty symbol file?";
+}
+
+if($include_zero) {
+    $lastsym++;
+    print "#0  $lastsym\n";
+}
+
+for($n = 1; $n <= $nsyms; $n++) {
+    $y = $n + $lastsym;
+    print "#$n  $y\n";
+}
diff --git a/egs/chime_wsj0/s5/utils/add_lex_disambig.pl b/egs/chime_wsj0/s5/utils/add_lex_disambig.pl
new file mode 100755
index 000000000..803e7f632
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/add_lex_disambig.pl
@@ -0,0 +1,118 @@
+#!/usr/bin/perl
+# Copyright 2010-2011  Microsoft Corporation
+#                2013  Johns Hopkins University (author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Adds disambiguation symbols to a lexicon.
+# Outputs still in the normal lexicon format.
+# Disambig syms are numbered #1, #2, #3, etc. (#0 
+# reserved for symbol in grammar).
+# Outputs the number of disambig syms to the standard output.
+# With the --pron-probs option, expects the second field
+# of each lexicon line to be a pron-prob.
+
+$pron_probs = 0;
+
+if ($ARGV[0] eq "--pron-probs") {
+  $pron_probs = 1;
+  shift @ARGV;
+}
+
+if(@ARGV != 2) {
+    die "Usage: add_lex_disambig.pl [--pron-probs] lexicon.txt lexicon_disambig.txt "
+}
+
+
+$lexfn = shift @ARGV;
+$lexoutfn = shift @ARGV;
+
+open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
+
+# (1)  Read in the lexicon.
+@L = ( );
+while(<L>) {
+    @A = split(" ", $_);
+    push @L, join(" ", @A);
+}
+
+# (2) Work out the count of each phone-sequence in the
+# lexicon.
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    if ($pron_probs) {
+      $p = shift @A;
+      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
+    }
+    $count{join(" ",@A)}++;
+}
+
+# (3) For each left sub-sequence of each phone-sequence, note down
+# that exists (for identifying prefixes of longer strings).
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    if ($pron_probs) { shift @A; } # remove pron-prob.
+    while(@A > 0) {
+        pop @A;  # Remove last phone
+        $issubseq{join(" ",@A)} = 1;
+    }
+}
+
+# (4) For each entry in the lexicon:
+#  if the phone sequence is unique and is not a
+#  prefix of another word, no diambig symbol.
+#  Else output #1, or #2, #3, ... if the same phone-seq
+#  has already been assigned a disambig symbol.
+
+
+open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
+
+$max_disambig = 0;
+foreach $l (@L) {
+    @A = split(" ", $l);
+    $word = shift @A;
+    if ($pron_probs) { $pron_prob = shift @A; }
+    $phnseq = join(" ",@A);
+    if(!defined $issubseq{$phnseq}
+       && $count{$phnseq} == 1) {
+        ; # Do nothing.
+    } else {
+        if($phnseq eq "") { # need disambig symbols for the empty string
+            # that are not use anywhere else.
+            $max_disambig++;
+            $reserved{$max_disambig} = 1;
+            $phnseq = "#$max_disambig";
+        } else {
+            $curnumber = $disambig_of{$phnseq};
+            if(!defined{$curnumber}) { $curnumber = 0; }
+            $curnumber++; # now 1 or 2, ... 
+            while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
+            if($curnumber > $max_disambig) {
+                $max_disambig = $curnumber;
+            }
+            $disambig_of{$phnseq} = $curnumber;
+            $phnseq = $phnseq . " #" . $curnumber;
+         }
+    }
+    if ($pron_probs) {  print O "$word\t$pron_prob\t$phnseq\n"; }
+    else {  print O "$word\t$phnseq\n"; }
+}
+
+print $max_disambig . "\n";
+
diff --git a/egs/chime_wsj0/s5/utils/apply_map.pl b/egs/chime_wsj0/s5/utils/apply_map.pl
new file mode 100755
index 000000000..2917a6118
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/apply_map.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This program is a bit like ./sym2int.pl in that it applies a map
+# to things in a file, but it's a bit more general in that it doesn't
+# assume the things being mapped to are single tokens, they could
+# be sequences of tokens.  See the usage message.
+
+
+if (@ARGV > 0 && $ARGV[0] eq "-f") {
+  shift @ARGV; 
+  $field_spec = shift @ARGV; 
+  if ($field_spec =~ m/^\d+$/) {
+    $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+  }
+  if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+    if ($1 ne "") {
+      $field_begin = $1 - 1;    # Change to zero-based indexing.
+    }
+    if ($2 ne "") {
+      $field_end = $2 - 1;      # Change to zero-based indexing.
+    }
+  }
+  if (!defined $field_begin && !defined $field_end) {
+    die "Bad argument to -f option: $field_spec"; 
+  }
+}
+
+# Mapping is obligatory
+$permissive = 0;
+if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
+  shift @ARGV;
+  # Mapping is optional (missing key is printed to output)
+  $permissive = 1;
+}
+
+if(@ARGV != 1) {
+  print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
+    "options: [-f <field-range> ]\n" .
+    "Applies the map 'map' to all input text, where each line of the map\n" .
+    "is interpreted as a map from the first field to the list of the other fields\n" .
+    "Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
+    "range in the input to apply the map to.\n" .
+    "e.g.: echo A B | apply_map.pl a.txt\n" .
+    "where a.txt is:\n" .
+    "A a1 a2\n" .
+    "B b\n" .
+    "will produce:\n" .
+    "a1 a2 b\n";
+  exit(1);
+}
+
+($map) = @ARGV;
+open(M, "<$map") || die "Error opening map file $map: $!";
+
+while (<M>) {
+  @A = split(" ", $_);
+  @A >= 1 || die "apply_map.pl: empty line.";
+  $i = shift @A;
+  $o = join(" ", @A);
+  $map{$i} = $o;
+}
+
+while(<STDIN>) {
+  @A = split(" ", $_);
+  for ($x = 0; $x < @A; $x++) {
+    if ( (!defined $field_begin || $x >= $field_begin)
+         && (!defined $field_end || $x <= $field_end)) {
+      $a = $A[$x];
+      if (!defined $map{$a}) {
+        if (!$permissive) {
+          die "apply_map.pl: undefined key $a\n"; 
+        } else {
+          print STDERR "apply_map.pl: warning! missing key $a\n";
+        }
+      } else {
+        $A[$x] = $map{$a}; 
+      }
+    }
+  }
+  print join(" ", @A) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/utils/best_wer.sh b/egs/chime_wsj0/s5/utils/best_wer.sh
new file mode 100755
index 000000000..06a7aa211
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/best_wer.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# To be run from one directory above this script.
+
+perl -e 'while(<>){ 
+    s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
+    if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
+    elsif (m: (Mean|Sum/Avg|)\s+\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
+        && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
+   if (defined $bestline){ print $bestline; } ' | \
+  awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
+  awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
+  awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
+  sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
+
+
+
diff --git a/egs/chime_wsj0/s5/utils/combine_data.sh b/egs/chime_wsj0/s5/utils/combine_data.sh
new file mode 100755
index 000000000..60e21710b
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/combine_data.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This script operates on a data directory, such as in data/train/.
+# See http://kaldi.sourceforge.net/data_prep.html#data_prep_data
+# for what these directories contain.
+
+
+if [ $# -lt 2 ]; then
+  echo "Usage: combine_data.sh <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
+  exit 1
+fi
+
+dest=$1;
+shift;
+
+first_src=$1;
+
+mkdir -p $dest;
+
+export LC_ALL=C
+
+for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp; do
+  if [ -f $first_src/$file ]; then
+    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
+    echo "$0: combined $file"
+  else
+    echo "$0 [info]: not combining $file as it does not exist"
+  fi
+done
+
+utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
+
+utils/fix_data_dir.sh $dest || exit 1;
+
+exit 0
diff --git a/egs/chime_wsj0/s5/utils/convert_ctm.pl b/egs/chime_wsj0/s5/utils/convert_ctm.pl
new file mode 100755
index 000000000..4488297fc
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/convert_ctm.pl
@@ -0,0 +1,92 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This takes as standard input a ctm file that's "relative to the utterance",
+# i.e. times are measured relative to the beginning of the segments, and it
+# uses a "segments" file (format:
+# utterance-id recording-id start-time end-time
+# ) and a "reco2file_and_channel" file (format:
+# recording-id basename-of-file
+
+$skip_unknown=undef;
+if ( $ARGV[0] eq "--skip-unknown" ) {
+  $skip_unknown=1;
+  shift @ARGV;
+}
+
+if (@ARGV < 2 || @ARGV > 3) {
+  print STDERR "Usage: convert_ctm.pl <segments-file> <reco2file_and_channel-file> [<utterance-ctm>] > real-ctm\n";
+  exit(1);
+}
+
+$segments = shift @ARGV;
+$reco2file_and_channel = shift @ARGV;
+
+open(S, "<$segments") || die "opening segments file $segments";
+while(<S>) {
+  @A = split(" ", $_);
+  @A == 4 || die "Bad line in segments file: $_";
+  ($utt, $recording_id, $begin_time, $end_time) = @A;
+  $utt2reco{$utt} = $recording_id;
+  $begin{$utt} = $begin_time;
+  $end{$utt} = $end_time;
+}
+close(S);
+open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
+while(<R>) {
+  @A = split(" ", $_);
+  @A == 3 || die "Bad line in reco2file_and_channel file: $_";
+  ($recording_id, $file, $channel) = @A;
+  $reco2file{$recording_id} = $file;
+  $reco2channel{$recording_id} = $channel;
+}
+
+
+# Now process the ctm file, which is either the standard input or the third
+# command-line argument.
+while(<>) {
+  @A= split(" ", $_);
+  ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
+  # lines look like:
+  # <utterance-id> 1 <begin-time> <length> <word> [ confidence ]
+  ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
+  $reco = $utt2reco{$utt};
+  if (!defined $reco) { 
+      next if defined $skip_unknown;
+      die "Utterance-id $utt not defined in segments file $segments"; 
+  }
+  $file = $reco2file{$reco};
+  $channel = $reco2channel{$reco};
+  if (!defined $file || !defined $channel) { 
+    die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 
+  }
+  $b = $begin{$utt};
+  $e = $end{$utt};
+  $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
+  $wbegin_r = sprintf("%.2f", $wbegin_r);
+  $wlen = sprintf("%.2f", $wlen);
+  if (defined $conf) {
+    $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 
+  } else {
+    $line = "$file $channel $wbegin_r $wlen $w\n"; 
+  }
+  if ($wbegin_r + $wlen > $e + 0.01) {
+    print STDERR "Warning: word appears to be past end of recording; line is $line";
+  }
+  print $line; # goes to stdout.
+}
+
+__END__
+
+# Test example [also test it without the 0.5's]
+echo utt reco 10.0 20.0 > segments
+echo reco file A > reco2file_and_channel
+echo utt 1 8.0 1.0 word 0.5 > ctm_in
+echo file A 18.00 1.00 word 0.5 > ctm_out
+utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
+rm segments reco2file_and_channel ctm_in ctm_out
+
+
+
+
diff --git a/egs/chime_wsj0/s5/utils/convert_slf.pl b/egs/chime_wsj0/s5/utils/convert_slf.pl
new file mode 100755
index 000000000..367ed7e63
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/convert_slf.pl
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+# Copyright 2013  Korbinian Riedhammer
+
+# Convert a kaldi-lattice and convert it to HTK SLF format;  if given an output
+# directory, each lattice will be put in an individual gzipped file.
+
+use utf8;
+
+binmode(STDIN, ":encoding(utf8)");
+binmode(STDOUT, ":encoding(utf8)");
+
+# defaults
+$framerate=0.01;
+$lmscale=1.0;
+$acscale=1.0;
+$wdpenalty=0.0;
+
+if (@ARGV < 1 || @ARGV > 2) {
+  print STDERR "Convert kaldi lattices to HTK SLF (v1.1) format.\n";
+  print STDERR "Usage: convert_slf.pl [options] lat-file.txt [out-dir]\n";
+  print STDERR "  e.g. lattice-word-align 'ark:gunzip -c lat.gz |' ark,t:- | $0 - slf/\n";
+  print STDERR "Options regarding the SLF output:
+  --lmscale x    LM weight (default: lmscale=$lmscale)
+  --acscale x    Acoustic weight (default: acscale=$acscale)
+  --wdpenalty x  Word insertion penalty (default: $wdpenalty)
+  --framerate x  Frame rate to compute timing information (default: $framerate)
+";
+
+  exit 1;
+}
+
+while (@ARGV gt 0 and $ARGV[0] =~ m/^--/) {
+  $param = shift @ARGV;
+  if ($param eq "--lmscale") { $lmscale = shift @ARGV; }
+  elsif ($param eq "--acscale") { $acscale = shift @ARGV; }
+  elsif ($param eq "--wdpenalty") { $wdpenalty = shift @ARGV; }
+  elsif ($param eq "--framerate") { $framerate = shift @ARGV; }
+  else {
+    print STDERR "Unknown option $param\n";
+    exit 1;
+  }
+}
+
+$outdir = "";
+if (@ARGV == 2) {
+  $outdir = pop @ARGV;
+  unless (-d $outdir) {
+    print STDERR "Could not find directory $outdir\n";
+    exit 1;
+  }
+}
+
+
+$utt = "";
+@links = ();
+%nodes = ();
+%trace = ();
+
+if ($outdir eq "") {
+  open(FH, ">-") or die "Could not write to stdout (???)\n";
+}
+
+open (FI, $ARGV[0]) or die "Could not read from file\n";
+binmode(FI, ":encoding(utf8)");
+
+while(<FI>) {
+  chomp;
+
+  @A = split /\s+/;
+
+  if (@A == 1 and $utt eq "") {
+    # new lattice
+    $utt = $A[0];
+    $nodes{0} = 0.0;
+    $trace{0} = 0;
+  } elsif (@A == 1) {
+    # do nothing with an accepting state
+  } elsif (@A == 4) {
+    # FSA arc
+    ($s, $e, $w, $info) = @A;
+    ($gs, $as, $ss) = split(/,/, $info);
+
+    # kaldi saves -log, but HTK does it the other way round
+    $gs *= -1;
+    $as *= -1;
+    
+    # the state sequence is something like 1_2_4_56_45 so we remove all digits and count the _+1
+    $ss =~ s/[0-9]*//g;
+    $ss = 1 + length $ss;
+
+    
+    # we need the trace to compute the time segment
+    $trace{$e} = $s;
+    $nodes{$e} = $nodes{$s} + $ss * $framerate unless defined $nodes{$e}; # no not overwrite timing
+
+    push @links, "S=$s\tE=$e\tW=$w\tv=0\ta=$as\tl=$gs";
+  } elsif (@A == 0) {
+    # print out the lattice;  open file handle first
+    unless ($outdir eq "") {
+      open(FH, "|-", "gzip -c > $outdir/$utt.lat.gz") or die "Could not write to $outdir/$utt.lat.gz\n";
+      binmode(FH, ":encoding(utf8)");
+    } 
+
+    # header
+    print FH "VERSION=1.1\n";
+    print FH "UTTERANCE=$utt\n";
+    print FH "lmscale=$lmscale\n";
+    print FH "acscale=$acscale\n";
+    print FH "N=".(keys %nodes)."\tL=".(@links)."\n";
+
+    # nodes
+    for $n (sort { $a <=> $b } keys %nodes) {
+      printf FH "I=%d\tt=%.2f\n", $n, $nodes{$n};
+    }
+
+    # links/arks
+    for $i (0 .. $#links) {
+      print FH "J=$i\t".$links[$i]."\n";
+    }
+
+    print FH "\n";
+
+    # close handle if it was a file
+    close(FH) unless ($outdir eq "");
+
+    # clear data
+    $utt = "";
+    @links = ();
+    %nodes = ();
+    %trace = ();
+  }
+}
+
+if ($utt != "") {
+  print STDERR "Last lattice was not printed as it might be incomplete?  Missing empty line?\n";
+}
+
diff --git a/egs/chime_wsj0/s5/utils/copy_data_dir.sh b/egs/chime_wsj0/s5/utils/copy_data_dir.sh
new file mode 100755
index 000000000..a102f0bd1
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/copy_data_dir.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+# This script operates on a directory, such as in data/train/,
+# that contains some subset of the following files:
+#  feats.scp
+#  wav.scp
+#  spk2utt
+#  utt2spk
+#  text
+#
+# It copies to another directory, possibly adding a specified prefix or a suffix
+# to the utterance and/or speaker names.  Note, the recording-ids stay the same.
+#
+
+
+# begin configuration section
+spk_prefix=
+utt_prefix=
+spk_suffix=
+utt_suffix=
+# end configuration section
+
+. utils/parse_options.sh
+
+if [ $# != 2 ]; then
+  echo "Usage: "
+  echo "  $0 [options] <srcdir> <destdir>"
+  echo "e.g.:"
+  echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
+  echo "Options"
+  echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
+  echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
+  echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
+  echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
+  exit 1;
+fi
+
+
+export LC_ALL=C
+
+srcdir=$1
+destdir=$2
+
+if [ ! -f $srcdir/utt2spk ]; then
+  echo "copy_data_dir.sh: no such file $srcdir/utt2spk" 
+  exit 1;
+fi
+
+set -e;
+
+mkdir -p $destdir
+
+cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
+cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
+
+cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
+  utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
+
+if [ -f $srcdir/feats.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
+fi
+
+
+if [ -f $srcdir/segments ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$srcdir/segments
+  cp $srcdir/wav.scp $destdir
+  if [ -f $srcdir/reco2file_and_channel ]; then
+    cp $srcdir/reco2file_and_channel $destdir/
+  fi
+else # no segments->wav indexed by utt.
+  if [ -f $srcdir/wav.scp ]; then 
+    utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
+  fi
+fi
+
+if [ -f $srcdir/text ]; then
+  utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
+fi
+if [ -f $srcdir/spk2gender ]; then
+  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
+fi
+if [ -f $srcdir/cmvn.scp ]; then
+  utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
+fi
+for f in stm glm ctm; do
+  if [ -f $srcdir/$f ]; then
+    cp $srcdir/$f $destdir
+  fi
+done
+
+rm $destdir/spk_map $destdir/utt_map
+
+echo "$0: copied data from $srcdir to $destdir"
+utils/validate_data_dir.sh $destdir
diff --git a/egs/chime_wsj0/s5/utils/eps2disambig.pl b/egs/chime_wsj0/s5/utils/eps2disambig.pl
new file mode 100755
index 000000000..fecbdc833
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/eps2disambig.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script replaces epsilon with #0 on the input side only, of the G.fst
+# acceptor.  
+
+while(<>){
+    s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
+    print;
+}
diff --git a/egs/chime_wsj0/s5/utils/filter_scp.pl b/egs/chime_wsj0/s5/utils/filter_scp.pl
new file mode 100755
index 000000000..73429f8e2
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/filter_scp.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation
+#                     Johns Hopkins University (author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script takes a list of utterance-ids or any file whose first field
+# of each line is an utterance-id, and filters an scp
+# file (or any file whose first field is an utterance id), printing
+# out only those lines whose first field is in id_list.
+
+$exclude = 0;
+
+if ($ARGV[0] eq "--exclude") {
+  $exclude = 1;
+  shift @ARGV;
+}
+
+if(@ARGV < 1 || @ARGV > 2) {
+  die "Usage: filter_scp.pl [--exclude] id_list [in.scp] > out.scp ";
+}
+
+
+$idlist = shift @ARGV;
+open(F, "<$idlist") || die "Could not open id-list file $idlist";
+while(<F>) {
+  @A = split;
+  @A>=1 || die "Invalid id-list file line $_";
+  $seen{$A[0]} = 1;
+}
+
+while(<>) {
+  @A = split;
+  @A > 0 || die "Invalid scp file line $_";
+  if((!$exclude && $seen{$A[0]}) || ($exclude && !defined $seen{$A[0]})) {
+    print $_;
+  }
+}
diff --git a/egs/chime_wsj0/s5/utils/find_arpa_oovs.pl b/egs/chime_wsj0/s5/utils/find_arpa_oovs.pl
new file mode 100755
index 000000000..abd63f65e
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/find_arpa_oovs.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+if (  @ARGV < 1 && @ARGV > 2) {
+    die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
+    # This program finds words in the arpa file that are not symbols
+    # in the OpenFst-format symbol table words.txt.  It prints them
+    # on the standard output, one per line.
+}
+
+$symtab = shift @ARGV;
+open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
+while(<S>){ 
+    @A = split(" ", $_);
+    @A == 2 || die "Bad line in symbol table file: $_";
+    $seen{$A[0]} = 1;
+}
+
+$curgram=0;
+while(<>) { # Find the \data\ marker.
+    if(m:^\\data\\$:) { last; }
+}
+while(<>) {
+    if(m/^\\(\d+)\-grams:\s*$/) {
+        $curgram = $1;
+        if($curgram > 1) {
+            last; # This is an optimization as we can get the vocab from the 1-grams
+        }
+    } elsif($curgram > 0) {
+        @A = split(" ", $_);
+        if(@A > 1) {
+            shift @A;
+            for($n=0;$n<$curgram;$n++) {
+                $word = $A[$n];
+                if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
+                $in_arpa{$word} = 1;
+            }
+        } else {
+            if(@A > 0 && $A[0] !~ m:\\end\\:) {
+                print STDERR "Unusual line $_ (line $.) in arpa file\n";
+            }
+        }
+    }
+}
+
+foreach $w (keys %in_arpa) {
+    if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
+        print "$w\n";
+    }
+}
diff --git a/egs/chime_wsj0/s5/utils/fix_ctm.sh b/egs/chime_wsj0/s5/utils/fix_ctm.sh
new file mode 100755
index 000000000..7bab9f6f0
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/fix_ctm.sh
@@ -0,0 +1,32 @@
+#! /bin/bash
+
+stmfile=$1
+ctmfile=$2
+
+segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
+segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
+
+segments_stm_count=`echo "$segments_stm" | wc -l `
+segments_ctm_count=`echo "$segments_ctm" | wc -l `
+
+#echo $segments_stm_count
+#echo $segments_ctm_count
+
+if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
+  pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
+  (
+    for elem in $pp ; do
+      echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
+    done
+  ) >> $ctmfile
+  echo "FIXED CTM FILE"
+  exit 0
+elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
+  echo "Segment STM count: $segments_stm_count"
+  echo "Segment CTM count: $segments_ctm_count"
+  echo "FAILURE FIXING CTM FILE"
+  exit 1
+else
+  exit 0
+fi
+
diff --git a/egs/chime_wsj0/s5/utils/fix_data_dir.sh b/egs/chime_wsj0/s5/utils/fix_data_dir.sh
new file mode 100755
index 000000000..0004d39ee
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/fix_data_dir.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+
+# This script makes sure that only the segments present in 
+# all of "feats.scp", "wav.scp" [if present], segments[if prsent]
+# text, and utt2spk are present in any of them.
+# It puts the original contents of data-dir into 
+# data-dir/.backup
+
+if [ $# != 1 ]; then
+  echo "Usage: fix_data_dir.sh data-dir"
+  exit 1
+fi
+
+data=$1
+mkdir -p $data/.backup
+
+[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
+
+[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
+
+tmpdir=$(mktemp -d);
+trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
+
+export LC_ALL=C
+
+
+function check_sorted {
+  file=$1
+  sort -k1,1 -u <$file >$file.tmp
+  if ! cmp -s $file $file.tmp; then
+    echo "$0: file $1 is not in sorted order or not unique, sorting it"
+    mv $file.tmp $file
+  else
+    rm $file.tmp
+  fi
+}
+
+for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp reco2file_and_channel spk2gender; do
+  if [ -f $data/$x ]; then
+    cp $data/$x $data/.backup/$x
+    check_sorted $data/$x
+  fi
+done
+
+
+function filter_file {
+  filter=$1
+  file_to_filter=$2
+  cp $file_to_filter ${file_to_filter}.tmp
+  utils/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter
+  if ! cmp ${file_to_filter}.tmp  $file_to_filter >&/dev/null; then
+    length1=`cat ${file_to_filter}.tmp | wc -l`
+    length2=`cat ${file_to_filter} | wc -l`
+    if [ $length1 -ne $length2 ]; then
+      echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter."
+    fi
+  fi
+}
+
+function filter_recordings {
+  # We call this once before the stage when we filter on utterance-id, and once
+  # after.
+  
+  if [ -f $data/segments ]; then
+  # We have a segments file -> we need to filter this and the file wav.scp, and
+  # reco2file_and_utt, if it exists, to make sure they have the same list of
+  # recording-ids.
+
+    if [ ! -f $data/wav.scp ]; then
+      echo "$0: $data/segments exists but not $data/wav.scp"
+      exit 1;
+    fi
+    awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings
+    n1=`cat $tmpdir/recordings | wc -l`
+    [ ! -s $tmpdir/recordings ] && \
+      echo "Empty list of recordings (bad file $data/segments)?" && exit 1;
+    utils/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp
+    mv $tmpdir/recordings.tmp $tmpdir/recordings
+
+    
+    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
+    filter_file $tmpdir/recordings $data/segments
+    cp $data/segments{,.tmp}; awk '{print $2, $1, $3, $4}' <$data/segments.tmp >$data/segments
+
+    filter_file $tmpdir/recordings $data/wav.scp
+    [ -f $data/reco2file_and_channel ] && filter_file $tmpdir/recordings $data/reco2file_and_channel
+  fi
+}
+
+function filter_speakers {
+  # throughout this program, we regard utt2spk as primary and spk2utt as derived, so...
+  utils/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
+
+  if [ -f $data/cmvn.scp ]; then
+    cat $data/spk2utt | awk '{print $1}' >$tmpdir/speakers
+    cat $data/cmvn.scp | awk '{print $1}' >$tmpdir/speakers.cmvn
+    utils/filter_scp.pl $data/cmvn.scp $tmpdir/speakers > $tmpdir/speakers.tmp
+    mv $tmpdir/speakers.tmp $tmpdir/speakers
+
+    filter_file $tmpdir/speakers $data/cmvn.scp
+    filter_file $tmpdir/speakers $data/spk2utt
+    utils/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
+  fi
+  if [ -f $data/spk2gender ]; then
+    # We don't handle the case when the spk2gender does not cover all speakers.
+    cat $data/spk2utt | awk '{print $1}' >$tmpdir/speakers
+    filter_file $tmpdir/speakers $data/spk2gender 
+  fi
+}
+
+function filter_utts {
+  cat $data/utt2spk | awk '{print $1}' > $tmpdir/utts
+
+# Do a check.
+
+  ! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
+    echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;
+
+  ! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
+    echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
+    echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
+
+  ! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
+    echo "spk2utt is not in sorted order (fix this yourself)" && exit 1;
+
+
+  maybe_wav=
+  [ ! -f $data/segments ] && maybe_wav=wav.scp  # wav indexed by utts only if segments does not exist.
+  for x in feats.scp text segments $maybe_wav; do
+    if [ -f $data/$x ]; then
+      utils/filter_scp.pl $data/$x $tmpdir/utts > $tmpdir/utts.tmp
+      mv $tmpdir/utts.tmp $tmpdir/utts
+    fi
+  done
+  [ ! -s $tmpdir/utts ] && echo "fix_data_dir.sh: no utterances remained: not proceeding further." && \
+    rm $tmpdir/utts && exit 1;
+
+  nutts=`cat $tmpdir/utts | wc -l`
+  if [ -f $data/feats.scp ]; then
+    nfeats=`cat $data/feats.scp | wc -l`
+  else
+    nfeats=0
+  fi
+  ntext=`cat $data/text | wc -l`
+  if [ "$nutts" -ne "$nfeats" -o "$nutts" -ne "$ntext" ]; then
+    echo "fix_data_dir.sh: kept $nutts utterances, vs. $nfeats features and $ntext transcriptions."
+  else
+    echo "fix_data_dir.sh: kept all $nutts utterances."
+  fi
+
+  for x in utt2spk feats.scp text segments $maybe_wav; do
+    if [ -f $data/$x ]; then
+      mv $data/$x $data/.backup/$x
+      utils/filter_scp.pl $tmpdir/utts $data/.backup/$x > $data/$x
+    fi
+  done
+
+}
+
+filter_recordings
+filter_speakers
+filter_utts
+filter_recordings
+
+
+
+utils/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
+
+echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/egs/chime_wsj0/s5/utils/format_lm.sh b/egs/chime_wsj0/s5/utils/format_lm.sh
new file mode 100755
index 000000000..b6ba4ce7d
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/format_lm.sh
@@ -0,0 +1,84 @@
+#!/bin/bash -u
+
+# Copyright 2012  Arnab Ghoshal
+# Copyright 2010-2011  Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+
+if [ $# -ne 4 ]; then
+  printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0`
+  echo "  Convert ARPA-format language models to FSTs.";
+  exit 1;
+fi
+
+lang_dir=$1
+lm=$2
+lexicon=$3
+out_dir=$4
+mkdir -p $out_dir
+
+[ -f ./path.sh ] && . ./path.sh
+
+echo "Converting '$lm' to FST"
+
+for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
+  cp -r $lang_dir/$f $out_dir
+done
+
+lm_base=$(basename $lm '.gz')
+gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
+  > $out_dir/oovs_${lm_base}.txt
+
+# Removing all "illegal" combinations of <s> and </s>, which are supposed to 
+# occur only at being/end of utt.  These can cause determinization failures 
+# of CLG [ends up being epsilon cycles].
+gunzip -c $lm \
+  | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
+  | arpa2fst - | fstprint \
+  | utils/remove_oovs.pl $out_dir/oovs_${lm_base}.txt \
+  | utils/eps2disambig.pl | utils/s2eps.pl \
+  | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+    --keep_isymbols=false --keep_osymbols=false \
+  | fstrmepsilon > $out_dir/G.fst
+set +e
+fstisstochastic $out_dir/G.fst
+set -e
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+mkdir -p $out_dir/tmpdir.g
+awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 
+     END{print "0 0 #0 #0"; print "0";}' \
+     < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
+
+fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+  $out_dir/tmpdir.g/select_empty.fst.txt \
+  | fstarcsort --sort_type=olabel \
+  | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
+
+fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
+  && echo "Language model has cycles with empty words" && exit 1
+
+rm -r $out_dir/tmpdir.g
+
+
+echo "Succeeded in formatting LM: '$lm'"
diff --git a/egs/chime_wsj0/s5/utils/format_lm_sri.sh b/egs/chime_wsj0/s5/utils/format_lm_sri.sh
new file mode 100755
index 000000000..7fe7d4226
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/format_lm_sri.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# Copyright 2012  Arnab Ghoshal
+# Copyright 2010-2011  Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# Begin configuration section.
+srilm_opts="-subset -prune-lowprobs -unk -tolower"
+# end configuration sections
+
+help_message="Usage: "`basename $0`" [options] lang_dir LM lexicon out_dir
+Convert ARPA-format language models to FSTs. Change the LM vocabulary using SRILM.\n
+options: 
+  --help                 # print this message and exit
+  --srilm-opts STRING    # options to pass to SRILM tools (default: '$srilm_opts')
+";
+
+. utils/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+lang_dir=$1
+lm=$2
+lexicon=$3
+out_dir=$4
+mkdir -p $out_dir
+
+[ -f ./path.sh ] && . ./path.sh
+
+loc=`which change-lm-vocab`
+if [ -z $loc ]; then
+  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=`pwd`/../../../tools/srilm/bin/i686-m64
+  else
+    sdir=`pwd`/../../../tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/../change-lm-vocab ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir:$sdir/..
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+
+echo "Converting '$lm' to FST"
+tmpdir=$(mktemp -d);
+trap 'rm -rf "$tmpdir"' EXIT
+
+for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
+  cp -r $lang_dir/$f $out_dir || exit 1;
+done
+
+lm_base=$(basename $lm '.gz')
+gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
+  > $out_dir/oovs_${lm_base}.txt || exit 1;
+
+# Removing all "illegal" combinations of <s> and </s>, which are supposed to 
+# occur only at being/end of utt.  These can cause determinization failures 
+# of CLG [ends up being epsilon cycles].
+gunzip -c $lm \
+  | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
+  | gzip -c > $tmpdir/lm.gz || exit 1;
+
+awk '{print $1}' $out_dir/words.txt > $tmpdir/voc || exit 1;
+
+# Change the LM vocabulary to be the intersection of the current LM vocabulary
+# and the set of words in the pronunciation lexicon. This also renormalizes the 
+# LM by recomputing the backoff weights, and remove those ngrams whose 
+# probabilities are lower than the backed-off estimates.
+change-lm-vocab -vocab $tmpdir/voc -lm $tmpdir/lm.gz -write-lm $tmpdir/out_lm \
+  $srilm_opts || exit 1;
+
+arpa2fst $tmpdir/out_lm | fstprint \
+  | utils/eps2disambig.pl | utils/s2eps.pl \
+  | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+    --keep_isymbols=false --keep_osymbols=false \
+  | fstrmepsilon > $out_dir/G.fst || exit 1;
+
+fstisstochastic $out_dir/G.fst
+
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+mkdir -p $out_dir/tmpdir.g
+awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 
+     END{print "0 0 #0 #0"; print "0";}' \
+     < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt || exit 1;
+
+fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+  $out_dir/tmpdir.g/select_empty.fst.txt \
+  | fstarcsort --sort_type=olabel \
+  | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst || exit 1;
+
+fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
+  && echo "Language model has cycles with empty words" && exit 1
+
+rm -r $out_dir/tmpdir.g
+
+
+echo "Succeeded in formatting LM: '$lm'"
diff --git a/egs/chime_wsj0/s5/utils/gen_topo.pl b/egs/chime_wsj0/s5/utils/gen_topo.pl
new file mode 100755
index 000000000..58721ced1
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/gen_topo.pl
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs.
+
+if(@ARGV != 4) {
+  print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
+  print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
+  exit (1);
+}
+
+($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
+
+( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
+( $num_sil_states >= 3 && $num_sil_states <= 100 ) || die "Unexpected number of silence-model states $num_sil_states\n";
+
+$nonsil_phones =~ s/:/ /g;
+$sil_phones =~ s/:/ /g;
+$nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
+$sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
+
+print "<Topology>\n";
+print "<TopologyEntry>\n";
+print "<ForPhones>\n";
+print "$nonsil_phones\n";
+print "</ForPhones>\n";
+for ($state = 0; $state < $num_nonsil_states; $state++) {
+  $statep1 = $state+1;
+  print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>\n";
+}
+print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
+print "</TopologyEntry>\n";
+# Now silence phones.  They have a different topology-- apart from the first and
+# last states, it's fully connected.
+$transp = 1.0 / ($num_sil_states-1);
+
+print "<TopologyEntry>\n";
+print "<ForPhones>\n";
+print "$sil_phones\n";
+print "</ForPhones>\n";
+print "<State> 0 <PdfClass> 0 ";
+for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 
+  # emitting state.
+  print "<Transition> $nextstate $transp ";
+}
+print "</State>\n";
+for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
+  # themselves and to the last emitting state.
+  print "<State> $state <PdfClass> $state ";
+  for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
+    print "<Transition> $nextstate $transp ";
+  }
+  print "</State>\n";
+}
+# Final emitting state (non-skippable).
+$state = $num_sil_states-1;
+print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>\n";
+# Final nonemitting state:
+print "<State> $num_sil_states </State>\n"; 
+print "</TopologyEntry>\n";
+print "</Topology>\n";
diff --git a/egs/chime_wsj0/s5/utils/int2sym.pl b/egs/chime_wsj0/s5/utils/int2sym.pl
new file mode 100755
index 000000000..13cc5ae9b
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/int2sym.pl
@@ -0,0 +1,71 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+undef $field_begin;
+undef $field_end;
+
+
+if ($ARGV[0] eq "-f") {
+  shift @ARGV; 
+  $field_spec = shift @ARGV; 
+  if ($field_spec =~ m/^\d+$/) {
+    $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+  }
+  if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+    if ($1 ne "") {
+      $field_begin = $1 - 1; # Change to zero-based indexing.
+    }
+    if ($2 ne "") {
+      $field_end = $2 - 1; # Change to zero-based indexing.
+    }
+  }
+  if (!defined $field_begin && !defined $field_end) {
+    die "Bad argument to -f option: $field_spec"; 
+  }
+}
+$symtab = shift @ARGV;
+if(!defined $symtab) {
+    print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
+      "options: [-f (<field>|<field_start>-<field-end>)]\n" .
+      "e.g.: -f 2, or -f 3-4\n";
+    exit(1);
+}
+
+open(F, "<$symtab") || die "Error opening symbol table file $symtab";
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "bad line in symbol table file: $_";
+    $int2sym{$A[1]} = $A[0];
+}
+
+sub int2sym {
+    my $a = shift @_;
+    my $pos = shift @_;
+    if($a !~  m:^\d+$:) { # not all digits..
+      $pos1 = $pos+1; # make it one-based.
+      die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
+    }
+    $s = $int2sym{$a};
+    if(!defined ($s)) {
+      die "int2sym.pl: integer $a not in symbol table $symtab.";
+    }
+    return $s;
+}
+
+$error = 0;
+while (<>) {
+  @A = split(" ", $_);
+  for ($pos = 0; $pos <= $#A; $pos++) {
+    $a = $A[$pos];
+    if ( (!defined $field_begin || $pos >= $field_begin)
+         && (!defined $field_end || $pos <= $field_end)) {
+      $a = int2sym($a, $pos);
+    }
+    print $a . " ";
+  }
+  print "\n";
+}
+
+
+
diff --git a/egs/chime_wsj0/s5/utils/kwslist_post_process.pl b/egs/chime_wsj0/s5/utils/kwslist_post_process.pl
new file mode 100755
index 000000000..5b1cbc970
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/kwslist_post_process.pl
@@ -0,0 +1,291 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+sub ReadKwslist {
+  my $kwslist_in = shift @_;
+
+  my $source = "STDIN";
+  if ($kwslist_in ne "-") {
+    open(I, "<$kwslist_in") || die "$0: Fail to open kwslist $kwslist_in\n";
+    $source = "I";
+  }
+
+  # Read in the kwslist and parse it. Note that this is a naive parse -- I simply
+  # assume that the kwslist is "properly" generated
+  my @KWS;
+  my (@info, $kwid, $tbeg, $dur, $file, $score, $channel);
+  my ($kwlist_filename, $language, $system_id) = ("", "", "");
+  while (<$source>) {
+    chomp;
+
+    if (/<kwslist/) {
+      /language="(\S+)"/ && ($language = $1);
+      /system_id="(\S+)"/ && ($system_id = $1);
+      /kwlist_filename="(\S+)"/ && ($kwlist_filename = $1);
+      @info = ($kwlist_filename, $language, $system_id);
+      next;
+    }
+
+    if (/<detected_kwlist/) {
+      ($kwid) = /kwid="(\S+)"/;
+      next;
+    }
+
+    if (/<kw/) {
+      ($dur) = /dur="(\S+)"/;
+      ($file) = /file="(\S+)"/;
+      ($tbeg) = /tbeg="(\S+)"/;
+      ($score) = /score="(\S+)"/;
+      ($channel) = /channel="(\S+)"/;
+      push(@KWS, [$kwid, $file, $channel, $tbeg, $dur, $score, ""]);
+    }
+  }
+
+  $kwslist_in eq "-" || close(I);
+
+  return [\@info, \@KWS];
+}
+
+sub PrintKwslist {
+  my ($info, $KWS) = @_;
+
+  my $kwslist = "";
+
+  # Start printing
+  $kwslist .= "<kwslist kwlist_filename=\"$info->[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n";
+  my $prev_kw = "";
+  foreach my $kwentry (@{$KWS}) {
+    if ($prev_kw ne $kwentry->[0]) {
+      if ($prev_kw ne "") {$kwslist .= "  </detected_kwlist>\n";}
+      $kwslist .= "  <detected_kwlist search_time=\"1\" kwid=\"$kwentry->[0]\" oov_count=\"0\">\n";
+      $prev_kw = $kwentry->[0];
+    }
+    $kwslist .= "    <kw file=\"$kwentry->[1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$kwentry->[5]\" decision=\"$kwentry->[6]\"";
+    if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";}
+    if (defined($kwentry->[8])) {$kwslist .= " raw_score=\"$kwentry->[8]\"";}
+    $kwslist .= "/>\n";
+  }
+  $kwslist .= "  </detected_kwlist>\n";
+  $kwslist .= "</kwslist>\n";
+
+  return $kwslist;
+}
+
+sub KwslistOutputSort {
+  if ($a->[0] ne $b->[0]) {
+    if ($a->[0] =~ m/[0-9]+$/ and $b->[0] =~ m/[0-9]+$/) {
+      ($a->[0] =~ /([0-9]*)$/)[0] <=> ($b->[0] =~ /([0-9]*)$/)[0]
+    } else {
+      $a->[0] cmp $b->[0];
+    }
+  } elsif ($a->[5] ne $b->[5]) {
+    $b->[5] <=> $a->[5];
+  } else {
+    $a->[1] cmp $b->[1];
+  }
+}
+sub KwslistDupSort {
+  my ($a, $b, $duptime) = @_;
+  if ($a->[0] ne $b->[0]) {
+    $a->[0] cmp $b->[0];
+  } elsif ($a->[1] ne $b->[1]) {
+    $a->[1] cmp $b->[1];
+  } elsif ($a->[2] ne $b->[2]) {
+    $a->[2] cmp $b->[2];
+  } elsif (abs($a->[3]-$b->[3]) >= $duptime){
+    $a->[3] <=> $b->[3];
+  } elsif ($a->[5] ne $b->[5]) {
+    $b->[5] <=> $a->[5];
+  } else {
+    $b->[4] <=> $a->[4];
+  }
+}
+
+my $Usage = <<EOU;
+This script reads a kwslist.xml file, does the post processing such as making decisions,
+normalizing score, removing duplicates, etc. It writes the results to another kwslist.xml
+file.
+
+Usage: kwslist_post_process.pl [options] <kwslist_in|-> <kwslist_out|->
+ e.g.: kwslist_post_process.pl kwslist.in.xml kwslist.out.xml
+
+Allowed options:
+  --beta        : Beta value when computing ATWV                (float,   default = 999.9)
+  --digits      : How many digits should the score use          (int,     default = "infinite")
+  --duptime     : Tolerance for duplicates                      (float,   default = 0.5)
+  --duration    : Duration of the audio (Actural length/2)      (float,   default = 3600)
+  --normalize   : Normalize scores or not                       (boolean, default = false)
+  --Ntrue-scale : Keyword independent scale factor for Ntrue    (float,   default = 1.0)
+  --remove-dup  : Remove duplicates                             (boolean, default = false)
+  --remove-NO   : Remove the "NO" decision instances            (boolean, default = false)
+  --verbose     : Verbose level (higher --> more kws section)   (integer, default 0)
+  --YES-cutoff  : Only keep "\$YES-cutoff" yeses for each kw     (int,     default = -1)
+
+EOU
+
+my $beta = 999.9;
+my $duration = 3600;
+my $normalize = "false";
+my $verbose = 0;
+my $Ntrue_scale = 1.0;
+my $remove_dup = "false";
+my $duptime = 0.5;
+my $remove_NO = "false";
+my $digits = 0;
+my $YES_cutoff = -1;
+GetOptions('beta=f'     => \$beta,
+  'duration=f'          => \$duration,
+  'normalize=s'         => \$normalize,
+  'verbose=i'           => \$verbose,
+  'Ntrue-scale=f'       => \$Ntrue_scale,
+  'remove-dup=s'        => \$remove_dup,
+  'duptime=f'           => \$duptime,
+  'remove-NO=s'         => \$remove_NO,
+  'digits=i'            => \$digits,
+  'YES-cutoff=i'        => \$YES_cutoff);
+
+($normalize eq "true" || $normalize eq "false") || die "$0: Bad value for option --normalize\n";
+($remove_dup eq "true" || $remove_dup eq "false") || die "$0: Bad value for option --remove-dup\n";
+($remove_NO eq "true" || $remove_NO eq "false") || die "$0: Bad value for option --remove-NO\n";
+
+@ARGV == 2 || die $Usage;
+
+# Workout the input/output source
+my $kwslist_in = shift @ARGV;
+my $kwslist_out = shift @ARGV;
+
+my ($info, $KWS) = @{ReadKwslist($kwslist_in)};
+
+# Work out the Ntrue
+my %Ntrue;
+foreach my $kwentry (@{$KWS}) {
+  if (!defined($Ntrue{$kwentry->[0]})) {
+    $Ntrue{$kwentry->[0]} = 0.0;
+  }
+  $Ntrue{$kwentry->[0]} += $kwentry->[5];
+}
+
+# Scale the Ntrue and work out the expected count based threshold
+my %threshold;
+foreach my $key (keys %Ntrue) {
+  $Ntrue{$key} *= $Ntrue_scale;
+  $threshold{$key} = $Ntrue{$key}/($duration/$beta+($beta-1)/$beta*$Ntrue{$key});
+}
+
+# Removing duplicates
+if ($remove_dup eq "true") {
+  my @tmp = sort {KwslistDupSort($a, $b, $duptime)} @{$KWS};
+  my @KWS = ();
+  push(@KWS, $tmp[0]);
+  for (my $i = 1; $i < scalar(@tmp); $i ++) {
+    my $prev = $KWS[-1];
+    my $curr = $tmp[$i];
+    if ((abs($prev->[3]-$curr->[3]) < $duptime ) &&
+        ($prev->[2] eq $curr->[2]) &&
+        ($prev->[1] eq $curr->[1]) &&
+        ($prev->[0] eq $curr->[0])) {
+      next;
+    } else {
+      push(@KWS, $curr);
+    }
+  }
+  $KWS = \@KWS;
+}
+
+my $format_string = "%g";
+if ($digits gt 0 ) {
+  $format_string = "%." . $digits ."f";
+}
+
+# Making decisions...
+my %YES_count;
+foreach my $kwentry (@{$KWS}) {
+  my $threshold = $threshold{$kwentry->[0]};
+  if ($kwentry->[5] > $threshold) {
+    $kwentry->[6] = "YES";
+    if (defined($YES_count{$kwentry->[0]})) {
+      $YES_count{$kwentry->[0]} ++;
+    } else {
+      $YES_count{$kwentry->[0]} = 1;
+    }
+  } else {
+    $kwentry->[6] = "NO";
+    if (!defined($YES_count{$kwentry->[0]})) {
+      $YES_count{$kwentry->[0]} = 0;
+    }
+  }
+  if ($verbose > 0) {
+    push(@{$kwentry}, sprintf("%g", $threshold));
+  }
+  if ($normalize eq "true") {
+    if ($verbose > 0) {
+      push(@{$kwentry}, $kwentry->[5]);
+    }
+    my $numerator = (1-$threshold)*$kwentry->[5];
+    my $denominator = (1-$threshold)*$kwentry->[5]+(1-$kwentry->[5])*$threshold;
+    if ($denominator != 0) {
+      $kwentry->[5] = sprintf($format_string, $numerator/$denominator);
+    } else {
+      $kwentry->[5] = sprintf($format_string, $kwentry->[5]);
+    }
+  } else {
+    $kwentry->[5] = sprintf($format_string, $kwentry->[5]);
+  }
+}
+
+# Sorting and printing
+my @tmp = sort KwslistOutputSort @{$KWS};
+
+# Process the YES-cutoff. Note that you don't need this for the normal cases where
+# hits and false alarms are balanced
+if ($YES_cutoff != -1) {
+  my $count = 1;
+  for (my $i = 1; $i < scalar(@tmp); $i ++) {
+    if ($tmp[$i]->[0] ne $tmp[$i-1]->[0]) {
+      $count = 1;
+      next;
+    }
+    if ($YES_count{$tmp[$i]->[0]} > $YES_cutoff*2) {
+      $tmp[$i]->[6] = "NO";
+      $tmp[$i]->[5] = 0;
+      next;
+    }
+    if (($count == $YES_cutoff) && ($tmp[$i]->[6] eq "YES")) {
+      $tmp[$i]->[6] = "NO";
+      $tmp[$i]->[5] = 0;
+      next;
+    }
+    if ($tmp[$i]->[6] eq "YES") {
+      $count ++;
+    }
+  }
+}
+
+# Process the remove-NO decision
+if ($remove_NO eq "true") {
+  my @KWS = @tmp;
+  @tmp = ();
+  for (my $i = 0; $i < scalar(@KWS); $i ++) {
+    if ($KWS[$i]->[6] eq "YES") {
+      push(@tmp, $KWS[$i]);
+    }
+  }
+}
+
+# Printing
+my $kwslist = PrintKwslist($info, \@tmp);
+
+if ($kwslist_out eq "-") {
+  print $kwslist;
+} else {
+  open(O, ">$kwslist_out") || die "$0: Fail to open output file $kwslist_out\n";
+  print O $kwslist;
+  close(O);
+}
diff --git a/egs/chime_wsj0/s5/utils/ln.pl b/egs/chime_wsj0/s5/utils/ln.pl
new file mode 100755
index 000000000..594d3924e
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/ln.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl
+use File::Spec;
+
+if ( @ARGV < 2 ) {
+  print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
+    "This script does a soft link of input1, input2, etc." .
+    "to dest-dir, using relative links where possible\n" .
+    "Note: input-n and dest-dir may both be absolute pathnames,\n" .
+    "or relative pathnames, relative to the current directlory.\n";
+  exit(1);
+}  
+
+$dir = pop @ARGV;
+if ( ! -d $dir ) {
+  print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
+  exit(1);
+}
+
+$ans = 1; # true.
+
+$absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
+defined $absdir || die "No such directory $dir";
+foreach $file (@ARGV) {
+  $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
+  defined $absfile || die "No such file or directory: $file";
+  @absdir_split = split("/", $absdir);
+  @absfile_split = split("/", $absfile);
+
+  $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
+  # as the destination in the link command.
+  $num_removed = 0;
+  while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
+    shift @absdir_split;
+    shift @absfile_split;
+    $num_removed++;
+  }
+  if (-l $newfile) { # newfile is already a link -> safe to delete it.
+    unlink($newfile); # "unlink" just means delete.
+  }
+  if ($num_removed == 0) { # will use absolute pathnames.
+    $oldfile = "/" . join("/", @absfile_split);
+    $ret = symlink($oldfile, $newfile);
+  } else {
+    $num_dots = @absdir_split;
+    $oldfile = join("/", @absfile_split);
+    for ($n = 0; $n < $num_dots; $n++) {
+      $oldfile = "../" . $oldfile;
+    }
+    $ret = symlink($oldfile, $newfile);
+  }
+  $ans = $ans && $ret;
+  if (! $ret) {
+    print STDERR "Error linking $oldfile to $newfile\n";
+  }
+}
+
+exit ($ans == 1 ? 0 : 1);
+
diff --git a/egs/chime_wsj0/s5/utils/make_lexicon_fst.pl b/egs/chime_wsj0/s5/utils/make_lexicon_fst.pl
new file mode 100755
index 000000000..7bd43cbba
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/make_lexicon_fst.pl
@@ -0,0 +1,161 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011  Microsoft Corporation
+#                2013  Johns Hopkins University (author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
+
+$pron_probs = 0;
+
+if ($ARGV[0] eq "--pron-probs") {
+  $pron_probs = 1;
+  shift @ARGV;
+}
+
+if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
+  print STDERR
+    "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt
+Creates a lexicon FST that transduces phones to words, and may allow optional silence. 
+Note: ordinarily, each line of lexicon.txt is: word phone1 phone2 ... phoneN; if the --pron-probs option is
+used, each line is: word pronunciation-probability phone1 phone2 ... phoneN.  The probability 'prob' will
+typically be between zero and one, and note that it's generally helpful to normalize so the largest one
+for each word is 1.0, but this is your responsibility.  The silence disambiguation symbol, e.g. something
+like #5, is used only when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst, and was
+introduced to fix a particular case of non-determinism of decoding graphs.\n";
+  exit(1);
+}
+
+$lexfn = shift @ARGV;
+if (@ARGV == 0) {
+  $silprob = 0.0;
+} elsif (@ARGV == 2) { 
+  ($silprob,$silphone) = @ARGV;
+} else {
+  ($silprob,$silphone,$sildisambig) = @ARGV;
+}
+if ($silprob != 0.0) {
+  $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
+  $silcost = -log($silprob);
+  $nosilcost = -log(1.0 - $silprob);
+}
+
+
+open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
+
+
+sub is_sil {
+  # Return true (1) if provided with a phone-sequence
+  # that means silence.
+  # @_ is the parameters of the function
+  # This function returns true if @_ equals ( $silphone )
+  # or something of the form ( "#0", $silphone, "#1" )
+  # where the "#0" and "#1" are disambiguation symbols.
+  return ( @_ == 1 && $_[0] eq $silphone ||
+           (@_ == 3 && $_[1] eq $silphone &&
+            $_[0] =~ m/^\#\d+$/ &&
+            $_[0] =~ m/^\#\d+$/));
+}
+
+if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
+  $loopstate = 0;
+  $nextstate = 1;               # next unallocated state.
+  while (<L>) {
+    @A = split(" ", $_);
+    @A == 0 && die "Empty lexicon line.";
+    $w = shift @A;
+    if (! $pron_probs) {
+      $pron_cost = 0.0;
+    } else {
+      $pron_prob = shift @A;
+      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
+        die "Bad pronunciation probability in line $_";
+      }
+      $pron_cost = -log($pron_prob);
+    }
+    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
+    
+    $s = $loopstate;
+    $word_or_eps = $w;
+    while (@A > 0) {
+      $p = shift @A;
+      if (@A > 0) {
+        $ns = $nextstate++;
+      } else {
+        $ns = $loopstate;
+      }
+      print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
+      $word_or_eps = "<eps>";
+      $pron_cost_string = ""; # so we only print it on the first arc of the word.
+      $s = $ns;
+    }
+  }
+  print "$loopstate\t0\n";      # final-cost.
+} else {                        # have silence probs.
+  $startstate = 0;
+  $loopstate = 1;
+  $silstate = 2;   # state from where we go to loopstate after emitting silence.
+  print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
+  if (!defined $sildisambig) {
+    print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
+    print "$silstate\t$loopstate\t$silphone\t<eps>\n";             # no cost.
+    $nextstate = 3;
+  } else {
+    $disambigstate = 3;
+    $nextstate = 4;
+    print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
+    print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
+    print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
+  }
+  while (<L>) {
+    @A = split(" ", $_);
+    $w = shift @A;
+    if (! $pron_probs) {
+      $pron_cost = 0.0;
+    } else {
+      $pron_prob = shift @A;
+      if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
+        die "Bad pronunciation probability in line $_";
+      }
+      $pron_cost = -log($pron_prob);
+    }
+    if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
+    $s = $loopstate;
+    $word_or_eps = $w;
+    while (@A > 0) {
+      $p = shift @A;
+      if (@A > 0) {
+        $ns = $nextstate++;
+        print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
+        $word_or_eps = "<eps>";
+        $pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
+        $s = $ns;
+      } else {
+        if (!is_sil($p)) {
+          # This is non-deterministic but relatively compact,
+          # and avoids epsilons.
+          $local_nosilcost = $nosilcost + $pron_cost;
+          $local_silcost = $silcost + $pron_cost;
+          print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
+          print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
+        } else {
+          # no point putting opt-sil after silence word.
+          print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
+        }
+      }
+    }
+  }
+  print "$loopstate\t0\n";      # final-cost.
+}
diff --git a/egs/chime_wsj0/s5/utils/make_phone_bigram_lang.sh b/egs/chime_wsj0/s5/utils/make_phone_bigram_lang.sh
new file mode 100755
index 000000000..fd1d52253
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/make_phone_bigram_lang.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# Apache 2.0.  Copyright 2012, Johns Hopkins University (author: Daniel Povey)
+
+# This script creates a "lang" directory of the "testing" type (including G.fst)
+# given an existing "alignment" directory and an existing "lang" directory.
+# The directory contains only single-phone words, and a bigram language model that
+# is built without smoothing, on top of single phones.  The point of no smoothing
+# is to limit the number of transitions, so we can decode reasonably fast, and the
+# graph won't blow up.  This is probably going to be most useful for things like
+# language-id.
+
+
+# We might later have options here; if not, I'llr emove this.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+if [ $# != 3 ]; then
+  echo "Usage: utils/make_phone_bigram_lang.sh [options] <lang-dir> <ali-dir> <output-lang-dir>"
+  echo "e.g.: utils/make_phone_bigram_lang.sh data/lang exp/tri3b_ali data/lang_phone_bg"
+  exit 1;
+fi
+
+lang=$1
+alidir=$2
+lang_out=$3
+
+for f in $lang/phones.txt $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
+done
+
+mkdir -p $lang_out || exit 1;
+
+grep -v '#' $lang/phones.txt >  $lang_out/phones.txt # no disambig symbols
+   # needed; G and L . G will be deterministic.
+cp $lang/topo $lang_out
+rm -r $lang_out/phones 2>/dev/null
+cp -r $lang/phones/ $lang_out/
+rm $lang_out/phones/word_boundary.* 2>/dev/null # these would
+  # no longer be valid.
+# List of disambig symbols will be empty.
+echo -n > $lang_out/phones/disambig.txt
+echo -n > $lang_out/phones/disambig.int
+echo -n > $lang_out/phones/disambig.csl
+
+# Get phone-level transcripts of training data and create a
+# language model.
+ali-to-phones $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz|" ark,t:- | \
+  perl -e 'while(<>) {
+    @A = split(" ", $_);
+    shift @A; # Remove the utterance-id.
+    foreach $p ( @A ) { $phones{$p} = 1; } # assoc. array of phones.
+    unshift @A, "<s>";
+    push @A, "</s>";
+    for ($n = 0; $n+1 < @A; $n++) {
+      $p = $A[$n]; $q = $A[$n+1];
+      $count{$p,$q}++;
+      $histcount{$p}++;
+    }
+  }
+  @phones = keys %phones;
+  unshift @phones, "<s>";
+  # @phones is now all real phones, plus <s>.
+  for ($n = 0; $n < @phones; $n++) {
+    $phn2state{$phones[$n]} = $n;
+  }
+  foreach $p (@phones) {
+    $src = $phn2state{$p};
+    $hist = $histcount{$p};
+    $hist > 0 || die;    
+    foreach $q (@phones) {
+      $c = $count{$p,$q};
+      if (defined $c) {
+        $cost = -log($c / $hist); # cost on FST arc.
+        $dest = $phn2state{$q};
+        print "$src $dest $q $cost\n";  # Note: q is actually numeric.
+      }
+    }
+    $c = $count{$p,"</s>"};
+    if (defined $c) {
+      $cost = -log($c / $hist); # cost on FST arc.      
+      print "$src $cost\n"; # final-prob.
+    }
+  } ' | fstcompile --acceptor=true > $lang_out/G.fst
+
+# symbols for phones and words are the same.
+# Neither has disambig symbols.
+cp $lang_out/phones.txt $lang_out/words.txt
+  
+grep -v '<eps>' $lang_out/phones.txt | awk '{printf("0 0 %s %s\n", $2, $2);} END{print("0 0.0");}' | \
+   fstcompile  > $lang_out/L.fst
+
+# L and L_disambig are the same.
+cp $lang_out/L.fst $lang_out/L_disambig.fst
diff --git a/egs/chime_wsj0/s5/utils/make_unigram_grammar.pl b/egs/chime_wsj0/s5/utils/make_unigram_grammar.pl
new file mode 100755
index 000000000..314a66a10
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/make_unigram_grammar.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script is used in discriminative training.
+# This script makes a simple unigram-loop version of G.fst
+# using a unigram grammar estimated from some training transcripts.
+# This is for MMI training.
+# We don't have any silences in G.fst; these are supplied by the
+# optional silences in the lexicon.
+
+# Note: the symbols in the transcripts become the input and output
+# symbols of G.txt; these can be numeric or not.
+
+if(@ARGV != 0) {
+    die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
+}
+
+$totcount = 0;
+$nl = 0;
+while (<>) {
+  @A = split(" ", $_);
+  foreach $a (@A) {
+    $count{$a}++;
+    $totcount++;
+  }
+  $nl++;
+  $totcount++; # Treat end-of-sentence as a symbol for purposes of
+  # $totcount, so the grammar is properly stochastic.  This doesn't
+  # become </s>, it just becomes the final-prob.
+}
+
+foreach $a (keys %count) {
+  $prob = $count{$a} / $totcount;
+  $cost = -log($prob);          # Negated natural-log probs.
+  print "0\t0\t$a\t$a\t$cost\n";
+}
+# Zero final-cost.
+$final_prob = $nl / $totcount;
+$final_cost = -log($final_prob);
+print "0\t$final_cost\n";
+
diff --git a/egs/chime_wsj0/s5/utils/mkgraph.sh b/egs/chime_wsj0/s5/utils/mkgraph.sh
new file mode 100755
index 000000000..4d82d1848
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/mkgraph.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script creates a fully expanded decoding graph (HCLG) that represents
+# all the language-model, pronunciation dictionary (lexicon), context-dependency,
+# and HMM structure in our model.  The output is a Finite State Transducer
+# that has word-ids on the output, and pdf-ids on the input (these are indexes
+# that resolve to Gaussian Mixture Models).  
+# See
+#  http://kaldi.sourceforge.net/graph_recipe_test.html
+# (this is compiled from this repository using Doxygen,
+# the source for this part is in src/doc/graph_recipe_test.dox)
+
+
+N=3
+P=1
+reverse=false
+
+for x in `seq 2`; do 
+  [ "$1" == "--mono" ] && N=1 && P=0 && shift;
+  [ "$1" == "--quinphone" ] && N=5 && P=2 && shift;
+  [ "$1" == "--reverse" ] && reverse=true && shift;
+done
+
+if [ $# != 3 ]; then
+   echo "Usage: utils/mkgraph.sh [options] <lang-dir> <model-dir> <graphdir>"
+   echo "e.g.: utils/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
+   echo " Options:"
+   echo " --mono          #  For monophone models."
+   echo " --quinphone     #  For models with 5-phone context (3 is default)"
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+lang=$1
+tree=$2/tree
+model=$2/final.mdl
+dir=$3
+
+mkdir -p $dir
+
+tscale=1.0
+loopscale=0.1
+
+# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
+# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
+# would have to use -o instead),  -f means file exists, and -ot means older than).
+
+required="$lang/L.fst $lang/G.fst $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $model $tree"
+for f in $required; do
+  [ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
+done
+
+mkdir -p $lang/tmp
+# Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in 
+# place of -o
+if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
+      $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
+  fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
+    fstminimizeencoded  > $lang/tmp/LG.fst || exit 1;
+  fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic."
+fi
+
+
+clg=$lang/tmp/CLG_${N}_${P}.fst
+
+if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst ]]; then
+  fstcomposecontext --context-size=$N --central-position=$P \
+   --read-disambig-syms=$lang/phones/disambig.int \
+   --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \
+    $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
+  fstisstochastic $clg  || echo "[info]: CLG not stochastic."
+fi
+
+if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model  \
+    || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
+  if $reverse; then
+    make-h-transducer --reverse=true --push_weights=true \
+      --disambig-syms-out=$dir/disambig_tid.int \
+      --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
+      > $dir/Ha.fst  || exit 1;
+  else
+    make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \
+      --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
+       > $dir/Ha.fst  || exit 1;
+  fi
+fi
+
+if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
+      $dir/HCLGa.fst -ot $clg ]]; then
+  fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
+    | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \
+     fstminimizeencoded > $dir/HCLGa.fst || exit 1;
+  fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
+fi
+
+if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
+  add-self-loops --self-loop-scale=$loopscale --reorder=true \
+    $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
+
+  if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
+    # No point doing this test if transition-scale not 1, as it is bound to fail. 
+    fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic."
+  fi
+fi
+
+# keep a copy of the lexicon and a list of silence phones with HCLG...
+# this means we can decode without reference to the $lang directory.
+
+
+cp $lang/words.txt $dir/ || exit 1;
+mkdir -p $dir/phones
+cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
+  # but ignore the error if it's not there.
+
+cp $lang/phones/disambig.{txt,int} $dir/phones/ 2> /dev/null
+cp $lang/phones/silence.csl $dir/phones/ || exit 1;
+cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there.
+
+# to make const fst:
+# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
+
diff --git a/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config.pl b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config.pl
new file mode 100755
index 000000000..9efc97479
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config.pl
@@ -0,0 +1,159 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# These options can be useful if we want to splice the input
+# features across time.
+$input_left_context = 0;
+$input_right_context = 0;
+$param_stddev_factor = 1.0;  # can be used to adjust initial variance
+  # of parameters.
+$initial_num_hidden_layers = -1; # if >= 0, the number of hidden layers
+  # the model should start with, which may be less than the final number
+  # (the final number is used to calculate the #neurons).
+$single_layer_config = "";
+$bias_stddev = 2.0;
+$learning_rate = 0.001;
+$nobias = "";
+
+for ($x = 1; $x < 10; $x++) {
+  if ($ARGV[0] eq "--input-left-context") {
+    $input_left_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--input-right-context") {
+    $input_right_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--param-stddev-factor") {
+    $param_stddev_factor = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--bias-stddev") {
+    $bias_stddev = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--nobias") {
+    $nobias = "Nobias";
+    shift;
+  }
+  if ($ARGV[0] eq "--learning-rate") {
+    $learning_rate = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--initial-num-hidden-layers") {
+    $initial_num_hidden_layers = $ARGV[1];
+    $single_layer_config = $ARGV[2];
+    shift; shift; shift;
+  }
+}
+
+
+if (@ARGV != 4) {
+  print STDERR "Usage: make_nnet_config.pl  [options] <feat-dim> <num-leaves> <num-hidden-layers> <num-parameters>  >config-file
+Options:
+   --input-left-context <n>        #  #frames of left context for input features; default 0.
+   --input-right-context <n>       #  #frames of right context for input features; default 0.
+   --param-stdddev-factor <f>      #  Factor which can be used to modify the standard deviation of
+                                   #  randomly initialized features (default, 1.  Gets multiplied by
+                                   #  1/sqrt of number of inputs).
+   --initial-num-hidden-layers <n> <config-file>   #  If >0, number of hidden layers to initialize the network with.
+                                   #  In this case, the positional parameter <num-hidden-layers> is only
+                                   #  used to work out the number of units per hidden layer (based on
+                                   #  parameter count), and we write to <config-file> the config corresponding
+                                   #  to a single hidden layer.
+   --learning-rate <f>             # Initial learning rate, default 0.001\n";
+     exit(1);
+}
+
+($feat_dim, $num_leaves, $num_hidden_layers, $num_params) = @ARGV;
+($input_left_context < 0) &&  die "Invalid input left context $input_left_context";
+($input_right_context < 0) &&  die "Invalid input right context $input_right_context";
+($feat_dim <= 0) &&  die "Invalid feature dimension $feat_dim";
+($num_leaves <= 0) && die "Invalid number of leaves $num_leaves";
+($num_hidden_layers <= 0) && die "Invalid number of hidden layers $num_hidden_layers";
+if ($initial_num_hidden_layers < 0) {
+  $initial_num_hidden_layers = $num_hidden_layers;
+}
+if ($initial_num_hidden_layers > $num_hidden_layers) {
+  print STDERR "Initial number of hidden layers is more than #hidden layers.\n" .
+    "This does not really make sense but continuing anyway.";
+}
+
+$context_size = 1 + $input_left_context + $input_right_context;
+($num_params < ($num_leaves + ($feat_dim * $context_size) + $num_hidden_layers + 1))
+  && die "Invalid number of params $num_params";
+
+## num_params = hidden_layer_size^2 * (num_hidden_layers-1)
+##            + hidden_layer_size * (num_leaves + feat_dim * context_size)
+## solve for hidden_layer_size = x.
+## a x^2 + b  + c, with
+## a = num_hidden_layers - 1
+## b = num_leaves + feat_dim * context_size
+## c = -num_params
+
+$a = $num_hidden_layers - 1;
+$b = $num_leaves + $feat_dim * $context_size;
+$c = -$num_params;
+
+if ($a > 0) {
+  $hidden_layer_size =  int((-$b + sqrt($b*$b - 4*$a*$c)) / (2*$a));
+} else {
+  $hidden_layer_size = int(-$c/$b);
+}
+
+
+$actual_num_params = $hidden_layer_size * $hidden_layer_size * ($num_hidden_layers - 1)
+                   + $hidden_layer_size * ($num_leaves + $feat_dim * $context_size);
+
+if (abs($actual_num_params - $num_params) > 0.1 * $num_params) {
+  print STDERR "Warning: make_nnet_config.pl: possible failure $actual_num_params != $num_params";
+}
+
+if ($input_left_context + $input_right_context != 0) {
+  # First component has to be splicing component...
+  # Note: we might be interested in decorrelating this e.g. with
+  # DCT layer at some point, but for now, splicing isn't seeming to be
+  # that useful.
+  print "SpliceComponent input-dim=$feat_dim left-context=$input_left_context right-context=$input_right_context\n";
+}
+$cur_input_dim = $feat_dim * (1 + $input_left_context + $input_right_context);
+
+for ($hidden_layer = 0; $hidden_layer < $initial_num_hidden_layers; $hidden_layer++) {
+  $param_stddev = $param_stddev_factor * 1.0 / sqrt($cur_input_dim);
+  print "AffineComponent$nobias input-dim=$cur_input_dim output-dim=$hidden_layer_size " .
+    "learning-rate=$learning_rate param-stddev=$param_stddev bias-stddev=$bias_stddev\n";
+  $cur_input_dim = $hidden_layer_size;
+  print "TanhComponent dim=$cur_input_dim\n";
+}
+
+if ($single_layer_config ne "") {
+  # Create a config file we'll use to add new hidden layers.
+  open(F, ">$single_layer_config") || die "Error opening $single_layer_config for output";
+  $param_stddev = $param_stddev_factor * 1.0 / sqrt($hidden_layer_size);
+  print F "AffineComponent$nobias input-dim=$hidden_layer_size output-dim=$hidden_layer_size " .
+    "learning-rate=$learning_rate param-stddev=$param_stddev bias-stddev=$bias_stddev\n";
+  print F "TanhComponent dim=$hidden_layer_size\n";
+  close (F) || die "Closing config file";
+}
+
+## Now the output layer.
+print "AffineComponent$nobias input-dim=$cur_input_dim output-dim=$num_leaves " .
+  "learning-rate=$learning_rate param-stddev=0 bias-stddev=0\n"; # we just set the parameters to zero for this layer.
+## the softmax nonlinearity.
+print "SoftmaxComponent dim=$num_leaves\n";
+
+##
diff --git a/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_block.pl b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_block.pl
new file mode 100755
index 000000000..38851777d
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_block.pl
@@ -0,0 +1,156 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# These options can be useful if we want to splice the input
+# features across time.
+$input_left_context = 0;
+$input_right_context = 0;
+$param_stddev_factor = 1.0;  # can be used to adjust initial variance
+  # of parameters.
+$initial_num_hidden_layers = -1; # if >= 0, the number of hidden layers
+  # the model should start with, which may be less than the final number
+  # (the final number is used to calculate the #neurons).
+$single_layer_config = "";
+
+for ($x = 1; $x < 10; $x++) {
+  if ($ARGV[0] eq "--input-left-context") {
+    $input_left_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--input-right-context") {
+    $input_right_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--param-stddev-factor") {
+    $param_stddev_factor = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--initial-num-hidden-layers") {
+    $initial_num_hidden_layers = $ARGV[1];
+    $single_layer_config = $ARGV[2];
+    shift; shift; shift;
+  }
+}
+
+
+if (@ARGV != 5) {
+  print STDERR "Usage: make_nnet_config_block.pl  [options] <feat-dim> <num-leaves> <num-hidden-layers> <num-blocks> <num-parameters>  >config-file
+Options:
+   --input-left-context <n>        #  #frames of left context for input features; default 0.
+   --input-right-context <n>       #  #frames of right context for input features; default 0.
+   --param-stdddev-factor <f>      #  Factor which can be used to modify the standard deviation of
+                                   #  randomly nitialized features (default, 1.  Gets multiplied by
+                                   #  1/sqrt of number of inputs).
+   --initial-num-hidden-layers <n> <config-file>   #  If >0, number of hidden layers to initialize the network with.
+                                   #  In this case, the positional parameter <num-hidden-layers> is only
+                                   #  used to work out the number of units per hidden layer (based on
+                                   #  parameter count), and we write to <config-file> the config corresponding
+                                   #  to a single hidden layer.\n";
+     exit(1);
+}
+
+($feat_dim, $num_leaves, $num_hidden_layers, $num_blocks, $num_params) = @ARGV;
+
+($input_left_context < 0) &&  die "Invalid input left context $input_left_context";
+($input_right_context < 0) &&  die "Invalid input right context $input_right_context";
+($feat_dim <= 0) &&  die "Invalid feature dimension $feat_dim";
+($num_leaves <= 0) && die "Invalid number of leaves $num_leaves";
+($num_blocks <= 0) && die "Invalid number of blocks $num_blocks";
+($num_blocks > 20) && die "Implausibly high number of blocks $num_blocks";
+($num_hidden_layers <= 0) && die "Invalid number of hidden layers $num_hidden_layers";
+if ($initial_num_hidden_layers < 0) {
+  $initial_num_hidden_layers = $num_hidden_layers;
+}
+if ($initial_num_hidden_layers > $num_hidden_layers) {
+  print STDERR "Initial number of hidden layers is more than #hidden layers.\n" .
+    "This does not really make sense but continuing anyway.";
+}
+
+$context_size = 1 + $input_left_context + $input_right_context;
+($num_params < ($num_leaves + ($feat_dim * $context_size) + $num_hidden_layers + 1))
+  && die "Invalid number of params $num_params";
+
+## num_params = hidden_layer_size^2/num_blocks * (num_hidden_layers-1)
+##            + hidden_layer_size * (num_leaves + feat_dim * context_size)
+## solve for hidden_layer_size = x.
+## a x^2 + b  + c, with
+## a = (num_hidden_layers - 1) / num_blocks
+## b = num_leaves + feat_dim * context_size
+## c = -num_params
+
+$a = ($num_hidden_layers - 1) / ($num_blocks * 1.0); # * 1.0 to make sure it's float.
+$b = $num_leaves + $feat_dim * $context_size;
+$c = -$num_params;
+
+if ($a > 0) {
+  $hidden_layer_size =  int((-$b + sqrt($b*$b - 4*$a*$c)) / (2*$a));
+} else {
+  $hidden_layer_size = int(-$c/$b);
+}
+##  make sure num_blocks divides hidden_layer_size.
+$hidden_layer_size -= $hidden_layer_size % $num_blocks;
+
+$actual_num_params = ($hidden_layer_size * $hidden_layer_size)/$num_blocks * ($num_hidden_layers - 1)
+                   + $hidden_layer_size * ($num_leaves + $feat_dim * $context_size);
+
+if (abs($actual_num_params - $num_params) > 0.1 * $num_params) {
+  print STDERR "Warning: $0: possible failure $actual_num_params != $num_params";
+}
+
+if ($input_left_context + $input_right_context != 0) {
+  # First component has to be splicing component...
+  # Note: we might be interested in decorrelating this e.g. with
+  # DCT layer at some point, but for now, splicing isn't seeming to be
+  # that useful.
+  print "SpliceComponent input-dim=$feat_dim left-context=$input_left_context right-context=$input_right_context\n";
+}
+$cur_input_dim = $feat_dim * (1 + $input_left_context + $input_right_context);
+
+for ($hidden_layer = 0; $hidden_layer < $initial_num_hidden_layers; $hidden_layer++) {
+  if ($hidden_layer == 0) {
+    $param_stddev = $param_stddev_factor * 1.0 / sqrt($cur_input_dim);
+    print "AffineComponent input-dim=$cur_input_dim output-dim=$hidden_layer_size " .
+      "param-stddev=$param_stddev\n";
+    print "TanhComponent dim=$hidden_layer_size\n";
+  } else {
+    $param_stddev = $param_stddev_factor * 1.0 / sqrt($cur_input_dim / $num_blocks);
+    print "PermuteComponent dim=$cur_input_dim\n";
+    print "BlockAffineComponent num-blocks=$num_blocks input-dim=$cur_input_dim output-dim=$hidden_layer_size " .
+      "param-stddev=$param_stddev\n";
+    print "TanhComponent dim=$hidden_layer_size\n";
+  }
+  $cur_input_dim = $hidden_layer_size;
+}
+
+if ($single_layer_config ne "") {
+  # Create a config file we'll use to add new hidden layers.
+  open(F, ">$single_layer_config") || die "Error opening $single_layer_config for output";
+  $param_stddev = $param_stddev_factor * 1.0 / sqrt($hidden_layer_size);
+  print F "PermuteComponent dim=$hidden_layer_size\n";
+  print F "BlockAffineComponent num-blocks=$num_blocks input-dim=$hidden_layer_size output-dim=$hidden_layer_size " .
+    "param-stddev=$param_stddev\n";
+  print F "TanhComponent dim=$hidden_layer_size\n";
+  close (F) || die "Closing config file";
+}
+
+## Now the output layer.
+print "AffineComponent input-dim=$cur_input_dim output-dim=$num_leaves " .
+  "param-stddev=0\n"; # we just set the parameters to zero for this layer.
+## the softmax nonlinearity.
+print "SoftmaxComponent dim=$num_leaves\n";
+
+##
diff --git a/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
new file mode 100755
index 000000000..aec8ef242
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
@@ -0,0 +1,277 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# These options can be useful if we want to splice the input
+# features across time.
+$input_left_context = 0;
+$input_right_context = 0;
+$param_stddev_factor = 1.0;  # can be used to adjust initial variance
+  # of parameters.
+$initial_num_hidden_layers = -1; # if >= 0, the number of hidden layers
+  # the model should start with, which may be less than the final number
+  # (the final number is used to calculate the #neurons).
+$single_layer_config = ""; # a file to which we'll output a config corresponding
+       # to a single layer; we'll later use this to add layers to the neural
+       # network.
+$bias_stddev = 2.0;  # Standard deviation for random initialization of the
+                     # bias terms (mean is zero).
+$splice_max_context = 0; # Relates to SpliceMaxComponent (experimental feature)
+$learning_rate = 0.001;
+$max_change = 0.0;
+$nonlinear_component_type = "Tanh";
+
+$alpha = 4.0;
+$l2_penalty_opt = ""; # Option for AffineComponentPreconditioned layer.
+$tree_map = ""; # If supplied, a text file that maps from l2 to l1 tree nodes (output
+   # by build-tree-two-level).  Used for initializing mixture-prob component.
+
+$splice_context = 0;
+$dropout_scale = -1.0; # if not -1.0, scale for "lower" part of 
+                       # dropout scale, typically 0 <= dropout_scale < 1.
+$additive_noise_stddev = 0.0; # I didn't find this helpful either.
+$lda_dim = 0;
+$expand_power = 1;
+$expand_scale = 1.0;
+$lda_mat = "";
+
+for ($x = 1; $x < 10; $x++) {
+  if ($ARGV[0] eq "--input-left-context") {
+    $input_left_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--l2-penalty") {
+    my $l2_penalty = $ARGV[1];
+    $l2_penalty_opt = "l2-penalty=$l2_penalty";
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--dropout-scale") {
+    $dropout_scale = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--expand-power") {
+    $expand_power = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--expand-scale") {
+    $expand_scale = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--max-change") {
+    $max_change = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--additive-noise-stddev") {
+    $additive_noise_stddev = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--nonlinear-component-type") {
+    $nonlinear_component_type = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--lda-mat") {
+    $splice_context = $ARGV[1];
+    $lda_dim = $ARGV[2];
+    $lda_mat = $ARGV[3];
+    shift; shift; shift; shift;
+  }
+  if ($ARGV[0] eq "--input-right-context") {
+    $input_right_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--param-stddev-factor") {
+    $param_stddev_factor = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--bias-stddev") {
+    $bias_stddev = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--alpha") {
+    $alpha = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--splice-max-context") {
+    $splice_max_context = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--learning-rate") {
+    $learning_rate = $ARGV[1];
+    shift; shift;
+  }
+  if ($ARGV[0] eq "--initial-num-hidden-layers") {
+    $initial_num_hidden_layers = $ARGV[1];
+    $single_layer_config = $ARGV[2];
+    shift; shift; shift;
+  }
+  if ($ARGV[0] eq "--tree-map") { # Note: this was for an idea that
+    # didn't end up working for me; it relates to SCTM-like systems.
+    $tree_map = $ARGV[1];
+    shift; shift;
+  }
+}
+
+
+if (@ARGV != 4) {
+  print STDERR "Usage: make_nnet_config_preconditioned.pl  [options] <feat-dim> <num-leaves> <num-hidden-layers> <num-parameters>  >config-file
+Options:
+   --input-left-context <n>        #  #frames of left context for input features; default 0 (this separate from pre-LDA splicing).
+   --input-right-context <n>       #  #frames of right context for input features; default 0  (this separate from pre-LDA splicing).
+   --param-stdddev-factor <f>      #  Factor which can be used to modify the standard deviation of
+                                   #  randomly nitialized features (default, 1.  Gets multiplied by
+                                   #  1/sqrt of number of inputs).
+   --initial-num-hidden-layers <n> <config-file>   #  If >0, number of hidden layers to initialize the network with.
+                                   #  In this case, the positional parameter <num-hidden-layers> is only
+                                   #  used to work out the number of units per hidden layer (based on
+                                   #  parameter count), and we write to <config-file> the config corresponding
+                                   #  to a single hidden layer.
+   --alpha <f>                     #  Factor (default 0.1) which affects the preconditioning.  0 < alpha <= 1;
+                                   #  smaller means more aggressive preconditioning / less smoothing of the Fisher
+                                   #  matrix.
+   --learning-rate <f>             # Initial learning rate, default 0.001
+   --lda-mat <splice-width> <lda-dimension> <lda-matrix-filename>  # Allows the user to specify splice-and-lda
+                                   # with a given transformation, as a fixed component in the network.  E.g.
+                                   # splice-width of 4 represents context of +- 4 frames.  Here, lda-dimension is
+                                   # the output dimension of LDA, which must be the same as in the file.\n";
+  exit(1);
+}
+
+($feat_dim, $num_leaves, $num_hidden_layers, $num_params) = @ARGV;
+($input_left_context < 0) &&  die "Invalid input left context $input_left_context";
+($input_right_context < 0) &&  die "Invalid input right context $input_right_context";
+($feat_dim <= 0) &&  die "Invalid feature dimension $feat_dim";
+($num_leaves <= 0) && die "Invalid number of leaves $num_leaves";
+($num_hidden_layers <= 0) && die "Invalid number of hidden layers $num_hidden_layers";
+if ($initial_num_hidden_layers < 0) {
+  $initial_num_hidden_layers = $num_hidden_layers;
+}
+if ($initial_num_hidden_layers > $num_hidden_layers) {
+  print STDERR "Initial number of hidden layers is more than #hidden layers.\n" .
+    "This does not really make sense but continuing anyway.";
+}
+
+$context_size = 1 + $input_left_context + $input_right_context;
+($num_params < ($num_leaves + ($feat_dim * $context_size) + $num_hidden_layers + 1))
+  && die "Invalid number of params $num_params";
+
+## num_params = hidden_layer_size^2 * (num_hidden_layers-1)
+##            + hidden_layer_size * (num_leaves + feat_dim * context_size * expand_power)
+## solve for hidden_layer_size = x.
+## a x^2 + b  + c, with
+## a = num_hidden_layers - 1
+## b = num_leaves + feat_dim * context_size
+## c = -num_params
+
+$a = $num_hidden_layers - 1;
+$b = $num_leaves + $feat_dim * $context_size * $expand_power;
+$c = -$num_params;
+
+if ($a > 0) {
+  $hidden_layer_size =  int((-$b + sqrt($b*$b - 4*$a*$c)) / (2*$a));
+} else {
+  $hidden_layer_size = int(-$c/$b);
+}
+
+
+$actual_num_params = $hidden_layer_size * $hidden_layer_size * ($num_hidden_layers - 1)
+                   + $hidden_layer_size * ($num_leaves + $feat_dim * $context_size * $expand_power);
+
+if (abs($actual_num_params - $num_params) > 0.1 * $num_params) {
+  print STDERR "Warning: make_nnet_config.pl: possible failure $actual_num_params != $num_params";
+}
+
+if ($splice_context > 0) { # --lda-mat <splice-context> <lda-matrix> was specified...
+  print "SpliceComponent input-dim=$feat_dim left-context=$splice_context right-context=$splice_context\n";
+  print "FixedLinearComponent matrix=$lda_mat\n"; # specify the filename.
+  $feat_dim = $lda_dim; # This is now the input dimension.
+}
+
+if ($splice_max_context > 0) {
+  print "SpliceMaxComponent dim=$feat_dim left-context=$splice_max_context right-context=$splice_max_context\n";
+}
+
+
+if ($input_left_context + $input_right_context != 0) {
+  # First component has to be splicing component...
+  # Note: we might be interested in decorrelating this e.g. with
+  # DCT layer at some point, but for now, splicing isn't seeming to be
+  # that useful.
+  print "SpliceComponent input-dim=$feat_dim left-context=$input_left_context right-context=$input_right_context\n";
+}
+$cur_input_dim = $feat_dim * (1 + $input_left_context + $input_right_context);
+
+if ($expand_power > 1) {
+  print "PowerExpandComponent input-dim=$cur_input_dim max-power=$expand_power higher-power-scale=$expand_scale\n";
+  $cur_input_dim *= $expand_power;
+}
+
+for ($hidden_layer = 0; $hidden_layer < $initial_num_hidden_layers; $hidden_layer++) {
+  $param_stddev = $param_stddev_factor * 1.0 / sqrt($cur_input_dim);
+  print "AffineComponentPreconditioned input-dim=$cur_input_dim output-dim=$hidden_layer_size alpha=$alpha max-change=$max_change " .
+    "$l2_penalty_opt learning-rate=$learning_rate param-stddev=$param_stddev bias-stddev=$bias_stddev\n";
+  $cur_input_dim = $hidden_layer_size;
+  print "${nonlinear_component_type}Component dim=$cur_input_dim\n";
+  if ($dropout_scale != -1.0) {
+    print "DropoutComponent dim=$cur_input_dim dropout-scale=$dropout_scale\n";
+  }
+  if ($additive_noise_stddev != 0.0) {
+    print "AdditiveNoiseComponent dim=$cur_input_dim stddev=$additive_noise_stddev\n";
+  }
+}
+
+if ($single_layer_config ne "") {
+  # Create a config file we'll use to add new hidden layers.
+  open(F, ">$single_layer_config") || die "Error opening $single_layer_config for output";
+  $param_stddev = $param_stddev_factor * 1.0 / sqrt($hidden_layer_size);
+  print F "AffineComponentPreconditioned input-dim=$hidden_layer_size output-dim=$hidden_layer_size alpha=$alpha max-change=$max_change " .
+    "$l2_penalty_opt learning-rate=$learning_rate param-stddev=$param_stddev bias-stddev=$bias_stddev\n";
+  print F "${nonlinear_component_type}Component dim=$hidden_layer_size\n";
+  if ($dropout_scale != -1.0) {
+    print F "DropoutComponent dim=$cur_input_dim dropout-scale=$dropout_scale\n";
+  }
+  if ($additive_noise_stddev != 0.0) {
+    print F "AdditiveNoiseComponent dim=$cur_input_dim stddev=$additive_noise_stddev\n";
+  }
+  close (F) || die "Closing config file";
+}
+
+## Now the output layer.
+print "AffineComponentPreconditioned input-dim=$cur_input_dim output-dim=$num_leaves alpha=$alpha max-change=$max_change " .
+  "$l2_penalty_opt learning-rate=$learning_rate param-stddev=0 bias-stddev=0\n"; # we just set the parameters to zero for this layer.
+## the softmax nonlinearity.
+print "SoftmaxComponent dim=$num_leaves\n";
+
+if ($tree_map ne "") {
+  # Create a MixtureProbComponent at the end, that shares "Gaussians"
+  # among leaves that share the same level-1 tree index.
+  open(F, "<$tree_map") || die "opening tree map file $tree_map";
+  $map = <F>;
+  close(F);
+  $map =~ s/\s*\[\s*// || die "Unexpected data in tree map file $tree_map";
+  $map =~ s/\s*\]\s*// || die "Unexpected data in tree map file $tree_map";
+  @map = split(" ", $map);
+  @dims = ();
+  while (@map > 0) {
+    $index = shift @map;
+    $n = 1;
+    while (@map > 0 && $map[0] == $index) { shift @map; $n++; }
+    push @dims, $n;
+  }
+  $dims = join(":", @dims);
+  print "MixtureProbComponent learning-rate=$learning_rate diag-element=0.9 dims=$dims\n";
+}
+
+##
diff --git a/egs/chime_wsj0/s5/utils/nnet-cpu/update_learning_rates.pl b/egs/chime_wsj0/s5/utils/nnet-cpu/update_learning_rates.pl
new file mode 100755
index 000000000..36ef26ee5
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet-cpu/update_learning_rates.pl
@@ -0,0 +1,141 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script takes three command-line arguments.
+# The first is a log-file such as exp/tri4b_nnet/log/combine.10.log,
+# which is the output of nnet-combine.  The second is a file such
+# as exp/tri4b_nnet/11.tmp.mdl, i.e. a model file, for which we will
+# update the learning rates; the third is the output nnet file e.g.
+# exp/tri4b_nnet/11.mdl
+
+# This script assumes that the "combine" script is called as:
+# nnet-combine <old-model> <new-model-1> <new-model-2> ... <new-model-n> <validation-examples> <output-model>.
+# It gets from the logging output a line like this:
+# LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.43424 to -1.42067, scale factors are  [ 0.727508 0.79889 0.299533 0.137696 -0.0479123 0.210445 0.0195638 0.123843 0.167453 0.0193894 -0.0128672 0.178384 0.0516549 0.0958205 0.125495 ]
+# [in this case the 1st 3 numbers correspond to the <old-model> ] and for each
+# updatable layer, it works out the total weight on the new models.
+# It interprets this as being (for each layer) a step length along
+# the path old-model -> new-model.
+# Basically, we change the learning rate by a factor equal to this step length,
+# subject to limits on the change  [by default limit to halving/doubling].
+# It's fairly obvious why we would want do do this.
+
+# These options can be useful if we want to splice the input
+# features across time.
+$sources_to_exclude = 1; # may make this configurable later.
+$min_learning_rate_factor = 0.5;
+$max_learning_rate_factor = 2.0;
+$min_learning_rate = 0.0001; # Put a floor because if too small,
+  # the changes become zero due to roundoff.
+
+if (@ARGV > 0) {
+  for ($x = 1; $x < 10; $x++) {
+    if ($ARGV[0] eq "--min-learning-rate-factor") {
+      $min_learning_rate_factor = $ARGV[1];
+      shift; shift;
+    }
+    if ($ARGV[0] eq "--max-learning-rate-factor") {
+      $max_learning_rate_factor = $ARGV[1];
+      shift; shift;
+    }
+    if ($ARGV[0] eq "--min-learning-rate") {
+      $min_learning_rate = $ARGV[1];
+      shift; shift;
+    }
+  }
+}
+
+
+if (@ARGV != 3) {
+  print STDERR "Usage: update_learning_rates.pl [options] <log-file-for-nnet-combine> <nnet-in> <nnet-out>
+Options:
+   --min-learning-rate-factor       #  minimum factor to change learning rate by (default: 0.5)
+   --max-learning-rate-factor       #  maximum factor to change learning rate by (default: 2.0)\n";
+   exit(1);
+}
+
+($combine_log, $nnet_in, $nnet_out) = @ARGV;
+
+open(L, "<$combine_log") || die "Opening log file \"$combine_log\"";
+
+
+while(<L>) {
+  if (m/Objective functions for the source neural nets are\s+\[(.+)\]/) {
+    ## a line like:
+    ##  LOG (nnet-combine:GetInitialScaleParams():combine-nnet.cc:66) Objective functions for the source neural nets are  [ -1.37002 -1.52115 -1.52103 -1.50189 -1.51912 ]
+    @A = split(" ", $1);
+    $num_sources = @A; # number of source neural nets (dimension of @A); 5 in this case.
+  }
+  ## a line like:
+  ## LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.37002 to -1.36574, scale factors are  [ 0.819379 0.696122 0.458798 0.040513 -0.0448875 0.171431 0.0274615 0.139143 0.133846 0.0372585 0.114193 0.17944 0.0491838 0.0668778 0.0328936 ]
+  if (m/Combining nnets.+scale factors are\s+\[(.+)\]/) {
+    @scale_factors = split(" ", $1);
+  }
+}
+
+if (!defined $num_sources) {
+  die "Log file $combine_log did not have expected format: no line with \"Objective functions\"\n";
+}
+if (!defined @scale_factors) {
+  die "Log file $combine_log did not have expected format: no line with \"Combining nnets\"\n";
+}
+
+
+$num_scales = @scale_factors; # length of the array.
+if ($num_scales % $num_sources != 0) {
+  die "Error interpreting log file $combine_log: $num_sources does not divide $num_scales\n";
+}
+close(L);
+
+open(P, "nnet-am-info $nnet_in |") || die "Opening pipe from nnet-am-info";
+@learning_rates = ();
+while(<P>) {
+  if (m/learning rate = ([^,]+),/) {
+    push @learning_rates, $1;
+  }
+}
+close(P);
+
+$num_layers = $num_scales / $num_sources;
+
+$num_info_learning_rates = @learning_rates;
+if ($num_layers != $num_info_learning_rates) {
+  die "From log file we expect there to be $num_layers updatable components, but from the output of nnet-am-info we saw $num_info_learning_rates";
+}
+
+for ($layer = 0; $layer < $num_layers; $layer++) {
+  # getting the sum of the weights for this layer from all the non-excluded sources.
+  $sum = 0.0;
+  for ($source = $sources_to_exclude; $source < $num_sources; $source++) {
+    $index = ($source * $num_layers) + $layer;
+    $sum += $scale_factors[$index];
+  }
+  $learning_rate_factor = $sum;
+  if ($learning_rate_factor > $max_learning_rate_factor) { $learning_rate_factor = $max_learning_rate_factor; }
+  if ($learning_rate_factor < $min_learning_rate_factor) { $learning_rate_factor = $min_learning_rate_factor; }
+  $old_learning_rate = $learning_rates[$layer];
+  $new_learning_rate = $old_learning_rate * $learning_rate_factor;
+  if ($new_learning_rate < $min_learning_rate) { $new_learning_rate = $min_learning_rate; }
+  print STDERR "For layer $layer, sum of weights of non-excluded sources is $sum, learning-rate factor is $learning_rate_factor\n";
+  $learning_rates[$layer] = $new_learning_rate;
+}
+
+$lrates_string=join(":", @learning_rates);
+
+$ret = system("nnet-am-copy --learning-rates=$lrates_string $nnet_in $nnet_out");
+
+exit($ret != 0);
diff --git a/egs/chime_wsj0/s5/utils/nnet/analyze_alignments.sh b/egs/chime_wsj0/s5/utils/nnet/analyze_alignments.sh
new file mode 100755
index 000000000..dc01bb872
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/analyze_alignments.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright 2012 Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# To be run from ..
+
+
+if [ $# != 4 ]; then
+   echo "Usage: $0 <ali-tag> <ali-rspecifier> <transition-model> <lang>"
+   echo " e.g.: $0 'TRAINING SET' 'ark:gunzip -c \$alidir/ali.gz |' tri1/final.mdl "
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . path.sh; fi
+
+tag=$1
+ali=$2
+model=$3
+lang=$4
+
+tmpfile=$(mktemp)
+
+echo "%%%%%% .pdf STATS, $tag %%%%%%"
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
+  "ark:ali-to-pdf --print-args=false $model \"$ali\" ark:- 2>/dev/null |" \
+  $tmpfile.0 2>&1
+echo
+
+echo "%%%%%% .phone STATS, $tag %%%%%%"
+#prob stats
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
+  "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
+  $tmpfile.1 2>&1
+#frame stats
+analyze-counts --binary=false \
+  "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
+  $tmpfile.2 2>/dev/null
+echo
+
+echo "%%%%%% .ali STATS, $tag %%%%%%"
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true "$ali" /dev/null 2>&1
+echo
+
+echo "%%%%%% .phone STATS (VERBOSE), $tag %%%%%%"
+#paste and show the logs
+cat $tmpfile.1 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.1a
+cat $tmpfile.2 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.2a
+paste $tmpfile.1a $tmpfile.2a > $tmpfile
+paste $lang/phones.txt $tmpfile | awk '{printf "%10s %4d  %f %d\n", $1, $2, $3, $4;}' 
+echo
+
+echo "%%%%%% .pdf STATS (VERBOSE), $part %%%%%%"
+cat $tmpfile.0
+echo "%%%%%% END"
+
+rm $tmpfile{,.0,.1,.2,.1a,.2a}
+
+
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/copy_feats.sh b/egs/chime_wsj0/s5/utils/nnet/copy_feats.sh
new file mode 100755
index 000000000..5d31a0027
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/copy_feats.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright 2012 Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script resaves features to a specified directory,
+# this is done to have the randomized data stored consecutivly,
+# which improves the speed and reduces loads on disks.
+#
+# To make sure the temporary dir gets deleted upon exit of the calling script
+# you can use something like:
+#
+# trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT
+
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 3 ]; then
+   echo "Usage: $0 <input.scp> <tmpdir> <output.scp>"
+   echo " e.g.: $0 train_remote.scp /tmp/324nkjl train_local.scp"
+   exit 1;
+fi
+
+scp_in=$1
+tmpdir=$2
+scp_out=$3
+
+echo "Re-saving the features to tmpdir $tmpdir @ $(hostname)"
+#divide the arks per 10k files
+nj=$((1 + $(cat $scp_in | wc -l) / 10000))
+for((n=0; n<nj; n++)); do
+  copy-feats "scp:utils/split_scp.pl -j $nj $n $scp_in - |" ark,scp:$tmpdir/feats.$n.ark,$tmpdir/feats.$n.scp || exit 1
+done
+#assemble the scp file
+for((n=0; n<nj; n++)); do
+  cat $tmpdir/feats.$n.scp
+done > $scp_out
+#test we have all the data
+l1=$(cat $scp_in | wc -l)
+l2=$(cat $scp_out | wc -l)
+[[ "$l1" != "$l2" ]] && echo "ERROR in data re-saving $l1 != $l2" && exit 1;
+#notify it was copied ok
+wc -l $scp_in $scp_out
+echo Copied ok!
+
+exit 0
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_dct_mat.py b/egs/chime_wsj0/s5/utils/nnet/gen_dct_mat.py
new file mode 100755
index 000000000..bff014af4
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_dct_mat.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python -u
+
+# ./gen_dct_mat.py
+# script generates matrix with DCT transform
+#     
+# author: Karel Vesely
+#
+
+from math import *
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--fea-dim', dest='dim', help='feature dimension')
+parser.add_option('--splice', dest='splice', help='applied splice value')
+parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+dim=int(options.dim)
+splice=int(options.splice)
+dct_basis=int(options.dct_basis)
+
+timeContext=2*splice+1
+
+
+#generate the DCT matrix
+M_PI = 3.1415926535897932384626433832795
+M_SQRT2 = 1.4142135623730950488016887
+
+
+#generate small DCT matrix
+print '['
+for k in range(dct_basis):
+    for m in range(dim):
+        for n in range(timeContext):
+          if(n==0): 
+              print m*'0 ',
+          else: 
+              print (dim-1)*'0 ',
+          print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
+          if(n==timeContext-1):
+              print (dim-m-1)*'0 ',
+        print
+    print 
+
+print ']'
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_hamm_mat.py b/egs/chime_wsj0/s5/utils/nnet/gen_hamm_mat.py
new file mode 100755
index 000000000..31a6d877d
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_hamm_mat.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python -u
+
+# ./gen_hamm_mat.py
+# script generates diagonal matrix with hamming window values
+#     
+# author: Karel Vesely
+#
+
+from math import *
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--fea-dim', dest='dim', help='feature dimension')
+parser.add_option('--splice', dest='splice', help='applied splice value')
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+dim=int(options.dim)
+splice=int(options.splice)
+
+
+#generate the diagonal matrix with hammings
+M_2PI = 6.283185307179586476925286766559005
+
+dim_mat=(2*splice+1)*dim
+timeContext=2*splice+1
+print '['
+for row in range(dim_mat):
+    for col in range(dim_mat):
+        if col!=row:
+            print '0',
+        else:
+            i=int(row/dim)
+            print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
+    print
+
+print ']'
+
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_mlp_init.py b/egs/chime_wsj0/s5/utils/nnet/gen_mlp_init.py
new file mode 100755
index 000000000..81a9504cc
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_mlp_init.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python -u
+
+# ./gen_mlp_init.py
+# script generateing NN initialization 
+#     
+# author: Karel Vesely
+#
+
+import math, random
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--dim', dest='dim', help='d1:d2:d3 layer dimensions in the network')
+parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
+parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
+parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
+parser.add_option('--normalized', dest='normalized', help='Generate normalized weights according to X.Glorot paper, U[-x,x] x=sqrt(6)/(sqrt(dim_in+dim_out))', action='store_true', default=False)
+parser.add_option('--activation', dest='activation', help='activation type tag (def. <sigmoid>)', default='<sigmoid>')
+parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
+parser.add_option('--linOutput', dest='linOutput', help='generate MLP with linear output', action='store_true', default=False)
+parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+#seeding
+seedval=int(options.seedval)
+if(seedval != 0):
+    random.seed(seedval)
+
+
+dimStrL = options.dim.split(':')
+
+dimL = []
+for i in range(len(dimStrL)):
+    dimL.append(int(dimStrL[i]))
+
+
+#print dimL,'linBN',options.linBNdim
+
+for layer in range(len(dimL)-1):
+    print '<affinetransform>', dimL[layer+1], dimL[layer]
+    #precompute...
+    nomalized_interval = math.sqrt(6.0) / math.sqrt(dimL[layer+1]+dimL[layer])
+    #weight matrix
+    print '['
+    for row in range(dimL[layer+1]):
+        for col in range(dimL[layer]):
+            if(options.normalized):
+                print random.random()*2.0*nomalized_interval - nomalized_interval, 
+            elif(options.gauss):
+                if(options.inputscale):
+                    print 3/math.sqrt(dimL[layer])*random.gauss(0.0,1.0),
+                else:
+                    print 0.1*random.gauss(0.0,1.0),
+            else:
+                if(options.inputscale):
+                    print (random.random()-0.5)*2*3/math.sqrt(dimL[layer]),
+                else:
+                    print random.random()/5.0-0.1, 
+        print #newline for each row 
+    print ']'
+    #bias vector
+    print '[',
+    for idx in range(dimL[layer+1]):
+        if(int(options.linBNdim) == dimL[layer+1]):
+            print '0.0',
+        elif(layer == len(dimL)-2):#last layer (softmax)
+            print '0.0',
+        elif(options.negbias):
+            print random.random()/5.0-4.1,
+        else:
+            print '0.0',
+    print ']'
+
+    if(int(options.linBNdim) != dimL[layer+1]):
+        if(layer == len(dimL)-2):
+            if(not(options.linOutput)) :
+                print '<softmax>', dimL[layer+1], dimL[layer+1]
+        else:
+            #print '<sigmoid>', dimL[layer+1], dimL[layer+1]
+            print options.activation, dimL[layer+1], dimL[layer+1]
+
+
+
+
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_rbm_init.py b/egs/chime_wsj0/s5/utils/nnet/gen_rbm_init.py
new file mode 100755
index 000000000..03fff47f4
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_rbm_init.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python -u
+
+# egs/wsj/s2/scripts/gen_rbm_init.py
+#
+# Copyright 2012  Karel Vesely
+#
+# Initializes the RBM Neural Network
+#
+# calling example:
+# python gen_mlp_init.py --dimIn=598 --dimOut=135 --dimHid=1024:1024:1024 
+#
+
+import math, random
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--dim', dest='dim', help='d1:d2 layer dimensions in the network')
+parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
+parser.add_option('--gauss-scale', dest='gauss_scale', help='standard deviation of the gaussain noise', default='0.1')
+parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (default all 0.0)', action='store_true', default=False)
+parser.add_option('--hidtype', dest='hidtype', help='gauss/bern', default='bern')
+parser.add_option('--vistype', dest='vistype', help='gauss/bern', default='bern')
+parser.add_option('--cmvn-nnet', dest='cmvn_nnet', help='cmvn_nnet to parse mean activation used in visible bias initialization', default='')
+
+
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+
+dimStrL = options.dim.split(':')
+assert(len(dimStrL) == 2) #only single layer to initialize
+dimL = []
+for i in range(len(dimStrL)):
+    dimL.append(int(dimStrL[i]))
+
+gauss_scale=float(options.gauss_scale)
+
+#generate RBM
+print '<rbm>', dimL[1], dimL[0]
+print options.vistype, options.hidtype
+
+
+#init weight matrix
+print '['
+for row in range(dimL[1]):
+    for col in range(dimL[0]):
+        if(options.gauss):
+            print gauss_scale * random.gauss(0.0,1.0),
+        else:
+            print (random.random()-0.5)/5.0, 
+    print
+print ']'
+
+#init visbias
+if len(options.cmvn_nnet)>0:
+    ### use the formula log(p/(1-p) for visible biases, where p is mean activity of the neuron
+    f = open(options.cmvn_nnet)
+    #make sure file starts by <addshift>
+    line = f.readline()
+    arr = line.split(' ')
+    if arr[0] != '<addshift>':
+        raise Exception('missing <addshift> in '+options.cmvn_nnet)
+    #get the p's
+    line = f.readline()
+    arr = line.strip().split(' ')
+    assert(len(arr)-2 == dimL[0])
+    #print the values
+    print '[',
+    for i in range(1,len(arr)-1):
+        p = -float(arr[i])
+        #p must be after sigmoid
+        if(not (p >= 0.0 and p <= 1.0)):
+            raise Exception('Negative addshifts from '+options.cmvn_nnet+' must be 0..1, ie. the sigmoid outputs')
+        #limit the bias to +/- 8, we will modify the p values accordingly
+        if(p < 0.00033535):
+            p = 0.00033535
+        if(p > 0.99966465):
+            p = 0.99966465
+        #use the inverse sigmoid to get biases from the mean activations
+        print math.log(p/(1-p)),
+    print ']'
+    f.close()
+else:
+    print '[',
+    for idx in range(dimL[0]):
+        if(options.vistype=='gauss'):
+            print '0.0',
+        elif(options.negbias):
+            print random.random()/5.0-4.1,
+        else:
+            print '0.0',
+    print ']'
+
+#init hidbias
+print '[',
+for idx in range(dimL[1]):
+    if(options.hidtype=='gauss'):
+        print '0.0',
+    elif(options.negbias):
+        print random.random()/5.0-4.1,
+    else:
+        print '0.0',
+print ']'
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_recurrent_dnn.py b/egs/chime_wsj0/s5/utils/nnet/gen_recurrent_dnn.py
new file mode 100755
index 000000000..8c30cafe8
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_recurrent_dnn.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python -u
+
+# ./gen_hamm_dct.py
+# script generateing NN initialization for training with TNet
+#     
+# author: Chao Weng 
+#
+
+import math, random
+import sys
+
+
+
+
+if (len(sys.argv) != 4):
+  print >> sys.stderr, 'Usage....\n' 
+  sys.exit(1) 
+
+recur_layer = int(sys.argv[2])
+nnet_in = open(sys.argv[1], 'r')
+nnet_out = open(sys.argv[3], 'w')
+cur_layer = 0 
+
+for line in nnet_in:
+  line_list = line.split()
+  if (line_list[0] == '<sigmoid>'):
+    cur_layer = cur_layer + 1
+  if (line_list[0] == '<sigmoid>' and cur_layer == recur_layer):
+    nnet_out.write('<recurrent> ' + str(line_list[1]) +  ' ' + str(line_list[2]) + '\n')
+    nnet_out.write('[\n')
+    #weight matrix
+    for r in range(int(line_list[1])):
+      for c in range(int(line_list[2])):
+        nnet_out.write(str(1/math.sqrt(float(line_list[2]))*random.gauss(0.0, 1.0)))
+        nnet_out.write(' ')
+      nnet_out.write('\n') 
+    nnet_out.write(']\n')
+    #bias vector
+    nnet_out.write('[ ')
+    for c in range(int(line_list[2])):
+      nnet_out.write(str(random.random()/5.0-4.1)) 
+      nnet_out.write(' ') 
+    nnet_out.write(']\n')
+  else:
+    nnet_out.write(line)
+  
diff --git a/egs/chime_wsj0/s5/utils/nnet/gen_splice.py b/egs/chime_wsj0/s5/utils/nnet/gen_splice.py
new file mode 100755
index 000000000..a8e400b85
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/gen_splice.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python -u
+
+# ./gen_splice.py
+# script generates splice nnet transfrom
+# 
+#     
+# author: Karel Vesely
+#
+
+from math import *
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
+parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
+parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' )
+(options, args) = parser.parse_args()
+
+if(options.dim_in == None):
+    parser.print_help()
+    sys.exit(1)
+
+dim_in=int(options.dim_in)
+splice=int(options.splice)
+splice_step=int(options.splice_step)
+
+dim_out=(2*splice+1)*dim_in
+
+print '<splice>', dim_out, dim_in
+print '[',
+
+splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step)
+for idx in range(len(splice_vec)):
+    print splice_vec[idx],
+
+print ']'
+
diff --git a/egs/chime_wsj0/s5/utils/nnet/init_nnet.sh b/egs/chime_wsj0/s5/utils/nnet/init_nnet.sh
new file mode 100755
index 000000000..a6dbe229d
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/nnet/init_nnet.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+# Copyright 2012  Karel Vesely (Brno University of Technology)
+# Apache 2.0
+
+# Initialize neural network
+
+# Begin configuration.
+model_size=8000000 # nr. of parameteres in MLP
+hid_layers=4       # nr. of hidden layers (prior to sotfmax or bottleneck)
+bn_dim=            # set value to get a bottleneck network
+hid_dim=           # set value to override the $model_size
+seed=777           # seed for the initialization 
+init_opts="--gauss --negbias"
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+[ -f path.sh ] && . ./path.sh; 
+
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: $0 <in-dim> <out-dim> <nnet-init>"
+   echo " e.g.: $0 400 3000 nnet.init"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>  # config containing options"
+   echo "  --model-size <N>        # number of weights in the nnet"
+   echo "  --hid-layers <N>        # number of hidden layers"
+   echo "  --bn-dim <N>            # dim of linear bottleneck"
+   echo "  --hid-dim <N>           # dim of hidden layers (overrides --model-size)"
+   exit 1;
+fi
+
+in_dim=$1
+out_dim=$2
+nnet_out=$3
+dir=$(dirname $nnet_out)
+
+###
+### What is the topology? Straight or bottleneck?
+###
+if [ -z "$bn_dim" ]; then #MLP w/o bottleneck
+  case "$hid_layers" in
+    0) #just logistic regresion
+      mlp_init=$dir/nnet_${in_dim}_${out_dim}.init
+      echo "Initializing MLP : $mlp_init"
+      utils/nnet/gen_mlp_init.py --dim=${in_dim}:${out_dim} \
+        ${init_opts} --seed=$seed > $mlp_init || exit 1;
+      ;;
+    1) #MLP with one hidden layer
+      if [ -z "$hid_dim" ]; then
+        hid_dim=$((model_size/(in_dim+out_dim)))
+      fi
+      mlp_init=$dir/nnet_${in_dim}_${hid_dim}_${out_dim}.init
+      echo "Initializing MLP : $mlp_init"
+      utils/nnet/gen_mlp_init.py --dim=${in_dim}:${hid_dim}:${out_dim} \
+        ${init_opts} --seed=$seed > $mlp_init || exit 1;
+      ;;
+    2|3|4|5|6|7|8|9|10) #MLP with more than 1 hidden layer
+      if [ -z "$hid_dim" ]; then
+        a=$((hid_layers-1))
+        b=$((in_dim+out_dim))
+        c=$((-model_size))
+        hid_dim=$(awk "BEGIN{ hid_dim= -$b/(2*$a) + sqrt($b^2 -4*$a*$c)/(2*$a); print int(hid_dim) }") 
+      fi
+      #build the mlp name mlp_init and dim argument dim_arg
+      mlp_init=
+      dim_arg=
+      { 
+        mlp_init=$dir/nnet_${in_dim}
+        dim_arg=${in_dim}
+        for i in $(seq $hid_layers); do
+          mlp_init=${mlp_init}_$hid_dim
+          dim_arg=${dim_arg}:${hid_dim}
+        done
+        mlp_init=${mlp_init}_${out_dim}.init
+        dim_arg=${dim_arg}:${out_dim}
+      }
+      echo "Initializing MLP : $mlp_init"
+      utils/nnet/gen_mlp_init.py --dim=${dim_arg} ${init_opts} \
+        --seed=$seed > $mlp_init || exit 1;
+      ;;
+    *)
+      echo "Unsupported number of hidden layers $hid_layers"
+      exit 1;
+  esac
+else #MLP with bottleneck
+  bn_dim=$bn_dim
+  case "$hid_layers" in # ie. number of layers in front of bottleneck
+    1) #1-hidden layer in front of the bottleneck
+      if [ -z "$hid_dim" ]; then
+        hid_dim=$((model_size/(in_dim+out_dim+(2*bn_dim))))
+      fi
+      mlp_init=$dir/nnet_${in_dim}_${hid_dim}_${bn_dim}_${hid_dim}_${out_dim}.init
+      echo "Initializing MLP : $mlp_init"
+      utils/nnet/gen_mlp_init.py --dim=${in_dim}:${hid_dim}:${bn_dim}:${hid_dim}:${out_dim} \
+        ${init_opts} --seed=$seed --linBNdim=$bn_dim > $mlp_init || exit 1;
+      ;;
+    2|3|4|5|6|7|8|9|10) #more than 1 hidden layer in front of bottleneck
+      if [ -z "$hid_dim" ]; then
+        a=$((hid_layers-1))
+        b=$((in_dim+2*bn_dim+out_dim))
+        c=$((-model_size))
+        hid_dim=$(awk "BEGIN{ hid_dim= -$b/(2*$a) + sqrt($b^2 -4*$a*$c)/(2*$a); print int(hid_dim) }") 
+      fi
+      #build the nnet name mlp_init and dim agument dim_arg
+      mlp_init=
+      dim_arg=
+      { 
+        mlp_init=$dir/nnet_${in_dim}
+        dim_arg=${in_dim}
+        for i in $(seq $hid_layers); do
+          mlp_init=${mlp_init}_$hid_dim
+          dim_arg=${dim_arg}:${hid_dim}
+        done
+        mlp_init=${mlp_init}_${bn_dim}lin_${hid_dim}_${out_dim}.init
+        dim_arg=${dim_arg}:${bn_dim}:${hid_dim}:${out_dim}
+      }
+      echo "Initializing MLP : $mlp_init"
+      utils/nnet/gen_mlp_init.py --dim=${dim_arg} ${init_opts} \
+        --seed=$seed --linBNdim=$bn_dim > $mlp_init || exit 1;
+      ;;
+    *)
+      echo "Unsupported number of hidden layers $hid_layers"
+      exit 1;
+  esac
+fi
+
+#The output name same as the mlp name, we are done..
+[ $nnet_out == $mlp_init ] && "Successfuly created '$nnet_out'" && exit 0;
+
+#Or we need to link the destination file
+#(we want to keep the name showing the topology)
+([ -f $nnet_out ] && unlink $nnet_out; cd $dir; ln -s $(basename $mlp_init) $(basename $nnet_out))
+
+echo "Successfuly created linked '$nnet_out'"
diff --git a/egs/chime_wsj0/s5/utils/parse_options.sh b/egs/chime_wsj0/s5/utils/parse_options.sh
new file mode 100755
index 000000000..eb091d6f4
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/parse_options.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal, Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the 
+# $help_message variable (if defined).
+
+
+###
+### The --config file options have lower priority to command line 
+### options, so we need to import them first...
+###
+
+# Now import all the configs specified by command-line, in left-to-right order
+for ((n=1; n<$#; n++)); do
+  if [ "${!n}" == "--config" ]; then
+    n_plus1=$((n+1))
+    config=${!n_plus1}
+    . $config  # source the config file.
+  fi
+done
+
+
+###
+### No we process the command line options
+###
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help 
+    # message and exit.  Scripts should put help messages in $help_message
+  --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
+	  else printf "$help_message\n" 1>&2 ; fi; 
+	  exit 0 ;; 
+    # If the first command-line argument begins with "--" (e.g. --foo-bar), 
+    # then work out the variable name as $name, which will equal "foo_bar".
+  --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
+    # Next we test whether the variable in question is undefned-- if so it's 
+    # an invalid option and we die.  Note: $0 evaluates to the name of the 
+    # enclosing script.
+    # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+    # is undefined.  We then have to wrap this test inside "eval" because 
+    # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+      
+      oldval="`eval echo \\$$name`";
+    # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
+	was_bool=true;
+      else 
+	was_bool=false;
+      fi
+
+    # Set the variable to the right value-- the escaped quotes make it work if
+    # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\"; 
+        
+    # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": --$name $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Check for an empty argument to the --cmd option, which can easily occur as a 
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
+
+
+true; # so this script returns exit code 0.
diff --git a/egs/chime_wsj0/s5/utils/pinyin_map.pl b/egs/chime_wsj0/s5/utils/pinyin_map.pl
new file mode 100755
index 000000000..0b4909b27
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/pinyin_map.pl
@@ -0,0 +1,78 @@
+#!/usr/bin/perl -w
+
+$num_args = $#ARGV + 1;
+if ($num_args != 1) {
+  print "\nUsage: pinyin2phone.pl pinyin2phone\n";
+  exit;
+}
+
+open(MAPS, $ARGV[0]) or die("Could not open pinyin map file.");
+my %py2ph; foreach $line (<MAPS>) { @A = split(" ", $line);
+  $py = shift(@A);
+  $py2ph{$py} = [@A]; 
+}
+
+#foreach $word ( keys %py2ph ) {
+     #foreach $i ( 0 .. $#{ $py2ph{$word} } ) {
+     #    print " $word = $py2ph{$word}[$i]";
+     #}
+     #print " $#{ $py2ph{$word} }";
+     #print "\n";
+#}
+
+my @entry;
+
+while (<STDIN>) {
+  @A = split(" ", $_);
+  @entry = (); 
+  $W = shift(@A);
+  push(@entry, $W);
+  for($i = 0; $i < @A; $i++) {
+    $initial= $A[$i]; $final = $A[$i];
+    #print $initial, " ", $final, "\n";
+    if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;} 
+    elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;}
+    elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;}
+    if ($initial ne $A[$i]) {
+      $tone = $final;
+      $final =~ s:([A-Z]+)[0-9]:$1:;
+      $tone =~ s:[A-Z]+([0-9]):$1:;
+      if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { print "1: no entry find for ", $A[$i], " ", $initial, " ", $final;  exit;}
+      push(@entry, @{$py2ph{$initial}}); 
+      @tmp = @{$py2ph{$final}};
+      for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
+      push(@entry, @tmp); 
+    }
+    else {
+      $tone = $A[$i];
+      $A[$i] =~ s:([A-Z]+)[0-9]:$1:;   
+      $tone =~ s:[A-Z]+([0-9]):$1:;
+      if (!(exists $py2ph{$A[$i]})) { print "2: no entry find for ", $A[$i];  exit;}
+      @tmp = @{$py2ph{$A[$i]}};
+      for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
+      push(@entry, @tmp); 
+    }
+  } 
+  print "@entry";
+  print "\n";
+}
diff --git a/egs/chime_wsj0/s5/utils/prepare_lang.sh b/egs/chime_wsj0/s5/utils/prepare_lang.sh
new file mode 100755
index 000000000..b01246032
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/prepare_lang.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
+#                      Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script prepares a directory such as data/lang/, in the standard format,
+# given a source directory containing a dictionary lexicon.txt in a form like:
+# word phone1 phone2 ... phoneN
+# per line (alternate prons would be separate lines), or a dictionary with probabilities
+# called lexiconp.txt in a form:
+# word pron-prob phone1 phone2 ... phoneN
+# (with 0.0 < pron-prob <= 1.0); note: if lexiconp.txt exists, we use it even if
+# lexicon.txt exists.
+# and also files silence_phones.txt, nonsilence_phones.txt, optional_silence.txt
+# and extra_questions.txt
+# Here, silence_phones.txt and nonsilence_phones.txt are lists of silence and
+# non-silence phones respectively (where silence includes various kinds of 
+# noise, laugh, cough, filled pauses etc., and nonsilence phones includes the 
+# "real" phones.)
+# In each line of those files is a list of phones, and the phones on each line 
+# are assumed to correspond to the same "base phone", i.e. they will be 
+# different stress or tone variations of the same basic phone.
+# The file "optional_silence.txt" contains just a single phone (typically SIL) 
+# which is used for optional silence in the lexicon.
+# extra_questions.txt might be empty; typically will consist of lists of phones,
+# all members of each list with the same stress or tone; and also possibly a 
+# list for the silence phones.  This will augment the automtically generated 
+# questions (note: the automatically generated ones will treat all the 
+# stress/tone versions of a phone the same, so will not "get to ask" about 
+# stress or tone).
+
+# This script adds word-position-dependent phones and constructs a host of other
+# derived files, that go in data/lang/.
+
+# Begin configuration section.
+num_sil_states=5
+num_nonsil_states=3
+position_dependent_phones=true
+# position_dependent_phones is false also when position dependent phones and word_boundary.txt 
+# have been generated by another source
+reverse=false
+share_silence_phones=false  # if true, then share pdfs of different silence 
+                            # phones together.
+sil_prob=0.5
+make_individual_sil_models=false # enforce individual models for all silence phones
+# end configuration sections
+
+. utils/parse_options.sh 
+
+if [ $# -ne 4 ]; then 
+  echo "usage: utils/prepare_lang.sh <dict-src-dir> <oov-dict-entry> <tmp-dir> <lang-dir>"
+  echo "e.g.: utils/prepare_lang.sh data/local/dict <SPOKEN_NOISE> data/local/lang data/lang"
+  echo "<dict-src-dir> should contain the following files:"
+  echo " extra_questions.txt  lexicon.txt nonsilence_phones.txt  optional_silence.txt  silence_phones.txt"
+  echo "See http://kaldi.sourceforge.net/data_prep.html#data_prep_lang_creating for more info."
+  echo "options: "
+  echo "     --num-sil-states <number of states>             # default: 5, #states in silence models."
+  echo "     --num-nonsil-states <number of states>          # default: 3, #states in non-silence models."
+  echo "     --position-dependent-phones (true|false)        # default: true; if true, use _B, _E, _S & _I"
+  echo "                                                     # markers on phones to indicate word-internal positions. "
+  echo "     --reverse (true|false)                          # reverse lexicon."
+  echo "     --share-silence-phones (true|false)             # default: false; if true, share pdfs of "
+  echo "                                                     # all non-silence phones. "
+  echo "     --sil-prob <probability of silence>             # default: 0.5 [must have 0 <= silprob < 1]"
+  echo "     --make-individual-sil-models (true|false)       # default: false; make non-{shared,split} states for each silphone"
+  exit 1;
+fi
+
+srcdir=$1
+oov_word=$2
+tmpdir=$3
+dir=$4
+mkdir -p $dir $tmpdir $dir/phones
+
+[ -f path.sh ] && . ./path.sh
+
+! utils/validate_dict_dir.pl $srcdir && echo "*Error validating directory $srcdir*" && exit 1;
+
+if [[ ! -f $srcdir/lexicon.txt ]]; then
+  echo "**Creating $dir/lexicon.txt from $dir/lexiconp.txt"
+  perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' < $srcdir/lexiconp.txt > $srcdir/lexicon.txt || exit 1;
+fi
+if [[ ! -f $srcdir/lexiconp.txt ]]; then
+  echo "**Creating $srcdir/lexiconp.txt from $srcdir/lexicon.txt"
+  perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
+fi
+
+! utils/validate_dict_dir.pl $srcdir >&/dev/null && \
+   echo "Validation failed (second time)" && exit 1;
+
+if $position_dependent_phones; then
+  # Create $tmpdir/lexicon.original from $srcdir/lexicon.txt by
+  # adding the markers _B, _E, _S, _I depending on word position.
+  # In this recipe, these markers apply to silence also.
+  # Do this starting from lexiconp.txt only.
+  
+
+  perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; @A>0||die;
+         if(@A==1) { print "$w $p $A[0]_S\n"; } else { print "$w $p $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+           < $srcdir/lexiconp.txt > $tmpdir/lexiconp.original || exit 1;
+  
+  # create $tmpdir/phone_map.txt
+  # this has the format (on each line)
+  # <original phone> <version 1 of original phone> <version 2> ...
+  # where the versions depend on the position of the phone within a word. 
+  # For instance, we'd have:
+  # AA AA_B AA_E AA_I AA_S
+  # for (B)egin, (E)nd, (I)nternal and (S)ingleton
+  # and in the case of silence
+  # SIL SIL SIL_B SIL_E SIL_I SIL_S
+  # [because SIL on its own is one of the variants; this is for when it doesn't
+  #  occur inside a word but as an option in the lexicon.]
+
+  # This phone map expands the phone lists into all the word-position-dependent
+  # versions of the phone lists.
+
+  cat <(for x in `cat $srcdir/silence_phones.txt`; do for y in "" "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    <(for x in `cat $srcdir/nonsilence_phones.txt`; do for y in "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    > $tmpdir/phone_map.txt
+else
+  cp $srcdir/lexiconp.txt $tmpdir/lexiconp.original
+  cat $srcdir/silence_phones.txt $srcdir/nonsilence_phones.txt | \
+    sed 's/ /\n/g' | awk '(NF>0){print}' > $tmpdir/phones
+  paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
+fi
+
+if $reverse; then
+  echo "reversing lexicon."
+  cat $tmpdir/lexiconp.original \
+    | awk '{printf "%s %s ",$1, $2;for(i=NF;i>2;i--){printf "%s ",$i;}printf "\n"}' \
+    > $tmpdir/lexiconp.txt
+else
+  mv $tmpdir/lexiconp.original $tmpdir/lexiconp.txt
+fi
+
+mkdir -p $dir/phones  # various sets of phones...
+
+# Sets of phones for use in clustering, and making monophone systems.
+
+if $share_silence_phones; then
+  # build a roots file that will force all the silence phones to share the
+  # same pdf's. [three distinct states, only the transitions will differ.]
+  # 'shared'/'not-shared' means, do we share the 3 states of the HMM
+  # in the same tree-root?
+  # Sharing across models(phones) is achieved by writing several phones
+  # into one line of roots.txt (shared/not-shared doesn't affect this).
+  # 'shared split' means we have 1 tree-root for the 3 states of the HMM 
+  # (but we get to ask about the HMM-position when we split).
+  # 'not-shared not-split' means we have separate tree roots for the 3 states,
+  # but we never split the tree so they remain stumps
+  # so all phones in the line correspond to the same model.
+
+  if $make_individual_sil_models; then
+    nsil=`wc $srcdir/silence_phones.txt | awk '{printf $1}'`
+    cat $srcdir/silence_phones.txt | awk '{printf("%s\n", $0); }' | cat - $srcdir/nonsilence_phones.txt | \
+      utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+    cat $dir/phones/sets.txt | \
+      awk -v nsil=$nsil '{if(NR<=nsil) print "not-shared", "not-split", $0; else print "shared", "split", $0;}' > $dir/phones/roots.txt
+  else
+    cat $srcdir/silence_phones.txt | awk '{printf("%s ", $0); } END{printf("\n");}' | cat - $srcdir/nonsilence_phones.txt | \
+      utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+    cat $dir/phones/sets.txt | \
+      awk '{if(NR==1) print "not-shared", "not-split", $0; else print "shared", "split", $0;}' > $dir/phones/roots.txt
+  fi
+else
+  # different silence phones will have different GMMs.  [note: here, all "shared split" means
+  # is that we may have one GMM for all the states, or we can split on states.  because they're
+  # context-independent phones, they don't see the context.]
+  cat $srcdir/{,non}silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+  cat $dir/phones/sets.txt | awk '{print "shared", "split", $0;}' > $dir/phones/roots.txt
+fi
+
+cat $srcdir/silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+ awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/silence.txt
+cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+ awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/nonsilence.txt
+cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
+cp $dir/phones/silence.txt $dir/phones/context_indep.txt
+
+cat $srcdir/extra_questions.txt | utils/apply_map.pl $tmpdir/phone_map.txt \
+  >$dir/phones/extra_questions.txt
+
+# Want extra questions about the word-start/word-end stuff. Make it separate for
+# silence and non-silence. Probably doesn't matter, as silence will rarely
+# be inside a word.
+if $position_dependent_phones; then
+  for suffix in _B _E _I _S; do
+    (for x in `cat $srcdir/nonsilence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+  for suffix in "" _B _E _I _S; do
+    (for x in `cat $srcdir/silence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+fi
+
+# add disambig symbols to the lexicon in $tmpdir/lexiconp.txt
+# and produce $tmpdir/lexicon_disambig.txt
+
+ndisambig=`utils/add_lex_disambig.pl --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
+ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
+echo $ndisambig > $tmpdir/lex_ndisambig
+
+# Format of lexiconp_disambig.txt:
+# !SIL	1.0   SIL_S
+# <SPOKEN_NOISE>	1.0   SPN_S #1
+# <UNK>	1.0  SPN_S #2
+# <NOISE>	1.0  NSN_S
+# !EXCLAMATION-POINT	1.0  EH2_B K_I S_I K_I L_I AH0_I M_I EY1_I SH_I AH0_I N_I P_I OY2_I N_I T_E
+
+( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) >$dir/phones/disambig.txt
+
+# Create phone symbol table.
+echo "<eps>" | cat - $dir/phones/{silence,nonsilence,disambig}.txt | \
+  awk '{n=NR-1; print $1, n;}' > $dir/phones.txt 
+
+# Create a file that describes the word-boundary information for
+# each phone.  5 categories.
+if $position_dependent_phones; then
+  cat $dir/phones/{silence,nonsilence}.txt | \
+    awk '/_I$/{print $1, "internal"; next;} /_B$/{print $1, "begin"; next; }
+         /_S$/{print $1, "singleton"; next;} /_E$/{print $1, "end"; next; }
+         {print $1, "nonword";} ' > $dir/phones/word_boundary.txt
+else
+  # word_boundary.txt might have been generated by another source
+  [ -f $srcdir/word_boundary.txt ] && cp $srcdir/word_boundary.txt $dir/phones/word_boundary.txt
+fi
+
+# Create word symbol table.
+cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq  | \
+ awk 'BEGIN{print "<eps> 0";} {printf("%s %d\n", $1, NR);} END{printf("#0 %d\n", NR+1);} ' \
+  > $dir/words.txt || exit 1;
+
+# format of $dir/words.txt:
+#<eps> 0
+#!EXCLAMATION-POINT 1
+#!SIL 2
+#"CLOSE-QUOTE 3
+#...
+
+silphone=`cat $srcdir/optional_silence.txt` || exit 1;
+[ -z "$silphone" ] && \
+  ( echo "You have no optional-silence phone; it is required in the current scripts"
+    echo "but you may use the option --sil-prob 0.0 to stop it being used." ) && \
+   exit 1;
+
+# create $dir/phones/align_lexicon.{txt,int}.
+# This is the new-new style of lexicon aligning.
+
+# First remove pron-probs from the lexicon.
+perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' <$tmpdir/lexiconp.txt >$tmpdir/align_lexicon.txt
+
+# Note: here, $silphone will have no suffix e.g. _S because it occurs as optional-silence,
+# and is not part of a word.
+[ ! -z "$silphone" ] && echo "<eps> $silphone" >> $tmpdir/align_lexicon.txt
+
+cat $tmpdir/align_lexicon.txt | \
+ perl -ane '@A = split; print $A[0], " ", join(" ", @A), "\n";' | sort | uniq > $dir/phones/align_lexicon.txt
+
+# create phones/align_lexicon.int
+cat $dir/phones/align_lexicon.txt | utils/sym2int.pl -f 3- $dir/phones.txt | \
+  utils/sym2int.pl -f 1-2 $dir/words.txt > $dir/phones/align_lexicon.int
+
+# Create the basic L.fst without disambiguation symbols, for use
+# in training. 
+utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp.txt $sil_prob $silphone | \
+  fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+  --keep_isymbols=false --keep_osymbols=false | \
+   fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+
+# The file oov.txt contains a word that we will map any OOVs to during
+# training.
+echo "$oov_word" > $dir/oov.txt || exit 1;
+cat $dir/oov.txt | utils/sym2int.pl $dir/words.txt >$dir/oov.int || exit 1;
+# integer version of oov symbol, used in some scripts.
+
+
+# Create these lists of phones in colon-separated integer list form too, 
+# for purposes of being given to programs as command-line options.
+for f in silence nonsilence optional_silence disambig context_indep; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt >$dir/phones/$f.int
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt | \
+   awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/$f.csl || exit 1;
+done
+
+for x in sets extra_questions; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$x.txt > $dir/phones/$x.int || exit 1;
+done
+
+utils/sym2int.pl -f 3- $dir/phones.txt <$dir/phones/roots.txt \
+   > $dir/phones/roots.int || exit 1;
+
+#if $position_dependent_phones; then
+if [ -f $dir/phones/word_boundary.txt ]; then
+  utils/sym2int.pl -f 1 $dir/phones.txt <$dir/phones/word_boundary.txt \
+    > $dir/phones/word_boundary.int || exit 1;
+fi
+
+silphonelist=`cat $dir/phones/silence.csl`
+nonsilphonelist=`cat $dir/phones/nonsilence.csl`
+utils/gen_topo.pl $num_nonsil_states $num_sil_states $nonsilphonelist $silphonelist >$dir/topo
+
+
+# Create the lexicon FST with disambiguation symbols, and put it in lang_test.
+# There is an extra step where we create a loop to "pass through" the
+# disambiguation symbols from G.fst.
+phone_disambig_symbol=`grep \#0 $dir/phones.txt | awk '{print $2}'`
+word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
+
+utils/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt $sil_prob $silphone '#'$ndisambig | \
+   fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+   --keep_isymbols=false --keep_osymbols=false |   \
+   fstaddselfloops  "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |" | \
+   fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
+
+
+echo "$(basename $0): validating output directory"
+! utils/validate_lang.pl $dir && echo "$(basename $0): error validating output" &&  exit 1;
+
+exit 0;
+
diff --git a/egs/chime_wsj0/s5/utils/queue.pl b/egs/chime_wsj0/s5/utils/queue.pl
new file mode 100755
index 000000000..5bda0ace0
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/queue.pl
@@ -0,0 +1,313 @@
+#!/usr/bin/perl
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine.  Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to 
+  # allow the JOB=1:n option to be interleaved with the
+  # options to qsub.
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $sync = 1;
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+
+if (@ARGV < 2) {
+  print STDERR
+   "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+   "e.g.: queue.pl foo.log echo baz\n" .
+   " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+   "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+   " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+   "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+   " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+   "  another string other than JOB)\n" .
+   "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+   "and change its behavior.  Otherwise it uses qstat to work out when the job finished\n";
+  exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+    && $jobend > $jobstart) {
+  print STDERR "queue.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do.  Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them.  Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program.  Our heuristic
+# is that stuff with spaces in should be quoted.  This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) { 
+  if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+                                            # as-is.
+  elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+  else { $cmd .= "\"$x\" "; }  # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+# make a directory called "q",
+# where we will put the log created by qsub... normally this doesn't contain
+# anything interesting, evertyhing goes to $logfile.
+if (! -d "$qdir") { 
+  system "mkdir $qdir 2>/dev/null";
+  sleep(5); ## This is to fix an issue we encountered in denominator lattice creation,
+  ## where if e.g. the exp/tri2b_denlats/log/15/q directory had just been
+  ## created and the job immediately ran, it would die with an error because nfs
+  ## had not yet synced.  I'm also decreasing the acdirmin and acdirmax in our
+  ## NFS settings to something like 5 seconds.
+} 
+
+if (defined $jobname) { # It's an array job.
+  $queue_array_opt = "-t $jobstart:$jobend"; 
+  $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get 
+  # replaced by qsub, in each job, with the job-id.
+  $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+  $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+  # is for the queue to put its log, and this doesn't need the task array subscript
+  # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+  $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q "  echo '#' Started at \`date\`\n";
+print Q "  echo -n '# '; cat <<EOF\n";
+print Q "$cmd\n"; # this is a way of echoing the command into a comment in the log file,
+print Q "EOF\n"; # without having to escape things like "|" and quote characters.
+print Q ") >$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; 
+  # let the script return with status 100 which will put it to E state; more easily rerunnable.
+if (!defined $jobname) { # not an array job
+  print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+  print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n";       # treats specially.
+$qsub_cmd = "qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1";
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+  die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ($qsub_cmd);
+if ($ret != 0) {
+  if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    print STDERR "queue.pl: job writing to $logfile failed\n";
+  } else {
+    print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+    print STDERR `tail $queue_logfile`;
+  }
+  exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+  # need to wait for the jobs to finish.  We wait for the
+  # sync-files we "touched" in the script to exist.
+  @syncfiles = ();
+  if (!defined $jobname) { # not an array job.
+    push @syncfiles, $syncfile;
+  } else {
+    for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+      push @syncfiles, "$syncfile.$jobid";
+    }
+  }
+  # We will need the sge_job_id, to check that job still exists
+  $sge_job_id=`grep "Your job" $queue_logfile | awk '{ print \$3 }' | sed 's|\\\..*||'`;
+  chomp($sge_job_id);
+  $check_sge_job_ctr=1;
+  #
+  $wait = 0.1;
+  foreach $f (@syncfiles) {
+    # wait for them to finish one by one.
+    while (! -f $f) {
+      sleep($wait);
+      $wait *= 1.2;
+      if ($wait > 3.0) {
+        $wait = 3.0; # never wait more than 3 seconds.
+        if (rand() > 0.5) {
+          system("touch $qdir/.kick");
+        } else {
+          system("rm $qdir/.kick 2>/dev/null");
+        }
+        # This seems to kick NFS in the teeth to cause it to refresh the
+        # directory.  I've seen cases where it would indefinitely fail to get
+        # updated, even though the file exists on the server.
+        system("ls $qdir >/dev/null");
+      }
+
+      # Check that the job exists in SGE. Job can be killed if duration 
+      # exceeds some hard limit, or in case of a machine shutdown. 
+      if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
+        if ( -f $f ) { next; }; #syncfile appeared, ok
+        $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
+        if($ret != 0) {
+          # Don't consider immediately missing job as error, first wait some  
+          # time to make sure it is not just delayed creation of the syncfile.
+          sleep(3);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(7);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(20);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          #Otherwise it is an error
+          if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+          print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+          print STDERR "          Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
+          exit(1);
+        }
+      }
+    }
+  }
+  $all_syncfiles = join(" ", @syncfiles);
+  system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status.  We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+  push @logfiles, $logfile;
+} else {
+  for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+    $l = $logfile; 
+    $l =~ s/\$SGE_TASK_ID/$jobid/g;
+    push @logfiles, $l;
+  }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+  @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+  for ($iter = 0; $iter <= @wait_times; $iter++) {
+    $line = `tail -10 $l 2>/dev/null`; # Note: although this line should be the last
+    # line of the file, I've seen cases where it was not quite the last line because
+    # of delayed output by the process that was running, or processes it had called.
+    # so tail -10 gives it a little leeway.
+    if ($line =~ m/with status (\d+)/) {
+      $status = $1;
+      last;
+    } else {
+      if ($iter < @wait_times) {
+        sleep($wait_times[$iter]);
+      } else {
+        if (! -f $l) {
+          print STDERR "Log-file $l does not exist.\n";
+        } else {
+          print STDERR "The last line of log-file $l does not seem to indicate the "
+            . "return status as expected\n";
+        }
+        exit(1);                # Something went wrong with the queue, or the
+        # machine it was running on, probably.
+      }
+    }
+  }
+  # OK, now we have $status, which is the return-status of
+  # the command in the job.
+  if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+  if (@logfiles == 1) {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+    print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  } else {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    $numjobs = 1 + $jobend - $jobstart;
+    print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+  }
+  exit(1);
+}
diff --git a/egs/chime_wsj0/s5/utils/reduce_data_dir.sh b/egs/chime_wsj0/s5/utils/reduce_data_dir.sh
new file mode 100755
index 000000000..8e95a5609
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/reduce_data_dir.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# koried, 10/29/2012
+
+# Reduce a data set based on a list of turn-ids
+
+if [ $# != 3 ]; then
+echo "usage: $0 srcdir turnlist destdir"
+exit 1;
+fi
+
+srcdir=$1
+reclist=$2
+destdir=$3
+
+if [ ! -f $srcdir/utt2spk ]; then 
+echo "$0: no such file $srcdir/utt2spk"
+exit 1;
+fi
+
+function do_filtering {
+# assumes the utt2spk and spk2utt files already exist.
+	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
+	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
+	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
+	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
+	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
+	if [ -f $srcdir/segments ]; then
+		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
+		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
+		# The next line would override the command above for wav.scp, which would be incorrect.
+		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
+		[ -f $srcdir/reco2file_and_channel ] && \
+			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
+		
+		# Filter the STM file for proper sclite scoring (this will also remove the comments lines)
+		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
+		rm $destdir/reco
+	fi
+	srcutts=`cat $srcdir/utt2spk | wc -l`
+	destutts=`cat $destdir/utt2spk | wc -l`
+	echo "Reduced #utt from $srcutts to $destutts"
+}
+
+mkdir -p $destdir
+
+# filter the utt2spk based on the set of recordings
+utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk
+
+utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
+do_filtering;
+
diff --git a/egs/chime_wsj0/s5/utils/reduce_data_dir_by_reclist.sh b/egs/chime_wsj0/s5/utils/reduce_data_dir_by_reclist.sh
new file mode 100755
index 000000000..14da92145
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/reduce_data_dir_by_reclist.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# koried, 10/29/2012
+
+# Reduce a data set based on a list of recordings
+
+if [ $# != 3 ]; then
+echo "usage: $0 srcdir reclist destdir"
+exit 1;
+fi
+
+srcdir=$1
+reclist=$2
+destdir=$3
+
+if [ ! -f $srcdir/utt2spk ]; then 
+echo "$0: no such file $srcdir/utt2spk"
+exit 1;
+fi
+
+function do_filtering {
+# assumes the utt2spk and spk2utt files already exist.
+	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
+	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
+	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
+	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
+	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
+	if [ -f $srcdir/segments ]; then
+		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
+		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
+# The next line would override the command above for wav.scp, which would be incorrect.
+		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
+		[ -f $srcdir/reco2file_and_channel ] && \
+			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
+		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
+		rm $destdir/reco
+	fi
+	srcutts=`cat $srcdir/utt2spk | wc -l`
+	destutts=`cat $destdir/utt2spk | wc -l`
+	echo "Reduced #utt from $srcutts to $destutts"
+}
+
+mkdir -p $destdir
+
+# filter the utt2spk based on the set of recordings
+rm -f $destdir/utt2spk
+for i in `cat $reclist`; do
+	cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk
+done
+
+utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
+do_filtering;
+
diff --git a/egs/chime_wsj0/s5/utils/remove_oovs.pl b/egs/chime_wsj0/s5/utils/remove_oovs.pl
new file mode 100755
index 000000000..5bcab5984
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/remove_oovs.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script removes lines that contain these OOVs on either the
+# third or fourth fields  of the line.  It is intended to remove arcs
+# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
+
+if (  @ARGV < 1 && @ARGV > 2) {
+    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
+}
+
+$unklist = shift @ARGV;
+open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
+while(<S>){ 
+    @A = split(" ", $_);
+    @A == 1 || die "Bad line in unknown-symbol list: $_";
+    $unk{$A[0]} = 1;
+}
+
+$num_removed = 0;
+while(<>){ 
+    @A = split(" ", $_);
+    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
+        $num_removed++;
+    } else {
+        print;
+    }
+}
+print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
+
diff --git a/egs/chime_wsj0/s5/utils/reverse_arpa.py b/egs/chime_wsj0/s5/utils/reverse_arpa.py
new file mode 100755
index 000000000..ff6ea12a5
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/reverse_arpa.py
@@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+# Copyright 2012 Mirko Hannemann BUT, mirko.hannemann@gmail.com
+
+import sys
+import codecs # for UTF-8/unicode
+
+if len(sys.argv) != 2:
+    print 'usage: reverse_arpa arpa.in'
+    sys.exit()
+arpaname = sys.argv[1]
+
+#\data\
+#ngram 1=4
+#ngram 2=2
+#ngram 3=2
+#
+#\1-grams:
+#-5.234679	a -3.3
+#-3.456783	b
+#0.0000000	<s> -2.5
+#-4.333333	</s>
+#
+#\2-grams:
+#-1.45678	a b -3.23
+#-1.30490	<s> a -4.2
+#
+#\3-grams:
+#-0.34958	<s> a b
+#-0.23940	a b </s>
+#\end\
+
+# read language model in ARPA format
+try:
+  file = codecs.open(arpaname, "r", "utf-8")
+except IOError:
+  print 'file not found: ' + arpaname
+  sys.exit()
+
+text=file.readline()
+while (text and text[:6] != "\\data\\"): text=file.readline()
+if not text:
+  print "invalid ARPA file"
+  sys.exit()
+#print text,
+while (text and text[:5] != "ngram"): text=file.readline()
+
+# get ngram counts
+cngrams=[]
+n=0
+while (text and text[:5] == "ngram"):
+  ind = text.split("=")
+  counts = int(ind[1].strip())
+  r = ind[0].split()
+  read_n = int(r[1].strip())
+  if read_n != n+1:
+    print "invalid ARPA file:", text
+    sys.exit()
+  n = read_n
+  cngrams.append(counts)
+  #print text,
+  text=file.readline()
+
+# read all n-grams order by order
+sentprob = 0.0 # sentence begin unigram
+ngrams=[]
+inf=float("inf")
+for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
+  while (text and "-grams:" not in text): text=file.readline()
+  if n != int(text[1]):
+    print "invalid ARPA file:", text
+    sys.exit()
+  #print text,cngrams[n-1]
+  this_ngrams={} # stores all read ngrams
+  for ng in range(cngrams[n-1]):
+    while (text and len(text.split())<2):
+      text=file.readline()
+      if (not text) or ((len(text.split())==1) and (("-grams:" in text) or (text[:5] == "\\end\\"))): break
+    if (not text) or ((len(text.split())==1) and (("-grams:" in text) or (text[:5] == "\\end\\"))):
+      break # to deal with incorrect ARPA files
+    entry = text.split()
+    prob = float(entry[0])
+    if len(entry)>n+1:
+      back = float(entry[-1])
+      words = entry[1:n+1]
+    else:
+      back = 0.0
+      words = entry[1:]
+    ngram = " ".join(words)
+    if (n==1) and words[0]=="<s>":
+      sentprob = prob
+      prob = 0.0
+    this_ngrams[ngram] = (prob,back)
+    #print prob,ngram.encode("utf-8"),back
+
+    for x in range(n-1,0,-1):
+      # add all missing backoff ngrams for reversed lm
+      l_ngram = " ".join(words[:x]) # shortened ngram
+      r_ngram = " ".join(words[1:1+x]) # shortened ngram with offset one
+      if l_ngram not in ngrams[x-1]: # create missing ngram
+        ngrams[x-1][l_ngram] = (0.0,inf)
+        #print ngram, "create 0.0", l_ngram, "inf"
+      if r_ngram not in ngrams[x-1]: # create missing ngram
+        ngrams[x-1][r_ngram] = (0.0,inf)
+        #print ngram, "create 0.0", r_ngram, "inf",x,n,h_ngram
+
+      # add all missing backoff ngrams for forward lm
+      h_ngram = " ".join(words[n-x:]) # shortened history
+      if h_ngram not in ngrams[x-1]: # create missing ngram
+        ngrams[x-1][h_ngram] = (0.0,inf)
+        #print "create inf", h_ngram, "0.0"
+    text=file.readline()
+    if (not text) or ((len(text.split())==1) and (("-grams:" in text) or (text[:5] == "\\end\\"))): break
+  ngrams.append(this_ngrams)
+
+while (text and text[:5] != "\\end\\"): text=file.readline()
+if not text:
+  print "invalid ARPA file"
+  sys.exit()
+file.close()
+#print text,
+
+#fourgram "maxent" model (b(ABCD)=0):
+#p(A)+b(A) A 0
+#p(AB)+b(AB)-b(A)-p(B) AB 0
+#p(ABC)+b(ABC)-b(AB)-p(BC) ABC 0
+#p(ABCD)+b(ABCD)-b(ABC)-p(BCD) ABCD 0
+
+#fourgram reverse ARPA model (b(ABCD)=0):
+#p(A)+b(A) A 0
+#p(AB)+b(AB)-p(B)+p(A) BA 0
+#p(ABC)+b(ABC)-p(BC)+p(AB)-p(B)+p(A) CBA 0
+#p(ABCD)+b(ABCD)-p(BCD)+p(ABC)-p(BC)+p(AB)-p(B)+p(A) DCBA 0
+
+# compute new reversed ARPA model
+print "\\data\\"
+for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
+  print "ngram "+str(n)+"="+str(len(ngrams[n-1].keys()))
+offset = 0.0
+for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
+  print "\\"+str(n)+"-grams:"
+  keys = ngrams[n-1].keys()
+  keys.sort()
+  for ngram in keys:
+    prob = ngrams[n-1][ngram]
+    # reverse word order
+    words = ngram.split()
+    rstr = " ".join(reversed(words))
+    # swap <s> and </s>
+    rev_ngram = rstr.replace("<s>","<temp>").replace("</s>","<s>").replace("<temp>","</s>")
+
+    revprob = prob[0]
+    if (prob[1] != inf): # only backoff weights from not newly created ngrams
+      revprob = revprob + prob[1]
+    #print prob[0],prob[1]
+    # sum all missing terms in decreasing ngram order
+    for x in range(n-1,0,-1): 
+      l_ngram = " ".join(words[:x]) # shortened ngram
+      if l_ngram not in ngrams[x-1]:
+        sys.stderr.write(rev_ngram+": not found "+l_ngram+"\n")
+      p_l = ngrams[x-1][l_ngram][0]
+      #print p_l,l_ngram
+      revprob = revprob + p_l
+
+      r_ngram = " ".join(words[1:1+x]) # shortened ngram with offset one
+      if r_ngram not in ngrams[x-1]:
+        sys.stderr.write(rev_ngram+": not found "+r_ngram+"\n")
+      p_r = ngrams[x-1][r_ngram][0]
+      #print -p_r,r_ngram
+      revprob = revprob - p_r
+
+    if n != len(cngrams): #not highest order
+      back = 0.0
+      if rev_ngram[:3] == "<s>": # special handling since arpa2fst ignores <s> weight
+        if n == 1:
+          offset = revprob # remember <s> weight
+          revprob = sentprob # apply <s> weight from forward model
+          back = offset
+        elif n == 2:
+          revprob = revprob + offset # add <s> weight to bigrams starting with <s>
+      if (prob[1] != inf): # only backoff weights from not newly created ngrams
+        print revprob,rev_ngram.encode("utf-8"),back
+      else:
+        print revprob,rev_ngram.encode("utf-8"),"-100000.0"
+    else: # highest order - no backoff weights
+      if (n==2) and (rev_ngram[:3] == "<s>"): revprob = revprob + offset
+      print revprob,rev_ngram.encode("utf-8")
+print "\\end\\"
diff --git a/egs/chime_wsj0/s5/utils/reverse_lm.sh b/egs/chime_wsj0/s5/utils/reverse_lm.sh
new file mode 100755
index 000000000..96ac1609b
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/reverse_lm.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+# Copyright 2012  Brno University of Technology (Author: Mirko Hannemann)
+# JHU (Author: Dan Povey)
+# Apache 2.0
+
+# configuration section
+tmpdir=data/local/lm_tmp  # only for OOVs and checks
+lexicon=data/local/lang_tmp.reverse/lexicon.txt # only for checks
+# end config section
+
+mkdir -p $tmpdir
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: utils/reverse_lm.sh [options] <arpa-gz-file> <lang-dir> <out-dir>"
+   echo "e.g.: utils/reverse_lm.sh data/local/nist_lm/lm_tgpr_5k.arpa.gz data/lang.reverse data/lang_test_tgpr_5k.reverse"
+   echo "... where files from <lang-dir> are copied into <out-dir>"
+   echo "options:"
+   echo " --lexicon <lexicon-file>   reversed lexicon (only for checks)"
+   exit 1;
+fi
+
+lm=$1 # gzipped arpa file
+langdir=$2
+outdir=$3 # output directory
+
+# create the corresponding FST for the language model
+# and the corresponding lang_test_* directory.
+
+echo Preparing reverse language model from $lm into $outdir
+echo "Finding OOVs and strange silences"
+mkdir -p $outdir
+for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
+  cp -r $langdir/$f $outdir
+done
+gunzip -c $lm | utils/find_arpa_oovs.pl $outdir/words.txt  > $tmpdir/oovs.txt
+
+# grep -v '<s> <s>' because the LM seems to have some strange and useless
+# stuff in it with multiple <s>'s in the history.  Encountered some other similar
+# things in a LM from Geoff.  Removing all "illegal" combinations of <s> and </s>,
+# which are supposed to occur only at being/end of utt.  These can cause 
+# determinization failures of CLG [ends up being epsilon cycles].
+gunzip -c $lm | \
+  grep -v '<s> <s>' | \
+  grep -v '</s> <s>' | \
+  grep -v '</s> </s>' > $outdir/forward.arpa
+echo "Mapping ARPA to reverse ARPA"
+python utils/reverse_arpa.py $outdir/forward.arpa > $outdir/reverse.arpa
+arpa2fst $outdir/reverse.arpa | fstprint | \
+  grep -v "230258.5" | \
+  utils/remove_oovs.pl $tmpdir/oovs.txt | \
+  utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$outdir/words.txt \
+    --osymbols=$outdir/words.txt  --keep_isymbols=false --keep_osymbols=false \
+    | fstrmepsilon > $outdir/G_org.fst
+#--arc_type=log
+
+echo "Push weights to make it stochastic (log semi-ring)"
+# delta must be very small otherwise weight pushing won't succeed
+#fstpush --push_weights=true --push_labels=true --delta=1E-7 $outdir/G_log.fst >$outdir/G_log_pushed.fst
+fstpushspecial --delta=1E-5 $outdir/G_org.fst >$outdir/G.fst
+
+fstisstochastic $outdir/G.fst
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+# Because of the <s> fiasco for these particular LMs, the first number is not
+# as close to zero as it could be.
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+
+if [ -f $lexicon ]; then
+  mkdir -p $tmpdir/g
+  awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
+    < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
+  fstcompile --isymbols=$outdir/words.txt --osymbols=$outdir/words.txt $tmpdir/g/select_empty.fst.txt | \
+    fstarcsort --sort_type=olabel | fstcompose - $outdir/G.fst > $tmpdir/g/empty_words.fst
+  fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
+  echo "Language model has cycles with empty words" && exit 1
+  rm -r $tmpdir/g
+fi
+echo "Succeeded in creating reversed language model."
+rm -r $tmpdir
diff --git a/egs/chime_wsj0/s5/utils/reverse_lm_test.sh b/egs/chime_wsj0/s5/utils/reverse_lm_test.sh
new file mode 100755
index 000000000..b3d0e35a2
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/reverse_lm_test.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Copyright 2012  Brno University of Technology (Author: Mirko Hannemann)
+# Apache 2.0
+
+# configuration section
+utterances=4
+maxlen=30
+nbest=10
+# end config section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 2 ]; then
+   echo "Usage: utils/reverse_lm_test.sh [options] <fwd-lm-dir> <bwd-lm-dir>"
+   echo "example: utils/reverse_lm_test.sh data/lang_test_tgpr_5k data/lang_test_tgpr_5k.reverse"
+   echo "options:"
+   echo "  --utterances <int>   number of random test utterances"
+   echo "  --maxlen <int>       max number of arcs (words) in utterance"
+   echo "  --nbest <int>        compare n best paths"
+   exit 1;
+fi
+
+test_fwd=$1
+test_bwd=$2
+nb=`echo $nbest | awk '{print $1-1;}'`
+
+# For each language model the corresponding FST in lang_test_* directory.
+
+echo "compare LM scores using "$test_fwd/G.fst" and "$test_bwd/G.fst
+
+for utt in `seq 1 $utterances`
+do
+  # generate random sentence with forward language model
+  len=1000 # big number
+  while [ $len -gt $maxlen ]
+  do
+    fstrandgen --npath=1 $test_fwd/G.fst | fstprint --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt > sent$utt
+    len=`cat sent$utt | wc -l`
+  done
+  cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{printf "utterance:"; for(i=1;i<=length(a);i++) {printf " %s",a[i];} printf "\n";}'  
+  
+  # get n best paths with forward language model
+  cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[i];} print length(a);}' > sent$utt.forward
+  fstcompile --acceptor --isymbols=$test_fwd/words.txt  --osymbols=$test_fwd/words.txt sent$utt.forward > sent$utt.forward.fst
+  fstcompose $test_fwd/G.fst sent$utt.forward.fst > sent$utt.composed.forward.fst
+  fstshortestpath --nshortest=$nbest sent$utt.composed.forward.fst | fstprint > sent$utt.composed.forward.n
+
+  rm sent$utt.forward.scores 2>/dev/null
+  for n in `seq 0 $nb`
+  do
+    # select path with rank n
+    cat sent$utt.composed.forward.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.forward.$n.fst
+    fstprint sent$utt.composed.forward.$n.fst > sent$utt.composed.forward.$n
+    # compute shortest distance to final states
+    fstshortestdistance sent$utt.composed.forward.$n.fst | \
+      awk -v list=sent$utt.composed.forward.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \
+      { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \
+      >> sent$utt.forward.scores
+  done
+  
+  # get n best paths with reverse language model
+  cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[length(a)-i+1];} print length(a);}' > sent$utt.reverse
+  fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.reverse > sent$utt.reverse.fst
+  fstcompose $test_bwd/G.fst sent$utt.reverse.fst > sent$utt.composed.reverse.fst
+  fstshortestpath --nshortest=$nbest sent$utt.composed.reverse.fst | fstprint > sent$utt.composed.reverse.n
+
+  rm sent$utt.reverse.scores 2>/dev/null
+  for n in `seq 0 $nb`
+  do
+    # select path with rank n
+    cat sent$utt.composed.reverse.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.reverse.$n.fst
+    fstprint sent$utt.composed.reverse.$n.fst > sent$utt.composed.reverse.$n
+    # compute shortest distance to final states
+    fstshortestdistance sent$utt.composed.reverse.$n.fst | \
+      awk -v list=sent$utt.composed.reverse.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \
+      { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \
+      >> sent$utt.reverse.scores
+  done
+
+  # present results
+  paste sent$utt.forward.scores sent$utt.reverse.scores | \
+    awk '{diff=$1-$2; if ( (diff<0?-diff:diff) > 0.001 ) print NR,$1,$2,"!!!"; else print NR,$1,$2;}'
+  # clean up
+  rm sent$utt
+  rm sent$utt.*
+done
diff --git a/egs/chime_wsj0/s5/utils/rnnlm_compute_scores.sh b/egs/chime_wsj0/s5/utils/rnnlm_compute_scores.sh
new file mode 100755
index 000000000..d904fdc99
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/rnnlm_compute_scores.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Compute scores from RNNLM.  This script takes a directory
+# $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
+# where it expects the files:
+#  rnnlm  wordlist.rnn  unk.probs,
+# and also an input file location where it can get the sentences to score, and
+# an output file location to put the scores (negated logprobs) for each
+# sentence.  This script uses the Kaldi-style "archive" format, so the input and
+# output files will have a first field that corresponds to some kind of
+# utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
+# N-best list.
+#
+# Here, "wordlist.rnn" is the set of words, like a vocabulary,
+# that the RNN was trained on (note, it won't include <s> or </s>),
+# plus <RNN_UNK> which is a kind of class where we put low-frequency
+# words; unk.probs gives the probs for words given this class, and it
+# has, on each line, "word prob".
+
+. ./path.sh || exit 1;
+
+rnnlm=$KALDI_ROOT/tools/rnnlm-0.3e/rnnlm
+
+[ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: rnnlm_compute_scores.sh <rnn-dir> <temp-dir> <input-text> <output-scores>"
+  exit 1;
+fi
+
+dir=$1
+tempdir=$2
+text_in=$3
+scores_out=$4
+
+for x in rnnlm wordlist.rnn unk.probs; do
+  if [ ! -f $dir/$x ]; then 
+    echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
+    exit 1;
+  fi
+done
+
+mkdir -p $tempdir
+cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
+cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
+cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
+  -v logprobs=$tempdir/loglikes.oov \
+ 'BEGIN{ while((getline<voc)>0) { invoc[$1]=1; } while ((getline<unk)>0){ unkprob[$1]=$2;} }
+  { logprob=0; for (x=1;x<=NF;x++) { w=$x;  
+    if (invoc[w]) { printf("%s ",w); } else {
+      printf("<RNN_UNK> ");
+      if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
+      else { print "Warning: unknown word ", w >"/dev/stderr"; logprob += log(1.0e-07); }}}
+    printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
+
+# OK, now we compute the scores on the text with OOVs replaced
+# with <RNN_UNK>
+
+$rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
+   awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
+
+[ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
+  echo "rnnlm rescoring failed" && exit 1;
+
+paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
+
+# scores out, with utterance-ids.
+paste $tempdir/ids $tempdir/scores  > $scores_out
+
diff --git a/egs/chime_wsj0/s5/utils/run.pl b/egs/chime_wsj0/s5/utils/run.pl
new file mode 100755
index 000000000..1750bc50a
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/run.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+# In general, doing 
+#  run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
+# 
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+$qsub_opts=""; # These will be ignored.
+
+# First parse an option like JOB=1:4, and any
+# options that would normally be given to 
+# queue.pl, which we will just discard.
+
+if (@ARGV > 0) {
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
+    # that would normally go to qsub, but which will be ignored here.
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $qsub_opts .= "-sync "; # Note: in the
+        # corresponding coce in queue.pl it says instead, just "$sync = 1;".
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+if ($qsub_opts ne "") {
+  print STDERR "Warning: run.pl ignoring options \"$qsub_opts\"\n";
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "run.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) { 
+    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
+    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+    else { $cmd .= "\"$x\" "; } 
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $childpid = fork();
+  if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+  if ($childpid == 0) { # We're in the child... this branch
+    # executes the job and returns (possibly with an error status).
+    if (defined $jobname) { 
+      $cmd =~ s/$jobname/$jobid/g;
+      $logfile =~ s/$jobname/$jobid/g;
+    }
+    system("mkdir -p `dirname $logfile` 2>/dev/null");
+    open(F, ">$logfile") || die "Error opening log file $logfile";
+    print F "# " . $cmd . "\n";
+    print F "# Started at " . `date`;
+    $starttime = `date +'%s'`;
+    print F "#\n";
+    close(F);
+
+    # Pipe into bash.. make sure we're not using any other shell.
+    open(B, "|bash") || die "Error opening shell command"; 
+    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+    close(B);                   # If there was an error, exit status is in $?
+    $ret = $?;
+
+    $endtime = `date +'%s'`;
+    open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+    $enddate = `date`;
+    chop $enddate;
+    print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+    close(F);
+    exit($ret == 0 ? 0 : 1);
+  }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $r = wait();
+  if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+  if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+  $njobs = $jobend - $jobstart + 1;
+  if ($njobs == 1) { 
+    print STDERR "run.pl: job failed, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  }
+  else {
+    $logfile =~ s/$jobname/*/g;
+    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+  }
+}
+
+
+exit ($ret);
diff --git a/egs/chime_wsj0/s5/utils/s2eps.pl b/egs/chime_wsj0/s5/utils/s2eps.pl
new file mode 100755
index 000000000..de993db67
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/s2eps.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script replaces <s> and </s> with <eps> (on both input and output sides),
+# for the G.fst acceptor.
+
+while(<>){
+    @A = split(" ", $_);
+    if ( @A >= 4 ) {
+        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
+        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
+    }
+    print join("\t", @A) . "\n";
+}
diff --git a/egs/chime_wsj0/s5/utils/shuffle_list.pl b/egs/chime_wsj0/s5/utils/shuffle_list.pl
new file mode 100755
index 000000000..e85b24aea
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/shuffle_list.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+
+# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+if ($ARGV[0] eq "--srand") {
+  $n = $ARGV[1];
+  $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
+  srand($ARGV[1]);
+  shift;
+  shift;
+} else {
+  srand(0); # Gives inconsistent behavior if we don't seed.
+}
+
+if (@ARGV > 1 || $ARGV[0] =~ m/-.+/) { # >1 args, or an option we 
+  # don't understand.
+  print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
+  print "randomizes the order of lines of input.\n";
+  exit(1);
+}
+
+@lines = <>;
+@lines = sort { rand() <=> rand() } @lines;
+print @lines;
diff --git a/egs/chime_wsj0/s5/utils/slurm.pl b/egs/chime_wsj0/s5/utils/slurm.pl
new file mode 100755
index 000000000..287f47627
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/slurm.pl
@@ -0,0 +1,131 @@
+#!/usr/bin/perl -w
+
+# In general, doing 
+#  slurm.pl some.log a b c 
+# is like running the command a b c as an interactive SLURM job, and putting the 
+# standard error and output into some.log.
+# It is a de-facto-mimicry of run.pl, with the difference, that it allocates the
+# jobs on a slurm cluster.  The calling script (e.g. decode.sh) should have the 
+# required allocation, e.g.
+#   $ sbatch -n 40 steps/decode.sh --nj 40 --cmd utils/slurm.pl ...
+# The benefit compared to qsub.pl is that there is no active wait involved, as
+# this script waits on all forked processes to finish.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+#  slurm.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+#  slurm.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
+# and slurm.pl will run something like:
+# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
+# 
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: slurm.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+
+# First parse an option like JOB=1:4
+
+if (@ARGV > 0) {
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "slurm.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to slurm.pl: $ARGV[0]\n";
+  }
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "slurm.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) { 
+    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
+    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+    else { $cmd .= "\"$x\" "; } 
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $childpid = fork();
+  if (!defined $childpid) { die "Error forking in slurm.pl (writing to $logfile)"; }
+  if ($childpid == 0) { # We're in the child... this branch
+    # executes the job and returns (possibly with an error status).
+    if (defined $jobname) { 
+      $cmd =~ s/$jobname/$jobid/g;
+      $logfile =~ s/$jobname/$jobid/g;
+    }
+    system("mkdir -p `dirname $logfile` 2>/dev/null");
+    open(F, ">$logfile") || die "Error opening log file $logfile";
+    print F "# " . $cmd . "\n";
+    print F "# Started at " . `date`;
+    $starttime = `date +'%s'`;
+    print F "#\n";
+    close(F);
+
+    # Pipe into bash.. make sure we're not using any other shell.
+
+    open(B, "|-", "srun -N 1 -n 1 bash") || die "Error opening shell command"; 
+    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+    close(B);                   # If there was an error, exit status is in $?
+    $ret = $?;
+
+    $endtime = `date +'%s'`;
+    open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+    $enddate = `date`;
+    chop $enddate;
+    print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+    close(F);
+    exit($ret == 0 ? 0 : 1);
+  }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $r = wait();
+  if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+  if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+  $njobs = $jobend - $jobstart + 1;
+  if ($njobs == 1) { 
+    print STDERR "slurm.pl: job failed, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "slurm.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  }
+  else {
+    $logfile =~ s/$jobname/*/g;
+    print STDERR "slurm.pl: $numfail / $njobs failed, log is in $logfile\n";
+  }
+}
+
+
+exit ($ret);
diff --git a/egs/chime_wsj0/s5/utils/spk2utt_to_utt2spk.pl b/egs/chime_wsj0/s5/utils/spk2utt_to_utt2spk.pl
new file mode 100755
index 000000000..ca8a6a124
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/spk2utt_to_utt2spk.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+while(<>){ 
+    @A = split(" ", $_);
+    @A > 1 || die "Invalid line in spk2utt file: $_";
+    $s = shift @A;
+    foreach $u ( @A ) {
+        print "$u $s\n";
+    }
+}
+
+
diff --git a/egs/chime_wsj0/s5/utils/split_data.sh b/egs/chime_wsj0/s5/utils/split_data.sh
new file mode 100755
index 000000000..a97ef3319
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/split_data.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright 2010-2013 Microsoft Corporation 
+#                     Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+split_per_spk=true
+if [ "$1" == "--per-utt" ]; then
+  split_per_spk=false
+  shift
+fi
+
+if [ $# != 2 ]; then
+  echo "Usage: split_data.sh <data-dir> <num-to-split>"
+  echo "This script will not split the data-dir if it detects that the output is newer than the input."
+  exit 1
+fi
+
+data=$1
+numsplit=$2
+
+if [ $numsplit -le 0 ]; then
+  echo "Invalid num-split argument $numsplit";
+  exit 1;
+fi
+
+n=0;
+feats=""
+wavs=""
+utt2spks=""
+texts=""
+
+nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nt=`cat $data/text | wc -l`
+if [ $nu -ne $nf ]; then
+  echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
+  echo " may produce incorrectly split data."
+  echo "use utils/fix_data_dir.sh to fix this."
+fi
+if [ $nt -ne 0 -a $nu -ne $nt ]; then
+  echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt); this script "
+  echo " may produce incorrectly split data."
+  echo "use utils/fix_data_dir.sh to fix this."
+fi
+
+s1=$data/split$numsplit/1
+if [ ! -d $s1 ]; then 
+  need_to_split=true
+else 
+  need_to_split=false
+  for f in utt2spk spk2utt feats.scp text wav.scp cmvn.scp spk2gender \
+    segments reco2file_and_channel; do
+    if [[ -f $data/$f && ( ! -f $s1/$f || $s1/$f -ot $data/$f ) ]]; then
+      need_to_split=true
+    fi
+  done
+fi
+
+if ! $need_to_split; then
+  exit 0;
+fi
+  
+for n in `seq $numsplit`; do
+   mkdir -p $data/split$numsplit/$n
+   feats="$feats $data/split$numsplit/$n/feats.scp"
+   texts="$texts $data/split$numsplit/$n/text"
+   utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
+done
+
+if $split_per_spk; then
+  utt2spk_opt="--utt2spk=$data/utt2spk"
+else
+  utt2spk_opt=
+fi
+
+utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
+
+utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
+[ -f $data/text ] && \
+ utils/split_scp.pl $utt2spk_opt $data/text $texts
+
+# If lockfile is not installed, just don't lock it.  It's not a big deal.
+which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock 
+
+for n in `seq $numsplit`; do
+   dsn=$data/split$numsplit/$n
+   utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1;
+   # for completeness, also split the spk2gender file
+   [ -f $data/spk2gender ] && \
+     utils/filter_scp.pl $dsn/spk2utt $data/spk2gender > $dsn/spk2gender 
+   [ -f $data/cmvn.scp ] && \
+     utils/filter_scp.pl $dsn/spk2utt $data/cmvn.scp > $dsn/cmvn.scp 
+   if [ -f $data/segments ]; then
+     utils/filter_scp.pl $dsn/utt2spk $data/segments > $dsn/segments
+      awk '{print $2;}' $dsn/segments |sort|uniq > $data/tmp.reco # recording-ids.
+     [ -f $data/reco2file_and_channel ] &&
+     utils/filter_scp.pl $data/tmp.reco $data/reco2file_and_channel > $dsn/reco2file_and_channel
+     [ -f $data/wav.scp ] && utils/filter_scp.pl $data/tmp.reco $data/wav.scp  > $dsn/wav.scp
+     rm $data/tmp.reco
+   else # else wav indexed by utterance -> filter on this.
+     [ -f $data/wav.scp ] &&
+       utils/filter_scp.pl $dsn/utt2spk $data/wav.scp > $dsn/wav.scp
+   fi
+done
+
+rm -f $data/.split_lock
+
+exit 0
diff --git a/egs/chime_wsj0/s5/utils/split_scp.pl b/egs/chime_wsj0/s5/utils/split_scp.pl
new file mode 100755
index 000000000..18abcdb2f
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/split_scp.pl
@@ -0,0 +1,221 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+# This program splits up any kind of .scp or archive-type file.
+# If there is no utt2spk option it will work on any text  file and
+# will split it up with an approximately equal number of lines in
+# each but.
+# With the --utt2spk option it will work on anything that has the 
+# utterance-id as the first entry on each line; the utt2spk file is
+# of the form "utterance speaker" (on each line).
+# It splits it into equal size chunks as far as it can.  If you use
+# the utt2spk option it will make sure these chunks coincide with
+# speaker boundaries.  In this case, if there are more chunks
+# than speakers (and in some other circumstances), some of the 
+# resulting  chunks will be empty and it
+# will print a warning.
+# You will normally call this like:
+# split_scp.pl scp scp.1 scp.2 scp.3 ...
+# or
+# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
+# Note that you can use this script to split the utt2spk file itself,
+# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
+
+# You can also call the scripts like:
+# split_scp.pl -j 3 0 scp scp.0
+# [note: with this option, it assumes zero-based indexing of the split parts,
+# i.e. the second number must be 0 <= n < num-jobs.]
+
+$num_jobs = 0;
+$job_id = 0;
+$utt2spk_file = "";
+
+for ($x = 1; $x <= 2; $x++) {
+    if ($ARGV[0] eq "-j") {
+        shift @ARGV;
+        $num_jobs = shift @ARGV;
+        $job_id = shift @ARGV;
+        if ($num_jobs <= 0 || $job_id < 0 || $job_id >= $num_jobs) {
+            die "Invalid num-jobs and job-id: $num_jobs and $job_id";
+        }
+    }
+    if ($ARGV[0] =~ "--utt2spk=(.+)") {
+        $utt2spk_file=$1;
+        shift;
+    }
+}
+
+if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
+    die "Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ... \n" .
+        " or: split_scp.pl -j num-jobs job-id [--utt2spk=<utt2spk_file>] in.scp [out.scp]\n" .
+        " ... where 0 <= job-id < num-jobs.";
+}
+
+$error = 0;   
+$inscp = shift @ARGV;
+if ($num_jobs == 0) { # without -j option
+    @OUTPUTS = @ARGV;
+} else {
+    for ($j = 0; $j < $num_jobs; $j++) {
+        if ($j == $job_id) { 
+            if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
+            else { push @OUTPUTS, "-"; }
+        } else {
+            push @OUTPUTS, "/dev/null";
+        }
+    }
+} 
+
+if ($utt2spk_file ne "") {  # We have the --utt2spk option...
+    open(U, "<$utt2spk_file") || die "Failed to open utt2spk file $utt2spk_file";
+    while(<U>) {
+        @A = split;
+        @A == 2 || die "Bad line $_ in utt2spk file $utt2spk_file";
+        ($u,$s) = @A;
+        $utt2spk{$u} = $s;
+    }
+    open(I, "<$inscp") || die "Opening input scp file $inscp";
+    @spkrs = ();
+    while(<I>) {
+        @A = split;
+        if(@A == 0) { die "Empty or space-only line in scp file $inscp"; }
+        $u = $A[0];
+        $s = $utt2spk{$u};
+        if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
+        if(!defined $spk_count{$s}) { 
+            push @spkrs, $s; 
+            $spk_count{$s} = 0;
+            $spk_data{$s} = "";
+        }
+        $spk_count{$s}++;
+        $spk_data{$s} = $spk_data{$s} . $_;
+    }
+    # Now split as equally as possible ..
+    # First allocate spks to files by allocating an approximately
+    # equal number of speakers.
+    $numspks = @spkrs;  # number of speakers.
+    $numscps = @OUTPUTS; # number of output files.
+    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+        $scparray[$scpidx] = []; # [] is array reference.
+    }
+    for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
+        $scpidx = int(($spkidx*$numscps) / $numspks);
+        $spk = $spkrs[$spkidx];
+        push @{$scparray[$scpidx]}, $spk;
+        $scpcount[$scpidx] += $spk_count{$spk};
+    }
+
+    # Now will try to reassign beginning + ending speakers
+    # to different scp's and see if it gets more balanced.
+    # Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
+    # We can show that if considering changing just 2 scp's, we minimize
+    # this by minimizing the squared difference in sizes.  This is
+    # equivalent to minimizing the absolute difference in sizes.  This
+    # shows this method is bound to converge.
+
+    $changed = 1;
+    while($changed) {
+        $changed = 0;
+        for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+            # First try to reassign ending spk of this scp.
+            if($scpidx < $numscps-1) {
+                $sz = @{$scparray[$scpidx]};
+                if($sz > 0) {
+                    $spk = $scparray[$scpidx]->[$sz-1];
+                    $count = $spk_count{$spk};
+                    $nutt1 = $scpcount[$scpidx];
+                    $nutt2 = $scpcount[$scpidx+1];
+                    if( abs( ($nutt2+$count) - ($nutt1-$count))
+                        < abs($nutt2 - $nutt1))  { # Would decrease
+                        # size-diff by reassigning spk...
+                        $scpcount[$scpidx+1] += $count;
+                        $scpcount[$scpidx] -= $count;
+                        pop @{$scparray[$scpidx]};
+                        unshift @{$scparray[$scpidx+1]}, $spk;
+                        $changed = 1;
+                    }
+                }
+            }
+            if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
+                $spk = $scparray[$scpidx]->[0];
+                $count = $spk_count{$spk};
+                $nutt1 = $scpcount[$scpidx-1];
+                $nutt2 = $scpcount[$scpidx];
+                if( abs( ($nutt2-$count) - ($nutt1+$count))
+                    < abs($nutt2 - $nutt1))  { # Would decrease
+                    # size-diff by reassigning spk...
+                    $scpcount[$scpidx-1] += $count;
+                    $scpcount[$scpidx] -= $count;
+                    shift @{$scparray[$scpidx]};
+                    push @{$scparray[$scpidx-1]}, $spk;
+                    $changed = 1;
+                }
+            }
+        }
+    }
+    # Now print out the files...
+    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+        $scpfn = $OUTPUTS[$scpidx];
+        open(F, ">$scpfn") || die "Could not open scp file $scpfn for writing.";
+        $count = 0;
+        if(@{$scparray[$scpidx]} == 0) {
+            print STDERR "Error: split_scp.pl producing empty .scp file $scpfn (too many splits and too few speakers?)\n";
+            $error = 1;
+        } else {
+            foreach $spk ( @{$scparray[$scpidx]} ) {
+                print F $spk_data{$spk};
+                $count += $spk_count{$spk};
+            }
+            if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
+        }
+        close(F);
+    }
+} else { 
+   # This block is the "normal" case where there is no --utt2spk 
+   # option and we just break into equal size chunks.
+
+    open(I, "<$inscp") || die "Opening input scp file $inscp";
+
+    $numscps = @OUTPUTS;  # size of array.
+    @F = ();
+    while(<I>) {
+        push @F, $_;
+    }
+    $numlines = @F;
+    if($numlines == 0) {
+        print STDERR "split_scp.pl: error: empty input scp file $inscp";
+        $error = 1;
+    }
+    $linesperscp = int( $numlines / $numscps); # the "whole part"..
+    $linesperscp >= 1 || die "You are splitting into too many pieces!";
+    $remainder = $numlines - ($linesperscp * $numscps);
+    ($remainder >= 0 && $remainder < $numlines) || die "bad remainder $remainder";
+    # [just doing int() rounds down].
+    $n = 0;
+    for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
+        $scpfile = $OUTPUTS[$scpidx];
+        open(O, ">$scpfile") || die "Opening output scp file $scpfile";
+        for($k = 0; $k < $linesperscp + ($scpidx < $remainder ? 1 : 0); $k++) {
+            print O $F[$n++];
+        }
+        close(O) || die "Closing scp file $scpfile";
+    }
+    $n == $numlines || die "split_scp.pl: code error., $n != $numlines";
+}
+
+exit ($error ? 1 : 0);
diff --git a/egs/chime_wsj0/s5/utils/subset_data_dir.sh b/egs/chime_wsj0/s5/utils/subset_data_dir.sh
new file mode 100755
index 000000000..b9fb7bbf3
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/subset_data_dir.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# Copyright 2010-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+
+# This script operates on a data directory, such as in data/train/.
+# See http://kaldi.sourceforge.net/data_prep.html#data_prep_data
+# for what these directories contain.
+
+# The script It creates a subset of that data, consisting of some specified
+# number of utterances.  (The selected utterances are distributed evenly
+# throughout the file, by the program ./subset_scp.pl).
+
+# There are four options, none compatible with any other.
+
+# If you give the --per-spk option, it will attempt to select the supplied
+# number of utterances for each speaker (typically you would supply a much
+# smaller number in this case).
+
+# If you give the --speakers option, it selects a subset of n randomly
+# selected speakers.
+
+# If you give the --shortest option, it will give you the n shortest utterances.
+
+# If you give the --first option, it will just give you the n first utterances.
+
+# If you give the --last option, it will just give you the n last utterances.
+
+
+shortest=false
+perspk=false
+first_opt=""
+speakers=false
+spk_list_specified=false
+
+if [ "$1" == "--per-spk" ]; then
+  perspk=true;
+  shift;
+elif [ "$1" == "--shortest" ]; then
+  shortest=true;
+  shift;
+elif [ "$1" == "--first" ]; then
+  first_opt="--first";
+  shift;
+elif [ "$1" == "--speakers" ]; then
+  speakers=true
+  shift;
+elif [ "$1" == "--last" ]; then
+  first_opt="--last";
+  shift;
+elif [ "$1" == "--spk-list" ]; then
+  spk_list_specified=true
+  shift;
+fi
+
+
+
+
+if [ $# != 3 ]; then
+  echo "Usage: "
+  echo "  subset_data_dir.sh [--speakers|--shortest|--first|--last|--per-spk] <srcdir> <num-utt> <destdir>"
+  echo "  subset_data_dir.sh [--spk-list <speaker-list-file>] <srcdir> <destdir>"
+  echo "By default, randomly selects <num-utt> utterances from the data directory."
+  echo "With --speakers, randomly selects enough speakers that we have <num-utt> utterances"
+  echo "With --first, selects the first <num-utt> utterances"
+  echo "With --last, selects the last <num-utt> utterances"
+  echo "With --shortest, selects the shortest <num-utt> utterances."
+  exit 1;
+fi
+
+if $spk_list_specified; then
+  spk_list=$1
+  srcdir=$2
+  destdir=$3
+else
+  srcdir=$1
+  numutt=$2
+  destdir=$3
+fi
+
+
+export LC_ALL=C
+
+if [ ! -f $srcdir/utt2spk ]; then
+  echo "subset_data_dir.sh: no such file $srcdir/utt2spk" 
+  exit 1;
+fi
+
+function do_filtering {
+  # assumes the utt2spk and spk2utt files already exist.
+  [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
+  [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
+  [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
+  [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
+  [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
+  if [ -f $srcdir/segments ]; then
+     utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
+     awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
+     # The next line would override the command above for wav.scp, which would be incorrect.
+     [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
+     [ -f $srcdir/reco2file_and_channel ] && \
+       utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
+     
+     # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
+     [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
+     
+     rm $destdir/reco
+  fi
+  srcutts=`cat $srcdir/utt2spk | wc -l`
+  destutts=`cat $destdir/utt2spk | wc -l`
+  echo "$0: reducing #utt from $srcutts to $destutts"
+}
+
+
+if $spk_list_specified; then
+  mkdir -p $destdir
+  utils/filter_scp.pl "$spk_list" $srcdir/spk2utt > $destdir/spk2utt || exit 1;
+  utils/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk || exit 1;
+  do_filtering; # bash function.
+  exit 0;  
+elif $speakers; then
+  mkdir -p $destdir
+  utils/shuffle_list.pl < $srcdir/spk2utt | awk -v numutt=$numutt '{ if (tot < numutt){ print; } tot += (NF-1); }' | \
+    sort > $destdir/spk2utt
+  utils/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
+  do_filtering; # bash function.
+  exit 0;  
+elif $perspk; then
+  mkdir -p $destdir
+  awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
+         for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); } 
+         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
+  utils/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
+  do_filtering; # bash function.
+  exit 0;
+else
+  if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
+    echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
+    exit 1;
+  fi 
+  mkdir -p $destdir || exit 1;
+
+  ## scripting note: $shortest evaluates to true or false
+  ## so this becomes the command true or false.
+  if $shortest; then
+    # select the n shortest utterances.
+    . ./path.sh
+    [ ! -f $srcdir/feats.scp ] && echo "$0: you selected --shortest but no feats.scp exist." && exit 1;
+    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
+    sort -n -k2 $destdir/tmp.len | awk '{print $1}' | head -$numutt >$destdir/tmp.uttlist
+    utils/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
+    rm $destdir/tmp.uttlist $destdir/tmp.len
+  else
+    utils/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
+  fi
+  utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
+  do_filtering;
+  exit 0;
+fi
diff --git a/egs/chime_wsj0/s5/utils/subset_data_dir_tr_cv.sh b/egs/chime_wsj0/s5/utils/subset_data_dir_tr_cv.sh
new file mode 100755
index 000000000..b0ee79dff
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/subset_data_dir_tr_cv.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Copyright 2013  Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin);
+#                 Brno University of Technology (Author: Karel Vesely);
+#                 Johns Hopkins University (Author: Daniel Povey);
+# Apache 2.0
+
+# This script splits dataset to two parts : 
+# training set from (100-P)% of speakers/utterances and 
+# held-out set (or cross-validation) from P% of remaining speakers/remaining utterances,
+# which will be later on used for neural network training
+#
+# There are two options for choosing held-out (or cross-validation) set, either by
+# --cv-spk-percent P , which will give you CV set based on random chosen P% of speakers, or
+# --cv-utt-percent P , which will give you CV set based on last P% utterances in the dataset
+# 
+# If you don't apply the above two options, by default the script will use --cv-utt-percent option,
+# and the default cross validation percentage portion is equal to 10% (i.e. P=10)
+#
+# The --cv-spk-percent option is useful if you would like to have subset chosen from random speakers order, 
+# especially for the cases where dataset contains multiple different corpora,
+# where type of speakers or recording channels may be quite different 
+
+# Begin configuration.
+cv_spk_percent= # % of speakers is parsed by option
+cv_utt_percent=10 # default 10% of total utterances 
+seed=777 # use seed for speaker shuffling
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+uttbase=true; # by default, we choose last 10% utterances for CV
+
+if [ "$1" == "--cv-spk-percent" ]; then
+  uttbase=false;
+  spkbase=true;
+fi
+
+[ -f path.sh ] && . ./path.sh; 
+
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 [--cv-spk-percent P|--cv-utt-percent P] <srcdir> <traindir> <crossvaldir>"
+  echo "  --cv-spk-percent P  Cross Validation portion of the total speakers, recommend value is 10% (i.e. P=10)"
+  echo "  --cv-utt-percent P  Cross Validation portion of the total utterances, default is 10% (i.e. P=10)"
+  echo "  "
+  exit 1;
+fi
+
+srcdir=$1
+trndir=$2
+cvdir=$3
+
+## use simple last P% utterance for CV
+if $uttbase; then
+  if [ ! -f $srcdir/utt2spk ]; then
+    echo "$0: no such file $srcdir/utt2spk"
+    exit 1;
+  fi
+
+  #total number of lines
+  N=$(cat $srcdir/utt2spk | wc -l)
+  #get line number where (100-P)% of the data lies
+  P_utt=$((N * cv_utt_percent / 100))
+  N_head=$((N -P_utt))
+  #move the boundary so it is located on speaker change
+  N_head=$(cat $srcdir/utt2spk | uniq -f1 -c | awk '{ if(n+$1<='$N_head') { n += $1 } else { nextfile } } END{ print n }')
+  #the rest of the data will be that big
+  N_tail=$((N-N_head))
+
+  #now call the subset_data_dir.sh and fix the directories
+  subset_data_dir.sh --first $srcdir $N_head $trndir
+  subset_data_dir.sh --last $srcdir $N_tail $cvdir
+
+  exit 0;
+fi
+
+## use random chosen P% speakers for CV
+if [ ! -f $srcdir/spk2utt ]; then
+  echo "$0: no such file $srcdir/spk2utt" 
+  exit 1;
+fi
+
+#total, cv, train number of speakers
+N=$(cat $srcdir/spk2utt | wc -l)
+N_spk_cv=$((N * cv_spk_percent / 100))
+N_spk_trn=$((N - N_spk_cv))
+
+mkdir -p $cvdir $trndir
+
+#shuffle the speaker list
+awk '{print $1}' $srcdir/spk2utt | shuffle_list.pl --srand $seed > $trndir/_tmpf_randspk
+
+#split the train/cv
+head -n $N_spk_cv $trndir/_tmpf_randspk > $cvdir/_tmpf_cvspk
+tail -n $N_spk_trn $trndir/_tmpf_randspk > $trndir/_tmpf_trainspk
+
+#now call the subset_data_dir.sh 
+subset_data_dir.sh --spk-list $trndir/_tmpf_trainspk $srcdir $trndir
+subset_data_dir.sh --spk-list $cvdir/_tmpf_cvspk $srcdir $cvdir
+
+#clean-up
+rm -f $trndir/_tmpf_randspk $trndir/_tmpf_trainspk $cvdir/_tmpf_cvspk
+
diff --git a/egs/chime_wsj0/s5/utils/subset_scp.pl b/egs/chime_wsj0/s5/utils/subset_scp.pl
new file mode 100755
index 000000000..2d3c32217
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/subset_scp.pl
@@ -0,0 +1,87 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This program selects a subset of N elements in the scp.
+
+# By default, it selects them evenly from throughout the scp, in order to avoid
+# selecting too many from the same speaker.  It prints them on the standard
+# output.
+# With the option --first, it just selects the N first utterances.
+# With the option --last, it just selects the N last utterances.
+
+# Last modified by JHU & HKUST @2013
+
+
+$first = 0;
+$last = 0;
+if ($ARGV[0] eq "--first") {
+  shift;
+  $first = 1;
+}
+if ($ARGV[0] eq "--last") {
+  shift;
+  $last = 1;
+}
+
+if(@ARGV < 2 ) {
+    die "Usage: subset_scp.pl [--first|--last] N in.scp ";
+}
+
+$N = shift @ARGV;
+if($N == 0) {
+    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
+}
+$inscp = shift @ARGV;
+open(I, "<$inscp") || die "Opening input scp file $inscp";
+
+@F = ();
+while(<I>) {
+    push @F, $_;
+}
+$numlines = @F;
+if($N > $numlines) {
+    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
+}
+
+sub select_n {
+    my ($start,$end,$num_needed) = @_;
+    my $diff = $end - $start;
+    if($num_needed > $diff) { die "select_n: code error"; }
+    if($diff == 1 ) {
+        if($num_needed  > 0) {
+            print $F[$start];
+        }
+    } else {
+        my $halfdiff = int($diff/2);
+        my $halfneeded = int($num_needed/2);
+        select_n($start, $start+$halfdiff, $halfneeded);
+        select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
+    }
+}
+
+if ( ! $first && ! $last) {
+  select_n(0, $numlines, $N);
+} else {
+  if ($first) { # --first option: same as head.
+    for ($n = 0; $n < $N; $n++) {
+      print $F[$n];
+    }
+  } else { # --last option: same as tail.
+    for ($n = @F - $N; $n < @F; $n++) {
+      print $F[$n];
+    }
+  }
+}
diff --git a/egs/chime_wsj0/s5/utils/summarize_warnings.pl b/egs/chime_wsj0/s5/utils/summarize_warnings.pl
new file mode 100755
index 000000000..ccbeb4186
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/summarize_warnings.pl
@@ -0,0 +1,46 @@
+#!/usr/bin/perl
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+ @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
+
+$dir = $ARGV[0];
+
+! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
+
+$dir =~ s:/$::; # Remove trailing slash.
+
+
+# Group the files into categories where all have the same base-name.
+foreach $f (glob ("$dir/*.log")) {
+  $f_category = $f;
+  # do next expression twice; s///g doesn't work as they overlap.
+  $f_category =~ s:\.\d+\.:.*.:;
+  $f_category =~ s:\.\d+\.:.*.:;
+  $fmap{$f_category} .= " $f";
+}
+
+sub split_hundreds { # split list of filenames into groups of 100.
+  my $names = shift @_;
+  my @A = split(" ", $names);
+  my @ans = ();
+  while (@A > 0) {
+    my $group = "";
+    for ($x = 0; $x < 100 && @A>0; $x++) {
+      $fname = pop @A;
+      $group .= "$fname ";
+    }
+    push @ans, $group;
+  }
+  return @ans;
+}
+
+foreach $c (keys %fmap) {
+  $n = 0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $n += `grep -w WARNING $fgroup | wc -l`;
+  }
+  if ($n != 0) {
+    print "$n warnings in $c\n"
+  }
+}
diff --git a/egs/chime_wsj0/s5/utils/sym2int.pl b/egs/chime_wsj0/s5/utils/sym2int.pl
new file mode 100755
index 000000000..2c9b711d2
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/sym2int.pl
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+$ignore_oov = 0;
+
+for($x = 0; $x < 2; $x++) {
+  if ($ARGV[0] eq "--map-oov") {
+    shift @ARGV; $map_oov = shift @ARGV;
+  }
+  if ($ARGV[0] eq "-f") {
+    shift @ARGV; 
+    $field_spec = shift @ARGV; 
+    if ($field_spec =~ m/^\d+$/) {
+      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+    }
+    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+      if ($1 ne "") {
+        $field_begin = $1 - 1;  # Change to zero-based indexing.
+      }
+      if ($2 ne "") {
+        $field_end = $2 - 1;    # Change to zero-based indexing.
+      }
+    }
+    if (!defined $field_begin && !defined $field_end) {
+      die "Bad argument to -f option: $field_spec"; 
+    }
+  }
+}
+
+$symtab = shift @ARGV;
+if (!defined $symtab) {
+  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
+    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
+      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
+}
+open(F, "<$symtab") || die "Error opening symbol table file $symtab";
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "bad line in symbol table file: $_";
+    $sym2int{$A[0]} = $A[1] + 0;
+}
+
+if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
+  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
+  $map_oov = $sym2int{$map_oov};
+}
+
+$num_warning = 0;
+$max_warning = 20;
+
+while (<>) {
+  @A = split(" ", $_);
+  @B = ();
+  for ($n = 0; $n < @A; $n++) {
+    $a = $A[$n];
+    if ( (!defined $field_begin || $n >= $field_begin)
+         && (!defined $field_end || $n <= $field_end)) {
+      $i = $sym2int{$a};
+      if (!defined ($i)) {
+        if (defined $map_oov) {
+          if ($num_warning++ < $max_warning) {
+            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
+            if ($num_warning == $max_warning) {
+              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
+            }
+          }
+          $i = $map_oov;
+        } else {
+          $pos = $n+1;
+          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
+        }
+      }
+      $a = $i;
+    }
+    push @B, $a;
+  }
+  print join(" ", @B);
+  print "\n";
+}
+if ($num_warning > 0) {
+  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 
+}
+
+exit(0);
diff --git a/egs/chime_wsj0/s5/utils/utt2spk_to_spk2utt.pl b/egs/chime_wsj0/s5/utils/utt2spk_to_spk2utt.pl
new file mode 100755
index 000000000..0c9e6417c
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/utt2spk_to_spk2utt.pl
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# converts an utt2spk file to a spk2utt file.
+# Takes input from the stdin or from a file argument;
+# output goes to the standard out.
+
+if ( @ARGV > 1 ) {
+    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
+}
+
+while(<>){ 
+    @A = split(" ", $_);
+    @A == 2 || die "Invalid line in utt2spk file: $_";
+    ($u,$s) = @A;
+    if(!$seen_spk{$s}) {
+        $seen_spk{$s} = 1;
+        push @spklist, $s;
+    }
+    $uttlist{$s} = $uttlist{$s} . "$u ";
+}
+foreach $s (@spklist) {
+    $l = $uttlist{$s};
+    $l =~ s: $::; # remove trailing space.
+    print "$s $l\n";
+}
diff --git a/egs/chime_wsj0/s5/utils/validate_data_dir.sh b/egs/chime_wsj0/s5/utils/validate_data_dir.sh
new file mode 100755
index 000000000..1a3ee8065
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/validate_data_dir.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+
+no_feats=false
+no_wav=false
+no_text=false
+
+for x in `seq 3`; do
+  if [ $1 == "--no-feats" ]; then
+    no_feats=true
+    shift;
+  fi
+  if [ $1 == "--no-text" ]; then
+    no_text=true
+    shift;
+  fi
+  if [ $1 == "--no-wav" ]; then
+    no_wav=true
+    shift;
+  fi
+done
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 [---no-feats] [---no-text] [---no-wav] data-dir"
+  echo "e.g.: $0 data/train"
+fi
+
+data=$1
+
+if [ ! -d $data ]; then
+  echo "$0: no such directory $data"
+  exit 1;
+fi
+
+for f in spk2utt utt2spk; do
+  if [ ! -f $data/$f ]; then
+    echo "$0: no such file $f"
+    exit 1;
+  fi
+  if [ ! -s $data/$f ]; then
+    echo "$0: empty file $f"
+    exit 1;
+  fi
+done
+
+! cat $data/utt2spk | awk '{if (NF != 2) exit(1); }' && \
+  echo "$0: $data/utt2spk has wrong format." && exit;
+
+tmpdir=$(mktemp -d);
+trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM
+
+export LC_ALL=C
+
+function check_sorted {
+  ! cat $1 | sort | cmp -s - $1 && \
+    echo "$0: file $1 is not in sorted order" && exit 1;
+  
+}
+function partial_diff {
+  diff $1 $2 | head -n 6
+  echo "..."
+  diff $1 $2 | tail -n 6
+  n1=`cat $1 | wc -l`
+  n2=`cat $2 | wc -l`
+  echo "[Lengths are $1=$n1 versus $2=$n2]"
+}
+
+check_sorted $data/utt2spk
+
+! cat $data/utt2spk | sort -k2 | cmp -s - $data/utt2spk && \
+   echo "$0: utt2spk is not in sorted order when sorted first on speaker-id " && \
+   echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
+
+check_sorted $data/spk2utt
+
+! cmp -s <(cat $data/utt2spk | awk '{print $1, $2;}') \
+     <(utils/spk2utt_to_utt2spk.pl $data/spk2utt)  && \
+   echo "$0: spk2utt and utt2spk do not seem to match" && exit 1;
+
+cat $data/utt2spk | awk '{print $1;}' > $tmpdir/utts
+
+if [ ! -f $data/text ] && ! $no_text; then
+  echo "$0: no such file $data/text (if this is by design, specify ---no-text)"
+  exit 1;
+fi
+
+num_utts=`cat $tmpdir/utts | wc -l`
+if [ -f $data/text ]; then
+  check_sorted $data/text
+  text_len=`cat $data/text | wc -l`
+  awk '{print $1}' < $data/text > $tmpdir/utts.txt
+  if ! cmp -s $tmpdir/utts{,.txt}; then
+    echo "$0: Error: in $data, utterance lists extracted from utt2spk and text"
+    echo "$0: differ, partial diff is:"
+    partial_diff $tmpdir/utts{,.txt}
+    exit 1;
+  fi
+fi
+
+if [ -f $data/segments ] && [ ! -f $data/wav.scp ]; then
+  echo "$0: in directory $data, segments file exists but no wav.scp"
+  exit 1;
+fi
+
+
+if [ ! -f $data/wav.scp ] && ! $no_wav; then
+  echo "$0: no such file $data/wav.scp (if this is by design, specify ---no-wav)"
+  exit 1;
+fi
+
+if [ -f $data/wav.scp ]; then
+  check_sorted $data/wav.scp
+
+  if [ -f $data/segments ]; then
+
+    check_sorted $data/segments
+    # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids.
+    ! cat $data/segments | \
+      awk '{if (NF != 4 || !($4 > $3)) { print "Bad line in segments file", $0; exit(1); }}' && \
+      echo "$0: badly formatted segments file" && exit 1;
+    
+    segments_len=`cat $data/segments | wc -l`
+    ! cmp -s $tmpdir/utts <(awk '{print $1}' <$data/text) && \
+      echo "$0: Utterance list differs between $data/text and $data/segments " && \
+      echo "$0: Lengths are $segments_len vs $num_utts";
+
+    cat $data/segments | awk '{print $2}' | sort | uniq > $tmpdir/recordings
+    awk '{print $1}' $data/wav.scp > $tmpdir/recordings.wav
+    if ! cmp -s $tmpdir/recordings{,.wav}; then
+      echo "$0: Error: in $data, recording-ids extracted from segments and wav.scp"
+      echo "$0: differ, partial diff is:"
+      partial_diff $tmpdir/recordings{,.wav}
+      exit 1;
+    fi
+    if [ -f $data/reco2file_and_channel ]; then
+      # this file is needed only for ctm scoring; it's indexed by recording-id.
+      check_sorted $data/reco2file_and_channel
+      ! cat $data/reco2file_and_channel | \
+        awk '{if (NF != 3 || ($3 != "A" && $3 != "B")) { print "Bad line ", $0; exit 1; }}' && \
+        echo "$0: badly formatted reco2file_and_channel file" && exit 1;
+      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/recordings.r2fc
+      if ! cmp -s $tmpdir/recordings{,.r2fc}; then
+        echo "$0: Error: in $data, recording-ids extracted from segments and reco2file_and_channel"
+        echo "$0: differ, partial diff is:"
+        partial_diff $tmpdir/recordings{,.r2fc}
+        exit 1;
+      fi
+    fi
+  else
+    # No segments file -> assume wav.scp indexed by utterance.
+    cat $data/wav.scp | awk '{print $1}' > $tmpdir/utts.wav
+    if ! cmp -s $tmpdir/utts{,.wav}; then
+      echo "$0: Error: in $data, utterance lists extracted from utt2spk and wav.scp"
+      echo "$0: differ, partial diff is:"
+      partial_diff $tmpdir/utts{,.wav}
+      exit 1;
+    fi
+
+    if [ -f $data/reco2file_and_channel ]; then
+      # this file is needed only for ctm scoring; it's indexed by recording-id.
+      check_sorted $data/reco2file_and_channel
+      ! cat $data/reco2file_and_channel | \
+        awk '{if (NF != 3 || ($3 != "A" && $3 != "B")) { print "Bad line ", $0; exit 1; }}' && \
+        echo "$0: badly formatted reco2file_and_channel file" && exit 1;
+      cat $data/reco2file_and_channel | awk '{print $1}' > $tmpdir/utts.r2fc
+      if ! cmp -s $tmpdir/utts{,.r2fc}; then
+        echo "$0: Error: in $data, utterance-ids extracted from segments and reco2file_and_channel"
+        echo "$0: differ, partial diff is:"
+        partial_diff $tmpdir/utts{,.r2fc}
+        exit 1;
+      fi
+    fi
+  fi
+fi
+
+if [ ! -f $data/feats.scp ] && ! $no_feats; then
+  echo "$0: no such file $data/feats.scp (if this is by design, specify ---no-feats)"
+  exit 1;
+fi
+
+if [ -f $data/feats.scp ]; then
+  check_sorted $data/feats.scp
+  cat $data/feats.scp | awk '{print $1}' > $tmpdir/utts.feats
+  if ! cmp -s $tmpdir/utts{,.feats}; then
+    echo "$0: Error: in $data, utterance-ids extracted from utt2spk and features"
+    echo "$0: differ, partial diff is:"
+    partial_diff $tmpdir/utts{,.feats}
+    exit 1;
+  fi
+fi
+
+if [ -f $data/cmvn.scp ]; then
+  check_sorted $data/cmvn.scp
+  cat $data/cmvn.scp | awk '{print $1}' > $tmpdir/speakers.cmvn
+  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
+  if ! cmp -s $tmpdir/speakers{,.cmvn}; then
+    echo "$0: Error: in $data, speaker lists extracted from spkutt and cmvn"
+    echo "$0: differ, partial diff is:"
+    partial_diff $tmpdir/speakers{,.cmvn}
+    exit 1;
+  fi
+fi
+
+if [ -f $data/spk2gender ]; then
+  check_sorted $data/spk2gender
+  ! cat $data/spk2gender | awk '{if (!((NF == 2 && ($2 == "m" || $2 == "f")))) exit 1; }' && \
+     echo "Mal-formed spk2gender file" && exit 1;
+  cat $data/spk2gender | awk '{print $1}' > $tmpdir/speakers.spk2gender
+  cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
+  if ! cmp -s $tmpdir/speakers{,.spk2gender}; then
+    echo "$0: Error: in $data, speaker lists extracted from spk2utt and spk2gender"
+    echo "$0: differ, partial diff is:"
+    partial_diff $tmpdir/speakers{,.spk2gender}
+    exit 1;
+  fi
+fi
+
+echo "Successfully validated data-directory $data"
diff --git a/egs/chime_wsj0/s5/utils/validate_dict_dir.pl b/egs/chime_wsj0/s5/utils/validate_dict_dir.pl
new file mode 100755
index 000000000..b9640a683
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/validate_dict_dir.pl
@@ -0,0 +1,218 @@
+#!/usr/bin/perl
+
+# Apache 2.0.
+# Guoguo Chen (guoguo@jhu.edu)
+# Daniel Povey (dpovey@gmail.com)
+#
+# Validation script for data/local/dict
+
+
+if(@ARGV != 1) {
+  die "Usage: validate_dict_dir.pl dict_directory\n";
+}
+
+$dict = shift @ARGV;
+
+$exit = 0;
+# Checking silence_phones.txt -------------------------------
+print "Checking $dict/silence_phones.txt ...\n";
+if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
+if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
+$idx = 1;
+%silence = ();
+$success = 1;
+print "--> reading $dict/silence_phones.txt\n";
+while(<S>) {
+  chomp;
+  my @col = split(" ", $_);
+  foreach(0 .. @col-1) {
+    my $p = $col[$_];
+    if($silence{$p}) {$exit = 1; print "--> ERROR: phone \"$p\" duplicates in $dict/silence_phones.txt (line $idx)\n"; $success = 0;}
+    else {$silence{$p} = 1;}
+    if ($p =~ m/_$/ || $p =~ m/#/ || $p =~ m/_[BESI]$/){
+      $exit = 1;
+      print "--> ERROR: phone \"$p\" has disallowed written form";
+      $success = 0;
+    }
+  }
+  $idx ++;
+}
+close(S);
+$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
+print "\n";
+
+# Checking optional_silence.txt -------------------------------
+print "Checking $dict/optional_silence.txt ...\n";
+if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
+if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
+$idx = 1;
+$success = 1;
+print "--> reading $dict/optional_silence.txt\n";
+while(<OS>) {
+  chomp;
+  my @col = split(" ", $_);
+  if ($idx > 1 or @col > 1) {
+    $exit = 1; print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n"; $success = 0;
+  } elsif (!$silence{$col[0]}) {
+    $exit = 1; print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n"; $success = 0;
+  }
+  $idx ++;
+}
+close(OS);
+$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
+print "\n";
+
+# Checking nonsilence_phones.txt -------------------------------
+print "Checking $dict/nonsilence_phones.txt ...\n";
+if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
+if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
+$idx = 1;
+%nonsilence = ();
+$success = 1;
+print "--> reading $dict/nonsilence_phones.txt\n";
+while(<NS>) {
+  chomp;
+  my @col = split(" ", $_);
+  foreach(0 .. @col-1) {
+    my $p = $col[$_];
+    if($nonsilence{$p}) {$exit = 1; print "--> ERROR: phone \"$p\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n"; $success = 0;}
+    else {$nonsilence{$p} = 1;}
+    if ($p =~ m/_$/ || $p =~ m/#/ || $p =~ m/_[BESI]$/){
+      $exit = 1;
+      print "--> ERROR: phone \"$p\" has disallowed written form";
+      $success = 0;
+    }
+  }
+  $idx ++;
+}
+close(NS);
+$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
+print "\n";
+
+# Checking disjoint -------------------------------
+sub intersect {
+  my ($a, $b) = @_;
+  @itset = ();
+  %itset = ();
+  foreach(keys %$a) {
+    if(exists $b->{$_} and !$itset{$_}) {
+      push(@itset, $_);
+      $itset{$_} = 1;
+    }
+  }
+  return @itset;
+}
+
+print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
+@itset = intersect(\%silence, \%nonsilence);
+if(@itset == 0) {print "--> disjoint property is OK.\n";}
+else {$exit = 1; print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlap: "; foreach(@itset) {print "$_ ";} print "\n";}
+print "\n";
+
+
+sub check_lexicon {
+  my ($lexfn, $pron_probs) = @_;
+  print "Checking $lexfn\n";
+  if(-z "$lexfn") {$exit = 1; print "--> ERROR: $lexfn is empty or not exists\n";}
+  if(!open(L, "<$lexfn")) {$exit = 1; print "--> ERROR: fail to open $lexfn\n";}
+  $idx = 1;
+  $success = 1;
+  print "--> reading $lexfn\n";
+  while (<L>) {
+    chomp;
+    my @col = split(" ", $_);
+    $word = shift @col;
+    if (!defined $word) {
+      $exit = 1; print "--> ERROR: empty lexicon line in $lexfn\n"; 
+      $success = 0;
+    }
+    if ($pron_probs) {
+      $prob = shift @col;
+      if (!($prob > 0.0 && $prob <= 1.0)) { 
+        $exit = 1; print "--> ERROR: bad pron-prob in lexicon-line '$_', in $lexfn\n";
+        $success = 0;
+      }
+    }
+    foreach (0 .. @col-1) {
+      if (!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
+        $exit = 1; print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt (line $idx)\n"; 
+        $success = 0;
+      }
+    }
+    $idx ++;
+  }
+  close(L);
+  $success == 0 || print "--> $lexfn is OK\n";
+  print "\n";
+}
+
+if (-f "$dict/lexicon.txt") { check_lexicon("$dict/lexicon.txt", 0); }
+if (-f "$dict/lexiconp.txt") { check_lexicon("$dict/lexiconp.txt", 1); }
+if (!(-f "$dict/lexicon.txt" || -f "$dict/lexiconp.txt")) {
+  print "--> ERROR: neither lexicon.txt or lexiconp.txt exist in directory $dir\n";
+  $exit = 1;
+}
+# If both lexicon.txt and lexiconp.txt exist, we check that they correspond to
+# each other.  If not, it could be that the user overwrote one and we need to
+# regenerate the other, but we don't know which is which.
+if ( (-f "$dict/lexicon.txt") && (-f "$dict/lexiconp.txt")) {
+  print "Checking that lexicon.txt and lexiconp.txt match\n";
+  if (!open(L, "<$dict/lexicon.txt") || !open(P, "<$dict/lexiconp.txt")) {
+    die "Error opening lexicon.txt and/or lexiconp.txt"; # already checked, so would be code error.
+  }
+  while(<L>) {
+    @A = split;
+    $x = <P>;
+    if (!defined $x) {
+      print "--> ERROR: lexicon.txt and lexiconp.txt have different numbers of lines (mismatch); delete one.\n";
+      $exit = 1;
+      last;
+    }
+    @B = split(" ", $x);
+    $w = shift @B;
+    $p = shift @B;
+    unshift @B, $w;
+    # now @A and @B should be the same.
+    if ($#A != $#B) {
+      print "--> ERROR: lexicon.txt and lexiconp.txt have mismatched lines '$_' versus '$x'; delete one.\n";
+      $exit = 1;
+      last;
+    }
+    for ($n = 0; $n < @A; $n++) {
+      if ($A[$n] ne $B[$n]) {
+        print "--> ERROR: lexicon.txt and lexiconp.txt have mismatched lines '$_' versus '$x'; delete one.\n";
+        $exit = 1;
+        last;
+      }
+    }
+  }
+  $x = <P>;
+  if (defined $x && $exit == 0) {
+    print "--> ERROR: lexicon.txt and lexiconp.txt have different numbers of lines (mismatch); delete one.\n";
+    $exit = 1;
+  }
+}
+
+# Checking extra_questions.txt -------------------------------
+print "Checking $dict/extra_questions.txt ...\n";
+if(-s "$dict/extra_questions.txt") {
+  if(!open(EX, "<$dict/extra_questions.txt")) {$exit = 1; print "--> ERROR: fail to open $dict/extra_questions.txt\n";}
+  $idx = 1;
+  $success = 1;
+  print "--> reading $dict/extra_questions.txt\n";
+  while(<EX>) {
+    chomp;
+    my @col = split(" ", $_);
+    foreach(0 .. @col-1) {
+      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
+        $exit = 1; print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt (line $idx, block ", $_+1, ")\n"; 
+        $success = 0;
+      }
+    }
+    $idx ++;
+  } 
+  close(EX);
+  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
+} else {print "--> $dict/extra_phones.txt is empty\n";}
+
+if($exit == 1) { print " [Error detected ]\n"; exit 1;}
diff --git a/egs/chime_wsj0/s5/utils/validate_lang.pl b/egs/chime_wsj0/s5/utils/validate_lang.pl
new file mode 100755
index 000000000..dbc91548c
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/validate_lang.pl
@@ -0,0 +1,534 @@
+#!/usr/bin/perl
+
+# Guoguo Chen (guoguo@jhu.edu)
+#
+# Validation script for data/lang
+
+if(@ARGV != 1) {
+  die "Usage: validate_lang.pl lang_directory\n";
+}
+
+$lang = shift @ARGV;
+$exit = 0;
+# Checking phones.txt -------------------------------
+print "Checking $lang/phones.txt ...\n";
+if(-z "$lang/phones.txt") {print "--> ERROR: $lang/phones.txt is empty or not exists\n"; exit 1;}
+if(!open(P, "<$lang/phones.txt")) {print "--> ERROR: fail to open $lang/phones.txt\n"; exit 1;}
+$idx = 1;
+%psymtab = ();
+while(<P>) {
+  chomp;
+  my @col = split(" ", $_);
+  if(@col != 2) {print "--> ERROR: expect 2 columns in $lang/phones.txt (break at line $idx)\n"; exit 1;}
+  my $phone = shift @col;
+  my $id = shift @col;
+  $psymtab{$phone} = $id;
+  $idx ++;
+}
+close(P);
+%pint2sym = (); 
+foreach(keys %psymtab) {
+  if($pint2sym{$psymtab{$_}}) {print "--> ERROR: ID \"$psymtab{$_}\" duplicates\n"; exit 1;} 
+  else {$pint2sym{$psymtab{$_}} = $_;}
+}
+print "--> $lang/phones.txt is OK\n";
+print "\n";
+
+# Check word.txt -------------------------------
+print "Checking words.txt: #0 ...\n";
+if(-z "$lang/words.txt") {print "--> ERROR: $lang/words.txt is empty or not exists\n"; exit 1;}
+if(!open(W, "<$lang/words.txt")) {print "--> ERROR: fail to open $lang/words.txt\n"; exit 1;}
+$idx = 1;
+%wsymtab = ();
+while(<W>) {
+  chomp;
+  my @col = split(" ", $_);
+  if(@col != 2) {print "--> ERROR: expect 2 columns in $lang/words.txt (line $idx)\n"; exit 1;}
+  $word = shift @col;
+  $id = shift @col;
+  $wsymtab{$word} = $id;
+  $idx ++;
+}
+close(W);
+%wint2sym = (); 
+foreach(keys %wsymtab) {
+  if($wint2sym{$wsymtab{$_}}) {print "--> ERROR: ID \"$wsymtab{$_}\" duplicates\n"; exit 1;} 
+  else {$wint2sym{$wsymtab{$_}} = $_;}
+}
+if(exists $wsymtab{"#0"}) {
+  print "--> $lang/words.txt has \"#0\"\n";
+  print "--> $lang/words.txt is OK\n";
+} else {print "--> ERROR: $lang/words.txt doesn't have \"#0\"\n"; $exit = 1;}
+print "\n";
+
+# Checking phones/* -------------------------------
+sub check_txt_int_csl {
+  my ($cat, $symtab) = @_;
+  print "Checking $cat.\{txt, int, csl\} ...\n";
+  if(-z "$cat.txt") {$exit = 1; return print "--> ERROR: $cat.txt is empty or not exists\n";}
+  if(-z "$cat.int") {$exit = 1; return print "--> ERROR: $cat.int is empty or not exists\n";}
+  if(-z "$cat.csl") {$exit = 1; return print "--> ERROR: $cat.csl is empty or not exists\n";}
+  if(!open(TXT, "<$cat.txt")) {$exit = 1; return print "--> ERROR: fail to open $cat.txt\n";}
+  if(!open(INT, "<$cat.int")) {$exit = 1; return print "--> ERROR: fail to open $cat.int\n";}
+  if(!open(CSL, "<$cat.csl")) {$exit = 1; return print "--> ERROR: fail to open $cat.csl\n";}
+
+  $idx1 = 1;
+  while(<TXT>) {
+    chomp;
+    my @col = split(" ", $_);
+    if(@col != 1) {$exit = 1; return print "--> ERROR: expect 1 column in $cat.txt (break at line $idx1)\n";}
+    $entry[$idx1] = shift @col;
+    $idx1 ++;
+  }
+  close(TXT); $idx1 --;
+  print "--> $idx1 entry/entries in $cat.txt\n";
+
+  $idx2 = 1;
+  while(<INT>) {
+    chomp;
+    my @col = split(" ", $_);
+    if(@col != 1) {$exit = 1; return print "--> ERROR: expect 1 column in $cat.int (break at line $idx2)\n";}
+    if($symtab->{$entry[$idx2]} ne shift @col) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2)\n";}
+    $idx2 ++;
+  }
+  close(INT); $idx2 --;
+  if($idx1 != $idx2) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line ", $idx2+1, ")\n";}
+  print "--> $cat.int corresponds to $cat.txt\n";
+
+  $idx3 = 1;
+  while(<CSL>) {
+    chomp;
+    my @col = split(":", $_);
+    if(@col != $idx1) {$exit = 1; return print "--> ERROR: expect $idx1 block/blocks in $cat.csl (break at line $idx3)\n";}
+    foreach(1 .. $idx1) {
+      if($symtab->{$entry[$_]} ne @col[$_-1]) {$exit = 1; return print "--> ERROR: $cat.csl doesn't correspond to $cat.txt (break at line $idx3, block $_)\n";}
+    }
+    $idx3 ++;
+  }
+  close(CSL); $idx3 --;
+  if($idx3 != 1) {$exit = 1; return print "--> ERROR: expect 1 row in $cat.csl (break at line ", $idx3+1, ")\n";}
+  print "--> $cat.csl corresponds to $cat.txt\n";
+
+  return print "--> $cat.\{txt, int, csl\} are OK\n";
+}
+
+sub check_txt_int {
+  my ($cat, $symtab) = @_;
+  print "Checking $cat.\{txt, int\} ...\n";
+  if(-z "$cat.txt") {$exit = 1; return print "--> ERROR: $cat.txt is empty or not exists\n";}
+  if(-z "$cat.int") {$exit = 1; return print "--> ERROR: $cat.int is empty or not exists\n";}
+  if(!open(TXT, "<$cat.txt")) {$exit = 1; return print "--> ERROR: fail to open $cat.txt\n";}
+  if(!open(INT, "<$cat.int")) {$exit = 1; return print "--> ERROR: fail to open $cat.int\n";}
+
+  $idx1 = 1;
+  while(<TXT>) {
+    chomp;
+    s/^(shared|not-shared) (split|not-split) //g;
+    s/ nonword$//g;
+    s/ begin$//g;
+    s/ end$//g;
+    s/ internal$//g;
+    s/ singleton$//g;
+    $entry[$idx1] = $_;
+    $idx1 ++; 
+  }
+  close(TXT); $idx1 --;
+  print "--> $idx1 entry/entries in $cat.txt\n";
+
+  $idx2 = 1;
+  while(<INT>) {
+    chomp;
+    s/^(shared|not-shared) (split|not-split) //g;
+    s/ nonword$//g;
+    s/ begin$//g;
+    s/ end$//g;
+    s/ internal$//g;
+    s/ singleton$//g;
+    my @col = split(" ", $_);
+    @set = split(" ", $entry[$idx2]);
+    if(@set != @col) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2)\n";}
+    foreach(0 .. @set-1) {
+      if($symtab->{@set[$_]} ne @col[$_]) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2, block " ,$_+1, ")\n";}
+    }
+    $idx2 ++;
+  }
+  close(INT); $idx2 --;
+  if($idx1 != $idx2) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line ", $idx2+1, ")\n";}
+  print "--> $cat.int corresponds to $cat.txt\n";
+
+  return print "--> $cat.\{txt, int\} are OK\n";
+}
+
+@list1 = ("context_indep", "disambig", "nonsilence", "silence", "optional_silence");
+@list2 = ("roots", "sets");
+foreach(@list1) {
+  check_txt_int_csl("$lang/phones/$_", \%psymtab); print "\n";
+}
+foreach(@list2) {
+  check_txt_int("$lang/phones/$_", \%psymtab); print "\n";
+}
+if((-s "$lang/phones/extra_questions.txt") || (-s "$lang/phones/extra_questions.int")) {
+  check_txt_int("$lang/phones/extra_questions", \%psymtab); print "\n";
+} else {
+  print "Checking $lang/phones/extra_questions.\{txt, int\} ...\n";
+  if((-f "$lang/phones/extra_questions.txt") && (-f "$lang/phones/extra_questions.int")) {
+    print "--> WARNING: the optional $lang/phones/extra_questions.\{txt, int\} are empty!\n\n";
+  } else {
+    $exit = 1; print "--> ERROR: $lang/phones/extra_questions.\{txt, int\} do not exist (they may be empty, but should be present)\n\n";
+  }
+} 
+if(-e "$lang/phones/word_boundary.txt") {
+  check_txt_int("$lang/phones/word_boundary", \%psymtab); print "\n";
+}
+
+# Check disjoint and summation -------------------------------
+sub intersect {
+  my ($a, $b) = @_;
+  @itset = ();
+  %itset = ();
+  foreach(keys %$a) {
+    if(exists $b->{$_} and !$itset{$_}) {
+      push(@itset, $_);
+      $itset{$_} = 1;
+    }
+  }
+  return @itset;
+}
+
+sub check_disjoint {
+  print "Checking disjoint: silence.txt, nosilenct.txt, disambig.txt ...\n";
+  if(!open(S, "<$lang/phones/silence.txt"))    {$exit = 1; return print "--> ERROR: fail to open $lang/phones/silence.txt\n";}
+  if(!open(N, "<$lang/phones/nonsilence.txt")) {$exit = 1; return print "--> ERROR: fail to open $lang/phones/nonsilence.txt\n";}
+  if(!open(D, "<$lang/phones/disambig.txt"))   {$exit = 1; return print "--> ERROR: fail to open $lang/phones/disambig.txt\n";}
+
+  $idx = 1;
+  while(<S>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($silence{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/silence.txt (line $idx)\n";}
+    $silence{$phone} = 1;
+    push(@silence, $phone);
+    $idx ++;
+  }
+  close(S);
+
+  $idx = 1; 
+  while(<N>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($nonsilence{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/nonsilence.txt (line $idx)\n";}
+    $nonsilence{$phone} = 1;
+    push(@nonsilence, $phone);
+    $idx ++;
+  }
+  close(N);
+
+  $idx = 1;
+  while(<D>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($disambig{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/disambig.txt (line $idx)\n";}
+    $disambig{$phone} = 1;
+    $idx ++;
+  }
+  close(D);
+
+  my @itsect1 = intersect(\%silence, \%nonsilence);
+  my @itsect2 = intersect(\%silence, \%disambig);
+  my @itsect3 = intersect(\%disambig, \%nonsilence);
+
+  $success = 1;
+  if(@itsect1 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: silence.txt and nonsilence.txt have intersection -- ";
+    foreach(@itsect1) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> silence.txt and nonsilence.txt are disjoint\n";}
+
+  if(@itsect2 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: silence.txt and disambig.txt have intersection -- ";
+    foreach(@itsect2) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> silence.txt and disambig.txt are disjoint\n";}
+
+  if(@itsect3 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: disambig.txt and nonsilence.txt have intersection -- ";
+    foreach(@itsect1) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> disambig.txt and nonsilence.txt are disjoint\n";}
+
+  $success == 0 || print "--> disjoint property is OK\n";
+  return;
+}
+
+sub check_summation {
+  print "Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n";
+  if(scalar(keys %silence) == 0)      {$exit = 1; return print "--> ERROR: $lang/phones/silence.txt is empty or not exists\n";}
+  if(scalar(keys %nonsilence) == 0)   {$exit = 1; return print "--> ERROR: $lang/phones/nonsilence.txt is empty or not exists\n";}
+  if(scalar(keys %disambig) == 0)     {$exit = 1; return print "--> ERROR: $lang/phones/disambig.txt is empty or not exists\n";}
+
+  %sum = (%silence, %nonsilence, %disambig);
+  $sum{"<eps>"} = 1;
+
+  my @itset = intersect(\%sum, \%psymtab);
+  my @key1 = keys %sum;
+  my @key2 = keys %psymtab;
+  my %itset = (); foreach(@itset) {$itset{$_} = 1;}
+  if(@itset < @key1) {
+    $exit = 1; print "--> ERROR: phones in silence.txt, nonsilence.txt, disambig.txt but not in phones.txt -- ";
+    foreach(@key1) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+
+  if(@itset < @key2) {
+    $exit = 1; print "--> ERROR: phones in phones.txt but not in silence.txt, nonsilence.txt, disambig.txt -- ";
+    foreach(@key2) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+
+  if(@itset == @key1 and @itset == @key2) {
+    print "--> summation property is OK\n";
+  }
+  return;
+}
+
+%silence = ();
+@silence = ();
+%nonsilence = ();
+@nonsilence = ();
+%disambig = ();
+check_disjoint; print "\n";
+check_summation; print "\n";
+
+# Checking optional_silence.txt -------------------------------
+print "Checking optional_silence.txt ...\n";
+$idx = 1;
+$success = 1;
+if(-z "$lang/phones/optional_silence.txt") {$exit = 1; $success = 0; print "--> ERROR: $lang/phones/optional_silence.txt is empty or not exists\n";}
+if(!open(OS, "<$lang/phones/optional_silence.txt")) {$exit = 1; $success = 0; print "--> ERROR: fail to open $lang/phones/optional_silence.txt\n";}
+print "--> reading $lang/phones/optional_silence.txt\n";
+while(<OS>) {
+  chomp;
+  my @col = split(" ", $_);
+  if ($idx > 1 or @col > 1) {
+    $exit = 1; print "--> ERROR: only 1 phone expected in $lang/phones/optional_silence.txt\n"; $success = 0;
+  } elsif (!$silence{$col[0]}) {
+    $exit = 1; print "--> ERROR: phone $col[0] not found in $lang/phones/silence_phones.txt\n"; $success = 0;
+  }
+  $idx ++;
+}
+close(OS);
+$success == 0 || print "--> $lang/phones/optional_silence.txt is OK\n";
+print "\n";
+
+# Check disambiguation symbols -------------------------------
+print "Checking disambiguation symbols: #0 and #1\n";
+if(scalar(keys %disambig) == 0) {$exit = 1; print "--> ERROR: $lang/phones/disambig.txt is empty or not exists\n";}
+if(exists $disambig{"#0"} and exists $disambig{"#1"}) {
+  print "--> $lang/phones/disambig.txt has \"#0\" and \"#1\"\n";
+  print "--> $lang/phones/disambig.txt is OK\n\n";
+} else {
+  $exit = 1; print "--> ERROR: $lang/phones/disambig.txt doesn't have \"#0\" or \"#1\"\n";
+}
+
+
+# Check topo -------------------------------
+print "Checking topo ...\n";
+if(-z "$lang/topo") {$exit = 1; print "--> ERROR: $lang/topo is empty or not exists\n";}
+if(!open(T, "<$lang/topo")) {$exit = 1; print "--> ERROR: fail to open $lang/topo\n";}
+$idx = 1;
+while(<T>) {
+  chomp;
+  next if(m/^<.*>[ ]*$/);
+  if($idx == 1) {$nonsilence_seq = $_; $idx ++;}
+  if($idx == 2) {$silence_seq = $_;}
+}
+close(T);
+if($silence_seq == 0 || $nonsilence_seq == 0) {$exit = 1; print "--> ERROR: $lang/topo doesn't have nonsilence section or silence section\n";}
+@silence_seq = split(" ", $silence_seq);
+@nonsilence_seq = split(" ", $nonsilence_seq);
+$success1 = 1;
+if(@nonsilence_seq != @nonsilence) {$exit = 1; print "--> ERROR: $lang/topo's nonsilence section doesn't correspond to nonsilence.txt\n";}
+else {
+  foreach(0 .. scalar(@nonsilence)-1) {
+    if($psymtab{@nonsilence[$_]} ne @nonsilence_seq[$_]) {
+      $exit = 1; print "--> ERROR: $lang/topo's nonsilence section doesn't correspond to nonsilence.txt\n";
+      $success = 0;
+    }
+  }
+}
+$success1 != 1 || print "--> $lang/topo's nonsilence section is OK\n";
+$success2 = 1;
+if(@silence_seq != @silence) {$exit = 1; print "--> ERROR: $lang/topo's silence section doesn't correspond to silence.txt\n";}
+else {
+  foreach(0 .. scalar(@silence)-1) {
+    if($psymtab{@silence[$_]} ne @silence_seq[$_]) {
+      $exit = 1; print "--> ERROR: $lang/topo's silence section doesn't correspond to silence.txt\n";
+      $success = 0;
+    }
+  }
+}
+$success2 != 1 || print "--> $lang/topo's silence section is OK\n";
+$success1 != 1 or $success2 != 1 || print "--> $lang/topo is OK\n";
+print "\n";
+
+# Check word_boundary -------------------------------
+$nonword   = "";
+$begin     = "";
+$end       = "";
+$internal  = "";
+$singleton = "";
+if(-s "$lang/phones/word_boundary.txt") {
+  print "Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n";
+  if(!open (W, "<$lang/phones/word_boundary.txt")) {$exit = 1; print "--> ERROR: fail to open $lang/phones/word_boundary.txt\n";}
+  $idx = 1;
+  %wb = ();
+  while(<W>) {
+    chomp;
+    my @col;
+    if (m/^.*nonword$/  ) {s/ nonword//g;    @col = split(" ", $_); if (@col == 1) {$nonword   .= "$col[0] ";}}
+    if (m/^.*begin$/    ) {s/ begin$//g;     @col = split(" ", $_); if (@col == 1) {$begin     .= "$col[0] ";}}
+    if (m/^.*end$/      ) {s/ end$//g;       @col = split(" ", $_); if (@col == 1) {$end       .= "$col[0] ";}}
+    if (m/^.*internal$/ ) {s/ internal$//g;  @col = split(" ", $_); if (@col == 1) {$internal  .= "$col[0] ";}}
+    if (m/^.*singleton$/) {s/ singleton$//g; @col = split(" ", $_); if (@col == 1) {$singleton .= "$col[0] ";}}
+    if(@col != 1) {$exit = 1; print "--> ERROR: expect 1 column in $lang/phones/word_boundary.txt (line $idx)\n";}
+    $wb{shift @col} = 1;
+    $idx ++;
+  }
+  close(W);
+
+  @itset = intersect(\%disambig, \%wb);
+  $success1 = 1;
+  if(@itset != 0) {
+    $success1 = 0;
+    $exit = 1; print "--> ERROR: $lang/phones/word_boundary.txt has disambiguation symbols -- ";
+    foreach(@itset) {print "$_ ";}
+    print "\n";
+  }
+  $success1 == 0 || print "--> $lang/phones/word_boundary.txt doesn't include disambiguation symbols\n";
+
+  %sum = (%silence, %nonsilence);
+  @itset = intersect(\%sum, \%wb);
+  %itset = (); foreach(@itset) {$itset{$_} = 1;}
+  $success2 = 1;
+  if(@itset < scalar(keys %sum)) {
+    $success2 = 0;
+    $exit = 1; print "--> ERROR: phones in nonsilence.txt and silence.txt but not in word_boundary.txt -- ";
+    foreach(keys %sum) {
+      if(!$itset{$_}) {print "$_ ";}            
+    }
+    print "\n";
+  }
+  if(@itset < scalar(keys %wb)) {
+    $success2 = 0;
+    $exit = 1; print "--> ERROR: phones in word_boundary.txt but not in nonsilence.txt or silence.txt -- ";
+    foreach(keys %wb) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+  $success2 == 0 || print "--> $lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n";
+  $success1 != 1 or $success2 != 1 || print "--> $lang/phones/word_boundary.txt is OK\n";
+  print "\n";
+}
+
+if(-s "$lang/phones/word_boundary.int") {
+  print "Checking word_boundary.int and disambig.int\n";
+  if(!open (W, "<$lang/phones/word_boundary.int")) {$exit = 1; print "--> ERROR: fail to open $lang/phones/word_boundary.int\n";}
+  while (<W>) {
+    @A = split;
+    if (@A != 2) { $exit = 1; print "--> ERROR: bad line $_ in $lang/phones/word_boundary.int\n"; }
+    $wbtype{$A[0]} = $A[1];
+  }
+  close(W);
+  if(!open (D, "<$lang/phones/disambig.int")) {$exit = 1; print "--> ERROR: fail to open $lang/phones/disambig.int\n";}
+  while (<D>) { 
+    @A = split;
+    if (@A != 1) { $exit = 1; print "--> ERROR: bad line $_ in $lang/phones/disambig.int\n"; }
+    $is_disambig{$A[0]} = 1;
+  }
+
+  foreach $fst ("L.fst", "L_disambig.fst") {
+    $wlen = int(rand(100)) + 1;
+    print "--> generating a $wlen words sequence\n";
+    $wordseq = "";
+    $sid = 0;
+    foreach (1 .. $wlen) {
+      $id = int(rand(scalar(%wint2sym)));
+      while ($wint2sym{$id} =~ m/^#[0-9]*$/ or $id == 0) {
+        $id = int(rand(scalar(%wint2sym)));
+      }
+      $wordseq = $wordseq . "$sid ". ($sid + 1) . " $id $id 0\n";
+      $sid ++;
+    }
+    $wordseq = $wordseq . "$sid 0";
+    $phoneseq = `echo \"$wordseq" | fstcompile | fstcompose $lang/$fst - | fstproject | fstrandgen | fstrmepsilon | fsttopsort | fstprint | awk '{if(NF > 2) {print \$3}}';`;
+    @phoneseq = split(" ", $phoneseq);
+    $transition = { }; # empty assoc. array of allowed transitions between phone types.  1 means we count a word,
+    # 0 means transition is allowed.  bos and eos are added as extra symbols here.
+    foreach $x ("bos", "nonword", "end", "singleton") {
+      $transition{$x, "nonword"} = 0;
+      $transition{$x, "begin"} = 1;
+      $transition{$x, "singleton"} = 1;
+      $transition{$x, "eos"} = 0;
+    }
+    $transition{"begin", "end"} = 0;
+    $transition{"begin", "internal"} = 0;
+    $transition{"internal", "internal"} = 0;
+    $transition{"internal", "end"} = 0;
+
+    $cur_state = "bos";
+    $num_words = 0;
+    foreach $phone (split (" ", "$phoneseq eos")) {
+      if (!($fst == "L_disambig.fst" && defined $is_disambig{$phone})) {
+        if ($phone == "eos") {
+          $state = "eos";
+        } else {
+          $state = $wbtype{$phone};
+        }
+        if (!defined $state) {
+          $exit = 1; print "--> ERROR: phone $phone is not specified in $lang/phones/word_boundary.int\n";
+          last;
+        } elsif (!defined $transition{$cur_state, $state}) {
+          $exit = 1; print "--> ERROR: transition from state $cur_state to $state indicates error in word_boundary.int or L.fst\n";
+          last;
+        } else {
+          $num_words += $transition{$cur_state, $state};
+          $cur_state = $state;
+        }
+      }
+    }
+    if (!$exit) {
+      if ($num_words != $wlen) {
+        $exit = 1; print "--> ERROR: number of reconstructed words $num_words does not match real number of words $wlen; indicates problem in $fst or word_boundary.int.  phoneseq = $phoneseq\n";
+      } else {
+        print "--> resulting phone sequence from $fst corresponds to the word sequence\n";
+        print "--> $fst is OK\n";
+      }
+    }
+  }
+  print "\n";
+}
+
+# Check oov -------------------------------
+check_txt_int("$lang/oov", \%wsymtab); print "\n";
+
+
+if ($exit == 1) { print "--> ERROR\n"; exit 1;}
+else { print "--> SUCCESS\n"; exit 0; }
+
diff --git a/egs/chime_wsj0/s5/utils/write_kwslist.pl b/egs/chime_wsj0/s5/utils/write_kwslist.pl
new file mode 100755
index 000000000..ac4acd671
--- /dev/null
+++ b/egs/chime_wsj0/s5/utils/write_kwslist.pl
@@ -0,0 +1,333 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+This script reads the raw keyword search results [result.*] and writes them as the kwslist.xml file.
+It can also do things like score normalization, decision making, duplicates removal, etc.
+
+Usage: utils/write_kwslist.pl [options] <raw_result_in|-> <kwslist_out|->
+ e.g.: utils/write_kwslist.pl --flen=0.01 --duration=1000 --segments=data/eval/segments
+                              --normalize=true --map-utter=data/kws/utter_map raw_results kwslist.xml
+
+Allowed options:
+  --beta                      : Beta value when computing ATWV              (float,   default = 999.9)
+  --digits                    : How many digits should the score use        (int,     default = "infinite")
+  --duptime                   : Tolerance for duplicates                    (float,   default = 0.5)
+  --duration                  : Duration of all audio, you must set this    (float,   default = 999.9)
+  --ecf-filename              : ECF file name                               (string,  default = "") 
+  --flen                      : Frame length                                (float,   default = 0.01)
+  --index-size                : Size of index                               (float,   default = 0)
+  --kwlist-filename           : Kwlist.xml file name                        (string,  default = "") 
+  --language                  : Language type                               (string,  default = "cantonese")
+  --map-utter                 : Map utterance for evaluation                (string,  default = "")
+  --normalize                 : Normalize scores or not                     (boolean, default = false)
+  --Ntrue-scale               : Keyword independent scale factor for Ntrue  (float,   default = 1.0)
+  --remove-dup                : Remove duplicates                           (boolean, default = false)
+  --remove-NO                 : Remove the "NO" decision instances          (boolean, default = false)
+  --segments                  : Segments file from Kaldi                    (string,  default = "")
+  --system-id                 : System ID                                   (string,  default = "")
+  --verbose                   : Verbose level (higher --> more kws section) (integer, default 0)
+  --YES-cutoff                : Only keep "\$YES-cutoff" yeses for each kw   (int,     default = -1)
+
+EOU
+
+my $segment = "";
+my $flen = 0.01;
+my $beta = 999.9;
+my $duration = 999.9;
+my $language = "cantonese";
+my $ecf_filename = "";
+my $index_size = 0;
+my $system_id = "";
+my $normalize = "false";
+my $map_utter = "";
+my $Ntrue_scale = 1.0;
+my $digits = 0;
+my $kwlist_filename = "";
+my $verbose = 0;
+my $duptime = 0.5;
+my $remove_dup = "false";
+my $remove_NO = "false";
+my $YES_cutoff = -1;
+GetOptions('segments=s'     => \$segment,
+  'flen=f'         => \$flen,
+  'beta=f'         => \$beta,
+  'duration=f'     => \$duration,
+  'language=s'     => \$language,
+  'ecf-filename=s' => \$ecf_filename,
+  'index-size=f'   => \$index_size,
+  'system-id=s'    => \$system_id,
+  'normalize=s'    => \$normalize,
+  'map-utter=s'    => \$map_utter,
+  'Ntrue-scale=f'  => \$Ntrue_scale,
+  'digits=i'       => \$digits,
+  'kwlist-filename=s' => \$kwlist_filename,
+  'verbose=i'         => \$verbose,
+  'duptime=f'         => \$duptime,
+  'remove-dup=s'      => \$remove_dup,
+  'YES-cutoff=i'      => \$YES_cutoff,
+  'remove-NO=s'       => \$remove_NO);
+
+($normalize eq "true" || $normalize eq "false") || die "$0: Bad value for option --normalize\n";
+($remove_dup eq "true" || $remove_dup eq "false") || die "$0: Bad value for option --remove-dup\n";
+($remove_NO eq "true" || $remove_NO eq "false") || die "$0: Bad value for option --remove-NO\n";
+
+if ($segment) {
+  open(SEG, "<$segment") || die "$0: Fail to open segment file $segment\n";
+}
+
+if ($map_utter) {
+  open(UTT, "<$map_utter") || die "$0: Fail to open utterance table $map_utter\n";
+}
+
+if (@ARGV != 2) {
+  die $Usage;
+}
+
+# Get parameters
+my $filein = shift @ARGV;
+my $fileout = shift @ARGV;
+
+# Get input source
+my $source = "";
+if ($filein eq "-") {
+  $source = "STDIN";
+} else {
+  open(I, "<$filein") || die "$0: Fail to open input file $filein\n";
+  $source = "I";
+}
+
+# Get symbol table and start time
+my %tbeg;
+if ($segment) {
+  while (<SEG>) {
+    chomp;
+    my @col = split(" ", $_);
+    @col == 4 || die "$0: Bad number of columns in $segment \"$_\"\n";
+    $tbeg{$col[0]} = $col[2];
+  }
+}
+
+# Get utterance mapper
+my %utter_mapper;
+if ($map_utter) {
+  while (<UTT>) {
+    chomp;
+    my @col = split(" ", $_);
+    @col == 2 || die "$0: Bad number of columns in $map_utter \"$_\"\n";
+    $utter_mapper{$col[0]} = $col[1];
+  }
+}
+
+# Function for printing Kwslist.xml
+sub PrintKwslist {
+  my ($info, $KWS) = @_;
+
+  my $kwslist = "";
+
+  # Start printing
+  $kwslist .= "<kwslist kwlist_filename=\"$info->[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n";
+  my $prev_kw = "";
+  foreach my $kwentry (@{$KWS}) {
+    if ($prev_kw ne $kwentry->[0]) {
+      if ($prev_kw ne "") {$kwslist .= "  </detected_kwlist>\n";}
+      $kwslist .= "  <detected_kwlist search_time=\"1\" kwid=\"$kwentry->[0]\" oov_count=\"0\">\n";
+      $prev_kw = $kwentry->[0];
+    }
+    $kwslist .= "    <kw file=\"$kwentry->[1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$kwentry->[5]\" decision=\"$kwentry->[6]\"";
+    if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";}
+    if (defined($kwentry->[8])) {$kwslist .= " raw_score=\"$kwentry->[8]\"";}
+    $kwslist .= "/>\n";
+  }
+  $kwslist .= "  </detected_kwlist>\n";
+  $kwslist .= "</kwslist>\n";
+
+  return $kwslist;
+}
+
+# Function for sorting
+sub KwslistOutputSort {
+  if ($a->[0] ne $b->[0]) {
+    if ($a->[0] =~ m/[0-9]+$/ && $b->[0] =~ m/[0-9]+$/) {
+      ($a->[0] =~ /([0-9]*)$/)[0] <=> ($b->[0] =~ /([0-9]*)$/)[0]
+    } else {
+      $a->[0] cmp $b->[0];
+    }
+  } elsif ($a->[5] ne $b->[5]) {
+    $b->[5] <=> $a->[5];
+  } else {
+    $a->[1] cmp $b->[1];
+  }
+}
+sub KwslistDupSort {
+  my ($a, $b, $duptime) = @_;
+  if ($a->[0] ne $b->[0]) {
+    $a->[0] cmp $b->[0];
+  } elsif ($a->[1] ne $b->[1]) {
+    $a->[1] cmp $b->[1];
+  } elsif ($a->[2] ne $b->[2]) {
+    $a->[2] cmp $b->[2];
+  } elsif (abs($a->[3]-$b->[3]) >= $duptime){
+    $a->[3] <=> $b->[3];
+  } elsif ($a->[5] ne $b->[5]) {
+    $b->[5] <=> $a->[5];
+  } else {
+    $b->[4] <=> $a->[4];
+  }
+}
+
+# Processing
+my @KWS;
+while (<$source>) {
+  chomp;
+  my @col = split(" ", $_);
+  @col == 5 || die "$0: Bad number of columns in raw results \"$_\"\n";
+  my $kwid = shift @col;
+  my $utter = $col[0];
+  my $start = sprintf("%.2f", $col[1]*$flen);
+  my $dur = sprintf("%.2f", $col[2]*$flen-$start);
+  my $score = exp(-$col[3]);
+
+  if ($segment) {
+    $start = sprintf("%.2f", $start+$tbeg{$utter});
+  }
+  if ($map_utter) {
+    $utter = $utter_mapper{$utter};
+  }
+
+  push(@KWS, [$kwid, $utter, 1, $start, $dur, $score, ""]);
+}
+
+my %Ntrue = ();
+foreach my $kwentry (@KWS) {
+  if (!defined($Ntrue{$kwentry->[0]})) {
+    $Ntrue{$kwentry->[0]} = 0.0;
+  }
+  $Ntrue{$kwentry->[0]} += $kwentry->[5];
+}
+
+# Scale the Ntrue
+my %threshold;
+foreach my $key (keys %Ntrue) {
+  $Ntrue{$key} *= $Ntrue_scale;
+  $threshold{$key} = $Ntrue{$key}/($duration/$beta+($beta-1)/$beta*$Ntrue{$key});
+}
+
+# Removing duplicates
+if ($remove_dup eq "true") {
+  my @tmp = sort {KwslistDupSort($a, $b, $duptime)} @KWS;
+  @KWS = ();
+  push(@KWS, $tmp[0]);
+  for (my $i = 1; $i < scalar(@tmp); $i ++) {
+    my $prev = $KWS[-1];
+    my $curr = $tmp[$i];
+    if ((abs($prev->[3]-$curr->[3]) < $duptime ) &&
+        ($prev->[2] eq $curr->[2]) &&
+        ($prev->[1] eq $curr->[1]) &&
+        ($prev->[0] eq $curr->[0])) {
+      next;
+    } else {
+      push(@KWS, $curr);
+    }
+  }
+}
+
+my $format_string = "%g";
+if ($digits gt 0 ) {
+  $format_string = "%." . $digits ."f";
+}
+
+my @info = ($kwlist_filename, $language, $system_id);
+my %YES_count;
+foreach my $kwentry (@KWS) {
+  my $threshold = $threshold{$kwentry->[0]};
+  if ($kwentry->[5] > $threshold) {
+    $kwentry->[6] = "YES";
+    if (defined($YES_count{$kwentry->[0]})) {
+      $YES_count{$kwentry->[0]} ++;
+    } else {
+      $YES_count{$kwentry->[0]} = 1;
+    }
+  } else {
+    $kwentry->[6] = "NO";
+    if (!defined($YES_count{$kwentry->[0]})) {
+      $YES_count{$kwentry->[0]} = 0;
+    }
+  }
+  if ($verbose > 0) {
+    push(@{$kwentry}, sprintf("%g", $threshold));
+  }
+  if ($normalize eq "true") {
+    if ($verbose > 0) {
+      push(@{$kwentry}, $kwentry->[5]);
+    }
+    my $numerator = (1-$threshold)*$kwentry->[5];
+    my $denominator = (1-$threshold)*$kwentry->[5]+(1-$kwentry->[5])*$threshold;
+    if ($denominator != 0) {
+      $kwentry->[5] = sprintf($format_string, $numerator/$denominator);
+    } else {
+      $kwentry->[5] = sprintf($format_string, $kwentry->[5]);
+    }
+  } else {
+    $kwentry->[5] = sprintf($format_string, $kwentry->[5]);
+  }
+}
+
+# Output sorting
+my @tmp = sort KwslistOutputSort @KWS;
+
+# Process the YES-cutoff. Note that you don't need this for the normal cases where
+# hits and false alarms are balanced
+if ($YES_cutoff != -1) {
+  my $count = 1;
+  for (my $i = 1; $i < scalar(@tmp); $i ++) { 
+    if ($tmp[$i]->[0] ne $tmp[$i-1]->[0]) {
+      $count = 1;
+      next;
+    }
+    if ($YES_count{$tmp[$i]->[0]} > $YES_cutoff*2) {
+      $tmp[$i]->[6] = "NO";
+      $tmp[$i]->[5] = 0;
+      next;
+    }
+    if (($count == $YES_cutoff) && ($tmp[$i]->[6] eq "YES")) {
+      $tmp[$i]->[6] = "NO";
+      $tmp[$i]->[5] = 0;
+      next;
+    }
+    if ($tmp[$i]->[6] eq "YES") {
+      $count ++;
+    }
+  }
+}
+
+# Process the remove-NO decision
+if ($remove_NO eq "true") {
+  my @KWS = @tmp;
+  @tmp = ();
+  for (my $i = 0; $i < scalar(@KWS); $i ++) {
+    if ($KWS[$i]->[6] eq "YES") {
+      push(@tmp, $KWS[$i]);
+    }
+  }
+}
+
+# Printing
+my $kwslist = PrintKwslist(\@info, \@tmp);
+
+if ($segment) {close(SEG);}
+if ($map_utter) {close(UTT);}
+if ($filein  ne "-") {close(I);}
+if ($fileout eq "-") {
+    print $kwslist;
+} else {
+  open(O, ">$fileout") || die "$0: Fail to open output file $fileout\n";
+  print O $kwslist;
+  close(O);
+}