From 1ae487048d7afebe6d7c6d1f4d60b99c3066086f Mon Sep 17 00:00:00 2001
From: Guoguo Chen <chenguoguo06@gmail.com>
Date: Thu, 16 Apr 2015 03:18:26 +0000
Subject: [PATCH] trunk: adding silprob to librispeech recipe

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5008 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
---
 egs/librispeech/s5/RESULTS                    | 100 ++++----
 .../s5/local/nnet2/run_6a_clean_460.sh        |  16 +-
 egs/librispeech/s5/local/nnet2/run_7a_960.sh  |  16 +-
 egs/librispeech/s5/local/online/run_nnet2.sh  |  54 ++--
 .../s5/local/online/run_nnet2_disc.sh         |  10 +-
 .../s5/local/online/run_nnet2_ms.sh           |  70 +++---
 .../s5/local/online/run_nnet2_ms_disc.sh      |  14 +-
 egs/librispeech/s5/local/run_data_cleaning.sh |  14 +-
 egs/librispeech/s5/local/run_rnnlm.sh         |   8 +-
 egs/librispeech/s5/local/score.sh             |  33 ++-
 egs/librispeech/s5/run.sh                     | 236 ++++++++++--------
 11 files changed, 300 insertions(+), 271 deletions(-)

diff --git a/egs/librispeech/s5/RESULTS b/egs/librispeech/s5/RESULTS
index b53ecb2b9..885b8bcd9 100644
--- a/egs/librispeech/s5/RESULTS
+++ b/egs/librispeech/s5/RESULTS
@@ -8,70 +8,72 @@
 # whereas "dev-other" and "test-other" sets contain more challenging speech
 
 ### SAT GMM model trained on the "train-clean-100" set (100 hours "clean" speech)
-%WER 8.19 [ 4453 / 54402, 632 ins, 480 del, 3341 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_16
-%WER 8.62 [ 4689 / 54402, 632 ins, 525 del, 3532 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_17
-%WER 10.62 [ 5778 / 54402, 659 ins, 752 del, 4367 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_15
-%WER 12.11 [ 6590 / 54402, 689 ins, 964 del, 4937 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_15
+### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri4b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
+%WER 8.20 [ 4459 / 54402, 695 ins, 427 del, 3337 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_14_0.5
+%WER 8.60 [ 4677 / 54402, 763 ins, 399 del, 3515 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_16_0.0
+%WER 10.39 [ 5655 / 54402, 711 ins, 648 del, 4296 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_16_0.0
+%WER 11.69 [ 6361 / 54402, 743 ins, 808 del, 4810 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_16_0.0
 
-%WER 9.32 [ 4899 / 52576, 749 ins, 496 del, 3654 sub ] exp/tri4b/decode_fglarge_test_clean/wer_16
-%WER 9.60 [ 5047 / 52576, 731 ins, 540 del, 3776 sub ] exp/tri4b/decode_tglarge_test_clean/wer_17
-%WER 11.79 [ 6197 / 52576, 717 ins, 798 del, 4682 sub ] exp/tri4b/decode_tgmed_test_clean/wer_16
-%WER 13.18 [ 6927 / 52576, 718 ins, 998 del, 5211 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_16
+%WER 9.10 [ 4786 / 52576, 708 ins, 464 del, 3614 sub ] exp/tri4b/decode_fglarge_test_clean/wer_17_0.5
+%WER 9.43 [ 4958 / 52576, 751 ins, 492 del, 3715 sub ] exp/tri4b/decode_tglarge_test_clean/wer_15_0.5
+%WER 11.36 [ 5975 / 52576, 799 ins, 642 del, 4534 sub ] exp/tri4b/decode_tgmed_test_clean/wer_17_0.0
+%WER 12.64 [ 6643 / 52576, 795 ins, 817 del, 5031 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_17_0.0
 
-%WER 29.31 [ 14934 / 50948, 1536 ins, 2215 del, 11183 sub ] exp/tri4b/decode_fglarge_dev_other/wer_18
-%WER 30.25 [ 15412 / 50948, 1655 ins, 2188 del, 11569 sub ] exp/tri4b/decode_tglarge_dev_other/wer_17
-%WER 33.01 [ 16817 / 50943, 1358 ins, 3023 del, 12436 sub ] [PARTIAL] exp/tri4b/decode_tgmed_dev_other/wer_19
-%WER 35.06 [ 17862 / 50948, 1378 ins, 3409 del, 13075 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18
+%WER 28.45 [ 14495 / 50948, 1574 ins, 1925 del, 10996 sub ] exp/tri4b/decode_fglarge_dev_other/wer_17_0.5
+%WER 29.24 [ 14895 / 50948, 1610 ins, 2041 del, 11244 sub ] exp/tri4b/decode_tglarge_dev_other/wer_19_0.5
+%WER 32.04 [ 16325 / 50948, 1753 ins, 2261 del, 12311 sub ] exp/tri4b/decode_tgmed_dev_other/wer_18_0.0
+%WER 33.97 [ 17305 / 50948, 1681 ins, 2661 del, 12963 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18_0.0
 
-%WER 31.47 [ 16470 / 52343, 1637 ins, 2624 del, 12209 sub ] exp/tri4b/decode_fglarge_test_other/wer_17
-%WER 32.35 [ 16933 / 52343, 1792 ins, 2638 del, 12503 sub ] exp/tri4b/decode_tglarge_test_other/wer_17
-%WER 35.08 [ 18363 / 52343, 1469 ins, 3566 del, 13328 sub ] exp/tri4b/decode_tgmed_test_other/wer_18
-%WER 36.83 [ 19278 / 52343, 1350 ins, 3976 del, 13952 sub ] exp/tri4b/decode_tgsmall_test_other/wer_18
+%WER 30.33 [ 15875 / 52343, 1639 ins, 2375 del, 11861 sub ] exp/tri4b/decode_fglarge_test_other/wer_17_0.5
+%WER 31.07 [ 16264 / 52343, 1728 ins, 2424 del, 12112 sub ] exp/tri4b/decode_tglarge_test_other/wer_18_0.5
+%WER 33.69 [ 17633 / 52343, 1755 ins, 2766 del, 13112 sub ] exp/tri4b/decode_tgmed_test_other/wer_18_0.0
+%WER 35.62 [ 18646 / 52343, 1758 ins, 3039 del, 13849 sub ] exp/tri4b/decode_tgsmall_test_other/wer_17_0.0
 
 
 ### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" set (460 hours "clean" speech)
-%WER 7.26 [ 3949 / 54402, 543 ins, 420 del, 2986 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_16
-%WER 7.65 [ 4162 / 54402, 592 ins, 436 del, 3134 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_15
-%WER 9.61 [ 5227 / 54402, 591 ins, 684 del, 3952 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15
-%WER 10.87 [ 5914 / 54402, 584 ins, 863 del, 4467 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15
+### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri5b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
+%WER 7.05 [ 3835 / 54402, 588 ins, 370 del, 2877 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_15_0.5
+%WER 7.49 [ 4077 / 54402, 623 ins, 376 del, 3078 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_14_0.5
+%WER 9.38 [ 5104 / 54402, 701 ins, 533 del, 3870 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15_0.0
+%WER 10.51 [ 5719 / 54402, 720 ins, 652 del, 4347 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15_0.0
 
-%WER 8.31 [ 4369 / 52576, 638 ins, 449 del, 3282 sub ] exp/tri5b/decode_fglarge_test_clean/wer_18
-%WER 8.55 [ 4496 / 52576, 673 ins, 444 del, 3379 sub ] exp/tri5b/decode_tglarge_test_clean/wer_16
-%WER 10.53 [ 5537 / 52576, 645 ins, 719 del, 4173 sub ] exp/tri5b/decode_tgmed_test_clean/wer_16
-%WER 11.71 [ 6159 / 52576, 638 ins, 869 del, 4652 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_16
+%WER 8.14 [ 4279 / 52576, 683 ins, 379 del, 3217 sub ] exp/tri5b/decode_fglarge_test_clean/wer_15_0.5
+%WER 8.50 [ 4469 / 52576, 597 ins, 510 del, 3362 sub ] exp/tri5b/decode_tglarge_test_clean/wer_15_1.0
+%WER 10.10 [ 5311 / 52576, 767 ins, 503 del, 4041 sub ] exp/tri5b/decode_tgmed_test_clean/wer_15_0.0
+%WER 11.20 [ 5886 / 52576, 774 ins, 617 del, 4495 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_15_0.0
 
-%WER 26.27 [ 13384 / 50948, 1450 ins, 1839 del, 10095 sub ] exp/tri5b/decode_fglarge_dev_other/wer_17
-%WER 27.32 [ 13917 / 50948, 1605 ins, 1845 del, 10467 sub ] exp/tri5b/decode_tglarge_dev_other/wer_16
-%WER 30.19 [ 15378 / 50943, 1406 ins, 2423 del, 11549 sub ] [PARTIAL] exp/tri5b/decode_tgmed_dev_other/wer_16
-%WER 32.21 [ 16408 / 50948, 1311 ins, 2994 del, 12103 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_17
+%WER 25.65 [ 13069 / 50948, 1664 ins, 1486 del, 9919 sub ] exp/tri5b/decode_fglarge_dev_other/wer_18_0.0
+%WER 26.60 [ 13552 / 50948, 1549 ins, 1774 del, 10229 sub ] exp/tri5b/decode_tglarge_dev_other/wer_17_0.5
+%WER 29.21 [ 14880 / 50943, 1618 ins, 2026 del, 11236 sub ] exp/tri5b/decode_tgmed_dev_other/wer_18_0.0
+%WER 30.89 [ 15736 / 50948, 1538 ins, 2388 del, 11810 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_18_0.0
 
-%WER 28.11 [ 14714 / 52343, 1524 ins, 2202 del, 10988 sub ] exp/tri5b/decode_fglarge_test_other/wer_16
-%WER 29.16 [ 15263 / 52343, 1616 ins, 2346 del, 11301 sub ] exp/tri5b/decode_tglarge_test_other/wer_17
-%WER 32.09 [ 16798 / 52343, 1342 ins, 3215 del, 12241 sub ] exp/tri5b/decode_tgmed_test_other/wer_18
-%WER 34.08 [ 17837 / 52343, 1412 ins, 3358 del, 13067 sub ] exp/tri5b/decode_tgsmall_test_other/wer_16
+%WER 27.36 [ 14323 / 52343, 1486 ins, 2136 del, 10701 sub ] exp/tri5b/decode_fglarge_test_other/wer_17_0.5
+%WER 28.32 [ 14824 / 52343, 1656 ins, 2118 del, 11050 sub ] exp/tri5b/decode_tglarge_test_other/wer_16_0.5
+%WER 31.01 [ 16233 / 52343, 1577 ins, 2593 del, 12063 sub ] exp/tri5b/decode_tgmed_test_other/wer_19_0.0
+%WER 32.99 [ 17269 / 52343, 1622 ins, 2792 del, 12855 sub ] exp/tri5b/decode_tgsmall_test_other/wer_17_0.0
 
 
 ### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" + "train-other-500" set (960 hours)
-%WER 7.08 [ 3853 / 54402, 591 ins, 373 del, 2889 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14
-%WER 7.52 [ 4091 / 54402, 638 ins, 397 del, 3056 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13
-%WER 9.47 [ 5151 / 54402, 656 ins, 613 del, 3882 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13
-%WER 10.88 [ 5919 / 54402, 626 ins, 813 del, 4480 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14
+### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri6b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
+%WER 7.02 [ 3819 / 54402, 516 ins, 424 del, 2879 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14_1.0
+%WER 7.33 [ 3988 / 54402, 506 ins, 468 del, 3014 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_15_1.0
+%WER 9.23 [ 5024 / 54402, 744 ins, 481 del, 3799 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13_0.0
+%WER 10.38 [ 5648 / 54402, 741 ins, 617 del, 4290 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14_0.0
 
-%WER 8.01 [ 4213 / 52576, 658 ins, 404 del, 3151 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15
-%WER 8.26 [ 4342 / 52576, 661 ins, 449 del, 3232 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15
-%WER 10.06 [ 5289 / 52576, 653 ins, 637 del, 3999 sub ] exp/tri6b/decode_tgmed_test_clean/wer_15
-%WER 11.24 [ 5907 / 52576, 704 ins, 756 del, 4447 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_14
+%WER 7.81 [ 4105 / 52576, 574 ins, 442 del, 3089 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15_1.0
+%WER 8.01 [ 4213 / 52576, 658 ins, 387 del, 3168 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15_0.5
+%WER 9.83 [ 5167 / 52576, 709 ins, 519 del, 3939 sub ] exp/tri6b/decode_tgmed_test_clean/wer_16_0.0
+%WER 10.99 [ 5778 / 52576, 723 ins, 640 del, 4415 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_16_0.0
 
-%WER 21.14 [ 10770 / 50948, 1168 ins, 1493 del, 8109 sub ] exp/tri6b/decode_fglarge_dev_other/wer_17
-%WER 22.14 [ 11278 / 50948, 1342 ins, 1466 del, 8470 sub ] exp/tri6b/decode_tglarge_dev_other/wer_15
-%WER 25.16 [ 12821 / 50948, 1233 ins, 1953 del, 9635 sub ] exp/tri6b/decode_tgmed_dev_other/wer_15
-%WER 27.23 [ 13872 / 50948, 1109 ins, 2426 del, 10337 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_16
-
-%WER 22.49 [ 11772 / 52343, 1289 ins, 1599 del, 8884 sub ] exp/tri6b/decode_fglarge_test_other/wer_15
-%WER 23.46 [ 12278 / 52343, 1341 ins, 1690 del, 9247 sub ] exp/tri6b/decode_tglarge_test_other/wer_16
-%WER 26.87 [ 14063 / 52343, 1334 ins, 2170 del, 10559 sub ] exp/tri6b/decode_tgmed_test_other/wer_14
-%WER 28.90 [ 15128 / 52343, 1239 ins, 2681 del, 11208 sub ] exp/tri6b/decode_tgsmall_test_other/wer_15
+%WER 20.53 [ 10460 / 50948, 1270 ins, 1258 del, 7932 sub ] exp/tri6b/decode_fglarge_dev_other/wer_15_0.5
+%WER 21.31 [ 10857 / 50948, 1299 ins, 1376 del, 8182 sub ] exp/tri6b/decode_tglarge_dev_other/wer_16_0.5
+%WER 24.27 [ 12365 / 50948, 1401 ins, 1558 del, 9406 sub ] exp/tri6b/decode_tgmed_dev_other/wer_16_0.0
+%WER 26.14 [ 13317 / 50948, 1292 ins, 1977 del, 10048 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_17_0.0
 
+%WER 21.79 [ 11405 / 52343, 1263 ins, 1497 del, 8645 sub ] exp/tri6b/decode_fglarge_test_other/wer_15_0.5
+%WER 22.78 [ 11923 / 52343, 1370 ins, 1483 del, 9070 sub ] exp/tri6b/decode_tglarge_test_other/wer_14_0.5
+%WER 25.67 [ 13439 / 52343, 1481 ins, 1767 del, 10191 sub ] exp/tri6b/decode_tgmed_test_other/wer_15_0.0
+%WER 27.79 [ 14545 / 52343, 1371 ins, 2250 del, 10924 sub ] exp/tri6b/decode_tgsmall_test_other/wer_16_0.0
 
 ### p-norm DNN trained on "train-clean-100"
 %WER 5.93 [ 3228 / 54402, 486 ins, 330 del, 2412 sub ] exp/nnet5a_clean_100_gpu/decode_fglarge_dev_clean/wer_13
diff --git a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
index 16d42f1d3..a3a176b63 100755
--- a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
+++ b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
@@ -64,16 +64,16 @@ fi
 
 for test in test_clean test_other dev_clean dev_other; do
   steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
-    --transform-dir exp/tri5b/decode_pp_tgsmall_$test \
-    exp/tri5b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
-  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-    data/$test $dir/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
+    --transform-dir exp/tri5b/decode_tgsmall_$test \
+    exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
+  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+    data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
   steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-    data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+    data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
   steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-    data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
 done
 
 exit 0;
diff --git a/egs/librispeech/s5/local/nnet2/run_7a_960.sh b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
index be9008cb3..7c7ef92f9 100755
--- a/egs/librispeech/s5/local/nnet2/run_7a_960.sh
+++ b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
@@ -64,16 +64,16 @@ fi
 
 for test in test_clean test_other dev_clean dev_other; do
   steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
-    --transform-dir exp/tri6b/decode_pp_tgsmall_$test \
-    exp/tri6b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
-  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-    data/$test $dir/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
+    --transform-dir exp/tri6b/decode_tgsmall_$test \
+    exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
+  steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+    data/$test $dir/decode_{tgsmall,tgmed}_$test  || exit 1;
   steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-    data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+    data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
   steps/lmrescore_const_arpa.sh \
-    --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-    data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
+    --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+    data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
 done
 
 exit 0;
diff --git a/egs/librispeech/s5/local/online/run_nnet2.sh b/egs/librispeech/s5/local/online/run_nnet2.sh
index 00f1c632a..10cb5511a 100755
--- a/egs/librispeech/s5/local/online/run_nnet2.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2.sh
@@ -83,12 +83,12 @@ if [ $stage -le 9 ]; then
   for test in dev_clean dev_other; do
     steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
       --online-ivector-dir exp/nnet2_online/ivectors_${test} \
-      exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed}  || exit 1;
+      exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
   done
 fi
 
@@ -105,15 +105,15 @@ if [ $stage -le 11 ]; then
   # previous utterances of the same speaker.
   for test in test_clean test_other dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}  || exit 1;
+      exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
   done
 fi
 
@@ -122,12 +122,12 @@ if [ $stage -le 12 ]; then
   # without carrying forward speaker information.
   for test in dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      --per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt  || exit 1;
+      --per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
   done
 fi
 
@@ -137,13 +137,13 @@ if [ $stage -le 13 ]; then
   # of the utterance while computing the iVector (--online false)
   for test in dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      --per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
-        ${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
+      --per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
+        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
   done
 fi
 
@@ -155,8 +155,8 @@ if [ $stage -le 14 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
 fi
 
 if [ $stage -le 15 ]; then
@@ -164,8 +164,8 @@ if [ $stage -le 15 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
 fi
 
 exit 0;
diff --git a/egs/librispeech/s5/local/online/run_nnet2_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
index 7f6a54ea1..3314f2507 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
@@ -135,14 +135,14 @@ if [ $stage -le 5 ]; then
     for test in test_clean test_other dev_clean dev_other; do
       (
         steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
-          --iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
-        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-          data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
+          --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
+        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+          data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
         steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
           data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
         steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
           data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
       ) &
     done
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms.sh b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
index 868ac4b42..91ace6c22 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
@@ -95,15 +95,15 @@ if [ $stage -le 9 ]; then
   for test in dev_clean dev_other; do
     steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
       --online-ivector-dir exp/nnet2_online/ivectors_${test} \
-      exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed}  || exit 1;
+      exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test $dir/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
   done
 fi
 
@@ -120,15 +120,15 @@ if [ $stage -le 11 ]; then
   # previous utterances of the same speaker.
   for test in dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}  || exit 1;
+      exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
   done
 fi
 
@@ -137,15 +137,15 @@ if [ $stage -le 12 ]; then
   # without carrying forward speaker information.
   for test in dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      --per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt  || exit 1;
+      --per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
   done
 fi
 
@@ -155,16 +155,16 @@ if [ $stage -le 13 ]; then
   # of the utterance while computing the iVector (--online false)
   for test in test_clean test_other dev_clean dev_other; do
     steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-      --per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
-        ${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
+      --per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
+        ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
   done
 fi
 
@@ -176,8 +176,8 @@ if [ $stage -le 14 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
 fi
 
 if [ $stage -le 15 ]; then
@@ -185,8 +185,8 @@ if [ $stage -le 15 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
 fi
 
 if [ $stage -le 16 ]; then
@@ -195,8 +195,8 @@ if [ $stage -le 16 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true  --silence-weight 0.0 \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
 fi
 
 exit 0;
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
index 0b6637e38..0ff05eec4 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
@@ -136,15 +136,15 @@ if [ $stage -le 5 ]; then
     for test in test_clean test_other dev_clean dev_other; do
       (
         steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
-          --iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
-        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-          data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
+          --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
+        steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+          data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed}  || exit 1;
         steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-          data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
         steps/lmrescore_const_arpa.sh \
-          --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-          data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+          data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
       ) &
       ) &
     done
diff --git a/egs/librispeech/s5/local/run_data_cleaning.sh b/egs/librispeech/s5/local/run_data_cleaning.sh
index 55c2b2f97..3300ad4c4 100755
--- a/egs/librispeech/s5/local/run_data_cleaning.sh
+++ b/egs/librispeech/s5/local/run_data_cleaning.sh
@@ -16,7 +16,7 @@ set -e
 
 
 if [ $stage -le 1 ]; then
-  steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang_pp \
+  steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang \
     exp/tri6b exp/tri6b_cleanup
 fi
 
@@ -28,21 +28,21 @@ fi
 
 if [ $stage -le 3 ]; then
   steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-    data/train.thresh$thresh data/lang_pp exp/tri6b exp/tri6b_ali_$thresh
+    data/train.thresh$thresh data/lang exp/tri6b exp/tri6b_ali_$thresh
 fi
 
 if [ $stage -le 4 ]; then
   steps/train_sat.sh  --cmd "$train_cmd" \
-    7000 150000 data/train_960_thresh$thresh data/lang_pp exp/tri6b_ali_$thresh  exp/tri6b_$thresh || exit 1;
+    7000 150000 data/train_960_thresh$thresh data/lang exp/tri6b_ali_$thresh  exp/tri6b_$thresh || exit 1;
 fi
 
 if [ $stage -le 5 ]; then
-  utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_pp_tgsmall || exit 1
+  utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_tgsmall || exit 1
   for test in dev_clean dev_other; do
     steps/decode_fmllr.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
-      exp/tri6b_$thresh/graph_pp_tgsmall data/$test exp/tri6b_$thresh/decode_pp_tgsmall_$test || exit 1
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test exp/tri6b_$thresh/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
+      exp/tri6b_$thresh/graph_tgsmall data/$test exp/tri6b_$thresh/decode_tgsmall_$test || exit 1
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test exp/tri6b_$thresh/decode_{tgsmall,tgmed}_$test  || exit 1;
   done
 fi
 
diff --git a/egs/librispeech/s5/local/run_rnnlm.sh b/egs/librispeech/s5/local/run_rnnlm.sh
index c3991bd4c..ea9b997dc 100755
--- a/egs/librispeech/s5/local/run_rnnlm.sh
+++ b/egs/librispeech/s5/local/run_rnnlm.sh
@@ -72,13 +72,13 @@ if [ $stage -le 3 ]; then
   echo "$0: Performing RNNLM rescoring on tri6b decoding results"
   for lm in tgsmall tgmed; do
     for devset in dev_clean dev_other; do
-      sourcedir=exp/tri6b/decode_pp_${lm}_${devset}
+      sourcedir=exp/tri6b/decode_${lm}_${devset}
       resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
-      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
+      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
       cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25
       cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75
-      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
-      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
+      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
+      steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
     done
   done
 fi
diff --git a/egs/librispeech/s5/local/score.sh b/egs/librispeech/s5/local/score.sh
index 2a7c782b4..f6359c189 100755
--- a/egs/librispeech/s5/local/score.sh
+++ b/egs/librispeech/s5/local/score.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#           2014  Guoguo Chen
 # Apache 2.0
 
 [ -f ./path.sh ] && . ./path.sh
@@ -9,7 +10,7 @@ cmd=run.pl
 stage=0
 decode_mbr=true
 reverse=false
-word_ins_penalty=0.0
+word_ins_penalty=0.0,0.5,1.0
 min_lmwt=9
 max_lmwt=20
 #end configuration section.
@@ -43,25 +44,31 @@ mkdir -p $dir/scoring/log
 
 cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
 
-$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
-  lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-  lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
-  lattice-best-path --word-symbol-table=$symtab \
-    ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
+    lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+    lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+    lattice-best-path --word-symbol-table=$symtab \
+      ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
+done
 
 if $reverse; then
-  for lmwt in `seq $min_lmwt $max_lmwt`; do
-    mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig
-    awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
-       <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra
+  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+    for lmwt in `seq $min_lmwt $max_lmwt`; do
+      mv $dir/scoring/$lmwt.$wip.tra $dir/scoring/$lmwt.$wip.tra.orig
+      awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
+        <$dir/scoring/$lmwt.$wip.tra.orig >$dir/scoring/$lmwt.$wip.tra
+    done
   done
 fi
 
 # Note: the double level of quoting for the sed command
-$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
-   cat $dir/scoring/LMWT.tra \| \
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
+    cat $dir/scoring/LMWT.$wip.tra \| \
     utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
     compute-wer --text --mode=present \
-     ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT || exit 1;
+    ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+done
 
 exit 0;
diff --git a/egs/librispeech/s5/run.sh b/egs/librispeech/s5/run.sh
index f6ecbfd8a..02880f374 100755
--- a/egs/librispeech/s5/run.sh
+++ b/egs/librispeech/s5/run.sh
@@ -49,17 +49,18 @@ done
 # when "--stage 3" option is used below we skip the G2P steps, and use the
 # lexicon we have already downloaded from openslr.org/11/
 local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \
-   data/local/lm data/local/lm data/local/dict || exit 1
+   data/local/lm data/local/lm data/local/dict_nosp || exit 1
 
-utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
+utils/prepare_lang.sh data/local/dict_nosp \
+  "<SPOKEN_NOISE>" data/local/lang_tmp_nosp data/lang_nosp || exit 1;
 
-local/format_lms.sh data/local/lm || exit 1
+local/format_lms.sh --src-dir data/lang_nosp data/local/lm || exit 1
 
 # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
-utils/build_const_arpa_lm.sh \
-  data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
-utils/build_const_arpa_lm.sh \
-  data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
+utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \
+  data/lang_nosp data/lang_nosp_test_tglarge || exit 1;
+utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz \
+  data/lang_nosp data/lang_nosp_test_fglarge || exit 1;
 
 mfccdir=mfcc
 # spread the mfccs over various machines, as this data-set is quite large.
@@ -86,97 +87,143 @@ utils/subset_data_dir.sh data/train_clean_100 10000 data/train_10k
 
 # train a monophone system
 steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
-  data/train_2kshort data/lang exp/mono || exit 1;
+  data/train_2kshort data/lang_nosp exp/mono || exit 1;
 
 # decode using the monophone model
 (
-  utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1
+  utils/mkgraph.sh --mono data/lang_nosp_test_tgsmall \
+    exp/mono exp/mono/graph_nosp_tgsmall || exit 1
   for test in test_clean test_other dev_clean dev_other; do
-    steps/decode.sh --nj 20 --cmd "$decode_cmd" \
-      exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test
+    steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \
+      data/$test exp/mono/decode_nosp_tgsmall_$test || exit 1
   done
 )&
 
 steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
-  data/train_5k data/lang exp/mono exp/mono_ali_5k
+  data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
 
 # train a first delta + delta-delta triphone system on a subset of 5000 utterances
 steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
-    2000 10000 data/train_5k data/lang exp/mono_ali_5k exp/tri1 || exit 1;
+    2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1 || exit 1;
 
 # decode using the tri1 model
 (
-  utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_nosp_test_tgsmall \
+    exp/tri1 exp/tri1/graph_nosp_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
-    steps/decode.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-      data/$test exp/tri1/decode_{tgsmall,tgmed}_$test  || exit 1;
+    steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \
+      data/$test exp/tri1/decode_nosp_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
+      data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-      data/$test exp/tri1/decode_{tgsmall,tglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+      data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
   done
 )&
 
 steps/align_si.sh --nj 10 --cmd "$train_cmd" \
-  data/train_10k data/lang exp/tri1 exp/tri1_ali_10k || exit 1;
+  data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k || exit 1;
 
 
 # train an LDA+MLLT system.
 steps/train_lda_mllt.sh --cmd "$train_cmd" \
-   --splice-opts "--left-context=3 --right-context=3" \
-   2500 15000 data/train_10k data/lang exp/tri1_ali_10k exp/tri2b || exit 1;
+   --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+   data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2b || exit 1;
 
 # decode using the LDA+MLLT model
 (
-  utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_nosp_test_tgsmall \
+    exp/tri2b exp/tri2b/graph_nosp_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
-    steps/decode.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-      data/$test exp/tri2b/decode_{tgsmall,tgmed}_$test  || exit 1;
+    steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \
+      data/$test exp/tri2b/decode_nosp_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
+      data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-      data/$test exp/tri2b/decode_{tgsmall,tglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+      data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
   done
 )&
 
 # Align a 10k utts subset using the tri2b model
-steps/align_si.sh  --nj 10 --cmd "$train_cmd" \
-  --use-graphs true data/train_10k data/lang exp/tri2b exp/tri2b_ali_10k || exit 1;
+steps/align_si.sh  --nj 10 --cmd "$train_cmd" --use-graphs true \
+  data/train_10k data/lang_nosp exp/tri2b exp/tri2b_ali_10k || exit 1;
 
 # Train tri3b, which is LDA+MLLT+SAT on 10k utts
-steps/train_sat.sh --cmd "$train_cmd" \
-  2500 15000 data/train_10k data/lang exp/tri2b_ali_10k exp/tri3b || exit 1;
+steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
+  data/train_10k data/lang_nosp exp/tri2b_ali_10k exp/tri3b || exit 1;
 
 # decode using the tri3b model
 (
-  utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_nosp_test_tgsmall \
+    exp/tri3b exp/tri3b/graph_nosp_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
     steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-      data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test  || exit 1;
+      exp/tri3b/graph_nosp_tgsmall data/$test \
+      exp/tri3b/decode_nosp_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
+      data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-      data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+      data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
   done
 )&
 
 # align the entire train_clean_100 subset using the tri3b model
 steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
-  data/train_clean_100 data/lang exp/tri3b exp/tri3b_ali_clean_100 || exit 1;
+  data/train_clean_100 data/lang_nosp \
+  exp/tri3b exp/tri3b_ali_clean_100 || exit 1;
 
 # train another LDA+MLLT+SAT system on the entire 100 hour subset
-steps/train_sat.sh  --cmd "$train_cmd" \
-  4200 40000 data/train_clean_100 data/lang exp/tri3b_ali_clean_100 exp/tri4b || exit 1;
+steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
+  data/train_clean_100 data/lang_nosp \
+  exp/tri3b_ali_clean_100 exp/tri4b || exit 1;
 
 # decode using the tri4b model
 (
-  utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_nosp_test_tgsmall \
+    exp/tri4b exp/tri4b/graph_nosp_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
     steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1;
+      exp/tri4b/graph_nosp_tgsmall data/$test \
+      exp/tri4b/decode_nosp_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
+      data/$test exp/tri4b/decode_nosp_{tgsmall,tgmed}_$test  || exit 1;
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
+      data/$test exp/tri4b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
+      data/$test exp/tri4b/decode_nosp_{tgsmall,fglarge}_$test || exit 1;
+  done
+)&
+
+# Now we compute the pronunciation and silence probabilities from training data,
+# and re-create the lang directory.
+steps/get_prons.sh --cmd "$train_cmd" \
+  data/train_clean_100 data/lang_nosp exp/tri4b
+utils/dict_dir_add_pronprobs.sh --max-normalize true \
+  data/local/dict_nosp \
+  exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
+  exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict || exit 1
+
+utils/prepare_lang.sh data/local/dict \
+  "<SPOKEN_NOISE>" data/local/lang_tmp data/lang
+local/format_lms.sh --src-dir data/lang data/local/lm
+
+utils/build_const_arpa_lm.sh \
+  data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
+utils/build_const_arpa_lm.sh \
+  data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
+
+# decode using the tri4b model with pronunciation and silence probabilities
+(
+  utils/mkgraph.sh \
+    data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
+  for test in test_clean test_other dev_clean dev_other; do
+    steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
+      exp/tri4b/graph_tgsmall data/$test \
+      exp/tri4b/decode_tgsmall_$test || exit 1;
     steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
       data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
@@ -199,75 +246,41 @@ local/nnet2/run_5a_clean_100.sh || exit 1
 local/download_and_untar.sh $data $data_url train-clean-360 || exit 1;
 
 # now add the "clean-360" subset to the mix ...
-local/data_prep.sh $data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1
+local/data_prep.sh \
+  $data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1
 steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_clean_360 \
   exp/make_mfcc/train_clean_360 $mfccdir || exit 1
-steps/compute_cmvn_stats.sh data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1
+steps/compute_cmvn_stats.sh \
+  data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1
 
 # ... and then combine the two sets into a 460 hour one
-utils/combine_data.sh data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1
+utils/combine_data.sh \
+  data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1
 
 # align the new, combined set, using the tri4b model
 steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
   data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460 || exit 1;
 
-# At this point we estimate the probability of the pronunciation variants for
-# the words in our lexicon (of course some rare words won't be present in the
-# training data, so their probabilities will be left unchanged). These pronunciation
-# probabilities will be used in the subsequent _decoding_ steps.
-
-# count how many times every pronunciation variant was used in the training data
-steps/get_prons.sh --cmd "$train_cmd"  data/train_clean_460 data/lang exp/tri4b_ali_clean_460
-
-# use the counts from the above step, to calculate (smoothed) pronunciation probabilities
-utils/dict_dir_add_pronprobs.sh data/local/dict exp/tri4b_ali_clean_460/pron_counts_nowb.txt data/local/dict_pp
-
-# prepare a new "lang" directories to be used for the pronunciation probability setup
-utils/prepare_lang.sh data/local/dict_pp "<SPOKEN_NOISE>" data/local/lang_tmp_pp data/lang_pp
-local/format_lms.sh --src-dir data/lang_pp data/local/lm
-
-# regenerate the full 3-gram and 4-gram directories
-utils/build_const_arpa_lm.sh \
-  data/local/lm/lm_tglarge.arpa.gz data/lang_pp data/lang_pp_test_tglarge || exit 1;
-utils/build_const_arpa_lm.sh \
-  data/local/lm/lm_fglarge.arpa.gz data/lang_pp data/lang_pp_test_fglarge || exit 1;
-
-# decode again using the tri4b model, but this time with pronunciation probability
-(
-  utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri4b exp/tri4b/graph_pp_tgsmall || exit 1;
-  for test in test_clean test_other dev_clean dev_other; do
-    steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri4b/graph_pp_tgsmall data/$test exp/tri4b/decode_pp_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test exp/tri4b/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
-    steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test exp/tri4b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
-    steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test exp/tri4b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
-  done
-)&
-
-
 # create a larger SAT model, trained on the 460 hours of data.
-steps/train_sat.sh  --cmd "$train_cmd" \
-  5000 100000 data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1;
+steps/train_sat.sh  --cmd "$train_cmd" 5000 100000 \
+  data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1;
 
 # decode using the tri5b model
 (
-  utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri5b exp/tri5b/graph_pp_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_test_tgsmall \
+    exp/tri5b exp/tri5b/graph_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
     steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri5b/graph_pp_tgsmall data/$test exp/tri5b/decode_pp_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test exp/tri5b/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
+      exp/tri5b/graph_tgsmall data/$test \
+      exp/tri5b/decode_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test exp/tri5b/decode_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test exp/tri5b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test exp/tri5b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1;
   done
 )&
 
@@ -277,13 +290,16 @@ local/nnet2/run_6a_clean_460.sh || exit 1
 local/download_and_untar.sh $data $data_url train-other-500 || exit 1;
 
 # prepare the 500 hour subset.
-local/data_prep.sh $data/LibriSpeech/train-other-500 data/train_other_500 || exit 1
+local/data_prep.sh \
+  $data/LibriSpeech/train-other-500 data/train_other_500 || exit 1
 steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_other_500 \
   exp/make_mfcc/train_other_500 $mfccdir || exit 1
-steps/compute_cmvn_stats.sh data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1
+steps/compute_cmvn_stats.sh \
+  data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1
 
 # combine all the data
-utils/combine_data.sh data/train_960 data/train_clean_460 data/train_other_500 || exit 1
+utils/combine_data.sh \
+  data/train_960 data/train_clean_460 data/train_other_500 || exit 1
 
 steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
   data/train_960 data/lang exp/tri5b exp/tri5b_ali_960 || exit 1;
@@ -295,18 +311,19 @@ steps/train_quick.sh --cmd "$train_cmd" \
 
 # decode using the tri6b model
 (
-  utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b exp/tri6b/graph_pp_tgsmall || exit 1;
+  utils/mkgraph.sh data/lang_test_tgsmall \
+    exp/tri6b exp/tri6b/graph_tgsmall || exit 1;
   for test in test_clean test_other dev_clean dev_other; do
     steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
-      exp/tri6b/graph_pp_tgsmall data/$test exp/tri6b/decode_pp_tgsmall_$test || exit 1;
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
-      data/$test exp/tri6b/decode_pp_{tgsmall,tgmed}_$test  || exit 1;
+      exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1;
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/$test exp/tri6b/decode_{tgsmall,tgmed}_$test  || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
-      data/$test exp/tri6b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
     steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
-      data/$test exp/tri6b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1;
   done
 )&
 
@@ -333,3 +350,6 @@ local/nnet2/run_7a_960.sh || exit 1
 # ## The following is an older version of the online-nnet2 recipe, without "multi-splice".  It's faster
 # ## to train but slightly worse.
 # # local/online/run_nnet2.sh
+
+# Wait for decodings in the background
+wait