From 1ae487048d7afebe6d7c6d1f4d60b99c3066086f Mon Sep 17 00:00:00 2001 From: Guoguo Chen Date: Thu, 16 Apr 2015 03:18:26 +0000 Subject: [PATCH] trunk: adding silprob to librispeech recipe git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5008 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8 --- egs/librispeech/s5/RESULTS | 100 ++++---- .../s5/local/nnet2/run_6a_clean_460.sh | 16 +- egs/librispeech/s5/local/nnet2/run_7a_960.sh | 16 +- egs/librispeech/s5/local/online/run_nnet2.sh | 54 ++-- .../s5/local/online/run_nnet2_disc.sh | 10 +- .../s5/local/online/run_nnet2_ms.sh | 70 +++--- .../s5/local/online/run_nnet2_ms_disc.sh | 14 +- egs/librispeech/s5/local/run_data_cleaning.sh | 14 +- egs/librispeech/s5/local/run_rnnlm.sh | 8 +- egs/librispeech/s5/local/score.sh | 33 ++- egs/librispeech/s5/run.sh | 236 ++++++++++-------- 11 files changed, 300 insertions(+), 271 deletions(-) diff --git a/egs/librispeech/s5/RESULTS b/egs/librispeech/s5/RESULTS index b53ecb2b9..885b8bcd9 100644 --- a/egs/librispeech/s5/RESULTS +++ b/egs/librispeech/s5/RESULTS @@ -8,70 +8,72 @@ # whereas "dev-other" and "test-other" sets contain more challenging speech ### SAT GMM model trained on the "train-clean-100" set (100 hours "clean" speech) -%WER 8.19 [ 4453 / 54402, 632 ins, 480 del, 3341 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_16 -%WER 8.62 [ 4689 / 54402, 632 ins, 525 del, 3532 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_17 -%WER 10.62 [ 5778 / 54402, 659 ins, 752 del, 4367 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_15 -%WER 12.11 [ 6590 / 54402, 689 ins, 964 del, 4937 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_15 +### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri4b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done +%WER 8.20 [ 4459 / 54402, 695 ins, 427 del, 3337 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_14_0.5 +%WER 8.60 [ 4677 / 54402, 763 ins, 399 del, 3515 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_16_0.0 +%WER 10.39 [ 5655 / 54402, 711 ins, 648 del, 4296 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_16_0.0 +%WER 11.69 [ 6361 / 54402, 743 ins, 808 del, 4810 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_16_0.0 -%WER 9.32 [ 4899 / 52576, 749 ins, 496 del, 3654 sub ] exp/tri4b/decode_fglarge_test_clean/wer_16 -%WER 9.60 [ 5047 / 52576, 731 ins, 540 del, 3776 sub ] exp/tri4b/decode_tglarge_test_clean/wer_17 -%WER 11.79 [ 6197 / 52576, 717 ins, 798 del, 4682 sub ] exp/tri4b/decode_tgmed_test_clean/wer_16 -%WER 13.18 [ 6927 / 52576, 718 ins, 998 del, 5211 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_16 +%WER 9.10 [ 4786 / 52576, 708 ins, 464 del, 3614 sub ] exp/tri4b/decode_fglarge_test_clean/wer_17_0.5 +%WER 9.43 [ 4958 / 52576, 751 ins, 492 del, 3715 sub ] exp/tri4b/decode_tglarge_test_clean/wer_15_0.5 +%WER 11.36 [ 5975 / 52576, 799 ins, 642 del, 4534 sub ] exp/tri4b/decode_tgmed_test_clean/wer_17_0.0 +%WER 12.64 [ 6643 / 52576, 795 ins, 817 del, 5031 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_17_0.0 -%WER 29.31 [ 14934 / 50948, 1536 ins, 2215 del, 11183 sub ] exp/tri4b/decode_fglarge_dev_other/wer_18 -%WER 30.25 [ 15412 / 50948, 1655 ins, 2188 del, 11569 sub ] exp/tri4b/decode_tglarge_dev_other/wer_17 -%WER 33.01 [ 16817 / 50943, 1358 ins, 3023 del, 12436 sub ] [PARTIAL] exp/tri4b/decode_tgmed_dev_other/wer_19 -%WER 35.06 [ 17862 / 50948, 1378 ins, 3409 del, 13075 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18 +%WER 28.45 [ 14495 / 50948, 1574 ins, 1925 del, 10996 sub ] exp/tri4b/decode_fglarge_dev_other/wer_17_0.5 +%WER 29.24 [ 14895 / 50948, 1610 ins, 2041 del, 11244 sub ] exp/tri4b/decode_tglarge_dev_other/wer_19_0.5 +%WER 32.04 [ 16325 / 50948, 1753 ins, 2261 del, 12311 sub ] exp/tri4b/decode_tgmed_dev_other/wer_18_0.0 +%WER 33.97 [ 17305 / 50948, 1681 ins, 2661 del, 12963 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18_0.0 -%WER 31.47 [ 16470 / 52343, 1637 ins, 2624 del, 12209 sub ] exp/tri4b/decode_fglarge_test_other/wer_17 -%WER 32.35 [ 16933 / 52343, 1792 ins, 2638 del, 12503 sub ] exp/tri4b/decode_tglarge_test_other/wer_17 -%WER 35.08 [ 18363 / 52343, 1469 ins, 3566 del, 13328 sub ] exp/tri4b/decode_tgmed_test_other/wer_18 -%WER 36.83 [ 19278 / 52343, 1350 ins, 3976 del, 13952 sub ] exp/tri4b/decode_tgsmall_test_other/wer_18 +%WER 30.33 [ 15875 / 52343, 1639 ins, 2375 del, 11861 sub ] exp/tri4b/decode_fglarge_test_other/wer_17_0.5 +%WER 31.07 [ 16264 / 52343, 1728 ins, 2424 del, 12112 sub ] exp/tri4b/decode_tglarge_test_other/wer_18_0.5 +%WER 33.69 [ 17633 / 52343, 1755 ins, 2766 del, 13112 sub ] exp/tri4b/decode_tgmed_test_other/wer_18_0.0 +%WER 35.62 [ 18646 / 52343, 1758 ins, 3039 del, 13849 sub ] exp/tri4b/decode_tgsmall_test_other/wer_17_0.0 ### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" set (460 hours "clean" speech) -%WER 7.26 [ 3949 / 54402, 543 ins, 420 del, 2986 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_16 -%WER 7.65 [ 4162 / 54402, 592 ins, 436 del, 3134 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_15 -%WER 9.61 [ 5227 / 54402, 591 ins, 684 del, 3952 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15 -%WER 10.87 [ 5914 / 54402, 584 ins, 863 del, 4467 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15 +### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri5b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done +%WER 7.05 [ 3835 / 54402, 588 ins, 370 del, 2877 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_15_0.5 +%WER 7.49 [ 4077 / 54402, 623 ins, 376 del, 3078 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_14_0.5 +%WER 9.38 [ 5104 / 54402, 701 ins, 533 del, 3870 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15_0.0 +%WER 10.51 [ 5719 / 54402, 720 ins, 652 del, 4347 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15_0.0 -%WER 8.31 [ 4369 / 52576, 638 ins, 449 del, 3282 sub ] exp/tri5b/decode_fglarge_test_clean/wer_18 -%WER 8.55 [ 4496 / 52576, 673 ins, 444 del, 3379 sub ] exp/tri5b/decode_tglarge_test_clean/wer_16 -%WER 10.53 [ 5537 / 52576, 645 ins, 719 del, 4173 sub ] exp/tri5b/decode_tgmed_test_clean/wer_16 -%WER 11.71 [ 6159 / 52576, 638 ins, 869 del, 4652 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_16 +%WER 8.14 [ 4279 / 52576, 683 ins, 379 del, 3217 sub ] exp/tri5b/decode_fglarge_test_clean/wer_15_0.5 +%WER 8.50 [ 4469 / 52576, 597 ins, 510 del, 3362 sub ] exp/tri5b/decode_tglarge_test_clean/wer_15_1.0 +%WER 10.10 [ 5311 / 52576, 767 ins, 503 del, 4041 sub ] exp/tri5b/decode_tgmed_test_clean/wer_15_0.0 +%WER 11.20 [ 5886 / 52576, 774 ins, 617 del, 4495 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_15_0.0 -%WER 26.27 [ 13384 / 50948, 1450 ins, 1839 del, 10095 sub ] exp/tri5b/decode_fglarge_dev_other/wer_17 -%WER 27.32 [ 13917 / 50948, 1605 ins, 1845 del, 10467 sub ] exp/tri5b/decode_tglarge_dev_other/wer_16 -%WER 30.19 [ 15378 / 50943, 1406 ins, 2423 del, 11549 sub ] [PARTIAL] exp/tri5b/decode_tgmed_dev_other/wer_16 -%WER 32.21 [ 16408 / 50948, 1311 ins, 2994 del, 12103 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_17 +%WER 25.65 [ 13069 / 50948, 1664 ins, 1486 del, 9919 sub ] exp/tri5b/decode_fglarge_dev_other/wer_18_0.0 +%WER 26.60 [ 13552 / 50948, 1549 ins, 1774 del, 10229 sub ] exp/tri5b/decode_tglarge_dev_other/wer_17_0.5 +%WER 29.21 [ 14880 / 50943, 1618 ins, 2026 del, 11236 sub ] exp/tri5b/decode_tgmed_dev_other/wer_18_0.0 +%WER 30.89 [ 15736 / 50948, 1538 ins, 2388 del, 11810 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_18_0.0 -%WER 28.11 [ 14714 / 52343, 1524 ins, 2202 del, 10988 sub ] exp/tri5b/decode_fglarge_test_other/wer_16 -%WER 29.16 [ 15263 / 52343, 1616 ins, 2346 del, 11301 sub ] exp/tri5b/decode_tglarge_test_other/wer_17 -%WER 32.09 [ 16798 / 52343, 1342 ins, 3215 del, 12241 sub ] exp/tri5b/decode_tgmed_test_other/wer_18 -%WER 34.08 [ 17837 / 52343, 1412 ins, 3358 del, 13067 sub ] exp/tri5b/decode_tgsmall_test_other/wer_16 +%WER 27.36 [ 14323 / 52343, 1486 ins, 2136 del, 10701 sub ] exp/tri5b/decode_fglarge_test_other/wer_17_0.5 +%WER 28.32 [ 14824 / 52343, 1656 ins, 2118 del, 11050 sub ] exp/tri5b/decode_tglarge_test_other/wer_16_0.5 +%WER 31.01 [ 16233 / 52343, 1577 ins, 2593 del, 12063 sub ] exp/tri5b/decode_tgmed_test_other/wer_19_0.0 +%WER 32.99 [ 17269 / 52343, 1622 ins, 2792 del, 12855 sub ] exp/tri5b/decode_tgsmall_test_other/wer_17_0.0 ### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" + "train-other-500" set (960 hours) -%WER 7.08 [ 3853 / 54402, 591 ins, 373 del, 2889 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14 -%WER 7.52 [ 4091 / 54402, 638 ins, 397 del, 3056 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13 -%WER 9.47 [ 5151 / 54402, 656 ins, 613 del, 3882 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13 -%WER 10.88 [ 5919 / 54402, 626 ins, 813 del, 4480 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14 +### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri6b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done +%WER 7.02 [ 3819 / 54402, 516 ins, 424 del, 2879 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14_1.0 +%WER 7.33 [ 3988 / 54402, 506 ins, 468 del, 3014 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_15_1.0 +%WER 9.23 [ 5024 / 54402, 744 ins, 481 del, 3799 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13_0.0 +%WER 10.38 [ 5648 / 54402, 741 ins, 617 del, 4290 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14_0.0 -%WER 8.01 [ 4213 / 52576, 658 ins, 404 del, 3151 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15 -%WER 8.26 [ 4342 / 52576, 661 ins, 449 del, 3232 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15 -%WER 10.06 [ 5289 / 52576, 653 ins, 637 del, 3999 sub ] exp/tri6b/decode_tgmed_test_clean/wer_15 -%WER 11.24 [ 5907 / 52576, 704 ins, 756 del, 4447 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_14 +%WER 7.81 [ 4105 / 52576, 574 ins, 442 del, 3089 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15_1.0 +%WER 8.01 [ 4213 / 52576, 658 ins, 387 del, 3168 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15_0.5 +%WER 9.83 [ 5167 / 52576, 709 ins, 519 del, 3939 sub ] exp/tri6b/decode_tgmed_test_clean/wer_16_0.0 +%WER 10.99 [ 5778 / 52576, 723 ins, 640 del, 4415 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_16_0.0 -%WER 21.14 [ 10770 / 50948, 1168 ins, 1493 del, 8109 sub ] exp/tri6b/decode_fglarge_dev_other/wer_17 -%WER 22.14 [ 11278 / 50948, 1342 ins, 1466 del, 8470 sub ] exp/tri6b/decode_tglarge_dev_other/wer_15 -%WER 25.16 [ 12821 / 50948, 1233 ins, 1953 del, 9635 sub ] exp/tri6b/decode_tgmed_dev_other/wer_15 -%WER 27.23 [ 13872 / 50948, 1109 ins, 2426 del, 10337 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_16 - -%WER 22.49 [ 11772 / 52343, 1289 ins, 1599 del, 8884 sub ] exp/tri6b/decode_fglarge_test_other/wer_15 -%WER 23.46 [ 12278 / 52343, 1341 ins, 1690 del, 9247 sub ] exp/tri6b/decode_tglarge_test_other/wer_16 -%WER 26.87 [ 14063 / 52343, 1334 ins, 2170 del, 10559 sub ] exp/tri6b/decode_tgmed_test_other/wer_14 -%WER 28.90 [ 15128 / 52343, 1239 ins, 2681 del, 11208 sub ] exp/tri6b/decode_tgsmall_test_other/wer_15 +%WER 20.53 [ 10460 / 50948, 1270 ins, 1258 del, 7932 sub ] exp/tri6b/decode_fglarge_dev_other/wer_15_0.5 +%WER 21.31 [ 10857 / 50948, 1299 ins, 1376 del, 8182 sub ] exp/tri6b/decode_tglarge_dev_other/wer_16_0.5 +%WER 24.27 [ 12365 / 50948, 1401 ins, 1558 del, 9406 sub ] exp/tri6b/decode_tgmed_dev_other/wer_16_0.0 +%WER 26.14 [ 13317 / 50948, 1292 ins, 1977 del, 10048 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_17_0.0 +%WER 21.79 [ 11405 / 52343, 1263 ins, 1497 del, 8645 sub ] exp/tri6b/decode_fglarge_test_other/wer_15_0.5 +%WER 22.78 [ 11923 / 52343, 1370 ins, 1483 del, 9070 sub ] exp/tri6b/decode_tglarge_test_other/wer_14_0.5 +%WER 25.67 [ 13439 / 52343, 1481 ins, 1767 del, 10191 sub ] exp/tri6b/decode_tgmed_test_other/wer_15_0.0 +%WER 27.79 [ 14545 / 52343, 1371 ins, 2250 del, 10924 sub ] exp/tri6b/decode_tgsmall_test_other/wer_16_0.0 ### p-norm DNN trained on "train-clean-100" %WER 5.93 [ 3228 / 54402, 486 ins, 330 del, 2412 sub ] exp/nnet5a_clean_100_gpu/decode_fglarge_dev_clean/wer_13 diff --git a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh index 16d42f1d3..a3a176b63 100755 --- a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh +++ b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh @@ -64,16 +64,16 @@ fi for test in test_clean test_other dev_clean dev_other; do steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \ - --transform-dir exp/tri5b/decode_pp_tgsmall_$test \ - exp/tri5b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1; + --transform-dir exp/tri5b/decode_tgsmall_$test \ + exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1; done exit 0; diff --git a/egs/librispeech/s5/local/nnet2/run_7a_960.sh b/egs/librispeech/s5/local/nnet2/run_7a_960.sh index be9008cb3..7c7ef92f9 100755 --- a/egs/librispeech/s5/local/nnet2/run_7a_960.sh +++ b/egs/librispeech/s5/local/nnet2/run_7a_960.sh @@ -64,16 +64,16 @@ fi for test in test_clean test_other dev_clean dev_other; do steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \ - --transform-dir exp/tri6b/decode_pp_tgsmall_$test \ - exp/tri6b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1; + --transform-dir exp/tri6b/decode_tgsmall_$test \ + exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1; done exit 0; diff --git a/egs/librispeech/s5/local/online/run_nnet2.sh b/egs/librispeech/s5/local/online/run_nnet2.sh index 00f1c632a..10cb5511a 100755 --- a/egs/librispeech/s5/local/online/run_nnet2.sh +++ b/egs/librispeech/s5/local/online/run_nnet2.sh @@ -83,12 +83,12 @@ if [ $stage -le 9 ]; then for test in dev_clean dev_other; do steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ --online-ivector-dir exp/nnet2_online/ivectors_${test} \ - exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1; + exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1; done fi @@ -105,15 +105,15 @@ if [ $stage -le 11 ]; then # previous utterances of the same speaker. for test in test_clean test_other dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1; + exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1; done fi @@ -122,12 +122,12 @@ if [ $stage -le 12 ]; then # without carrying forward speaker information. for test in dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1; done fi @@ -137,13 +137,13 @@ if [ $stage -le 13 ]; then # of the utterance while computing the iVector (--online false) for test in dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1; + --per-utt true --online false exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1; done fi @@ -155,8 +155,8 @@ if [ $stage -le 14 ]; then test=dev_clean steps/online/nnet2/decode.sh --threaded true \ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1; fi if [ $stage -le 15 ]; then @@ -164,8 +164,8 @@ if [ $stage -le 15 ]; then test=dev_clean steps/online/nnet2/decode.sh --threaded true --do-endpointing true \ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1; fi exit 0; diff --git a/egs/librispeech/s5/local/online/run_nnet2_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_disc.sh index 7f6a54ea1..3314f2507 100755 --- a/egs/librispeech/s5/local/online/run_nnet2_disc.sh +++ b/egs/librispeech/s5/local/online/run_nnet2_disc.sh @@ -135,14 +135,14 @@ if [ $stage -le 5 ]; then for test in test_clean test_other dev_clean dev_other; do ( steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \ - --iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1; + --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1 + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1; ) & done diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms.sh b/egs/librispeech/s5/local/online/run_nnet2_ms.sh index 868ac4b42..91ace6c22 100755 --- a/egs/librispeech/s5/local/online/run_nnet2_ms.sh +++ b/egs/librispeech/s5/local/online/run_nnet2_ms.sh @@ -95,15 +95,15 @@ if [ $stage -le 9 ]; then for test in dev_clean dev_other; do steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ --online-ivector-dir exp/nnet2_online/ivectors_${test} \ - exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1; + exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test $dir/decode_pp_${test}_{tgsmall,fglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1; done fi @@ -120,15 +120,15 @@ if [ $stage -le 11 ]; then # previous utterances of the same speaker. for test in dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1; + exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1; done fi @@ -137,15 +137,15 @@ if [ $stage -le 12 ]; then # without carrying forward speaker information. for test in dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1; done fi @@ -155,16 +155,16 @@ if [ $stage -le 13 ]; then # of the utterance while computing the iVector (--online false) for test in test_clean test_other dev_clean dev_other; do steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1; + --per-utt true --online false exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt_offline || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1; done fi @@ -176,8 +176,8 @@ if [ $stage -le 14 ]; then test=dev_clean steps/online/nnet2/decode.sh --threaded true \ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1; fi if [ $stage -le 15 ]; then @@ -185,8 +185,8 @@ if [ $stage -le 15 ]; then test=dev_clean steps/online/nnet2/decode.sh --threaded true --do-endpointing true \ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1; fi if [ $stage -le 16 ]; then @@ -195,8 +195,8 @@ if [ $stage -le 16 ]; then test=dev_clean steps/online/nnet2/decode.sh --threaded true --silence-weight 0.0 \ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ - --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \ - ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_sil0.0 || exit 1; + --per-utt true exp/tri6b/graph_tgsmall data/$test \ + ${dir}_online/decode_${test}_tgsmall_utt_threaded_sil0.0 || exit 1; fi exit 0; diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh index 0b6637e38..0ff05eec4 100755 --- a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh +++ b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh @@ -136,15 +136,15 @@ if [ $stage -le 5 ]; then for test in test_clean test_other dev_clean dev_other; do ( steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \ - --iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1; + --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1 + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1; ) & ) & done diff --git a/egs/librispeech/s5/local/run_data_cleaning.sh b/egs/librispeech/s5/local/run_data_cleaning.sh index 55c2b2f97..3300ad4c4 100755 --- a/egs/librispeech/s5/local/run_data_cleaning.sh +++ b/egs/librispeech/s5/local/run_data_cleaning.sh @@ -16,7 +16,7 @@ set -e if [ $stage -le 1 ]; then - steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang_pp \ + steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang \ exp/tri6b exp/tri6b_cleanup fi @@ -28,21 +28,21 @@ fi if [ $stage -le 3 ]; then steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train.thresh$thresh data/lang_pp exp/tri6b exp/tri6b_ali_$thresh + data/train.thresh$thresh data/lang exp/tri6b exp/tri6b_ali_$thresh fi if [ $stage -le 4 ]; then steps/train_sat.sh --cmd "$train_cmd" \ - 7000 150000 data/train_960_thresh$thresh data/lang_pp exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1; + 7000 150000 data/train_960_thresh$thresh data/lang exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1; fi if [ $stage -le 5 ]; then - utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_pp_tgsmall || exit 1 + utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_tgsmall || exit 1 for test in dev_clean dev_other; do steps/decode_fmllr.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri6b_$thresh/graph_pp_tgsmall data/$test exp/tri6b_$thresh/decode_pp_tgsmall_$test || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test exp/tri6b_$thresh/decode_pp_{tgsmall,tgmed}_$test || exit 1; + exp/tri6b_$thresh/graph_tgsmall data/$test exp/tri6b_$thresh/decode_tgsmall_$test || exit 1 + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri6b_$thresh/decode_{tgsmall,tgmed}_$test || exit 1; done fi diff --git a/egs/librispeech/s5/local/run_rnnlm.sh b/egs/librispeech/s5/local/run_rnnlm.sh index c3991bd4c..ea9b997dc 100755 --- a/egs/librispeech/s5/local/run_rnnlm.sh +++ b/egs/librispeech/s5/local/run_rnnlm.sh @@ -72,13 +72,13 @@ if [ $stage -le 3 ]; then echo "$0: Performing RNNLM rescoring on tri6b decoding results" for lm in tgsmall tgmed; do for devset in dev_clean dev_other; do - sourcedir=exp/tri6b/decode_pp_${lm}_${devset} + sourcedir=exp/tri6b/decode_${lm}_${devset} resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size} - steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5 + steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5 cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25 cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75 - steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25 - steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75 + steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25 + steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75 done done fi diff --git a/egs/librispeech/s5/local/score.sh b/egs/librispeech/s5/local/score.sh index 2a7c782b4..f6359c189 100755 --- a/egs/librispeech/s5/local/score.sh +++ b/egs/librispeech/s5/local/score.sh @@ -1,5 +1,6 @@ #!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# 2014 Guoguo Chen # Apache 2.0 [ -f ./path.sh ] && . ./path.sh @@ -9,7 +10,7 @@ cmd=run.pl stage=0 decode_mbr=true reverse=false -word_ins_penalty=0.0 +word_ins_penalty=0.0,0.5,1.0 min_lmwt=9 max_lmwt=20 #end configuration section. @@ -43,25 +44,31 @@ mkdir -p $dir/scoring/log cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-best-path --word-symbol-table=$symtab \ - ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done if $reverse; then - for lmwt in `seq $min_lmwt $max_lmwt`; do - mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig - awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ - <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + for lmwt in `seq $min_lmwt $max_lmwt`; do + mv $dir/scoring/$lmwt.$wip.tra $dir/scoring/$lmwt.$wip.tra.orig + awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ + <$dir/scoring/$lmwt.$wip.tra.orig >$dir/scoring/$lmwt.$wip.tra + done done fi # Note: the double level of quoting for the sed command -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cat $dir/scoring/LMWT.tra \| \ +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ compute-wer --text --mode=present \ - ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done exit 0; diff --git a/egs/librispeech/s5/run.sh b/egs/librispeech/s5/run.sh index f6ecbfd8a..02880f374 100755 --- a/egs/librispeech/s5/run.sh +++ b/egs/librispeech/s5/run.sh @@ -49,17 +49,18 @@ done # when "--stage 3" option is used below we skip the G2P steps, and use the # lexicon we have already downloaded from openslr.org/11/ local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \ - data/local/lm data/local/lm data/local/dict || exit 1 + data/local/lm data/local/lm data/local/dict_nosp || exit 1 -utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang || exit 1; +utils/prepare_lang.sh data/local/dict_nosp \ + "" data/local/lang_tmp_nosp data/lang_nosp || exit 1; -local/format_lms.sh data/local/lm || exit 1 +local/format_lms.sh --src-dir data/lang_nosp data/local/lm || exit 1 # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs -utils/build_const_arpa_lm.sh \ - data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1; -utils/build_const_arpa_lm.sh \ - data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1; +utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \ + data/lang_nosp data/lang_nosp_test_tglarge || exit 1; +utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz \ + data/lang_nosp data/lang_nosp_test_fglarge || exit 1; mfccdir=mfcc # spread the mfccs over various machines, as this data-set is quite large. @@ -86,97 +87,143 @@ utils/subset_data_dir.sh data/train_clean_100 10000 data/train_10k # train a monophone system steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \ - data/train_2kshort data/lang exp/mono || exit 1; + data/train_2kshort data/lang_nosp exp/mono || exit 1; # decode using the monophone model ( - utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1 + utils/mkgraph.sh --mono data/lang_nosp_test_tgsmall \ + exp/mono exp/mono/graph_nosp_tgsmall || exit 1 for test in test_clean test_other dev_clean dev_other; do - steps/decode.sh --nj 20 --cmd "$decode_cmd" \ - exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test + steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \ + data/$test exp/mono/decode_nosp_tgsmall_$test || exit 1 done )& steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ - data/train_5k data/lang exp/mono exp/mono_ali_5k + data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k # train a first delta + delta-delta triphone system on a subset of 5000 utterances steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ - 2000 10000 data/train_5k data/lang exp/mono_ali_5k exp/tri1 || exit 1; + 2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1 || exit 1; # decode using the tri1 model ( - utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1; + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri1 exp/tri1/graph_nosp_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do - steps/decode.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ - data/$test exp/tri1/decode_{tgsmall,tgmed}_$test || exit 1; + steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \ + data/$test exp/tri1/decode_nosp_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ - data/$test exp/tri1/decode_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test || exit 1; done )& steps/align_si.sh --nj 10 --cmd "$train_cmd" \ - data/train_10k data/lang exp/tri1 exp/tri1_ali_10k || exit 1; + data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k || exit 1; # train an LDA+MLLT system. steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=3 --right-context=3" \ - 2500 15000 data/train_10k data/lang exp/tri1_ali_10k exp/tri2b || exit 1; + --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ + data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2b || exit 1; # decode using the LDA+MLLT model ( - utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1; + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri2b exp/tri2b/graph_nosp_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do - steps/decode.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ - data/$test exp/tri2b/decode_{tgsmall,tgmed}_$test || exit 1; + steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \ + data/$test exp/tri2b/decode_nosp_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ - data/$test exp/tri2b/decode_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test || exit 1; done )& # Align a 10k utts subset using the tri2b model -steps/align_si.sh --nj 10 --cmd "$train_cmd" \ - --use-graphs true data/train_10k data/lang exp/tri2b exp/tri2b_ali_10k || exit 1; +steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \ + data/train_10k data/lang_nosp exp/tri2b exp/tri2b_ali_10k || exit 1; # Train tri3b, which is LDA+MLLT+SAT on 10k utts -steps/train_sat.sh --cmd "$train_cmd" \ - 2500 15000 data/train_10k data/lang exp/tri2b_ali_10k exp/tri3b || exit 1; +steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \ + data/train_10k data/lang_nosp exp/tri2b_ali_10k exp/tri3b || exit 1; # decode using the tri3b model ( - utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1; + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri3b exp/tri3b/graph_nosp_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ - data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test || exit 1; + exp/tri3b/graph_nosp_tgsmall data/$test \ + exp/tri3b/decode_nosp_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ - data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test || exit 1; done )& # align the entire train_clean_100 subset using the tri3b model steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ - data/train_clean_100 data/lang exp/tri3b exp/tri3b_ali_clean_100 || exit 1; + data/train_clean_100 data/lang_nosp \ + exp/tri3b exp/tri3b_ali_clean_100 || exit 1; # train another LDA+MLLT+SAT system on the entire 100 hour subset -steps/train_sat.sh --cmd "$train_cmd" \ - 4200 40000 data/train_clean_100 data/lang exp/tri3b_ali_clean_100 exp/tri4b || exit 1; +steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \ + data/train_clean_100 data/lang_nosp \ + exp/tri3b_ali_clean_100 exp/tri4b || exit 1; # decode using the tri4b model ( - utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1; + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri4b exp/tri4b/graph_nosp_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1; + exp/tri4b/graph_nosp_tgsmall data/$test \ + exp/tri4b/decode_nosp_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri4b/decode_nosp_{tgsmall,tgmed}_$test || exit 1; + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri4b/decode_nosp_{tgsmall,tglarge}_$test || exit 1; + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \ + data/$test exp/tri4b/decode_nosp_{tgsmall,fglarge}_$test || exit 1; + done +)& + +# Now we compute the pronunciation and silence probabilities from training data, +# and re-create the lang directory. +steps/get_prons.sh --cmd "$train_cmd" \ + data/train_clean_100 data/lang_nosp exp/tri4b +utils/dict_dir_add_pronprobs.sh --max-normalize true \ + data/local/dict_nosp \ + exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \ + exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict || exit 1 + +utils/prepare_lang.sh data/local/dict \ + "" data/local/lang_tmp data/lang +local/format_lms.sh --src-dir data/lang data/local/lm + +utils/build_const_arpa_lm.sh \ + data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1; +utils/build_const_arpa_lm.sh \ + data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1; + +# decode using the tri4b model with pronunciation and silence probabilities +( + utils/mkgraph.sh \ + data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1; + for test in test_clean test_other dev_clean dev_other; do + steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ + exp/tri4b/graph_tgsmall data/$test \ + exp/tri4b/decode_tgsmall_$test || exit 1; steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ @@ -199,75 +246,41 @@ local/nnet2/run_5a_clean_100.sh || exit 1 local/download_and_untar.sh $data $data_url train-clean-360 || exit 1; # now add the "clean-360" subset to the mix ... -local/data_prep.sh $data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1 +local/data_prep.sh \ + $data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1 steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_clean_360 \ exp/make_mfcc/train_clean_360 $mfccdir || exit 1 -steps/compute_cmvn_stats.sh data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1 +steps/compute_cmvn_stats.sh \ + data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1 # ... and then combine the two sets into a 460 hour one -utils/combine_data.sh data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1 +utils/combine_data.sh \ + data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1 # align the new, combined set, using the tri4b model steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460 || exit 1; -# At this point we estimate the probability of the pronunciation variants for -# the words in our lexicon (of course some rare words won't be present in the -# training data, so their probabilities will be left unchanged). These pronunciation -# probabilities will be used in the subsequent _decoding_ steps. - -# count how many times every pronunciation variant was used in the training data -steps/get_prons.sh --cmd "$train_cmd" data/train_clean_460 data/lang exp/tri4b_ali_clean_460 - -# use the counts from the above step, to calculate (smoothed) pronunciation probabilities -utils/dict_dir_add_pronprobs.sh data/local/dict exp/tri4b_ali_clean_460/pron_counts_nowb.txt data/local/dict_pp - -# prepare a new "lang" directories to be used for the pronunciation probability setup -utils/prepare_lang.sh data/local/dict_pp "" data/local/lang_tmp_pp data/lang_pp -local/format_lms.sh --src-dir data/lang_pp data/local/lm - -# regenerate the full 3-gram and 4-gram directories -utils/build_const_arpa_lm.sh \ - data/local/lm/lm_tglarge.arpa.gz data/lang_pp data/lang_pp_test_tglarge || exit 1; -utils/build_const_arpa_lm.sh \ - data/local/lm/lm_fglarge.arpa.gz data/lang_pp data/lang_pp_test_fglarge || exit 1; - -# decode again using the tri4b model, but this time with pronunciation probability -( - utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri4b exp/tri4b/graph_pp_tgsmall || exit 1; - for test in test_clean test_other dev_clean dev_other; do - steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri4b/graph_pp_tgsmall data/$test exp/tri4b/decode_pp_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test exp/tri4b/decode_pp_{tgsmall,tgmed}_$test || exit 1; - steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test exp/tri4b/decode_pp_{tgsmall,tglarge}_$test || exit 1; - steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test exp/tri4b/decode_pp_{tgsmall,fglarge}_$test || exit 1; - done -)& - - # create a larger SAT model, trained on the 460 hours of data. -steps/train_sat.sh --cmd "$train_cmd" \ - 5000 100000 data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1; +steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \ + data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1; # decode using the tri5b model ( - utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri5b exp/tri5b/graph_pp_tgsmall || exit 1; + utils/mkgraph.sh data/lang_test_tgsmall \ + exp/tri5b exp/tri5b/graph_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri5b/graph_pp_tgsmall data/$test exp/tri5b/decode_pp_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test exp/tri5b/decode_pp_{tgsmall,tgmed}_$test || exit 1; + exp/tri5b/graph_tgsmall data/$test \ + exp/tri5b/decode_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri5b/decode_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test exp/tri5b/decode_pp_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test exp/tri5b/decode_pp_{tgsmall,fglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1; done )& @@ -277,13 +290,16 @@ local/nnet2/run_6a_clean_460.sh || exit 1 local/download_and_untar.sh $data $data_url train-other-500 || exit 1; # prepare the 500 hour subset. -local/data_prep.sh $data/LibriSpeech/train-other-500 data/train_other_500 || exit 1 +local/data_prep.sh \ + $data/LibriSpeech/train-other-500 data/train_other_500 || exit 1 steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_other_500 \ exp/make_mfcc/train_other_500 $mfccdir || exit 1 -steps/compute_cmvn_stats.sh data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1 +steps/compute_cmvn_stats.sh \ + data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1 # combine all the data -utils/combine_data.sh data/train_960 data/train_clean_460 data/train_other_500 || exit 1 +utils/combine_data.sh \ + data/train_960 data/train_clean_460 data/train_other_500 || exit 1 steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ data/train_960 data/lang exp/tri5b exp/tri5b_ali_960 || exit 1; @@ -295,18 +311,19 @@ steps/train_quick.sh --cmd "$train_cmd" \ # decode using the tri6b model ( - utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b exp/tri6b/graph_pp_tgsmall || exit 1; + utils/mkgraph.sh data/lang_test_tgsmall \ + exp/tri6b exp/tri6b/graph_tgsmall || exit 1; for test in test_clean test_other dev_clean dev_other; do steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ - exp/tri6b/graph_pp_tgsmall data/$test exp/tri6b/decode_pp_tgsmall_$test || exit 1; - steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \ - data/$test exp/tri6b/decode_pp_{tgsmall,tgmed}_$test || exit 1; + exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1; + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri6b/decode_{tgsmall,tgmed}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \ - data/$test exp/tri6b/decode_pp_{tgsmall,tglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1; steps/lmrescore_const_arpa.sh \ - --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \ - data/$test exp/tri6b/decode_pp_{tgsmall,fglarge}_$test || exit 1; + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ + data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1; done )& @@ -333,3 +350,6 @@ local/nnet2/run_7a_960.sh || exit 1 # ## The following is an older version of the online-nnet2 recipe, without "multi-splice". It's faster # ## to train but slightly worse. # # local/online/run_nnet2.sh + +# Wait for decodings in the background +wait