trunk: adding silprob to librispeech recipe

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5008 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Guoguo Chen 2015-04-16 03:18:26 +00:00
Родитель cf90f50e07
Коммит 1ae487048d
11 изменённых файлов: 300 добавлений и 271 удалений

Просмотреть файл

@ -8,70 +8,72 @@
# whereas "dev-other" and "test-other" sets contain more challenging speech
### SAT GMM model trained on the "train-clean-100" set (100 hours "clean" speech)
%WER 8.19 [ 4453 / 54402, 632 ins, 480 del, 3341 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_16
%WER 8.62 [ 4689 / 54402, 632 ins, 525 del, 3532 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_17
%WER 10.62 [ 5778 / 54402, 659 ins, 752 del, 4367 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_15
%WER 12.11 [ 6590 / 54402, 689 ins, 964 del, 4937 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_15
### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri4b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
%WER 8.20 [ 4459 / 54402, 695 ins, 427 del, 3337 sub ] exp/tri4b/decode_fglarge_dev_clean/wer_14_0.5
%WER 8.60 [ 4677 / 54402, 763 ins, 399 del, 3515 sub ] exp/tri4b/decode_tglarge_dev_clean/wer_16_0.0
%WER 10.39 [ 5655 / 54402, 711 ins, 648 del, 4296 sub ] exp/tri4b/decode_tgmed_dev_clean/wer_16_0.0
%WER 11.69 [ 6361 / 54402, 743 ins, 808 del, 4810 sub ] exp/tri4b/decode_tgsmall_dev_clean/wer_16_0.0
%WER 9.32 [ 4899 / 52576, 749 ins, 496 del, 3654 sub ] exp/tri4b/decode_fglarge_test_clean/wer_16
%WER 9.60 [ 5047 / 52576, 731 ins, 540 del, 3776 sub ] exp/tri4b/decode_tglarge_test_clean/wer_17
%WER 11.79 [ 6197 / 52576, 717 ins, 798 del, 4682 sub ] exp/tri4b/decode_tgmed_test_clean/wer_16
%WER 13.18 [ 6927 / 52576, 718 ins, 998 del, 5211 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_16
%WER 9.10 [ 4786 / 52576, 708 ins, 464 del, 3614 sub ] exp/tri4b/decode_fglarge_test_clean/wer_17_0.5
%WER 9.43 [ 4958 / 52576, 751 ins, 492 del, 3715 sub ] exp/tri4b/decode_tglarge_test_clean/wer_15_0.5
%WER 11.36 [ 5975 / 52576, 799 ins, 642 del, 4534 sub ] exp/tri4b/decode_tgmed_test_clean/wer_17_0.0
%WER 12.64 [ 6643 / 52576, 795 ins, 817 del, 5031 sub ] exp/tri4b/decode_tgsmall_test_clean/wer_17_0.0
%WER 29.31 [ 14934 / 50948, 1536 ins, 2215 del, 11183 sub ] exp/tri4b/decode_fglarge_dev_other/wer_18
%WER 30.25 [ 15412 / 50948, 1655 ins, 2188 del, 11569 sub ] exp/tri4b/decode_tglarge_dev_other/wer_17
%WER 33.01 [ 16817 / 50943, 1358 ins, 3023 del, 12436 sub ] [PARTIAL] exp/tri4b/decode_tgmed_dev_other/wer_19
%WER 35.06 [ 17862 / 50948, 1378 ins, 3409 del, 13075 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18
%WER 28.45 [ 14495 / 50948, 1574 ins, 1925 del, 10996 sub ] exp/tri4b/decode_fglarge_dev_other/wer_17_0.5
%WER 29.24 [ 14895 / 50948, 1610 ins, 2041 del, 11244 sub ] exp/tri4b/decode_tglarge_dev_other/wer_19_0.5
%WER 32.04 [ 16325 / 50948, 1753 ins, 2261 del, 12311 sub ] exp/tri4b/decode_tgmed_dev_other/wer_18_0.0
%WER 33.97 [ 17305 / 50948, 1681 ins, 2661 del, 12963 sub ] exp/tri4b/decode_tgsmall_dev_other/wer_18_0.0
%WER 31.47 [ 16470 / 52343, 1637 ins, 2624 del, 12209 sub ] exp/tri4b/decode_fglarge_test_other/wer_17
%WER 32.35 [ 16933 / 52343, 1792 ins, 2638 del, 12503 sub ] exp/tri4b/decode_tglarge_test_other/wer_17
%WER 35.08 [ 18363 / 52343, 1469 ins, 3566 del, 13328 sub ] exp/tri4b/decode_tgmed_test_other/wer_18
%WER 36.83 [ 19278 / 52343, 1350 ins, 3976 del, 13952 sub ] exp/tri4b/decode_tgsmall_test_other/wer_18
%WER 30.33 [ 15875 / 52343, 1639 ins, 2375 del, 11861 sub ] exp/tri4b/decode_fglarge_test_other/wer_17_0.5
%WER 31.07 [ 16264 / 52343, 1728 ins, 2424 del, 12112 sub ] exp/tri4b/decode_tglarge_test_other/wer_18_0.5
%WER 33.69 [ 17633 / 52343, 1755 ins, 2766 del, 13112 sub ] exp/tri4b/decode_tgmed_test_other/wer_18_0.0
%WER 35.62 [ 18646 / 52343, 1758 ins, 3039 del, 13849 sub ] exp/tri4b/decode_tgsmall_test_other/wer_17_0.0
### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" set (460 hours "clean" speech)
%WER 7.26 [ 3949 / 54402, 543 ins, 420 del, 2986 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_16
%WER 7.65 [ 4162 / 54402, 592 ins, 436 del, 3134 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_15
%WER 9.61 [ 5227 / 54402, 591 ins, 684 del, 3952 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15
%WER 10.87 [ 5914 / 54402, 584 ins, 863 del, 4467 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15
### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri5b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
%WER 7.05 [ 3835 / 54402, 588 ins, 370 del, 2877 sub ] exp/tri5b/decode_fglarge_dev_clean/wer_15_0.5
%WER 7.49 [ 4077 / 54402, 623 ins, 376 del, 3078 sub ] exp/tri5b/decode_tglarge_dev_clean/wer_14_0.5
%WER 9.38 [ 5104 / 54402, 701 ins, 533 del, 3870 sub ] exp/tri5b/decode_tgmed_dev_clean/wer_15_0.0
%WER 10.51 [ 5719 / 54402, 720 ins, 652 del, 4347 sub ] exp/tri5b/decode_tgsmall_dev_clean/wer_15_0.0
%WER 8.31 [ 4369 / 52576, 638 ins, 449 del, 3282 sub ] exp/tri5b/decode_fglarge_test_clean/wer_18
%WER 8.55 [ 4496 / 52576, 673 ins, 444 del, 3379 sub ] exp/tri5b/decode_tglarge_test_clean/wer_16
%WER 10.53 [ 5537 / 52576, 645 ins, 719 del, 4173 sub ] exp/tri5b/decode_tgmed_test_clean/wer_16
%WER 11.71 [ 6159 / 52576, 638 ins, 869 del, 4652 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_16
%WER 8.14 [ 4279 / 52576, 683 ins, 379 del, 3217 sub ] exp/tri5b/decode_fglarge_test_clean/wer_15_0.5
%WER 8.50 [ 4469 / 52576, 597 ins, 510 del, 3362 sub ] exp/tri5b/decode_tglarge_test_clean/wer_15_1.0
%WER 10.10 [ 5311 / 52576, 767 ins, 503 del, 4041 sub ] exp/tri5b/decode_tgmed_test_clean/wer_15_0.0
%WER 11.20 [ 5886 / 52576, 774 ins, 617 del, 4495 sub ] exp/tri5b/decode_tgsmall_test_clean/wer_15_0.0
%WER 26.27 [ 13384 / 50948, 1450 ins, 1839 del, 10095 sub ] exp/tri5b/decode_fglarge_dev_other/wer_17
%WER 27.32 [ 13917 / 50948, 1605 ins, 1845 del, 10467 sub ] exp/tri5b/decode_tglarge_dev_other/wer_16
%WER 30.19 [ 15378 / 50943, 1406 ins, 2423 del, 11549 sub ] [PARTIAL] exp/tri5b/decode_tgmed_dev_other/wer_16
%WER 32.21 [ 16408 / 50948, 1311 ins, 2994 del, 12103 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_17
%WER 25.65 [ 13069 / 50948, 1664 ins, 1486 del, 9919 sub ] exp/tri5b/decode_fglarge_dev_other/wer_18_0.0
%WER 26.60 [ 13552 / 50948, 1549 ins, 1774 del, 10229 sub ] exp/tri5b/decode_tglarge_dev_other/wer_17_0.5
%WER 29.21 [ 14880 / 50943, 1618 ins, 2026 del, 11236 sub ] exp/tri5b/decode_tgmed_dev_other/wer_18_0.0
%WER 30.89 [ 15736 / 50948, 1538 ins, 2388 del, 11810 sub ] exp/tri5b/decode_tgsmall_dev_other/wer_18_0.0
%WER 28.11 [ 14714 / 52343, 1524 ins, 2202 del, 10988 sub ] exp/tri5b/decode_fglarge_test_other/wer_16
%WER 29.16 [ 15263 / 52343, 1616 ins, 2346 del, 11301 sub ] exp/tri5b/decode_tglarge_test_other/wer_17
%WER 32.09 [ 16798 / 52343, 1342 ins, 3215 del, 12241 sub ] exp/tri5b/decode_tgmed_test_other/wer_18
%WER 34.08 [ 17837 / 52343, 1412 ins, 3358 del, 13067 sub ] exp/tri5b/decode_tgsmall_test_other/wer_16
%WER 27.36 [ 14323 / 52343, 1486 ins, 2136 del, 10701 sub ] exp/tri5b/decode_fglarge_test_other/wer_17_0.5
%WER 28.32 [ 14824 / 52343, 1656 ins, 2118 del, 11050 sub ] exp/tri5b/decode_tglarge_test_other/wer_16_0.5
%WER 31.01 [ 16233 / 52343, 1577 ins, 2593 del, 12063 sub ] exp/tri5b/decode_tgmed_test_other/wer_19_0.0
%WER 32.99 [ 17269 / 52343, 1622 ins, 2792 del, 12855 sub ] exp/tri5b/decode_tgsmall_test_other/wer_17_0.0
### SAT GMM model trained on the combined "train-clean-100" + "train-clean-360" + "train-other-500" set (960 hours)
%WER 7.08 [ 3853 / 54402, 591 ins, 373 del, 2889 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14
%WER 7.52 [ 4091 / 54402, 638 ins, 397 del, 3056 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13
%WER 9.47 [ 5151 / 54402, 656 ins, 613 del, 3882 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13
%WER 10.88 [ 5919 / 54402, 626 ins, 813 del, 4480 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14
### for test in dev_clean test_clean dev_other test_other; do for lm in fglarge tglarge tgmed tgsmall; do grep WER exp/tri6b/decode_${lm}_${test}/wer* | best_wer.sh; done; echo; done
%WER 7.02 [ 3819 / 54402, 516 ins, 424 del, 2879 sub ] exp/tri6b/decode_fglarge_dev_clean/wer_14_1.0
%WER 7.33 [ 3988 / 54402, 506 ins, 468 del, 3014 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_15_1.0
%WER 9.23 [ 5024 / 54402, 744 ins, 481 del, 3799 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_13_0.0
%WER 10.38 [ 5648 / 54402, 741 ins, 617 del, 4290 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_14_0.0
%WER 8.01 [ 4213 / 52576, 658 ins, 404 del, 3151 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15
%WER 8.26 [ 4342 / 52576, 661 ins, 449 del, 3232 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15
%WER 10.06 [ 5289 / 52576, 653 ins, 637 del, 3999 sub ] exp/tri6b/decode_tgmed_test_clean/wer_15
%WER 11.24 [ 5907 / 52576, 704 ins, 756 del, 4447 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_14
%WER 7.81 [ 4105 / 52576, 574 ins, 442 del, 3089 sub ] exp/tri6b/decode_fglarge_test_clean/wer_15_1.0
%WER 8.01 [ 4213 / 52576, 658 ins, 387 del, 3168 sub ] exp/tri6b/decode_tglarge_test_clean/wer_15_0.5
%WER 9.83 [ 5167 / 52576, 709 ins, 519 del, 3939 sub ] exp/tri6b/decode_tgmed_test_clean/wer_16_0.0
%WER 10.99 [ 5778 / 52576, 723 ins, 640 del, 4415 sub ] exp/tri6b/decode_tgsmall_test_clean/wer_16_0.0
%WER 21.14 [ 10770 / 50948, 1168 ins, 1493 del, 8109 sub ] exp/tri6b/decode_fglarge_dev_other/wer_17
%WER 22.14 [ 11278 / 50948, 1342 ins, 1466 del, 8470 sub ] exp/tri6b/decode_tglarge_dev_other/wer_15
%WER 25.16 [ 12821 / 50948, 1233 ins, 1953 del, 9635 sub ] exp/tri6b/decode_tgmed_dev_other/wer_15
%WER 27.23 [ 13872 / 50948, 1109 ins, 2426 del, 10337 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_16
%WER 22.49 [ 11772 / 52343, 1289 ins, 1599 del, 8884 sub ] exp/tri6b/decode_fglarge_test_other/wer_15
%WER 23.46 [ 12278 / 52343, 1341 ins, 1690 del, 9247 sub ] exp/tri6b/decode_tglarge_test_other/wer_16
%WER 26.87 [ 14063 / 52343, 1334 ins, 2170 del, 10559 sub ] exp/tri6b/decode_tgmed_test_other/wer_14
%WER 28.90 [ 15128 / 52343, 1239 ins, 2681 del, 11208 sub ] exp/tri6b/decode_tgsmall_test_other/wer_15
%WER 20.53 [ 10460 / 50948, 1270 ins, 1258 del, 7932 sub ] exp/tri6b/decode_fglarge_dev_other/wer_15_0.5
%WER 21.31 [ 10857 / 50948, 1299 ins, 1376 del, 8182 sub ] exp/tri6b/decode_tglarge_dev_other/wer_16_0.5
%WER 24.27 [ 12365 / 50948, 1401 ins, 1558 del, 9406 sub ] exp/tri6b/decode_tgmed_dev_other/wer_16_0.0
%WER 26.14 [ 13317 / 50948, 1292 ins, 1977 del, 10048 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_17_0.0
%WER 21.79 [ 11405 / 52343, 1263 ins, 1497 del, 8645 sub ] exp/tri6b/decode_fglarge_test_other/wer_15_0.5
%WER 22.78 [ 11923 / 52343, 1370 ins, 1483 del, 9070 sub ] exp/tri6b/decode_tglarge_test_other/wer_14_0.5
%WER 25.67 [ 13439 / 52343, 1481 ins, 1767 del, 10191 sub ] exp/tri6b/decode_tgmed_test_other/wer_15_0.0
%WER 27.79 [ 14545 / 52343, 1371 ins, 2250 del, 10924 sub ] exp/tri6b/decode_tgsmall_test_other/wer_16_0.0
### p-norm DNN trained on "train-clean-100"
%WER 5.93 [ 3228 / 54402, 486 ins, 330 del, 2412 sub ] exp/nnet5a_clean_100_gpu/decode_fglarge_dev_clean/wer_13

Просмотреть файл

@ -64,16 +64,16 @@ fi
for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri5b/decode_pp_tgsmall_$test \
exp/tri5b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1;
--transform-dir exp/tri5b/decode_tgsmall_$test \
exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done
exit 0;

Просмотреть файл

@ -64,16 +64,16 @@ fi
for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri6b/decode_pp_tgsmall_$test \
exp/tri6b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1;
--transform-dir exp/tri6b/decode_tgsmall_$test \
exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done
exit 0;

Просмотреть файл

@ -83,12 +83,12 @@ if [ $stage -le 9 ]; then
for test in dev_clean dev_other; do
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${test} \
exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
done
fi
@ -105,15 +105,15 @@ if [ $stage -le 11 ]; then
# previous utterances of the same speaker.
for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -122,12 +122,12 @@ if [ $stage -le 12 ]; then
# without carrying forward speaker information.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
done
fi
@ -137,13 +137,13 @@ if [ $stage -le 13 ]; then
# of the utterance while computing the iVector (--online false)
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
done
fi
@ -155,8 +155,8 @@ if [ $stage -le 14 ]; then
test=dev_clean
steps/online/nnet2/decode.sh --threaded true \
--config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
fi
if [ $stage -le 15 ]; then
@ -164,8 +164,8 @@ if [ $stage -le 15 ]; then
test=dev_clean
steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
--config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
fi
exit 0;

Просмотреть файл

@ -135,14 +135,14 @@ if [ $stage -le 5 ]; then
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) &
done

Просмотреть файл

@ -95,15 +95,15 @@ if [ $stage -le 9 ]; then
for test in dev_clean dev_other; do
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${test} \
exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -120,15 +120,15 @@ if [ $stage -le 11 ]; then
# previous utterances of the same speaker.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -137,15 +137,15 @@ if [ $stage -le 12 ]; then
# without carrying forward speaker information.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
done
fi
@ -155,16 +155,16 @@ if [ $stage -le 13 ]; then
# of the utterance while computing the iVector (--online false)
for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
done
fi
@ -176,8 +176,8 @@ if [ $stage -le 14 ]; then
test=dev_clean
steps/online/nnet2/decode.sh --threaded true \
--config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_threaded || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_threaded || exit 1;
fi
if [ $stage -le 15 ]; then
@ -185,8 +185,8 @@ if [ $stage -le 15 ]; then
test=dev_clean
steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
--config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
fi
if [ $stage -le 16 ]; then
@ -195,8 +195,8 @@ if [ $stage -le 16 ]; then
test=dev_clean
steps/online/nnet2/decode.sh --threaded true --silence-weight 0.0 \
--config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
--per-utt true exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_threaded_sil0.0 || exit 1;
fi
exit 0;

Просмотреть файл

@ -136,15 +136,15 @@ if [ $stage -le 5 ]; then
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) &
) &
done

Просмотреть файл

@ -16,7 +16,7 @@ set -e
if [ $stage -le 1 ]; then
steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang_pp \
steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang \
exp/tri6b exp/tri6b_cleanup
fi
@ -28,21 +28,21 @@ fi
if [ $stage -le 3 ]; then
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
data/train.thresh$thresh data/lang_pp exp/tri6b exp/tri6b_ali_$thresh
data/train.thresh$thresh data/lang exp/tri6b exp/tri6b_ali_$thresh
fi
if [ $stage -le 4 ]; then
steps/train_sat.sh --cmd "$train_cmd" \
7000 150000 data/train_960_thresh$thresh data/lang_pp exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1;
7000 150000 data/train_960_thresh$thresh data/lang exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1;
fi
if [ $stage -le 5 ]; then
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_pp_tgsmall || exit 1
utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_tgsmall || exit 1
for test in dev_clean dev_other; do
steps/decode_fmllr.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
exp/tri6b_$thresh/graph_pp_tgsmall data/$test exp/tri6b_$thresh/decode_pp_tgsmall_$test || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri6b_$thresh/decode_pp_{tgsmall,tgmed}_$test || exit 1;
exp/tri6b_$thresh/graph_tgsmall data/$test exp/tri6b_$thresh/decode_tgsmall_$test || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri6b_$thresh/decode_{tgsmall,tgmed}_$test || exit 1;
done
fi

Просмотреть файл

@ -72,13 +72,13 @@ if [ $stage -le 3 ]; then
echo "$0: Performing RNNLM rescoring on tri6b decoding results"
for lm in tgsmall tgmed; do
for devset in dev_clean dev_other; do
sourcedir=exp/tri6b/decode_pp_${lm}_${devset}
sourcedir=exp/tri6b/decode_${lm}_${devset}
resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 0.5 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.25 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver --N 100 --stage 7 0.75 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
done
done
fi

Просмотреть файл

@ -1,5 +1,6 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2014 Guoguo Chen
# Apache 2.0
[ -f ./path.sh ] && . ./path.sh
@ -9,7 +10,7 @@ cmd=run.pl
stage=0
decode_mbr=true
reverse=false
word_ins_penalty=0.0
word_ins_penalty=0.0,0.5,1.0
min_lmwt=9
max_lmwt=20
#end configuration section.
@ -43,25 +44,31 @@ mkdir -p $dir/scoring/log
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
lattice-best-path --word-symbol-table=$symtab \
ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
lattice-best-path --word-symbol-table=$symtab \
ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
done
if $reverse; then
for lmwt in `seq $min_lmwt $max_lmwt`; do
mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig
awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
<$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra
for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
for lmwt in `seq $min_lmwt $max_lmwt`; do
mv $dir/scoring/$lmwt.$wip.tra $dir/scoring/$lmwt.$wip.tra.orig
awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
<$dir/scoring/$lmwt.$wip.tra.orig >$dir/scoring/$lmwt.$wip.tra
done
done
fi
# Note: the double level of quoting for the sed command
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
cat $dir/scoring/LMWT.tra \| \
for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
cat $dir/scoring/LMWT.$wip.tra \| \
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
compute-wer --text --mode=present \
ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1;
ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
done
exit 0;

Просмотреть файл

@ -49,17 +49,18 @@ done
# when "--stage 3" option is used below we skip the G2P steps, and use the
# lexicon we have already downloaded from openslr.org/11/
local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \
data/local/lm data/local/lm data/local/dict || exit 1
data/local/lm data/local/lm data/local/dict_nosp || exit 1
utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
utils/prepare_lang.sh data/local/dict_nosp \
"<SPOKEN_NOISE>" data/local/lang_tmp_nosp data/lang_nosp || exit 1;
local/format_lms.sh data/local/lm || exit 1
local/format_lms.sh --src-dir data/lang_nosp data/local/lm || exit 1
# Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \
data/lang_nosp data/lang_nosp_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz \
data/lang_nosp data/lang_nosp_test_fglarge || exit 1;
mfccdir=mfcc
# spread the mfccs over various machines, as this data-set is quite large.
@ -86,97 +87,143 @@ utils/subset_data_dir.sh data/train_clean_100 10000 data/train_10k
# train a monophone system
steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
data/train_2kshort data/lang exp/mono || exit 1;
data/train_2kshort data/lang_nosp exp/mono || exit 1;
# decode using the monophone model
(
utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1
utils/mkgraph.sh --mono data/lang_nosp_test_tgsmall \
exp/mono exp/mono/graph_nosp_tgsmall || exit 1
for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test
steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \
data/$test exp/mono/decode_nosp_tgsmall_$test || exit 1
done
)&
steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
data/train_5k data/lang exp/mono exp/mono_ali_5k
data/train_5k data/lang_nosp exp/mono exp/mono_ali_5k
# train a first delta + delta-delta triphone system on a subset of 5000 utterances
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
2000 10000 data/train_5k data/lang exp/mono_ali_5k exp/tri1 || exit 1;
2000 10000 data/train_5k data/lang_nosp exp/mono_ali_5k exp/tri1 || exit 1;
# decode using the tri1 model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_nosp_test_tgsmall \
exp/tri1 exp/tri1/graph_nosp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri1/decode_{tgsmall,tgmed}_$test || exit 1;
steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \
data/$test exp/tri1/decode_nosp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri1/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
done
)&
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
data/train_10k data/lang exp/tri1 exp/tri1_ali_10k || exit 1;
data/train_10k data/lang_nosp exp/tri1 exp/tri1_ali_10k || exit 1;
# train an LDA+MLLT system.
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" \
2500 15000 data/train_10k data/lang exp/tri1_ali_10k exp/tri2b || exit 1;
--splice-opts "--left-context=3 --right-context=3" 2500 15000 \
data/train_10k data/lang_nosp exp/tri1_ali_10k exp/tri2b || exit 1;
# decode using the LDA+MLLT model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_nosp_test_tgsmall \
exp/tri2b exp/tri2b/graph_nosp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri2b/decode_{tgsmall,tgmed}_$test || exit 1;
steps/decode.sh --nj 20 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \
data/$test exp/tri2b/decode_nosp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri2b/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
done
)&
# Align a 10k utts subset using the tri2b model
steps/align_si.sh --nj 10 --cmd "$train_cmd" \
--use-graphs true data/train_10k data/lang exp/tri2b exp/tri2b_ali_10k || exit 1;
steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
data/train_10k data/lang_nosp exp/tri2b exp/tri2b_ali_10k || exit 1;
# Train tri3b, which is LDA+MLLT+SAT on 10k utts
steps/train_sat.sh --cmd "$train_cmd" \
2500 15000 data/train_10k data/lang exp/tri2b_ali_10k exp/tri3b || exit 1;
steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
data/train_10k data/lang_nosp exp/tri2b_ali_10k exp/tri3b || exit 1;
# decode using the tri3b model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_nosp_test_tgsmall \
exp/tri3b exp/tri3b/graph_nosp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test || exit 1;
exp/tri3b/graph_nosp_tgsmall data/$test \
exp/tri3b/decode_nosp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
done
)&
# align the entire train_clean_100 subset using the tri3b model
steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
data/train_clean_100 data/lang exp/tri3b exp/tri3b_ali_clean_100 || exit 1;
data/train_clean_100 data/lang_nosp \
exp/tri3b exp/tri3b_ali_clean_100 || exit 1;
# train another LDA+MLLT+SAT system on the entire 100 hour subset
steps/train_sat.sh --cmd "$train_cmd" \
4200 40000 data/train_clean_100 data/lang exp/tri3b_ali_clean_100 exp/tri4b || exit 1;
steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
data/train_clean_100 data/lang_nosp \
exp/tri3b_ali_clean_100 exp/tri4b || exit 1;
# decode using the tri4b model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_nosp_test_tgsmall \
exp/tri4b exp/tri4b/graph_nosp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1;
exp/tri4b/graph_nosp_tgsmall data/$test \
exp/tri4b/decode_nosp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
data/$test exp/tri4b/decode_nosp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
data/$test exp/tri4b/decode_nosp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,fglarge} \
data/$test exp/tri4b/decode_nosp_{tgsmall,fglarge}_$test || exit 1;
done
)&
# Now we compute the pronunciation and silence probabilities from training data,
# and re-create the lang directory.
steps/get_prons.sh --cmd "$train_cmd" \
data/train_clean_100 data/lang_nosp exp/tri4b
utils/dict_dir_add_pronprobs.sh --max-normalize true \
data/local/dict_nosp \
exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict || exit 1
utils/prepare_lang.sh data/local/dict \
"<SPOKEN_NOISE>" data/local/lang_tmp data/lang
local/format_lms.sh --src-dir data/lang data/local/lm
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
# decode using the tri4b model with pronunciation and silence probabilities
(
utils/mkgraph.sh \
data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri4b/graph_tgsmall data/$test \
exp/tri4b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
@ -199,75 +246,41 @@ local/nnet2/run_5a_clean_100.sh || exit 1
local/download_and_untar.sh $data $data_url train-clean-360 || exit 1;
# now add the "clean-360" subset to the mix ...
local/data_prep.sh $data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1
local/data_prep.sh \
$data/LibriSpeech/train-clean-360 data/train_clean_360 || exit 1
steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_clean_360 \
exp/make_mfcc/train_clean_360 $mfccdir || exit 1
steps/compute_cmvn_stats.sh data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1
steps/compute_cmvn_stats.sh \
data/train_clean_360 exp/make_mfcc/train_clean_360 $mfccdir || exit 1
# ... and then combine the two sets into a 460 hour one
utils/combine_data.sh data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1
utils/combine_data.sh \
data/train_clean_460 data/train_clean_100 data/train_clean_360 || exit 1
# align the new, combined set, using the tri4b model
steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460 || exit 1;
# At this point we estimate the probability of the pronunciation variants for
# the words in our lexicon (of course some rare words won't be present in the
# training data, so their probabilities will be left unchanged). These pronunciation
# probabilities will be used in the subsequent _decoding_ steps.
# count how many times every pronunciation variant was used in the training data
steps/get_prons.sh --cmd "$train_cmd" data/train_clean_460 data/lang exp/tri4b_ali_clean_460
# use the counts from the above step, to calculate (smoothed) pronunciation probabilities
utils/dict_dir_add_pronprobs.sh data/local/dict exp/tri4b_ali_clean_460/pron_counts_nowb.txt data/local/dict_pp
# prepare a new "lang" directories to be used for the pronunciation probability setup
utils/prepare_lang.sh data/local/dict_pp "<SPOKEN_NOISE>" data/local/lang_tmp_pp data/lang_pp
local/format_lms.sh --src-dir data/lang_pp data/local/lm
# regenerate the full 3-gram and 4-gram directories
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang_pp data/lang_pp_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
data/local/lm/lm_fglarge.arpa.gz data/lang_pp data/lang_pp_test_fglarge || exit 1;
# decode again using the tri4b model, but this time with pronunciation probability
(
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri4b exp/tri4b/graph_pp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri4b/graph_pp_tgsmall data/$test exp/tri4b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri4b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri4b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri4b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
)&
# create a larger SAT model, trained on the 460 hours of data.
steps/train_sat.sh --cmd "$train_cmd" \
5000 100000 data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1;
steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \
data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1;
# decode using the tri5b model
(
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri5b exp/tri5b/graph_pp_tgsmall || exit 1;
utils/mkgraph.sh data/lang_test_tgsmall \
exp/tri5b exp/tri5b/graph_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri5b/graph_pp_tgsmall data/$test exp/tri5b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri5b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
exp/tri5b/graph_tgsmall data/$test \
exp/tri5b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri5b/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri5b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri5b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1;
done
)&
@ -277,13 +290,16 @@ local/nnet2/run_6a_clean_460.sh || exit 1
local/download_and_untar.sh $data $data_url train-other-500 || exit 1;
# prepare the 500 hour subset.
local/data_prep.sh $data/LibriSpeech/train-other-500 data/train_other_500 || exit 1
local/data_prep.sh \
$data/LibriSpeech/train-other-500 data/train_other_500 || exit 1
steps/make_mfcc.sh --cmd "$train_cmd" --nj 40 data/train_other_500 \
exp/make_mfcc/train_other_500 $mfccdir || exit 1
steps/compute_cmvn_stats.sh data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1
steps/compute_cmvn_stats.sh \
data/train_other_500 exp/make_mfcc/train_other_500 $mfccdir || exit 1
# combine all the data
utils/combine_data.sh data/train_960 data/train_clean_460 data/train_other_500 || exit 1
utils/combine_data.sh \
data/train_960 data/train_clean_460 data/train_other_500 || exit 1
steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
data/train_960 data/lang exp/tri5b exp/tri5b_ali_960 || exit 1;
@ -295,18 +311,19 @@ steps/train_quick.sh --cmd "$train_cmd" \
# decode using the tri6b model
(
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b exp/tri6b/graph_pp_tgsmall || exit 1;
utils/mkgraph.sh data/lang_test_tgsmall \
exp/tri6b exp/tri6b/graph_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri6b/graph_pp_tgsmall data/$test exp/tri6b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri6b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri6b/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri6b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1;
done
)&
@ -333,3 +350,6 @@ local/nnet2/run_7a_960.sh || exit 1
# ## The following is an older version of the online-nnet2 recipe, without "multi-splice". It's faster
# ## to train but slightly worse.
# # local/online/run_nnet2.sh
# Wait for decodings in the background
wait