trunk: librispeech recipe now uses a lexicon with pronunciation probabilities for the latter decoding steps

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4789 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Vassil Panayotov 2015-01-16 09:13:33 +00:00
Родитель f76998d8eb
Коммит 8480fa1dd6
12 изменённых файлов: 238 добавлений и 128 удалений

Просмотреть файл

@ -256,3 +256,68 @@
%WER 14.80 [ 7540 / 50948, 800 ins, 1025 del, 5715 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_other_tgmed/wer_18
%WER 16.10 [ 8201 / 50948, 789 ins, 1240 del, 6172 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_other_tgsmall/wer_18
## Results with a SAT model, trained on the "train-clean-100" and pronunciation probabilities estimated on the training data
# for x in exp/tri4b/decode_pp_*; do grep WER $x/wer_* | utils/best_wer.sh | egrep -v '\.si' ; done
%WER 8.05 [ 4382 / 54402, 583 ins, 520 del, 3279 sub ] exp/tri4b/decode_pp_fglarge_dev_clean/wer_17
%WER 29.07 [ 14813 / 50948, 1411 ins, 2373 del, 11029 sub ] exp/tri4b/decode_pp_fglarge_dev_other/wer_18
%WER 9.19 [ 4833 / 52576, 621 ins, 579 del, 3633 sub ] exp/tri4b/decode_pp_fglarge_test_clean/wer_20
%WER 31.28 [ 16371 / 52343, 1505 ins, 2773 del, 12093 sub ] exp/tri4b/decode_pp_fglarge_test_other/wer_17
%WER 8.53 [ 4639 / 54402, 635 ins, 516 del, 3488 sub ] exp/tri4b/decode_pp_tglarge_dev_clean/wer_15
%WER 29.98 [ 15274 / 50948, 1501 ins, 2380 del, 11393 sub ] exp/tri4b/decode_pp_tglarge_dev_other/wer_18
%WER 9.45 [ 4969 / 52576, 661 ins, 577 del, 3731 sub ] exp/tri4b/decode_pp_tglarge_test_clean/wer_18
%WER 32.14 [ 16824 / 52343, 1649 ins, 2804 del, 12371 sub ] exp/tri4b/decode_pp_tglarge_test_other/wer_17
%WER 10.47 [ 5694 / 54402, 615 ins, 793 del, 4286 sub ] exp/tri4b/decode_pp_tgmed_dev_clean/wer_15
%WER 32.97 [ 16795 / 50943, 1416 ins, 2874 del, 12505 sub ] [PARTIAL] exp/tri4b/decode_pp_tgmed_dev_other/wer_16
%WER 11.67 [ 6133 / 52576, 685 ins, 831 del, 4617 sub ] exp/tri4b/decode_pp_tgmed_test_clean/wer_16
%WER 34.97 [ 18303 / 52343, 1409 ins, 3660 del, 13234 sub ] exp/tri4b/decode_pp_tgmed_test_other/wer_17
%WER 11.93 [ 6490 / 54402, 641 ins, 1017 del, 4832 sub ] exp/tri4b/decode_pp_tgsmall_dev_clean/wer_15
%WER 34.98 [ 17821 / 50948, 1396 ins, 3344 del, 13081 sub ] exp/tri4b/decode_pp_tgsmall_dev_other/wer_16
%WER 13.07 [ 6874 / 52576, 698 ins, 986 del, 5190 sub ] exp/tri4b/decode_pp_tgsmall_test_clean/wer_15
%WER 36.83 [ 19276 / 52343, 1261 ins, 4092 del, 13923 sub ] exp/tri4b/decode_pp_tgsmall_test_other/wer_17
## Multi-splice version of the online recipe, using pronunciation probabilities estimated on training data
# for x in exp/nnet2_online/nnet_ms_a/decode_pp_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 4.43 [ 2411 / 54402, 339 ins, 258 del, 1814 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_clean_fglarge/wer_14
%WER 4.70 [ 2555 / 54402, 388 ins, 246 del, 1921 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_clean_tglarge/wer_11
%WER 5.86 [ 3186 / 54402, 338 ins, 449 del, 2399 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_clean_tgmed/wer_14
%WER 6.59 [ 3587 / 54402, 381 ins, 486 del, 2720 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_clean_tgsmall/wer_12
%WER 12.50 [ 6371 / 50948, 702 ins, 898 del, 4771 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_other_fglarge/wer_16
%WER 13.05 [ 6648 / 50948, 755 ins, 916 del, 4977 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_other_tglarge/wer_15
%WER 15.57 [ 7935 / 50948, 688 ins, 1327 del, 5920 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_other_tgmed/wer_15
%WER 17.08 [ 8702 / 50948, 694 ins, 1567 del, 6441 sub ] exp/nnet2_online/nnet_ms_a/decode_pp_dev_other_tgsmall/wer_15
# for x in exp/nnet2_online/nnet_ms_a_online/decode_pp_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 4.50 [ 2448 / 54402, 346 ins, 262 del, 1840 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_fglarge/wer_14
%WER 4.66 [ 2537 / 54402, 374 ins, 243 del, 1920 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_fglarge_utt/wer_12
%WER 4.52 [ 2461 / 54402, 359 ins, 252 del, 1850 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_fglarge_utt_offline/wer_13
%WER 4.74 [ 2581 / 54402, 375 ins, 272 del, 1934 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tglarge/wer_12
%WER 4.83 [ 2625 / 54402, 358 ins, 279 del, 1988 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tglarge_utt/wer_13
%WER 4.72 [ 2567 / 54402, 361 ins, 283 del, 1923 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tglarge_utt_offline/wer_13
%WER 5.85 [ 3184 / 54402, 343 ins, 447 del, 2394 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgmed/wer_14
%WER 6.11 [ 3325 / 54402, 385 ins, 392 del, 2548 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgmed_utt/wer_12
%WER 5.90 [ 3212 / 54402, 400 ins, 381 del, 2431 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgmed_utt_offline/wer_12
%WER 6.59 [ 3587 / 54402, 416 ins, 450 del, 2721 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgsmall/wer_11
%WER 6.92 [ 3762 / 54402, 392 ins, 505 del, 2865 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgsmall_utt/wer_12
%WER 6.68 [ 3634 / 54402, 434 ins, 451 del, 2749 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_clean_tgsmall_utt_offline/wer_11
%WER 12.85 [ 6548 / 50948, 725 ins, 871 del, 4952 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_fglarge/wer_15
%WER 13.70 [ 6981 / 50948, 812 ins, 895 del, 5274 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_fglarge_utt/wer_15
%WER 13.18 [ 6715 / 50948, 787 ins, 841 del, 5087 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_fglarge_utt_offline/wer_15
%WER 13.36 [ 6805 / 50948, 765 ins, 924 del, 5116 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tglarge/wer_15
%WER 14.29 [ 7282 / 50948, 888 ins, 917 del, 5477 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tglarge_utt/wer_14
%WER 13.65 [ 6955 / 50948, 806 ins, 903 del, 5246 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tglarge_utt_offline/wer_15
%WER 15.64 [ 7969 / 50948, 676 ins, 1372 del, 5921 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgmed/wer_16
%WER 16.68 [ 8497 / 50948, 771 ins, 1364 del, 6362 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgmed_utt/wer_15
%WER 15.93 [ 8118 / 50948, 736 ins, 1286 del, 6096 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgmed_utt_offline/wer_15
%WER 17.11 [ 8718 / 50948, 704 ins, 1547 del, 6467 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgsmall/wer_15
%WER 18.12 [ 9232 / 50948, 751 ins, 1627 del, 6854 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgsmall_utt/wer_15
%WER 17.38 [ 8855 / 50948, 736 ins, 1555 del, 6564 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_dev_other_tgsmall_utt_offline/wer_15
%WER 5.21 [ 2739 / 52576, 428 ins, 261 del, 2050 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_clean_fglarge_utt_offline/wer_12
%WER 5.32 [ 2795 / 52576, 402 ins, 298 del, 2095 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_clean_tglarge_utt_offline/wer_13
%WER 6.49 [ 3413 / 52576, 427 ins, 424 del, 2562 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_clean_tgmed_utt_offline/wer_12
%WER 7.18 [ 3774 / 52576, 469 ins, 477 del, 2828 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_clean_tgsmall_utt_offline/wer_11
%WER 13.35 [ 6987 / 52343, 808 ins, 925 del, 5254 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_fglarge_utt_offline/wer_14
%WER 13.79 [ 7219 / 52343, 847 ins, 953 del, 5419 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_tglarge_utt_offline/wer_13
%WER 16.08 [ 8416 / 52343, 746 ins, 1466 del, 6204 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_tgmed_utt_offline/wer_15
%WER 17.64 [ 9231 / 52343, 764 ins, 1662 del, 6805 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_tgsmall_utt_offline/wer_14

Просмотреть файл

@ -9,7 +9,6 @@
. ./path.sh || exit 1;
# begin configuration section
src_dict=data/local/dict/lexicon.txt # only needed for diagnostics, to identify empty words.
src_dir=data/lang
# end configuration section

Просмотреть файл

@ -59,15 +59,15 @@ fi
for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri4b/decode_tgsmall_$test \
exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
exp/tri4b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
exit 0;

Просмотреть файл

@ -64,16 +64,16 @@ fi
for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri5b/decode_tgsmall_$test \
exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
--transform-dir exp/tri5b/decode_pp_tgsmall_$test \
exp/tri5b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
exit 0;

Просмотреть файл

@ -64,16 +64,16 @@ fi
for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri6b/decode_tgsmall_$test \
exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
--transform-dir exp/tri6b/decode_pp_tgsmall_$test \
exp/tri6b/graph_pp_tgsmall data/$test $dir/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test $dir/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
exit 0;

Просмотреть файл

@ -83,12 +83,12 @@ if [ $stage -le 9 ]; then
for test in dev_clean dev_other; do
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${test} \
exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
done
fi
@ -105,15 +105,15 @@ if [ $stage -le 11 ]; then
# previous utterances of the same speaker.
for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -122,12 +122,12 @@ if [ $stage -le 12 ]; then
# without carrying forward speaker information.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1;
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
done
fi
@ -137,13 +137,13 @@ if [ $stage -le 13 ]; then
# of the utterance while computing the iVector (--online false)
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
--per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
done
fi

Просмотреть файл

@ -135,14 +135,14 @@ if [ $stage -le 5 ]; then
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
--iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) &
done

Просмотреть файл

@ -95,12 +95,15 @@ if [ $stage -le 9 ]; then
for test in dev_clean dev_other; do
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${test} \
exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_pp_tgsmall data/${test}_hires $dir/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test}_hires $dir/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -117,12 +120,15 @@ if [ $stage -le 11 ]; then
# previous utterances of the same speaker.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed} || exit 1;
exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge} || exit 1;
done
fi
@ -131,12 +137,15 @@ if [ $stage -le 12 ]; then
# without carrying forward speaker information.
for test in dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt || exit 1;
--per-utt true exp/tri6b/graph_pp_tgsmall data/$test ${dir}_online/decode_pp_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt || exit 1;
done
fi
@ -146,16 +155,16 @@ if [ $stage -le 13 ]; then
# of the utterance while computing the iVector (--online false)
for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
--per-utt true --online false exp/tri6b/graph_pp_tgsmall data/$test \
${dir}_online/decode_pp_${test}_tgsmall_utt_offline || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tgmed}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_pp_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
done
fi

Просмотреть файл

@ -70,12 +70,12 @@ if [ $stage -le 1 ]; then
steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads_denlats" \
--online-ivector-dir exp/nnet2_online/ivectors_train_960_hires \
--nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
data/train_960_hires data/lang $srcdir ${srcdir}_denlats || exit 1;
data/train_960_hires data/lang_pp $srcdir ${srcdir}_denlats || exit 1;
# the command below is a more generic, but slower, way to do it.
#steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads_denlats" \
# --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
# data/train_960 data/lang ${srcdir}_online ${srcdir}_denlats || exit 1;
# data/train_960 data/lang_pp ${srcdir}_online ${srcdir}_denlats || exit 1;
fi
@ -89,7 +89,7 @@ if [ $stage -le 2 ]; then
steps/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
--online-ivector-dir exp/nnet2_online/ivectors_train_960_hires \
--nj $nj data/train_960_hires data/lang $srcdir ${srcdir}_ali || exit 1;
--nj $nj data/train_960_hires data/lang_pp $srcdir ${srcdir}_ali || exit 1;
# the command below is a more generic, but slower, way to do it.
# steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
@ -109,13 +109,13 @@ if [ $stage -le 3 ]; then
--cmd "$decode_cmd -tc $max_jobs" \
--online-ivector-dir exp/nnet2_online/ivectors_train_960_hires \
--criterion $criterion --drop-frames $drop_frames \
data/train_960_hires data/lang ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
data/train_960_hires data/lang_pp ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
# the command below is a more generic, but slower, way to do it.
#steps/online/nnet2/get_egs_discriminative2.sh \
# --cmd "$decode_cmd -tc $max_jobs" \
# --criterion $criterion --drop-frames $drop_frames \
# data/train_960 data/lang ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
# data/train_960 data/lang_pp ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
fi
if [ $stage -le 4 ]; then
@ -136,15 +136,15 @@ if [ $stage -le 5 ]; then
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${test} $dir/decode_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
--iter epoch$epoch exp/tri6b/graph_pp_tgsmall data/${test} $dir/decode_pp_epoch${epoch}_${test}_tgsmall || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/${test} $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tgmed} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test $dir/decode_pp_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) &
) &
done

Просмотреть файл

@ -16,7 +16,7 @@ set -e
if [ $stage -le 1 ]; then
steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang \
steps/cleanup/find_bad_utts.sh --nj 100 --cmd "$train_cmd" data/train_960 data/lang_pp \
exp/tri6b exp/tri6b_cleanup
fi
@ -28,21 +28,21 @@ fi
if [ $stage -le 3 ]; then
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
data/train.thresh$thresh data/lang exp/tri6b exp/tri6b_ali_$thresh
data/train.thresh$thresh data/lang_pp exp/tri6b exp/tri6b_ali_$thresh
fi
if [ $stage -le 4 ]; then
steps/train_sat.sh --cmd "$train_cmd" \
7000 150000 data/train_960_thresh$thresh data/lang exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1;
7000 150000 data/train_960_thresh$thresh data/lang_pp exp/tri6b_ali_$thresh exp/tri6b_$thresh || exit 1;
fi
if [ $stage -le 5 ]; then
utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_tgsmall || exit 1
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b_$thresh exp/tri6b_$thresh/graph_pp_tgsmall || exit 1
for test in dev_clean dev_other; do
steps/decode_fmllr.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
exp/tri6b_$thresh/graph_tgsmall data/$test exp/tri6b_$thresh/decode_tgsmall_$test || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri6b_$thresh/decode_{tgsmall,tgmed}_$test || exit 1;
exp/tri6b_$thresh/graph_pp_tgsmall data/$test exp/tri6b_$thresh/decode_pp_tgsmall_$test || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri6b_$thresh/decode_pp_{tgsmall,tgmed}_$test || exit 1;
done
fi

Просмотреть файл

@ -71,15 +71,15 @@ fi
if [ $stage -le 3 ]; then
echo "$0: Performing RNNLM rescoring on tri6b decoding results"
for lm in tgsmall tgmed tglarge; do
for devset in dev_clean dev_other; do
sourcedir=exp/tri6b/decode_${lm}_${devset}
resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 0.5 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.25 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.75 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
done
for devset in dev_clean dev_other; do
sourcedir=exp/tri6b/decode_pp_${lm}_${devset}
resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 0.5 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.25 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.75 data/lang_pp_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
done
done
fi

Просмотреть файл

@ -55,8 +55,7 @@ utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/
local/format_lms.sh data/local/lm || exit 1
# Create ConstArpaLm format language model for full trigram and 4gram language
# model.
# Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
@ -66,7 +65,7 @@ mfccdir=mfcc
# spread the mfccs over various machines, as this data-set is quite large.
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename.
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$dir/$mfcc/storage \
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech/s5/$mfcc/storage \
$mfccdir/storage
fi
@ -212,24 +211,63 @@ utils/combine_data.sh data/train_clean_460 data/train_clean_100 data/train_clean
steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460 || exit 1;
# At this point we estimate the probability of the pronunciation variants for
# the words in our lexicon (of course some rare words won't be present in the
# training data, so their probabilities will be left unchanged). These pronunciation
# probabilities will be used in the subsequent _decoding_ steps.
# count how many times every pronunciation variant was used in the training data
steps/get_prons.sh --cmd "$train_cmd" data/train_clean_460 data/lang exp/tri4b_ali_clean_460
# use the counts from the above step, to calculate (smoothed) pronunciation probabilities
utils/dict_dir_add_pronprobs.sh data/local/dict exp/tri4b_ali_clean_460/pron_counts_nowb.txt data/local/dict_pp
# prepare a new "lang" directories to be used for the pronunciation probability setup
utils/prepare_lang.sh data/local/dict_pp "<SPOKEN_NOISE>" data/local/lang_tmp_pp data/lang_pp
local/format_lms.sh --src-dir data/lang_pp data/local/lm
# regenerate the full 3-gram and 4-gram directories
utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang_pp data/lang_pp_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
data/local/lm/lm_fglarge.arpa.gz data/lang_pp data/lang_pp_test_fglarge || exit 1;
# decode again using the tri4b model, but this time with pronunciation probability
(
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri4b exp/tri4b/graph_pp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri4b/graph_pp_tgsmall data/$test exp/tri4b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri4b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri4b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri4b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
)&
# create a larger SAT model, trained on the 460 hours of data.
steps/train_sat.sh --cmd "$train_cmd" \
5000 100000 data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b || exit 1;
# decode using the tri5b model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri5b exp/tri5b/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri5b exp/tri5b/graph_pp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri5b/graph_tgsmall data/$test exp/tri5b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri5b/decode_{tgsmall,tgmed}_$test || exit 1;
exp/tri5b/graph_pp_tgsmall data/$test exp/tri5b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri5b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri5b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri5b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
)&
@ -257,35 +295,34 @@ steps/train_quick.sh --cmd "$train_cmd" \
# decode using the tri6b model
(
utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b exp/tri6b/graph_tgsmall || exit 1;
utils/mkgraph.sh data/lang_pp_test_tgsmall exp/tri6b exp/tri6b/graph_pp_tgsmall || exit 1;
for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test exp/tri6b/decode_{tgsmall,tgmed}_$test || exit 1;
exp/tri6b/graph_pp_tgsmall data/$test exp/tri6b/decode_pp_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tgmed} \
data/$test exp/tri6b/decode_pp_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_pp_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1;
--cmd "$decode_cmd" data/lang_pp_test_{tgsmall,fglarge} \
data/$test exp/tri6b/decode_pp_{tgsmall,fglarge}_$test || exit 1;
done
)&
# steps/cleanup/debug_lexicon.sh --remove-stress true --nj 200 --cmd "$train_cmd" data/train_clean_100 \
# data/lang exp/tri6b data/local/dict/lexicon.txt exp/debug_lexicon_100h
# Perform RNNLM rescoring of tri6b
# Attention: with default settings requires 4 GB of memory per rescoring job, so commenting this out by default
# #Perform RNNLM rescoring of tri6b
# #Attention: with default settings requires 4 GB of memory per rescoring job, so commenting this out by default
# local/run_rnnlm.sh $data data/local/lm
# train NN models on the entire dataset
local/nnet2/run_7a_960.sh || exit 1
## train models on cleaned-up data
## we've found that this isn't helpful-- see the comments in local/run_data_cleaning.sh
#local/run_data_cleaning.sh
# # train models on cleaned-up data
# # we've found that this isn't helpful-- see the comments in local/run_data_cleaning.sh
# local/run_data_cleaning.sh
# # The following is the current online-nnet2 recipe, with "multi-splice".
# local/online/run_nnet2_ms.sh