Merge pull request #786 from freewym/librispeech_nnet2

add new results for Multi-splice version of online recipe of Librispeech, including those on test set.
This commit is contained in:
Daniel Povey 2016-05-18 19:56:26 -04:00
Родитель b45a70b936 f7b34367c5
Коммит 0d4f1b2483
3 изменённых файлов: 85 добавлений и 5 удалений

Просмотреть файл

@ -255,6 +255,74 @@
%WER 18.23 [ 9288 / 50948, 782 ins, 1585 del, 6921 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgsmall_utt/wer_15
%WER 17.54 [ 8936 / 50948, 813 ins, 1425 del, 6698 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgsmall_utt_offline/wer_14
## Multi-splice version of online recipe (5/16/2016).
# for x in exp/nnet2_online/nnet_ms_a/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 4.46 [ 2429 / 54402, 311 ins, 284 del, 1834 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_clean_fglarge/wer_13_1.0
%WER 4.64 [ 2522 / 54402, 362 ins, 251 del, 1909 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_clean_tglarge/wer_12_0.5
%WER 5.86 [ 3187 / 54402, 400 ins, 357 del, 2430 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_clean_tgmed/wer_13_0.0
%WER 6.60 [ 3592 / 54402, 450 ins, 403 del, 2739 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_clean_tgsmall/wer_12_0.0
%WER 12.31 [ 6274 / 50948, 742 ins, 784 del, 4748 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_other_fglarge/wer_16_0.5
%WER 12.87 [ 6557 / 50948, 774 ins, 850 del, 4933 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_other_tglarge/wer_15_0.5
%WER 15.25 [ 7770 / 50948, 871 ins, 1074 del, 5825 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_other_tgmed/wer_16_0.0
%WER 16.55 [ 8434 / 50948, 832 ins, 1280 del, 6322 sub ] exp/nnet2_online/nnet_ms_a/decode_dev_other_tgsmall/wer_16_0.0
%WER 4.99 [ 2624 / 52576, 388 ins, 256 del, 1980 sub ] exp/nnet2_online/nnet_ms_a/decode_test_clean_fglarge/wer_13_0.5
%WER 5.15 [ 2709 / 52576, 386 ins, 284 del, 2039 sub ] exp/nnet2_online/nnet_ms_a/decode_test_clean_tglarge/wer_13_0.5
%WER 6.25 [ 3285 / 52576, 422 ins, 357 del, 2506 sub ] exp/nnet2_online/nnet_ms_a/decode_test_clean_tgmed/wer_13_0.0
%WER 7.07 [ 3717 / 52576, 455 ins, 456 del, 2806 sub ] exp/nnet2_online/nnet_ms_a/decode_test_clean_tgsmall/wer_13_0.0
%WER 12.89 [ 6748 / 52343, 878 ins, 769 del, 5101 sub ] exp/nnet2_online/nnet_ms_a/decode_test_other_fglarge/wer_16_0.0
%WER 13.32 [ 6972 / 52343, 940 ins, 770 del, 5262 sub ] exp/nnet2_online/nnet_ms_a/decode_test_other_tglarge/wer_14_0.0
%WER 15.82 [ 8281 / 52343, 886 ins, 1197 del, 6198 sub ] exp/nnet2_online/nnet_ms_a/decode_test_other_tgmed/wer_15_0.0
%WER 17.09 [ 8948 / 52343, 863 ins, 1383 del, 6702 sub ] exp/nnet2_online/nnet_ms_a/decode_test_other_tgsmall/wer_15_0.0
# for x in exp/nnet2_online/nnet_ms_a_online/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 4.53 [ 2466 / 54402, 318 ins, 295 del, 1853 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_fglarge/wer_14_1.0
%WER 4.76 [ 2592 / 54402, 338 ins, 286 del, 1968 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_fglarge_utt/wer_13_1.0
%WER 4.57 [ 2488 / 54402, 330 ins, 285 del, 1873 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_fglarge_utt_offline/wer_13_1.0
%WER 4.71 [ 2562 / 54402, 392 ins, 236 del, 1934 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tglarge/wer_14_0.0
%WER 4.90 [ 2665 / 54402, 352 ins, 280 del, 2033 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tglarge_utt/wer_14_0.5
%WER 4.72 [ 2570 / 54402, 357 ins, 273 del, 1940 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tglarge_utt_offline/wer_14_0.5
%WER 5.87 [ 3196 / 54402, 419 ins, 340 del, 2437 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgmed/wer_12_0.0
%WER 6.11 [ 3326 / 54402, 385 ins, 396 del, 2545 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgmed_utt/wer_12_0.5
%WER 5.99 [ 3258 / 54402, 382 ins, 392 del, 2484 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgmed_utt_offline/wer_12_0.5
%WER 6.58 [ 3581 / 54402, 472 ins, 379 del, 2730 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgsmall/wer_11_0.0
%WER 6.89 [ 3746 / 54402, 475 ins, 405 del, 2866 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgsmall_utt/wer_12_0.0
%WER 6.69 [ 3637 / 54402, 480 ins, 383 del, 2774 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_clean_tgsmall_utt_offline/wer_11_0.0
%WER 12.67 [ 6456 / 50948, 774 ins, 771 del, 4911 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_fglarge/wer_16_0.5
%WER 13.73 [ 6993 / 50948, 785 ins, 922 del, 5286 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_fglarge_utt/wer_14_1.0
%WER 12.97 [ 6609 / 50948, 797 ins, 801 del, 5011 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_fglarge_utt_offline/wer_16_0.5
%WER 13.09 [ 6670 / 50948, 800 ins, 826 del, 5044 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tglarge/wer_15_0.5
%WER 14.27 [ 7270 / 50948, 909 ins, 869 del, 5492 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tglarge_utt/wer_14_0.5
%WER 13.46 [ 6859 / 50948, 828 ins, 845 del, 5186 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tglarge_utt_offline/wer_15_0.5
%WER 15.27 [ 7782 / 50948, 874 ins, 1051 del, 5857 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgmed/wer_16_0.0
%WER 16.41 [ 8359 / 50948, 949 ins, 1135 del, 6275 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgmed_utt/wer_16_0.0
%WER 15.56 [ 7926 / 50948, 893 ins, 1051 del, 5982 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgmed_utt_offline/wer_16_0.0
%WER 16.49 [ 8402 / 50948, 855 ins, 1210 del, 6337 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgsmall/wer_15_0.0
%WER 17.80 [ 9068 / 50948, 969 ins, 1260 del, 6839 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgsmall_utt/wer_15_0.0
%WER 16.97 [ 8647 / 50948, 845 ins, 1324 del, 6478 sub ] exp/nnet2_online/nnet_ms_a_online/decode_dev_other_tgsmall_utt_offline/wer_17_0.0
%WER 5.05 [ 2654 / 52576, 411 ins, 239 del, 2004 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_fglarge/wer_12_0.5
%WER 5.24 [ 2755 / 52576, 365 ins, 312 del, 2078 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_fglarge_utt/wer_13_1.0
%WER 5.09 [ 2676 / 52576, 405 ins, 241 del, 2030 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_fglarge_utt_offline/wer_13_0.5
%WER 5.22 [ 2744 / 52576, 393 ins, 282 del, 2069 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tglarge/wer_13_0.5
%WER 5.38 [ 2826 / 52576, 413 ins, 284 del, 2129 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tglarge_utt/wer_13_0.5
%WER 5.24 [ 2757 / 52576, 453 ins, 229 del, 2075 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tglarge_utt_offline/wer_13_0.0
%WER 6.26 [ 3289 / 52576, 436 ins, 345 del, 2508 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgmed/wer_13_0.0
%WER 6.54 [ 3441 / 52576, 435 ins, 381 del, 2625 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgmed_utt/wer_14_0.0
%WER 6.28 [ 3303 / 52576, 426 ins, 359 del, 2518 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgmed_utt_offline/wer_14_0.0
%WER 7.06 [ 3711 / 52576, 446 ins, 474 del, 2791 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgsmall/wer_14_0.0
%WER 7.31 [ 3845 / 52576, 510 ins, 426 del, 2909 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgsmall_utt/wer_12_0.0
%WER 7.08 [ 3723 / 52576, 460 ins, 445 del, 2818 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_clean_tgsmall_utt_offline/wer_13_0.0
%WER 13.17 [ 6891 / 52343, 936 ins, 713 del, 5242 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_fglarge/wer_14_0.0
%WER 14.20 [ 7432 / 52343, 832 ins, 983 del, 5617 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_fglarge_utt/wer_15_0.5
%WER 13.26 [ 6939 / 52343, 837 ins, 860 del, 5242 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_fglarge_utt_offline/wer_14_0.5
%WER 13.53 [ 7080 / 52343, 952 ins, 779 del, 5349 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tglarge/wer_14_0.0
%WER 14.77 [ 7730 / 52343, 877 ins, 1056 del, 5797 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tglarge_utt/wer_15_0.5
%WER 13.74 [ 7192 / 52343, 871 ins, 920 del, 5401 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tglarge_utt_offline/wer_14_0.5
%WER 15.78 [ 8259 / 52343, 898 ins, 1170 del, 6191 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgmed/wer_15_0.0
%WER 16.97 [ 8884 / 52343, 939 ins, 1304 del, 6641 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgmed_utt/wer_16_0.0
%WER 16.01 [ 8380 / 52343, 877 ins, 1210 del, 6293 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgmed_utt_offline/wer_16_0.0
%WER 16.98 [ 8889 / 52343, 900 ins, 1283 del, 6706 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgsmall/wer_14_0.0
%WER 18.21 [ 9533 / 52343, 966 ins, 1398 del, 7169 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgsmall_utt/wer_14_0.0
%WER 17.29 [ 9050 / 52343, 894 ins, 1391 del, 6765 sub ] exp/nnet2_online/nnet_ms_a_online/decode_test_other_tgsmall_utt_offline/wer_15_0.0
## Note: this learning rate is the effective learning rate; it gets multiplied by the num-jobs.
# for x in exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch*{clean,other}*; do grep WER $x/wer_* | utils/best_wer.sh ; done

Просмотреть файл

@ -24,7 +24,7 @@ if [ $stage -le 1 ]; then
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
fi
for datadir in train_960 dev_clean dev_other; do
for datadir in train_960 test_clean test_other dev_clean dev_other; do
utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1;

Просмотреть файл

@ -82,7 +82,7 @@ fi
if [ $stage -le 8 ]; then
# dump iVectors for the testing data.
for test in dev_clean dev_other; do
for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
data/${test}_hires exp/nnet2_online/extractor exp/nnet2_online/ivectors_$test || exit 1;
done
@ -92,7 +92,8 @@ fi
if [ $stage -le 9 ]; then
# this does offline decoding that should give about the same results as the
# real online decoding (the one with --per-utt true)
for test in dev_clean dev_other; do
for test in test_clean test_other dev_clean dev_other; do
(
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${test} \
exp/tri6b/graph_tgsmall data/${test}_hires $dir/decode_${test}_tgsmall || exit 1;
@ -104,7 +105,9 @@ if [ $stage -le 9 ]; then
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_${test}_{tgsmall,fglarge} || exit 1;
) &
done
wait
fi
@ -118,7 +121,8 @@ fi
if [ $stage -le 11 ]; then
# do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker.
for test in dev_clean dev_other; do
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -129,13 +133,16 @@ if [ $stage -le 11 ]; then
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
) &
done
wait
fi
if [ $stage -le 12 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
for test in dev_clean dev_other; do
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall_utt || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -146,7 +153,9 @@ if [ $stage -le 12 ]; then
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt || exit 1;
) &
done
wait
fi
if [ $stage -le 13 ]; then
@ -154,6 +163,7 @@ if [ $stage -le 13 ]; then
# without carrying forward speaker information, but looks to the end
# of the utterance while computing the iVector (--online false)
for test in test_clean test_other dev_clean dev_other; do
(
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
@ -165,7 +175,9 @@ if [ $stage -le 13 ]; then
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
) &
done
wait
fi
if [ $stage -le 14 ]; then