trunk: adding decoding for test_clean and test_other in librispeech setup; adding fglarg LM rescoring for librispeech setup;

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4769 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Guoguo Chen 2015-01-11 03:30:32 +00:00
Родитель 614e3db131
Коммит 99f4b31308
8 изменённых файлов: 56 добавлений и 16 удалений

Просмотреть файл

@ -56,12 +56,18 @@ if [ ! -f $dir/final.mdl ]; then
fi fi
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri4b/decode_tgsmall_$test \ --transform-dir exp/tri4b/decode_tgsmall_$test \
exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1; exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1; data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done done
exit 0; exit 0;

Просмотреть файл

@ -62,12 +62,18 @@ if [ ! -f $dir/final.mdl ]; then
fi fi
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri5b/decode_tgsmall_$test \ --transform-dir exp/tri5b/decode_tgsmall_$test \
exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1; exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1; data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done done
exit 0; exit 0;

Просмотреть файл

@ -62,7 +62,7 @@ if [ ! -f $dir/final.mdl ]; then
fi fi
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri6b/decode_tgsmall_$test \ --transform-dir exp/tri6b/decode_tgsmall_$test \
exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1; exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
@ -70,7 +70,10 @@ for test in dev_clean dev_other; do
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1; data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1; data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done done
exit 0; exit 0;

Просмотреть файл

@ -103,7 +103,7 @@ fi
if [ $stage -le 11 ]; then if [ $stage -le 11 ]; then
# do the actual online decoding with iVectors, carrying info forward from # do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker. # previous utterances of the same speaker.
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1; exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -111,6 +111,9 @@ if [ $stage -le 11 ]; then
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1; data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
done done
fi fi

Просмотреть файл

@ -132,7 +132,7 @@ if [ $stage -le 5 ]; then
ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
for epoch in $(seq $decode_start_epoch $num_epochs); do for epoch in $(seq $decode_start_epoch $num_epochs); do
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
( (
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \ steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1 --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
@ -141,6 +141,9 @@ if [ $stage -le 5 ]; then
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1; data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) & ) &
done done
done done

Просмотреть файл

@ -144,7 +144,7 @@ if [ $stage -le 13 ]; then
# this version of the decoding treats each utterance separately # this version of the decoding treats each utterance separately
# without carrying forward speaker information, but looks to the end # without carrying forward speaker information, but looks to the end
# of the utterance while computing the iVector (--online false) # of the utterance while computing the iVector (--online false)
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \ steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \ --per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1; ${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
@ -153,6 +153,9 @@ if [ $stage -le 13 ]; then
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1; data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
done done
fi fi

Просмотреть файл

@ -133,7 +133,7 @@ if [ $stage -le 5 ]; then
ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
for epoch in $(seq $decode_start_epoch $num_epochs); do for epoch in $(seq $decode_start_epoch $num_epochs); do
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
( (
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \ steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1 --iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
@ -142,6 +142,10 @@ if [ $stage -le 5 ]; then
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1; data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
) &
) & ) &
done done
done done

Просмотреть файл

@ -55,9 +55,12 @@ utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/
local/format_lms.sh data/local/lm || exit 1 local/format_lms.sh data/local/lm || exit 1
# Create ConstArpaLm format language model for full trigram language model. # Create ConstArpaLm format language model for full trigram and 4gram language
# model.
utils/build_const_arpa_lm.sh \ utils/build_const_arpa_lm.sh \
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1; data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
utils/build_const_arpa_lm.sh \
data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
mfccdir=mfcc mfccdir=mfcc
# spread the mfccs over various machines, as this data-set is quite large. # spread the mfccs over various machines, as this data-set is quite large.
@ -89,7 +92,7 @@ steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
# decode using the monophone model # decode using the monophone model
( (
utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1 utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test
done done
@ -105,7 +108,7 @@ steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
# decode using the tri1 model # decode using the tri1 model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1; exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -128,7 +131,7 @@ steps/train_lda_mllt.sh --cmd "$train_cmd" \
# decode using the LDA+MLLT model # decode using the LDA+MLLT model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode.sh --nj 20 --cmd "$decode_cmd" \ steps/decode.sh --nj 20 --cmd "$decode_cmd" \
exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1; exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -150,7 +153,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
# decode using the tri3b model # decode using the tri3b model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1; exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -172,7 +175,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
# decode using the tri4b model # decode using the tri4b model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1; exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -180,6 +183,9 @@ steps/train_sat.sh --cmd "$train_cmd" \
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri4b/decode_{tgsmall,tglarge}_$test || exit 1; data/$test exp/tri4b/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri4b/decode_{tgsmall,fglarge}_$test || exit 1;
done done
)& )&
@ -213,7 +219,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
# decode using the tri5b model # decode using the tri5b model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri5b exp/tri5b/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri5b exp/tri5b/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri5b/graph_tgsmall data/$test exp/tri5b/decode_tgsmall_$test || exit 1; exp/tri5b/graph_tgsmall data/$test exp/tri5b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -221,6 +227,9 @@ steps/train_sat.sh --cmd "$train_cmd" \
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1; data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1;
done done
)& )&
@ -249,7 +258,7 @@ steps/train_quick.sh --cmd "$train_cmd" \
# decode using the tri6b model # decode using the tri6b model
( (
utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b exp/tri6b/graph_tgsmall || exit 1; utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b exp/tri6b/graph_tgsmall || exit 1;
for test in dev_clean dev_other; do for test in test_clean test_other dev_clean dev_other; do
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1; exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1;
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
@ -257,6 +266,9 @@ steps/train_quick.sh --cmd "$train_cmd" \
steps/lmrescore_const_arpa.sh \ steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1; data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1;
done done
)& )&