зеркало из https://github.com/mozilla/kaldi.git
trunk: adding decoding for test_clean and test_other in librispeech setup; adding fglarg LM rescoring for librispeech setup;
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4769 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
614e3db131
Коммит
99f4b31308
|
@ -56,12 +56,18 @@ if [ ! -f $dir/final.mdl ]; then
|
|||
fi
|
||||
|
||||
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
--transform-dir exp/tri4b/decode_tgsmall_$test \
|
||||
exp/tri4b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
|
||||
exit 0;
|
||||
|
|
|
@ -62,12 +62,18 @@ if [ ! -f $dir/final.mdl ]; then
|
|||
fi
|
||||
|
||||
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
--transform-dir exp/tri5b/decode_tgsmall_$test \
|
||||
exp/tri5b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
|
||||
exit 0;
|
||||
|
|
|
@ -62,7 +62,7 @@ if [ ! -f $dir/final.mdl ]; then
|
|||
fi
|
||||
|
||||
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/nnet2/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
--transform-dir exp/tri6b/decode_tgsmall_$test \
|
||||
exp/tri6b/graph_tgsmall data/$test $dir/decode_tgsmall_$test || exit 1;
|
||||
|
@ -70,7 +70,10 @@ for test in dev_clean dev_other; do
|
|||
data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
|
||||
exit 0;
|
||||
|
|
|
@ -103,7 +103,7 @@ fi
|
|||
if [ $stage -le 11 ]; then
|
||||
# do the actual online decoding with iVectors, carrying info forward from
|
||||
# previous utterances of the same speaker.
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
|
||||
exp/tri6b/graph_tgsmall data/$test ${dir}_online/decode_${test}_tgsmall || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -111,6 +111,9 @@ if [ $stage -le 11 ]; then
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge} || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge} || exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ if [ $stage -le 5 ]; then
|
|||
ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
|
||||
|
||||
for epoch in $(seq $decode_start_epoch $num_epochs); do
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
(
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
|
||||
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
|
||||
|
@ -141,6 +141,9 @@ if [ $stage -le 5 ]; then
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
|
||||
) &
|
||||
done
|
||||
done
|
||||
|
|
|
@ -144,7 +144,7 @@ if [ $stage -le 13 ]; then
|
|||
# this version of the decoding treats each utterance separately
|
||||
# without carrying forward speaker information, but looks to the end
|
||||
# of the utterance while computing the iVector (--online false)
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
|
||||
--per-utt true --online false exp/tri6b/graph_tgsmall data/$test \
|
||||
${dir}_online/decode_${test}_tgsmall_utt_offline || exit 1;
|
||||
|
@ -153,6 +153,9 @@ if [ $stage -le 13 ]; then
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test ${dir}_online/decode_${test}_{tgsmall,tglarge}_utt_offline || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test ${dir}_online/decode_${test}_{tgsmall,fglarge}_utt_offline || exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ if [ $stage -le 5 ]; then
|
|||
ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
|
||||
|
||||
for epoch in $(seq $decode_start_epoch $num_epochs); do
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
(
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
|
||||
--iter epoch$epoch exp/tri6b/graph_tgsmall data/${test} $dir/decode_epoch${epoch}_${test}_tgsmall || exit 1
|
||||
|
@ -142,6 +142,10 @@ if [ $stage -le 5 ]; then
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,tglarge} || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test $dir/decode_epoch${epoch}_${test}_{tgsmall,fglarge} || exit 1;
|
||||
) &
|
||||
) &
|
||||
done
|
||||
done
|
||||
|
|
|
@ -55,9 +55,12 @@ utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/
|
|||
|
||||
local/format_lms.sh data/local/lm || exit 1
|
||||
|
||||
# Create ConstArpaLm format language model for full trigram language model.
|
||||
# Create ConstArpaLm format language model for full trigram and 4gram language
|
||||
# model.
|
||||
utils/build_const_arpa_lm.sh \
|
||||
data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge || exit 1;
|
||||
utils/build_const_arpa_lm.sh \
|
||||
data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge || exit 1;
|
||||
|
||||
mfccdir=mfcc
|
||||
# spread the mfccs over various machines, as this data-set is quite large.
|
||||
|
@ -89,7 +92,7 @@ steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
|
|||
# decode using the monophone model
|
||||
(
|
||||
utils/mkgraph.sh --mono data/lang_test_tgsmall exp/mono exp/mono/graph_tgsmall || exit 1
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/mono/graph_tgsmall data/$test exp/mono/decode_tgsmall_$test
|
||||
done
|
||||
|
@ -105,7 +108,7 @@ steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
|
|||
# decode using the tri1 model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri1 exp/tri1/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri1/graph_tgsmall data/$test exp/tri1/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -128,7 +131,7 @@ steps/train_lda_mllt.sh --cmd "$train_cmd" \
|
|||
# decode using the LDA+MLLT model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri2b exp/tri2b/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri2b/graph_tgsmall data/$test exp/tri2b/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -150,7 +153,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
|
|||
# decode using the tri3b model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri3b exp/tri3b/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri3b/graph_tgsmall data/$test exp/tri3b/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -172,7 +175,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
|
|||
# decode using the tri4b model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri4b exp/tri4b/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri4b/graph_tgsmall data/$test exp/tri4b/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -180,6 +183,9 @@ steps/train_sat.sh --cmd "$train_cmd" \
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test exp/tri4b/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test exp/tri4b/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
)&
|
||||
|
||||
|
@ -213,7 +219,7 @@ steps/train_sat.sh --cmd "$train_cmd" \
|
|||
# decode using the tri5b model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri5b exp/tri5b/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri5b/graph_tgsmall data/$test exp/tri5b/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -221,6 +227,9 @@ steps/train_sat.sh --cmd "$train_cmd" \
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test exp/tri5b/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test exp/tri5b/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
)&
|
||||
|
||||
|
@ -249,7 +258,7 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
|||
# decode using the tri6b model
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgsmall exp/tri6b exp/tri6b/graph_tgsmall || exit 1;
|
||||
for test in dev_clean dev_other; do
|
||||
for test in test_clean test_other dev_clean dev_other; do
|
||||
steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
|
||||
exp/tri6b/graph_tgsmall data/$test exp/tri6b/decode_tgsmall_$test || exit 1;
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
|
||||
|
@ -257,6 +266,9 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
|||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
|
||||
data/$test exp/tri6b/decode_{tgsmall,tglarge}_$test || exit 1;
|
||||
steps/lmrescore_const_arpa.sh \
|
||||
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
|
||||
data/$test exp/tri6b/decode_{tgsmall,fglarge}_$test || exit 1;
|
||||
done
|
||||
)&
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче