зеркало из https://github.com/mozilla/kaldi.git
trunk: semisupervised BNF+sgmm_mmi decoding script
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3694 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
77456f307b
Коммит
0f722a1d83
|
@ -0,0 +1,213 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2014 Pegah Ghahremani
|
||||
# Apache 2.0
|
||||
|
||||
# decode BNF + sgmm_mmi system
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
. conf/common_vars.sh || exit 1;
|
||||
. ./lang.conf || exit 1;
|
||||
|
||||
|
||||
type=dev10h
|
||||
data_only=false
|
||||
fast_path=true
|
||||
skip_kws=false
|
||||
extra_kws=false
|
||||
skip_stt=false
|
||||
skip_scoring=false
|
||||
tmpdir=`pwd`
|
||||
semisupervised=true
|
||||
|
||||
. utils/parse_options.sh
|
||||
if [ $# -ne 0 ]; then
|
||||
echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)"
|
||||
echo "--semisupervised<true> #set to false to skip unsupervised training."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $babel_type == "full" ] && $semisupervised; then
|
||||
echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false "
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if $semisupervised ; then
|
||||
unsup_string="_semi_supervised"
|
||||
else
|
||||
unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training
|
||||
fi
|
||||
|
||||
if ! echo {dev10h,dev2h,eval,unsup}{,.uem,.seg} | grep -w "$type" >/dev/null; then
|
||||
# note: echo dev10.uem | grep -w dev10h will produce a match, but this
|
||||
# doesn't matter because dev10h is also a valid value.
|
||||
echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg}
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
dirid=${type}
|
||||
exp_dir=exp_bnf${unsup_string}
|
||||
data_bnf_dir=data_bnf${unsup_string}
|
||||
param_bnf_dir=param_bnf${unsup_string}
|
||||
datadir=$data_bnf_dir/${dirid}
|
||||
|
||||
[ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1;
|
||||
[ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1;
|
||||
|
||||
# Set my_nj; typically 64.
|
||||
my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1;
|
||||
|
||||
|
||||
if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \
|
||||
[ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then
|
||||
# put the archives in $param_bnf_dir/.
|
||||
local/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf
|
||||
touch $data_bnf_dir/${dirid}_bnf/.done
|
||||
fi
|
||||
|
||||
if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then
|
||||
steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \
|
||||
--nj $train_nj --transform-dir exp/tri5/decode_${dirid} $data_bnf_dir/${dirid}_sat data/${dirid} \
|
||||
exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/
|
||||
|
||||
steps/append_feats.sh --cmd "$train_cmd" --nj 4 \
|
||||
$data_bnf_dir/${dirid}_bnf $data_bnf_dir/${dirid}_sat $data_bnf_dir/${dirid} \
|
||||
$exp_dir/append_feats/log $param_bnf_dir/
|
||||
steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir
|
||||
rm -r $data_bnf_dir/${dirid}_sat
|
||||
if ! $skip_kws ; then
|
||||
cp -r data/${dirid}/kws* $data_bnf_dir/${dirid}/
|
||||
fi
|
||||
touch $data_bnf_dir/${dirid}/.done
|
||||
fi
|
||||
|
||||
if $data_only ; then
|
||||
echo "Exiting, as data-only was requested... "
|
||||
fi
|
||||
|
||||
####################################################################
|
||||
##
|
||||
## FMLLR decoding
|
||||
##
|
||||
####################################################################
|
||||
decode=$exp_dir/tri6/decode_${dirid}
|
||||
if [ ! -f ${decode}/.done ]; then
|
||||
echo ---------------------------------------------------------------------
|
||||
echo "Decoding with SAT models on top of bottleneck features on" `date`
|
||||
echo ---------------------------------------------------------------------
|
||||
utils/mkgraph.sh \
|
||||
data/lang $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log
|
||||
|
||||
mkdir -p $decode
|
||||
#By default, we do not care about the lattices for this step -- we just want the transforms
|
||||
#Therefore, we will reduce the beam sizes, to reduce the decoding times
|
||||
steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \
|
||||
--acwt $bnf_decode_acwt \
|
||||
--nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\
|
||||
$exp_dir/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log
|
||||
touch ${decode}/.done
|
||||
fi
|
||||
|
||||
if ! $fast_path ; then
|
||||
local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
|
||||
--cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip\
|
||||
"${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
|
||||
${datadir} data/lang ${decode}
|
||||
|
||||
local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
|
||||
--cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \
|
||||
"${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
|
||||
${datadir} data/lang ${decode}.si
|
||||
fi
|
||||
|
||||
####################################################################
|
||||
## SGMM2 decoding
|
||||
####################################################################
|
||||
decode=$exp_dir/sgmm7/decode_fmllr_${dirid}
|
||||
if [ ! -f $decode/.done ]; then
|
||||
echo ---------------------------------------------------------------------
|
||||
echo "Spawning $decode on" `date`
|
||||
echo ---------------------------------------------------------------------
|
||||
utils/mkgraph.sh \
|
||||
data/lang $exp_dir/sgmm7 $exp_dir/sgmm7/graph |tee $exp_dir/sgmm7/mkgraph.log
|
||||
|
||||
mkdir -p $decode
|
||||
steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \
|
||||
--acwt $bnf_decode_acwt \
|
||||
--cmd "$decode_cmd" --transform-dir $exp_dir/tri6/decode_${dirid} "${decode_extra_opts[@]}"\
|
||||
$exp_dir/sgmm7/graph ${datadir} $decode |tee $decode/decode.log
|
||||
touch $decode/.done
|
||||
fi
|
||||
|
||||
if ! $fast_path ; then
|
||||
local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring \
|
||||
--cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \
|
||||
"${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
|
||||
${datadir} data/lang $exp_dir/sgmm7/decode_fmllr_${dirid}
|
||||
fi
|
||||
|
||||
####################################################################
|
||||
##
|
||||
## SGMM_MMI rescoring
|
||||
##
|
||||
####################################################################
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
# Decode SGMM+MMI (via rescoring).
|
||||
decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter
|
||||
if [ ! -f $decode/.done ]; then
|
||||
|
||||
mkdir -p $decode
|
||||
steps/decode_sgmm2_rescore.sh --skip-scoring true \
|
||||
--cmd "$decode_cmd" --iter $iter --transform-dir $exp_dir/tri6/decode_${dirid} \
|
||||
data/lang ${datadir} $exp_dir/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log
|
||||
|
||||
touch $decode/.done
|
||||
fi
|
||||
done
|
||||
|
||||
#We are done -- all lattices has been generated. We have to
|
||||
#a)Run MBR decoding
|
||||
#b)Run KW search
|
||||
for iter in 1 2 3 4; do
|
||||
# Decode SGMM+MMI (via rescoring).
|
||||
decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter
|
||||
local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
|
||||
--cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \
|
||||
"${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
|
||||
${datadir} data/lang $decode
|
||||
done
|
||||
|
||||
if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ] || \
|
||||
[ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt exp_bnf/tri7_nnet/.done ]; then
|
||||
|
||||
echo ---------------------------------------------------------------------
|
||||
echo "Decoding hybrid system on top of bottleneck features on" `date`
|
||||
echo ---------------------------------------------------------------------
|
||||
|
||||
# We use the graph from tri6.
|
||||
utils/mkgraph.sh \
|
||||
data/lang exp_bnf/tri6 exp_bnf/tri6/graph |tee exp_bnf/tri6/mkgraph.log
|
||||
|
||||
decode=exp_bnf/tri7_nnet/decode_${dirid}
|
||||
if [ ! -f $decode/.done ]; then
|
||||
mkdir -p $decode
|
||||
steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \
|
||||
--acwt $bnf_decode_acwt \
|
||||
--beam $dnn_beam --lat-beam $dnn_lat_beam \
|
||||
--skip-scoring true "${decode_extra_opts[@]}" \
|
||||
--feat-type raw \
|
||||
exp_bnf/tri6/graph ${datadir} $decode | tee $decode/decode.log
|
||||
|
||||
touch $decode/.done
|
||||
fi
|
||||
|
||||
local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\
|
||||
--cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \
|
||||
"${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
|
||||
${datadir} data/lang $decode
|
||||
fi
|
||||
|
||||
echo "$0: Everything looking good...."
|
||||
exit 0
|
Загрузка…
Ссылка в новой задаче