зеркало из https://github.com/mozilla/kaldi.git
Fixes and improvements to scripts for lattice-rescoring and fMLLR with SGMMs.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@459 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
a281368f8a
Коммит
465305aa0c
|
@ -3,6 +3,10 @@
|
|||
# This script basically calls the supplied decoding script
|
||||
# once for each test set (in parallel on the same machine),
|
||||
# and then averages the resulting WERs.
|
||||
# The interpretation of the decode-dir-1, etc., as inputs,
|
||||
# outputs and so on, depends on the decoding script you call.
|
||||
|
||||
# It assumes the model directory is one level of from decode-dir-1.
|
||||
|
||||
mono_opt=
|
||||
|
||||
|
@ -12,44 +16,44 @@ if [ "$1" == "--mono" ]; then
|
|||
fi
|
||||
|
||||
script=$1
|
||||
decode_dir=$2 # e.g. exp/sgmm3b/decode
|
||||
dir=`dirname $decode_dir` # e.g. exp/sgmm3b
|
||||
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
|
||||
decode_dir_2=$3
|
||||
decode_dir_3=$4
|
||||
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
|
||||
|
||||
if [ $# -lt 2 -o $# -gt 3 ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
|
||||
if [ $# -lt 2 -o $# -gt 4 ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1> [<decode-dir-2> [<decode-dir-3>] ]"
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -x $script -o ! -d $dir ]; then
|
||||
echo "scripts/decode.sh: Either no such script $script or not exebutable, or no such dir $dir"
|
||||
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
mkdir -p $decode_dir
|
||||
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
|
||||
|
||||
if [ $# -eq 2 ]; then # normal case: 2 args.
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
$script $dir data/test_$test data/lang $decode_dir/$test &
|
||||
$script $dir data/test_$test data/lang $decode_dir_1/$test &
|
||||
done
|
||||
elif [ $# -eq 3 ]; then
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
$script $dir data/test_$test data/lang $decode_dir_1/$test $decode_dir_2/$test &
|
||||
done
|
||||
else
|
||||
olddir=$3
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
if [ ! -d $olddir/$test ]; then
|
||||
echo "decode.sh: no such directory $olddir/$test";
|
||||
exit 1;
|
||||
fi
|
||||
$script $dir data/test_$test data/lang $decode_dir/$test $olddir/$test &
|
||||
$script $dir data/test_$test data/lang $decode_dir_1/$test $decode_dir_2/$test $decode_dir_3/$test &
|
||||
done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
|
||||
# Average the WERs... there may be various wer files named e.g. wer, wer_10, etc.,
|
||||
# so do this for each one.
|
||||
for w in $decode_dir/mar87/wer*; do
|
||||
for w in $decode_dir_1/mar87/wer*; do
|
||||
wername=`basename $w`
|
||||
scripts/average_wer.sh $decode_dir/?????/$wername > $decode_dir/$wername
|
||||
scripts/average_wer.sh $decode_dir_1/?????/$wername > $decode_dir_1/$wername
|
||||
done
|
||||
grep WER $decode_dir/wer* || echo "Error decoding $decode_dir: no WER results found."
|
||||
|
||||
|
||||
|
||||
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."
|
||||
|
||||
|
|
|
@ -113,12 +113,17 @@ steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f/final.u
|
|||
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm4f/decode exp/tri3d/decode
|
||||
|
||||
# Decode with fMLLR
|
||||
sgmm-comp-prexform exp/sgmm4f/final.{mdl,occs,fmllr_mdl}
|
||||
local/decode.sh steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm4f/decode_fmllr exp/sgmm4f/decode exp/tri3d/decode
|
||||
|
||||
local/decode.sh steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm4f/decode_nofmllr exp/sgmm4f/decode exp/tri3d/decode
|
||||
|
||||
# Some system combination experiments (just compose lattices).
|
||||
local/decode_combine.sh steps/decode_combine.sh exp/tri1/decode exp/tri2a/decode exp/combine_1_2a/decode
|
||||
local/decode_combine.sh steps/decode_combine.sh exp/sgmm4f/decode/ exp/tri3d/decode exp/combine_sgmm4f_tri3d/decode
|
||||
|
||||
for x in exp/*/decode; do grep WER $x/wer_* | scripts/best_wer.sh; done
|
||||
for x in exp/*/decode*; do grep WER $x/wer_* | scripts/best_wer.sh; done
|
||||
|
||||
exp/combine_1_2a/decode/wer_7:%WER 3.399027 [ 426 / 12533, 55 ins, 94 del, 277 sub ]
|
||||
exp/combine_sgmm4f_tri3d/decode/wer_5:%WER 1.731429 [ 217 / 12533, 30 ins, 43 del, 144 sub ]
|
||||
|
|
|
@ -57,7 +57,7 @@ feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ar
|
|||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmv
|
|||
# Second pass decoding... generate lattices and rescore with
|
||||
# various scales.
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
|
||||
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ar
|
|||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ if [ ! -z $olddir ]; then # i.e. if $olddir not empty string...
|
|||
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$olddir/trans.ark ark:- ark:- |"
|
||||
fi
|
||||
|
||||
sgmm-gselect $srcdir/final.mdl "$feats" "ark,t:|gzip -c > $dir/gselect.gz" \
|
||||
sgmm-gselect $srcdir/final.mdl "$feats" "ark:|gzip -c > $dir/gselect.gz" \
|
||||
2>$dir/gselect.log || exit 1;
|
||||
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
|
||||
|
||||
|
@ -100,7 +100,7 @@ sgmm-decode-faster "$gselect_opt" --beam=20.0 --acoustic-scale=0.1 --word-symbol
|
|||
sgmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
|
||||
--spk-vecs=ark:$dir/vecs.ark --utt2spk=ark:$data/utt2spk \
|
||||
--word-symbol-table=$lang/words.txt $srcdir/final.mdl $graphdir/HCLG.fst \
|
||||
"$feats" "ark,t:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
"$feats" "ark:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
2> $dir/decode_pass2.log || exit 1;
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a SGMM model [w/ speaker vectors]
|
||||
# and cepstral mean subtraction plus splice-9-frames plus LDA+MLLT, or
|
||||
# LDA+MLLT+SAT or LDA+ET features. For the last two, which
|
||||
# are speaker adaptive, the script takes an extra argument
|
||||
# corresponding to the previous decoding directory where we can
|
||||
# find the transform trans.ark.
|
||||
|
||||
# This script itself does two passes of decoding.
|
||||
|
||||
if [ $# != 5 -a $# != 6 ]; then
|
||||
echo "Usage: steps/decode_sgmm_lda_etc_fmllr.sh <model-dir> <data-dir> <lang-dir> <decode-dir> <old-sgmm-decode-dir> [<old-decode-dir-for-transforms>]"
|
||||
echo " e.g.: steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm3d data/test_feb89 data/lang_test exp/sgmm3d/decode/feb89 exp/sgmm3d/decode_fmllr/feb89"
|
||||
echo " or: steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm3e data/test_feb89 data/lang_test exp/sgmm3e/decode/feb89 exp/sgmm3e/decode_fmllr/feb89 exp/tri2c/decode/feb89"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
srcdir=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
dir=$4
|
||||
firstpassdir=$5
|
||||
olddir=$6 # old decoding dir where there are transforms [possibly]
|
||||
graphdir=$srcdir/graph
|
||||
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# -f means file exists; -o means or.
|
||||
requirements="$srcdir/final.mdl $srcdir/final.fmllr_mdl $srcdir/final.mat $firstpassdir/cmvn.ark $firstpassdir/lat.gz $firstpassdir/gselect.gz $firstpassdir/vecs.ark"
|
||||
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_etc.sh: input file $f does not exist";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$firstpassdir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
if [ ! -z $olddir ]; then # i.e. if $olddir not empty string...
|
||||
if [ ! -f $olddir/trans.ark ]; then
|
||||
echo decode_sgmm_lda_etc.sh: error: no such file $olddir/trans.ark
|
||||
exit 1
|
||||
fi
|
||||
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$olddir/trans.ark ark:- ark:- |"
|
||||
fi
|
||||
|
||||
gselect_opt="--gselect=ark:gunzip -c $firstpassdir/gselect.gz|"
|
||||
|
||||
|
||||
# Here we estimate the fMLLR transforms-- just one iteration should be sufficient,
|
||||
# as it's after many adaptation passes.
|
||||
|
||||
|
||||
( lattice-to-post --acoustic-scale=0.1 "ark:gunzip -c $firstpassdir/lat.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- | \
|
||||
sgmm-est-fmllr --fmllr-iters=10 --fmllr-min-count=1000 "$gselect_opt" \
|
||||
--spk-vecs=ark:$firstpassdir/vecs.ark --spk2utt=ark:$data/spk2utt $srcdir/final.fmllr_mdl \
|
||||
"$feats" ark,s,cs:- ark:$dir/trans.ark ) 2>$dir/est_fmllr.log || exit 1;
|
||||
|
||||
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
|
||||
|
||||
sgmm-rescore-lattice "$gselect_opt" --spk-vecs=ark:$firstpassdir/vecs.ark \
|
||||
--utt2spk=ark:$data/utt2spk $srcdir/final.mdl \
|
||||
"ark:gunzip -c $firstpassdir/lat.gz|" "$feats" "ark:|gzip -c >$dir/lat.gz" \
|
||||
2>$dir/acoustic_rescore.log || exit 1;
|
||||
|
||||
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// gmmbin/gmm-resocre-lattice.cc
|
||||
// gmmbin/gmm-rescore-lattice.cc
|
||||
|
||||
// Copyright 2009-2011 Saarland University
|
||||
// Author: Arnab Ghoshal
|
||||
|
@ -70,6 +70,24 @@ void LatticeAcousticRescore(const AmDiagGmm& am,
|
|||
}
|
||||
}
|
||||
}
|
||||
// Now make sure that epsilon-input arcs and final-probs don't have
|
||||
// any acoustic part in the weights.
|
||||
for (int32 s = 0; s < lat->NumStates(); s++) {
|
||||
for (fst::MutableArcIterator<Lattice> aiter(lat, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
LatticeArc arc = aiter.Value();
|
||||
int32 trans_id = arc.ilabel;
|
||||
if (trans_id == 0) {
|
||||
arc.weight.SetValue2(0); // make sure acoustic part of weight is zero.
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
LatticeWeight w = lat->Final(s);
|
||||
if (w != LatticeWeight::Zero()) {
|
||||
w.SetValue2(0); // make sure acoustic part of weight is zero.
|
||||
lat->SetFinal(s, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kaldi
|
||||
|
|
|
@ -85,6 +85,26 @@ void LatticeAcousticRescore(const AmSgmm& am,
|
|||
}
|
||||
}
|
||||
}
|
||||
// Now make sure that epsilon-input arcs and final-probs don't have
|
||||
// any acoustic part in the weights. We didn't do this as part of the
|
||||
// previous loop as it skipped over final-states, and these also may
|
||||
// have epsilon arcs out.
|
||||
for (int32 s = 0; s < lat->NumStates(); s++) {
|
||||
for (fst::MutableArcIterator<Lattice> aiter(lat, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
LatticeArc arc = aiter.Value();
|
||||
int32 trans_id = arc.ilabel;
|
||||
if (trans_id == 0) {
|
||||
arc.weight.SetValue2(0); // make sure acoustic part of weight is zero.
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
LatticeWeight w = lat->Final(s);
|
||||
if (w != LatticeWeight::Zero()) {
|
||||
w.SetValue2(0); // make sure acoustic part of weight is zero.
|
||||
lat->SetFinal(s, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kaldi
|
||||
|
|
Загрузка…
Ссылка в новой задаче