зеркало из https://github.com/mozilla/kaldi.git
Added scripts and results for wsj/s1 lattice oracle error rate.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@410 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
fa573297c2
Коммит
b1dded704c
|
@ -110,7 +110,13 @@ Test set: | Eval92 | Eval93 | Eval92 | Eval93 | Eval92 | Eval93 |
|
|||
latrescore| 12.6 18.7 14.3 21.0 12.1 18.2
|
||||
[beam=15]| 12.5 18.7 14.3 20.9 11.7 18.2
|
||||
|
||||
|
||||
# Oracle lattice results:
|
||||
On Eval92 lattices generated with a bigram LM (from exp/decode_tri2a_bg_latgen_eval92)
|
||||
Lattices pruned with varying beam values:
|
||||
beam=0.01 %WER 16.13 [ 910 / 5641, 213 ins, 59 del, 638 sub ]
|
||||
beam=1 %WER 13.07 [ 737 / 5641, 171 ins, 46 del, 520 sub ]
|
||||
beam=5 %WER 6.38 [ 360 / 5641, 86 ins, 20 del, 254 sub ]
|
||||
beam=10 %WER 4.11 [ 232 / 5641, 53 ins, 9 del, 170 sub ]
|
||||
|
||||
# Raw results:
|
||||
exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
|
||||
|
|
|
@ -216,6 +216,7 @@ steps/train_tri2a.sh || exit 1;
|
|||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval${year} exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_tri2a_bg_latgen_eval${year} exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a_latgen.sh data/eval_nov${year}.scp
|
||||
scripts/latoracle.sh exp/decode_tri2a_bg_latgen_eval${year} data/eval_nov${year}.txt exp/decode_tri2a_bg_latoracle_eval${year}
|
||||
scripts/latrescore.sh exp/decode_tri2a_bg_latgen_eval${year} data/G_bg.fst data/G_tg.fst data/eval_nov${year}.txt exp/decode_tri2a_bg_rescore_tg_eval${year}
|
||||
scripts/latrescore.sh exp/decode_tri2a_bg_latgen_eval${year} data/G_bg.fst data/G_tg_pruned.fst data/eval_nov${year}.txt exp/decode_tri2a_bg_rescore_tg_pruned_eval${year}
|
||||
scripts/latrescore.sh exp/decode_tri2a_bg_latgen_eval${year} data/G_bg.fst data/G_bg.fst data/eval_nov${year}.txt exp/decode_tri2a_bg_rescore_bg_eval${year}
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2011 Microsoft Corporation1 Gilles Boulianne
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/latoracle.sh <input-decode-dir> <transcript-text-file> <output-decode-dir>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
. path.sh || exit 1;
|
||||
|
||||
|
||||
inputdir=$1 # e.g. /pub/tmp/kaldi2011/dpovey/decode_tri1_latgen/
|
||||
transcript=$2 # e.g. data_prep/test_sep92_trans.txt
|
||||
dir=$3 #eg exp/decode_tri1_latgen
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
# Create reference transcriptions and lattices
|
||||
cat $transcript | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/test_trans.filt
|
||||
cat $dir/test_trans.filt | \
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt | \
|
||||
string-to-lattice "ark:$dir/test_trans.lats" 2>$dir/string-to-lattice.log
|
||||
|
||||
# Symbols that don't count as errors
|
||||
echo "<s>" > $dir/ignore.txt
|
||||
echo "</s>" >> $dir/ignore.txt
|
||||
echo "<UNK>" >> $dir/ignore.txt
|
||||
|
||||
# Loop over pruning beams
|
||||
inv_acwt=10
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
for beam in 0.01 1 5 10; do
|
||||
|
||||
echo "Pruning $inputdir"'/*.lats.gz'" with invacwt=$inv_acwt and beam=$beam"
|
||||
lattice-prune --acoustic-scale=$acwt --beam=$beam \
|
||||
"ark:gunzip -c $inputdir/*.lats.gz|" "ark,t:|gzip -c>$dir/lats.pruned.gz" \
|
||||
2>$dir/prune.$beam.log
|
||||
|
||||
echo "Computing oracle error rate w/r $transcript"
|
||||
lattice-oracle --word-symbol-table=data/words.txt --wildcard-symbols-list=$dir/ignore.txt \
|
||||
"ark:$dir/test_trans.lats" "ark:gunzip -c $dir/lats.pruned.gz|" "ark,t:$dir/oracle${beam}.tra" \
|
||||
2>$dir/oracle.$beam.log
|
||||
|
||||
cat $dir/oracle${beam}.tra | \
|
||||
scripts/int2sym.pl --ignore-first-field data/words.txt | \
|
||||
sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
|
||||
compute-wer --text --mode=present ark:$dir/test_trans.filt ark,p:- | tee $dir/wer_${beam}
|
||||
done
|
||||
|
Загрузка…
Ссылка в новой задаче