Committing improvements to s3 and some misc additions.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@449 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2011-09-12 16:53:52 +00:00
Родитель 4dc0bb01b9
Коммит a043cf56bb
12 изменённых файлов: 277 добавлений и 83 удалений

Просмотреть файл

@ -1,30 +1,55 @@
#!/bin/bash
script=$1
dir=$2
# This script basically calls the supplied decoding script
# once for each test set (in parallel on the same machine),
# and then averages the resulting WERs.
if [ $# -lt 2 -o $# -gt 3 -o ! -x $script -o ! -d $dir ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
echo "[check your command line arguments]"
mono_opt=
if [ "$1" == "--mono" ]; then
mono_opt=$1;
shift;
fi
scripts/mkgraph.sh data/lang_test $dir $dir/graph
script=$1
decode_dir=$2 # e.g. exp/sgmm3b/decode
dir=`dirname $decode_dir` # e.g. exp/sgmm3b
if [ $# -lt 2 -o $# -gt 3 ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
exit 1;
fi
if [ ! -x $script -o ! -d $dir ]; then
echo "scripts/decode.sh: Either no such script $script or not exebutable, or no such dir $dir"
exit 1;
fi
mkdir -p $decode_dir
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
if [ $# -eq 2 ]; then # normal case: 2 args.
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
$script $dir data/test_$test data/lang $dir/decode_$test &
$script $dir data/test_$test data/lang $decode_dir/$test &
done
else
olddir=$3
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
if [ ! -d $olddir/decode_$test ]; then
echo "decode.sh: no such directory $oldir/decode_$test";
if [ ! -d $olddir/$test ]; then
echo "decode.sh: no such directory $olddir/$test";
exit 1;
fi
$script $dir data/test_$test data/lang $dir/decode_$test $olddir/decode_$test &
$script $dir data/test_$test data/lang $decode_dir/$test $olddir/$test &
done
fi
wait
scripts/average_wer.sh $dir/decode_?????/wer > $dir/wer
cat $dir/wer
# Average the WERs... there may be various wer files named e.g. wer, wer_10, etc.,
# so do this for each one.
for w in $decode_dir/mar87/wer*; do
wername=`basename $w`
scripts/average_wer.sh $decode_dir/?????/$wername > $decode_dir/$wername
done
grep WER $decode_dir/wer* || echo "Error decoding $decode_dir: no WER results found."

Просмотреть файл

@ -41,15 +41,9 @@ scripts/subset_data_dir.sh data/train 1000 data/train.1k
# train monophone system.
steps/train_mono.sh data/train.1k data/lang exp/mono
# decode mono [do this "manually" in the next few lines of
# script; generally this stuff gets called by "local/decode.sh"
# but here we want to pass the --mono option to mkgraph.sh.
scripts/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
steps/decode_deltas.sh exp/mono data/test_$test data/lang exp/mono/decode_$test &
done
wait
scripts/average_wer.sh exp/mono/decode_?????/wer > exp/mono/wer
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode
# Get alignments from monophone system.
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali
@ -57,7 +51,7 @@ steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali
# train tri1 [first triphone pass]
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1
# decode tri1
local/decode.sh steps/decode_deltas.sh exp/tri1
local/decode.sh steps/decode_deltas.sh exp/tri1/decode
# align tri1
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
@ -66,24 +60,24 @@ steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
# train tri2a [delta+delta-deltas]
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a
# decode tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode
# train tri2b [LDA+MLLT]
steps/train_lda_mllt.sh data/train data/lang exp/tri1_ali exp/tri2b
# decode tri2b
local/decode.sh steps/decode_lda_mllt.sh exp/tri2b
local/decode.sh steps/decode_lda_mllt.sh exp/tri2b/decode
# Train and test ET.
steps/train_lda_et.sh data/train data/lang exp/tri1_ali exp/tri2c
scripts/mkgraph.sh data/lang_test exp/tri2c exp/tri2c/graph
local/decode.sh steps/decode_lda_et.sh exp/tri2c
local/decode.sh steps/decode_lda_et.sh exp/tri2c/decode
# Align all data with LDA+MLLT system (tri2b) and do LDA+MLLT+SAT
steps/align_lda_mllt.sh --graphs "ark,s,cs:gunzip -c exp/tri2b/graphs.fsts.gz|" \
data/train data/lang exp/tri2b exp/tri2b_ali
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri2b_ali exp/tri3d
scripts/mkgraph.sh data/lang_test exp/tri3d exp/tri3d/graph
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri3d
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri3d/decode
# Align all data with LDA+MLLT+SAT system (tri3d)
steps/align_lda_mllt_sat.sh --graphs "ark,s,cs:gunzip -c exp/tri3d/graphs.fsts.gz|" \
@ -92,7 +86,7 @@ steps/align_lda_mllt_sat.sh --graphs "ark,s,cs:gunzip -c exp/tri3d/graphs.fsts.g
# Try another pass on top of that.
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri3d_ali exp/tri4d
scripts/mkgraph.sh data/lang_test exp/tri4d exp/tri4d/graph
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d/decode
# Next, SGMM system-- train SGMM system with speaker vectors, on top
# of LDA+MLLT features.
@ -101,7 +95,7 @@ steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d/final.ubm exp/sgmm3d
scripts/mkgraph.sh data/lang_test exp/sgmm3d exp/sgmm3d/graph
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3d
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3d/decode
# Align LDA+ET system prior to training corresponding SGMM system.
steps/align_lda_et.sh --graphs "ark,s,cs:gunzip -c exp/tri2c/graphs.fsts.gz|" \
@ -111,12 +105,28 @@ steps/align_lda_et.sh --graphs "ark,s,cs:gunzip -c exp/tri2c/graphs.fsts.gz|" \
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e/final.ubm exp/sgmm3e
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3e
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3e/decode exp/tri2c/decode
# Now train SGMM system on top of LDA+MLLT+SAT
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f/final.ubm exp/sgmm4f
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm4f/decode exp/tri3d/decode
for x in exp/*/decode; do grep WER $x/wer_* | scripts/best_wer.sh; done
exp/mono/decode/wer_6:%WER 10.340701 [ 1296 / 12533, 95 ins, 391 del, 810 sub ]
exp/sgmm3d/decode/wer_5:%WER 2.267284 [ 284 / 12526, 38 ins, 51 del, 195 sub ]
exp/sgmm3e/decode/wer_6:%WER 2.122397 [ 266 / 12533, 37 ins, 51 del, 178 sub ]
exp/sgmm4f/decode/wer_4:%WER 1.795261 [ 225 / 12533, 45 ins, 37 del, 143 sub ]
exp/tri1/decode/wer_6:%WER 3.566584 [ 447 / 12533, 74 ins, 88 del, 285 sub ]
exp/tri2a/decode/wer_7:%WER 3.518711 [ 441 / 12533, 57 ins, 91 del, 293 sub ]
exp/tri2b/decode/wer_9:%WER 3.614458 [ 453 / 12533, 59 ins, 111 del, 283 sub ]
exp/tri2c/decode/wer_6:%WER 2.833653 [ 355 / 12528, 54 ins, 71 del, 230 sub ]
exp/tri3d/decode/wer_7:%WER 2.489428 [ 312 / 12533, 43 ins, 63 del, 206 sub ]
exp/tri4d/decode/wer_7:%WER 2.649007 [ 332 / 12533, 53 ins, 67 del, 212 sub ]
##### Below here is trash. ######
#steps/train_lda_mllt.sh.bak data/train data/train.1k data/lang exp/tri1 exp/tri2b_tmp

23
egs/rm/s3/scripts/best_wer.sh Executable file
Просмотреть файл

@ -0,0 +1,23 @@
#!/bin/bash
#
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from one directory above this script.
perl -e 'while(<>){
if (m/WER (\S+)/ && !defined $bestwer || $bestwer > $1){ $bestwer = $1; $bestline=$_; }}
if (defined $bestline){ print $bestline; } '

Просмотреть файл

@ -18,10 +18,13 @@
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
# This script generates lattices and rescores them with different
# acoustic weights, in order to explore a range of different
# weights.
if [ $# != 4 ]; then
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode_feb89"
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
exit 1;
fi
@ -49,20 +52,26 @@ fi
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# For Resource Management, we use beam of 25 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

Просмотреть файл

@ -58,33 +58,38 @@ compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,t:$dir/cm
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# For Resource Management, we use beam of 20 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
gmm-decode-faster --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
2> $dir/decode_pass1.log || exit 1;
2> $dir/decode_pass1.log || exit 1;
( ali-to-post ark:$dir/pass1.ali ark:- | \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- | \
gmm-post-to-gpost $srcdir/final.alimdl "$sifeats" ark:- ark:- | \
gmm-est-et --spk2utt=ark:$data/spk2utt $srcdir/final.mdl $srcdir/final.et "$sifeats" \
ark,s,cs:- ark:$dir/trans.ark ark,t:$dir/warp ) \
2> $dir/trans.log || exit 1;
2> $dir/trans.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
# Second pass decoding...
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
# Second pass decoding... generate lattices and rescore with
# various scales.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
# Now rescore lattices with various acoustic scales, and compute the WERs.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

Просмотреть файл

@ -15,13 +15,16 @@
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and cepstral
# mean subtraction plus splice-9-frames plus LDA + mllt features.
# Used, for example, to decode tri2b/.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
# This script generates lattices and rescores them with different
# acoustic weights, in order to explore a range of different
# weights.
if [ $# != 4 ]; then
echo "Usage: steps/decode_ldamllt.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_ldamllt.sh exp/tri2b data/test_feb89 data/lang_test exp/tri2b/decode_feb89"
echo "Usage: steps/decode_lda_mllt.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_lda_mllt.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
exit 1;
fi
@ -35,7 +38,6 @@ mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
# -f means file exists; -o means or.
if [ ! -f $srcdir/final.mdl -o ! -f $srcdir/final.mat ]; then
echo Input files $srcdir/final.mdl and/or $srcdir/final.mat do not exist.
exit 1;
@ -50,18 +52,26 @@ fi
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# For Resource Management, we use beam of 25 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

Просмотреть файл

@ -63,7 +63,7 @@ sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/c
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
gmm-decode-faster --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
2> $dir/decode_pass1.log || exit 1;
@ -77,15 +77,21 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
# Second pass decoding...
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# Now rescore lattices with various acoustic scales, and compute the WERs.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer

Просмотреть файл

@ -78,7 +78,7 @@ sgmm-gselect $srcdir/final.mdl "$feats" "ark,t:|gzip -c > $dir/gselect.gz" \
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
# Using smaller beam for first decoding pass.
sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
sgmm-decode-faster "$gselect_opt" --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.alimdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
2> $dir/decode_pass1.log || exit 1;
@ -97,17 +97,23 @@ sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol
# Second pass decoding...
sgmm-decode-faster --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
sgmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
--spk-vecs=ark:$dir/vecs.ark --utt2spk=ark:$data/utt2spk \
--word-symbol-table=$lang/words.txt $srcdir/final.mdl $graphdir/HCLG.fst \
"$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
"$feats" "ark,t:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

Просмотреть файл

@ -269,6 +269,29 @@ void ScaleLattice(
}
}
template<class Weight, class Int>
void RemoveAlignmentsFromCompactLattice(
MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
typedef CompactLatticeWeightTpl<Weight, Int> W;
typedef ArcTpl<W> Arc;
typedef MutableFst<Arc> Fst;
typedef typename Arc::StateId StateId;
typedef typename Arc::Label Label;
StateId num_states = fst->NumStates();
for (StateId s = 0; s < num_states; s++) {
for (MutableArcIterator<Fst> aiter(fst, s);
!aiter.Done();
aiter.Next()) {
Arc arc = aiter.Value();
arc.weight = W(arc.weight.Weight(), std::vector<Int>());
aiter.SetValue(arc);
}
W final_weight = fst->Final(s);
if (final_weight != W::Zero())
fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
}
}
template<class Weight, class Int>
void PruneCompactLattice(

Просмотреть файл

@ -156,6 +156,12 @@ void ScaleLattice(
const vector<vector<ScaleFloat> > &scale,
MutableFst<ArcTpl<Weight> > *fst);
/// Removes state-level alignments (the strings that are
/// part of the weights).
template<class Weight, class Int>
void RemoveAlignmentsFromCompactLattice(
MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
/// Class LatticeToStdMapper maps a normal arc (StdArc)
/// to a LatticeArc by putting the StdArc weight as the first
/// element of the LatticeWeight. Useful when doing LM

Просмотреть файл

@ -6,7 +6,7 @@ include ../kaldi.mk
BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-nbest \
lattice-lmrescore lattice-scale lattice-union lattice-to-post \
lattice-determinize lattice-oracle string-to-lattice
lattice-determinize lattice-oracle string-to-lattice lattice-rmali
OBJFILES =

Просмотреть файл

@ -0,0 +1,71 @@
// latbin/lattice-rmali.cc
// Copyright 2009-2011 Microsoft Corporation
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fstext/fstext-lib.h"
#include "lat/kaldi-lattice.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
typedef kaldi::int64 int64;
using fst::SymbolTable;
using fst::VectorFst;
using fst::StdArc;
const char *usage =
"Remove state-sequences from lattice weights\n"
"Usage: lattice-rmali [options] lattice-rspecifier lattice-wspecifier\n"
" e.g.: lattice-rmali ark:1.lats ark:proj.lats\n";
ParseOptions po(usage);
po.Read(argc, argv);
if (po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
std::string lats_rspecifier = po.GetArg(1),
lats_wspecifier = po.GetArg(2);
SequentialCompactLatticeReader lattice_reader(lats_rspecifier);
// Write as compact lattice.
CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
int32 n_done = 0; // there is no failure mode, barring a crash.
for (; !lattice_reader.Done(); lattice_reader.Next()) {
std::string key = lattice_reader.Key();
CompactLattice clat = lattice_reader.Value();
lattice_reader.FreeCurrent();
RemoveAlignmentsFromCompactLattice(&clat);
compact_lattice_writer.Write(key, clat);
n_done++;
}
KALDI_LOG << "Done removing alignments from" << n_done << " lattices.";
return (n_done != 0 ? 0 : 1);
} catch(const std::exception& e) {
std::cerr << e.what();
return -1;
}
}