зеркало из https://github.com/mozilla/kaldi.git
Committing improvements to s3 and some misc additions.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@449 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
4dc0bb01b9
Коммит
a043cf56bb
|
@ -1,30 +1,55 @@
|
|||
#!/bin/bash
|
||||
|
||||
script=$1
|
||||
dir=$2
|
||||
# This script basically calls the supplied decoding script
|
||||
# once for each test set (in parallel on the same machine),
|
||||
# and then averages the resulting WERs.
|
||||
|
||||
if [ $# -lt 2 -o $# -gt 3 -o ! -x $script -o ! -d $dir ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
|
||||
echo "[check your command line arguments]"
|
||||
mono_opt=
|
||||
|
||||
if [ "$1" == "--mono" ]; then
|
||||
mono_opt=$1;
|
||||
shift;
|
||||
fi
|
||||
|
||||
scripts/mkgraph.sh data/lang_test $dir $dir/graph
|
||||
script=$1
|
||||
decode_dir=$2 # e.g. exp/sgmm3b/decode
|
||||
dir=`dirname $decode_dir` # e.g. exp/sgmm3b
|
||||
|
||||
if [ $# -lt 2 -o $# -gt 3 ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -x $script -o ! -d $dir ]; then
|
||||
echo "scripts/decode.sh: Either no such script $script or not exebutable, or no such dir $dir"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
mkdir -p $decode_dir
|
||||
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
|
||||
|
||||
if [ $# -eq 2 ]; then # normal case: 2 args.
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
$script $dir data/test_$test data/lang $dir/decode_$test &
|
||||
$script $dir data/test_$test data/lang $decode_dir/$test &
|
||||
done
|
||||
else
|
||||
olddir=$3
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
if [ ! -d $olddir/decode_$test ]; then
|
||||
echo "decode.sh: no such directory $oldir/decode_$test";
|
||||
if [ ! -d $olddir/$test ]; then
|
||||
echo "decode.sh: no such directory $olddir/$test";
|
||||
exit 1;
|
||||
fi
|
||||
$script $dir data/test_$test data/lang $dir/decode_$test $olddir/decode_$test &
|
||||
$script $dir data/test_$test data/lang $decode_dir/$test $olddir/$test &
|
||||
done
|
||||
fi
|
||||
wait
|
||||
scripts/average_wer.sh $dir/decode_?????/wer > $dir/wer
|
||||
cat $dir/wer
|
||||
# Average the WERs... there may be various wer files named e.g. wer, wer_10, etc.,
|
||||
# so do this for each one.
|
||||
for w in $decode_dir/mar87/wer*; do
|
||||
wername=`basename $w`
|
||||
scripts/average_wer.sh $decode_dir/?????/$wername > $decode_dir/$wername
|
||||
done
|
||||
grep WER $decode_dir/wer* || echo "Error decoding $decode_dir: no WER results found."
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -41,15 +41,9 @@ scripts/subset_data_dir.sh data/train 1000 data/train.1k
|
|||
# train monophone system.
|
||||
steps/train_mono.sh data/train.1k data/lang exp/mono
|
||||
|
||||
# decode mono [do this "manually" in the next few lines of
|
||||
# script; generally this stuff gets called by "local/decode.sh"
|
||||
# but here we want to pass the --mono option to mkgraph.sh.
|
||||
scripts/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
steps/decode_deltas.sh exp/mono data/test_$test data/lang exp/mono/decode_$test &
|
||||
done
|
||||
wait
|
||||
scripts/average_wer.sh exp/mono/decode_?????/wer > exp/mono/wer
|
||||
|
||||
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode
|
||||
|
||||
|
||||
# Get alignments from monophone system.
|
||||
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali
|
||||
|
@ -57,7 +51,7 @@ steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali
|
|||
# train tri1 [first triphone pass]
|
||||
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1
|
||||
# decode tri1
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri1
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri1/decode
|
||||
|
||||
# align tri1
|
||||
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
|
||||
|
@ -66,24 +60,24 @@ steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
|
|||
# train tri2a [delta+delta-deltas]
|
||||
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a
|
||||
# decode tri2a
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri2a
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode
|
||||
|
||||
# train tri2b [LDA+MLLT]
|
||||
steps/train_lda_mllt.sh data/train data/lang exp/tri1_ali exp/tri2b
|
||||
# decode tri2b
|
||||
local/decode.sh steps/decode_lda_mllt.sh exp/tri2b
|
||||
local/decode.sh steps/decode_lda_mllt.sh exp/tri2b/decode
|
||||
|
||||
# Train and test ET.
|
||||
steps/train_lda_et.sh data/train data/lang exp/tri1_ali exp/tri2c
|
||||
scripts/mkgraph.sh data/lang_test exp/tri2c exp/tri2c/graph
|
||||
local/decode.sh steps/decode_lda_et.sh exp/tri2c
|
||||
local/decode.sh steps/decode_lda_et.sh exp/tri2c/decode
|
||||
|
||||
# Align all data with LDA+MLLT system (tri2b) and do LDA+MLLT+SAT
|
||||
steps/align_lda_mllt.sh --graphs "ark,s,cs:gunzip -c exp/tri2b/graphs.fsts.gz|" \
|
||||
data/train data/lang exp/tri2b exp/tri2b_ali
|
||||
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri2b_ali exp/tri3d
|
||||
scripts/mkgraph.sh data/lang_test exp/tri3d exp/tri3d/graph
|
||||
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri3d
|
||||
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri3d/decode
|
||||
|
||||
# Align all data with LDA+MLLT+SAT system (tri3d)
|
||||
steps/align_lda_mllt_sat.sh --graphs "ark,s,cs:gunzip -c exp/tri3d/graphs.fsts.gz|" \
|
||||
|
@ -92,7 +86,7 @@ steps/align_lda_mllt_sat.sh --graphs "ark,s,cs:gunzip -c exp/tri3d/graphs.fsts.g
|
|||
# Try another pass on top of that.
|
||||
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri3d_ali exp/tri4d
|
||||
scripts/mkgraph.sh data/lang_test exp/tri4d exp/tri4d/graph
|
||||
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d
|
||||
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d/decode
|
||||
|
||||
# Next, SGMM system-- train SGMM system with speaker vectors, on top
|
||||
# of LDA+MLLT features.
|
||||
|
@ -101,7 +95,7 @@ steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d
|
|||
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d/final.ubm exp/sgmm3d
|
||||
|
||||
scripts/mkgraph.sh data/lang_test exp/sgmm3d exp/sgmm3d/graph
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3d
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3d/decode
|
||||
|
||||
# Align LDA+ET system prior to training corresponding SGMM system.
|
||||
steps/align_lda_et.sh --graphs "ark,s,cs:gunzip -c exp/tri2c/graphs.fsts.gz|" \
|
||||
|
@ -111,12 +105,28 @@ steps/align_lda_et.sh --graphs "ark,s,cs:gunzip -c exp/tri2c/graphs.fsts.gz|" \
|
|||
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e
|
||||
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e/final.ubm exp/sgmm3e
|
||||
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3e
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3e/decode exp/tri2c/decode
|
||||
|
||||
# Now train SGMM system on top of LDA+MLLT+SAT
|
||||
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f
|
||||
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f/final.ubm exp/sgmm4f
|
||||
|
||||
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm4f/decode exp/tri3d/decode
|
||||
|
||||
for x in exp/*/decode; do grep WER $x/wer_* | scripts/best_wer.sh; done
|
||||
|
||||
exp/mono/decode/wer_6:%WER 10.340701 [ 1296 / 12533, 95 ins, 391 del, 810 sub ]
|
||||
exp/sgmm3d/decode/wer_5:%WER 2.267284 [ 284 / 12526, 38 ins, 51 del, 195 sub ]
|
||||
exp/sgmm3e/decode/wer_6:%WER 2.122397 [ 266 / 12533, 37 ins, 51 del, 178 sub ]
|
||||
exp/sgmm4f/decode/wer_4:%WER 1.795261 [ 225 / 12533, 45 ins, 37 del, 143 sub ]
|
||||
exp/tri1/decode/wer_6:%WER 3.566584 [ 447 / 12533, 74 ins, 88 del, 285 sub ]
|
||||
exp/tri2a/decode/wer_7:%WER 3.518711 [ 441 / 12533, 57 ins, 91 del, 293 sub ]
|
||||
exp/tri2b/decode/wer_9:%WER 3.614458 [ 453 / 12533, 59 ins, 111 del, 283 sub ]
|
||||
exp/tri2c/decode/wer_6:%WER 2.833653 [ 355 / 12528, 54 ins, 71 del, 230 sub ]
|
||||
exp/tri3d/decode/wer_7:%WER 2.489428 [ 312 / 12533, 43 ins, 63 del, 206 sub ]
|
||||
exp/tri4d/decode/wer_7:%WER 2.649007 [ 332 / 12533, 53 ins, 67 del, 212 sub ]
|
||||
|
||||
|
||||
##### Below here is trash. ######
|
||||
|
||||
#steps/train_lda_mllt.sh.bak data/train data/train.1k data/lang exp/tri1 exp/tri2b_tmp
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
perl -e 'while(<>){
|
||||
if (m/WER (\S+)/ && !defined $bestwer || $bestwer > $1){ $bestwer = $1; $bestline=$_; }}
|
||||
if (defined $bestline){ print $bestline; } '
|
||||
|
|
@ -18,10 +18,13 @@
|
|||
# Decoding script that works with a GMM model and delta-delta plus
|
||||
# cepstral mean subtraction features. Used, for example, to decode
|
||||
# mono/ and tri1/
|
||||
# This script generates lattices and rescores them with different
|
||||
# acoustic weights, in order to explore a range of different
|
||||
# weights.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode_feb89"
|
||||
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
@ -49,20 +52,26 @@ fi
|
|||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# For Resource Management, we use beam of 30 and acwt of 1/7.
|
||||
# For Resource Management, we use beam of 25 and acwt of 1/10.
|
||||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
|
||||
|
||||
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
|
|
@ -58,33 +58,38 @@ compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,t:$dir/cm
|
|||
|
||||
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
# For Resource Management, we use beam of 30 and acwt of 1/7.
|
||||
# For Resource Management, we use beam of 20 and acwt of 1/10.
|
||||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
|
||||
2> $dir/decode_pass1.log || exit 1;
|
||||
2> $dir/decode_pass1.log || exit 1;
|
||||
|
||||
( ali-to-post ark:$dir/pass1.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- | \
|
||||
gmm-post-to-gpost $srcdir/final.alimdl "$sifeats" ark:- ark:- | \
|
||||
gmm-est-et --spk2utt=ark:$data/spk2utt $srcdir/final.mdl $srcdir/final.et "$sifeats" \
|
||||
ark,s,cs:- ark:$dir/trans.ark ark,t:$dir/warp ) \
|
||||
2> $dir/trans.log || exit 1;
|
||||
2> $dir/trans.log || exit 1;
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
|
||||
|
||||
# Second pass decoding...
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
2> $dir/decode_pass2.log || exit 1;
|
||||
# Second pass decoding... generate lattices and rescore with
|
||||
# various scales.
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
|
||||
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
|
||||
# Now rescore lattices with various acoustic scales, and compute the WERs.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
|
|
@ -15,13 +15,16 @@
|
|||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and cepstral
|
||||
# mean subtraction plus splice-9-frames plus LDA + mllt features.
|
||||
# Used, for example, to decode tri2b/.
|
||||
# Decoding script that works with a GMM model and delta-delta plus
|
||||
# cepstral mean subtraction features. Used, for example, to decode
|
||||
# mono/ and tri1/
|
||||
# This script generates lattices and rescores them with different
|
||||
# acoustic weights, in order to explore a range of different
|
||||
# weights.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/decode_ldamllt.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_ldamllt.sh exp/tri2b data/test_feb89 data/lang_test exp/tri2b/decode_feb89"
|
||||
echo "Usage: steps/decode_lda_mllt.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_lda_mllt.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
@ -35,7 +38,6 @@ mkdir -p $dir
|
|||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# -f means file exists; -o means or.
|
||||
if [ ! -f $srcdir/final.mdl -o ! -f $srcdir/final.mat ]; then
|
||||
echo Input files $srcdir/final.mdl and/or $srcdir/final.mat do not exist.
|
||||
exit 1;
|
||||
|
@ -50,18 +52,26 @@ fi
|
|||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
# For Resource Management, we use beam of 30 and acwt of 1/7.
|
||||
# For Resource Management, we use beam of 25 and acwt of 1/10.
|
||||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
|
|
@ -63,7 +63,7 @@ sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/c
|
|||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
|
||||
2> $dir/decode_pass1.log || exit 1;
|
||||
|
||||
|
@ -77,15 +77,21 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/
|
|||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
|
||||
|
||||
# Second pass decoding...
|
||||
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
2> $dir/decode_pass2.log || exit 1;
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
|
||||
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
# Now rescore lattices with various acoustic scales, and compute the WERs.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ sgmm-gselect $srcdir/final.mdl "$feats" "ark,t:|gzip -c > $dir/gselect.gz" \
|
|||
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
|
||||
|
||||
# Using smaller beam for first decoding pass.
|
||||
sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
sgmm-decode-faster "$gselect_opt" --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.alimdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
|
||||
2> $dir/decode_pass1.log || exit 1;
|
||||
|
||||
|
@ -97,17 +97,23 @@ sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol
|
|||
|
||||
|
||||
# Second pass decoding...
|
||||
sgmm-decode-faster --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
|
||||
sgmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
|
||||
--spk-vecs=ark:$dir/vecs.ark --utt2spk=ark:$data/utt2spk \
|
||||
--word-symbol-table=$lang/words.txt $srcdir/final.mdl $graphdir/HCLG.fst \
|
||||
"$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
2> $dir/decode_pass2.log || exit 1;
|
||||
"$feats" "ark,t:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
|
||||
2> $dir/decode_pass2.log || exit 1;
|
||||
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
||||
|
||||
|
|
|
@ -269,6 +269,29 @@ void ScaleLattice(
|
|||
}
|
||||
}
|
||||
|
||||
template<class Weight, class Int>
|
||||
void RemoveAlignmentsFromCompactLattice(
|
||||
MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
|
||||
typedef CompactLatticeWeightTpl<Weight, Int> W;
|
||||
typedef ArcTpl<W> Arc;
|
||||
typedef MutableFst<Arc> Fst;
|
||||
typedef typename Arc::StateId StateId;
|
||||
typedef typename Arc::Label Label;
|
||||
StateId num_states = fst->NumStates();
|
||||
for (StateId s = 0; s < num_states; s++) {
|
||||
for (MutableArcIterator<Fst> aiter(fst, s);
|
||||
!aiter.Done();
|
||||
aiter.Next()) {
|
||||
Arc arc = aiter.Value();
|
||||
arc.weight = W(arc.weight.Weight(), std::vector<Int>());
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
W final_weight = fst->Final(s);
|
||||
if (final_weight != W::Zero())
|
||||
fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Weight, class Int>
|
||||
void PruneCompactLattice(
|
||||
|
|
|
@ -156,6 +156,12 @@ void ScaleLattice(
|
|||
const vector<vector<ScaleFloat> > &scale,
|
||||
MutableFst<ArcTpl<Weight> > *fst);
|
||||
|
||||
/// Removes state-level alignments (the strings that are
|
||||
/// part of the weights).
|
||||
template<class Weight, class Int>
|
||||
void RemoveAlignmentsFromCompactLattice(
|
||||
MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);
|
||||
|
||||
/// Class LatticeToStdMapper maps a normal arc (StdArc)
|
||||
/// to a LatticeArc by putting the StdArc weight as the first
|
||||
/// element of the LatticeWeight. Useful when doing LM
|
||||
|
|
|
@ -6,7 +6,7 @@ include ../kaldi.mk
|
|||
|
||||
BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-nbest \
|
||||
lattice-lmrescore lattice-scale lattice-union lattice-to-post \
|
||||
lattice-determinize lattice-oracle string-to-lattice
|
||||
lattice-determinize lattice-oracle string-to-lattice lattice-rmali
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
// latbin/lattice-rmali.cc
|
||||
|
||||
// Copyright 2009-2011 Microsoft Corporation
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "fstext/fstext-lib.h"
|
||||
#include "lat/kaldi-lattice.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
typedef kaldi::int64 int64;
|
||||
using fst::SymbolTable;
|
||||
using fst::VectorFst;
|
||||
using fst::StdArc;
|
||||
|
||||
const char *usage =
|
||||
"Remove state-sequences from lattice weights\n"
|
||||
"Usage: lattice-rmali [options] lattice-rspecifier lattice-wspecifier\n"
|
||||
" e.g.: lattice-rmali ark:1.lats ark:proj.lats\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string lats_rspecifier = po.GetArg(1),
|
||||
lats_wspecifier = po.GetArg(2);
|
||||
|
||||
SequentialCompactLatticeReader lattice_reader(lats_rspecifier);
|
||||
|
||||
// Write as compact lattice.
|
||||
CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
|
||||
|
||||
int32 n_done = 0; // there is no failure mode, barring a crash.
|
||||
|
||||
for (; !lattice_reader.Done(); lattice_reader.Next()) {
|
||||
std::string key = lattice_reader.Key();
|
||||
CompactLattice clat = lattice_reader.Value();
|
||||
lattice_reader.FreeCurrent();
|
||||
RemoveAlignmentsFromCompactLattice(&clat);
|
||||
compact_lattice_writer.Write(key, clat);
|
||||
n_done++;
|
||||
}
|
||||
KALDI_LOG << "Done removing alignments from" << n_done << " lattices.";
|
||||
return (n_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче