git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@65 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2011-06-06 02:56:08 +00:00
Родитель ac48d4d172
Коммит ce34fdb4f0
9 изменённых файлов: 616 добавлений и 13 удалений

Просмотреть файл

@ -74,7 +74,7 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
fi
if [ ! -f $dir/test_${test}.pass2.ali ]; then
sgmm-decode-faster $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pass2.tra ark,t:$dir/test_${test}.pass2.ali 2> $dir/pass2_${test}.log
sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pass2.tra ark,t:$dir/test_${test}.pass2.ali 2> $dir/pass2_${test}.log
fi
if [ ! -f $dir/test_${test}.fmllr_xforms ]; then
@ -86,7 +86,7 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
fi
if [ ! -f $dir/test_${test}.tra ]; then
sgmm-decode-faster-fmllr $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $fmllr_model $graphdir/HCLG.fst "$feats" ark:$dir/test_${test}.fmllr_xforms ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
sgmm-decode-faster-fmllr "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $fmllr_model $graphdir/HCLG.fst "$feats" ark:$dir/test_${test}.fmllr_xforms ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
fi
# the ,p option lets it score partial output without dying..

Просмотреть файл

@ -54,7 +54,7 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
ark:$dir/test_${test}.vecs ) 2>$dir/vecs_${test}.log
sgmm-decode-faster $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \

Просмотреть файл

@ -55,6 +55,11 @@ system:
tri2h 13.4 20.2 | [ splice-9-frames + HLDA... worse than tri2a]
tri2i 12.4 18.4 | [ triple-deltas + HLDA... same as tri2a]
tri2j 12.8 18.3 | [ triple-deltas+LDA+MLLT... slightly worse than tri2a]
tri2k 11.1 15.1 | [ splice-9-frames + LDA + ET ]
[utt] 11.1 15.0 | [adaptation per utterance]
[spk,+fmllr] 10.1 14.0 | [per speaker, plus fMLLR]
tri2l 9.6 13.7 | train with SAT; test with fMLLR
[utt] 12.0 16.8 | [adaptation per utterance]
# TODO: tri2k, and tri3*?, and SGMM stuff.
@ -102,6 +107,16 @@ exp/decode_tri2i_tgpr_eval92/wer:%WER 12.39 [ 699 / 5641, 130 ins, 72 del, 497 s
exp/decode_tri2i_tgpr_eval93/wer:%WER 18.35 [ 631 / 3439, 58 ins, 102 del, 471 sub ]
exp/decode_tri2j_tgpr_eval92/wer:%WER 12.82 [ 723 / 5641, 127 ins, 70 del, 526 sub ]
exp/decode_tri2j_tgpr_eval93/wer:%WER 18.26 [ 628 / 3439, 59 ins, 99 del, 470 sub ]
exp/decode_tri2k_tgpr_eval92/wer:%WER 11.06 [ 624 / 5641, 133 ins, 42 del, 449 sub ]
exp/decode_tri2k_tgpr_eval93/wer:%WER 15.09 [ 519 / 3439, 72 ins, 72 del, 375 sub ]
exp/decode_tri2k_tgpr_fmllr_eval92/wer:%WER 10.07 [ 568 / 5641, 120 ins, 48 del, 400 sub ]
exp/decode_tri2k_tgpr_fmllr_eval93/wer:%WER 14.02 [ 482 / 3439, 77 ins, 67 del, 338 sub ]
exp/decode_tri2k_tgpr_utt_eval92/wer:%WER 11.13 [ 628 / 5641, 135 ins, 44 del, 449 sub ]
exp/decode_tri2k_tgpr_utt_eval93/wer:%WER 14.95 [ 514 / 3439, 70 ins, 74 del, 370 sub ]
exp/decode_tri2l_tgpr_eval92/wer:%WER 9.64 [ 544 / 5641, 121 ins, 44 del, 379 sub ]
exp/decode_tri2l_tgpr_utt_eval92/wer:%WER 12.00 [ 677 / 5641, 141 ins, 60 del, 476 sub ]
exp/decode_tri2l_tgpr_utt_eval93/wer:%WER 16.75 [ 576 / 3439, 59 ins, 93 del, 424 sub ]
exp/decode_tri3a_bg_eval92/wer:%WER 11.82 [ 667 / 5641, 132 ins, 63 del, 472 sub ]
exp/decode_tri3a_bg_eval93/wer:%WER 15.00 [ 516 / 3439, 62 ins, 75 del, 379 sub ]
exp/decode_tri3a_tgpr_dfmllr_eval92/wer:%WER 10.51 [ 593 / 5641, 111 ins, 51 del, 431 sub ]

Просмотреть файл

@ -324,15 +324,20 @@ steps/train_tri2j.sh
# LDA+ET
steps/train_tri2k.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2k/tree exp/tri2k/final.mdl exp/graph_tri2k_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2k_tgpr_utt_eval92 exp/graph_tri2k_tg_pruned/HCLG.fst steps/decode_tri2k.sh data/eval_nov92.scp
scripts/decode.sh --per-spk exp/decode_tri2k_tgpr_eval92 exp/graph_tri2k_tg_pruned/HCLG.fst steps/decode_tri2k.sh data/eval_nov92.scp
for year in 92 93; do
scripts/decode.sh exp/decode_tri2k_tgpr_utt_eval$year exp/graph_tri2k_tg_pruned/HCLG.fst steps/decode_tri2k.sh data/eval_nov$year.scp
scripts/decode.sh --per-spk exp/decode_tri2k_tgpr_eval$year exp/graph_tri2k_tg_pruned/HCLG.fst steps/decode_tri2k.sh data/eval_nov$year.scp
scripts/decode.sh --per-spk exp/decode_tri2k_tgpr_fmllr_eval$year exp/graph_tri2k_tg_pruned/HCLG.fst steps/decode_tri2k_fmllr.sh data/eval_nov$year.scp
done
)&
# LDA+MLLT+SAT
steps/train_tri2l.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2l/tree exp/tri2l/final.mdl exp/graph_tri2l_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2l_tgpr_utt_eval92 exp/graph_tri2l_tg_pruned/HCLG.fst steps/decode_tri2l.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2l_tgpr_utt_eval93 exp/graph_tri2l_tg_pruned/HCLG.fst steps/decode_tri2l.sh data/eval_nov93.scp
scripts/decode.sh --per-spk exp/decode_tri2l_tgpr_eval92 exp/graph_tri2l_tg_pruned/HCLG.fst steps/decode_tri2l.sh data/eval_nov92.scp
scripts/decode.sh --per-spk exp/decode_tri2l_tgpr_eval93 exp/graph_tri2l_tg_pruned/HCLG.fst steps/decode_tri2l.sh data/eval_nov93.scp
)&

Просмотреть файл

@ -71,11 +71,11 @@ echo running on `hostname` > $dir/predecode${job}.log
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $alimodel $graph "$defaultfeats" ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali 2>>$dir/predecode${job}.log
# Estimate transforms
ali-to-post ark:$dir/$job.pre_ali ark:- | \
(ali-to-post ark:$dir/$job.pre_ali ark:- | \
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
gmm-post-to-gpost $alimodel "$defaultfeats" ark,o:- ark:- | \
gmm-est-et $spk2utt_opt $model $et "$sifeats" ark,o:- \
ark:$dir/$job.trans ark,t:$dir/$job.warp 2>$dir/et${job}.log
ark:$dir/$job.trans ark,t:$dir/$job.warp ) 2>$dir/et${job}.log
feats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- |"

Просмотреть файл

@ -0,0 +1,97 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script does the decoding of a single batch of test data (on one core).
# It requires arguments. It takes the graphdir and decoding directory, and the
# job number. It expects a file $decode_dir/test${job_number}.scp to exist, and
# puts its output in $decode_dir/${job_number}.tra
#
# If the files
# $decode_dir/${job_number}.utt2spk and $decode_dir/${job_number}.spk2utt exist,
# this script will assume you want to do per-speaker (not per-utterance) adaptation.
if [ $# != 3 ]; then
echo "Usage: scripts/decode_tri2k_fmllr.sh <graph> <decode-dir> <job-number>"
exit 1;
fi
. path.sh || exit 1;
acwt=0.0625
beam=13.0
mincount=300 # for fMLLR
prebeam=12.0 # first-pass decoding beam...
max_active=7000
alimodel=exp/tri2k/final.alimdl # first-pass model...
model=exp/tri2k/final.mdl
et=exp/tri2k/final.et
defaultmat=exp/tri2k/default.mat
ldamat=exp/tri2k/lda.mat
silphones=`cat data/silphones.csl`
graph=$1
dir=$2
job=$3
scp=$dir/$job.scp
sifeats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $ldamat ark:- ark:- |"
defaultfeats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $defaultmat ark:- ark:- |"
if [ -f $dir/$job.spk2utt ]; then
if [ ! -f $dir/$job.utt2spk ]; then
echo "spk2utt but not utt2spk file present!"
exit 1
fi
spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
fi
filenames="$scp $model $alimodel $et $graph data/words.txt"
for file in $filenames; do
if [ ! -f $file ] ; then
echo "No such file $file";
exit 1;
fi
done
echo running on `hostname` > $dir/predecode${job}.log
# First-pass decoding
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $alimodel $graph "$defaultfeats" ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali 2>>$dir/predecode${job}.log
# Estimate transforms
(ali-to-post ark:$dir/$job.pre_ali ark:- | \
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
gmm-post-to-gpost $alimodel "$defaultfeats" ark,o:- ark:- | \
gmm-est-et $spk2utt_opt $model $et "$sifeats" ark,o:- \
ark:$dir/$job.trans ark,t:$dir/$job.warp ) 2>$dir/et${job}.log
feats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- |"
# Intermediate decoding
echo running on `hostname` > $dir/pre2decode$job.log
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $model $graph "$feats" ark,t:$dir/$job.pre2_tra ark,t:$dir/$job.pre2_ali 2>>$dir/pre2decode$job.log
# Estimate fMLLR transforms.
(ali-to-post ark:$dir/$job.pre2_ali ark:- | \
weight-silence-post 0.0 $silphones $model ark:- ark:- | \
gmm-est-fmllr $spk2utt_opt --fmllr-min-count=$mincount $model "$feats" ark,o:- \
ark:$dir/$job.fmllr ) 2>$dir/fmllr${job}.log
feats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.fmllr ark:- ark:- |"
# Final
echo running final decoding pass on `hostname` > $dir/decode$job.log
gmm-decode-faster --beam=$beam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $model $graph "$feats" ark,t:$dir/$job.tra ark,t:$dir/$job.ali 2>>$dir/decode$job.log

280
egs/wsj/s1/steps/train_tri2k.sh Executable file
Просмотреть файл

@ -0,0 +1,280 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# tri2k is as tri2b ("exponential transform"), but using splice-9-frames + LDA
# features instead.
if [ -f path.sh ]; then . path.sh; fi
dir=exp/tri2k
srcdir=exp/tri1
srcmodel=$srcdir/final.mdl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
# The 3 following settings relate to ET.
dim=40 # the dim of our features.
normtype=mean
numiters_et=15 # Before this, update et.
numiters=35
maxiterinc=20 # By this iter, we have all the Gaussians.
realign_iters="10 20 30";
numleaves=2000
numgauss=2000 # initial num-gauss smallish so that transform-training
# code (when we modify this script) is a bit faster.
totgauss=10000 # Total num-gauss
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
silphonelist=`cat data/silphones.csl`
mkdir -p $dir
cp $srcdir/train.scp $dir
cp $srcdir/train.tra $dir
scripts/filter_scp.pl $dir/train.scp data/train.utt2spk > $dir/train.utt2spk
scripts/utt2spk_to_spk2utt.pl $dir/train.utt2spk > $dir/train.spk2utt
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.scp
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.tra
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
done
# also see featspart below, used for sub-parts of the features;
# try to keep them in sync.
srcfeats="ark,s,cs:add-deltas scp:$dir/train.scp ark:- |"
feats="ark,s,cs:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
origfeats="ark,s,cs:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
for n in 1 2 3; do
srcfeatspart[$n]="ark,s,cs:add-deltas scp:$dir/train${n}.scp ark:- |"
rawfeatspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- |"
featspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
origfeatspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
done
cp $srcdir/topo $dir
# Align all training data using old model (and old graphs, since we
# use the same data-subset as last time).
# Note: a few fail to get aligned here due to the difference between
# per-speaker and per-utterance splitting, but this doesn't really matter.
echo "Aligning all training data"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
"ark:gunzip -c $srcdir/graphs${n}.fsts.gz|" "${srcfeatspart[$n]}" \
"ark:|gzip -c >$dir/0.${n}.ali.gz" \
2> $dir/align.0.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo alignment error RE old system && exit 1
# Now accumulate LDA stats.
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/0.$n.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $srcmodel ark:- ark:- | \
acc-lda $srcmodel "${rawfeatspart[$n]}" \
ark:- $dir/lda$n.acc ) 2>$dir/lda_acc.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo lda-acc error && exit 1
est-lda --dim=$dim $dir/lda.mat $dir/lda?.acc 2>$dir/lda_est.log || exit 1
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" "ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
# The next few commands are involved with making the questions
# for tree clustering. The extra complexity vs. the RM recipe has
# to do with the desire to ask questions about the "real" phones
# ignoring things like stress and position-in-word, and ask questions
# separately about stress and position-in-word.
# Don't include silences as things to be clustered -> --nosil option.
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
scripts/make_roots.sh > $dir/roots_syms.txt
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $dir/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/mixup.log || exit 1;
rm $dir/treeacc $dir/1.occs
# Convert alignments generated from previous model, to use as initial alignments.
for n in 1 2 3; do
convert-ali $srcmodel $dir/1.mdl $dir/tree \
"ark:gunzip -c $dir/0.$n.ali.gz|" \
"ark:|gzip -c > $dir/cur$n.ali.gz" 2>$dir/convert.$n.log
done
rm $dir/0.?.ali.gz
# Make training graphs
echo "Compiling training graphs"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $dir/tree $dir/1.mdl data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo compile-graphs error && exit 1
gmm-init-et --normalize-type=$normtype --binary=false --dim=$dim $dir/1.et 2>$dir/init_et.log || exit 1
x=1
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|| touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
fi
if [ $x -lt $numiters_et ]; then
# Work out current transforms (in parallel).
echo "Computing ET transforms"
rm -f $dir/.error
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
gmm-post-to-gpost $dir/$x.mdl "${featspart[$n]}" ark,o:- ark:- | \
gmm-est-et --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $dir/$x.mdl $dir/$x.et \
"${origfeatspart[$n]}" ark,s,cs:- ark:$dir/$x.$n.trans ark,t:$dir/$x.$n.warp ) \
2> $dir/trans.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
# Remove previous transforms, if present.
if [ $x -gt 1 ]; then rm $dir/$[$x-1].?.trans; fi
# Now change $feats and $featspart to correspond to the transformed features.
feats="ark,s,cs:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark,s,cs:cat $dir/$x.?.trans|\" ark:- ark:- |"
for n in 1 2 3; do
featspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $dir/lda.mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train$n.utt2spk ark:$dir/$x.$n.trans ark:- ark:- |"
done
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" \
"ark,s,cs:gunzip -c $dir/cur?.ali.gz|" $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
if [ $x -lt $numiters_et ]; then
# Alternately estimate either A or B.
x1=$[$x+1]
if [ $[$x%2] == 0 ]; then # Estimate A:
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
gmm-post-to-gpost $dir/$x1.mdl "${featspart[$n]}" ark:- ark:- | \
gmm-et-acc-a --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $dir/$x1.mdl \
$dir/$x.et "${origfeatspart[$n]}" ark,s,cs:- $dir/$x.$n.et_acc_a ) \
2> $dir/acc_a.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error computing stats to accumulate A && exit 1
gmm-et-est-a --verbose=1 $dir/$x.et $dir/$x1.et $dir/$x.?.et_acc_a 2> $dir/update_a.$x.log || exit 1;
rm $dir/$x.?.et_acc_a
else
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
gmm-post-to-gpost $dir/$x1.mdl "${featspart[$n]}" ark:- ark:- | \
gmm-et-acc-b --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $dir/$x1.mdl $dir/$x.et \
"${origfeatspart[$n]}" ark,s,cs:- ark:$dir/$x.$n.trans ark:$dir/$x.$n.warp $dir/$x.$n.et_acc_b ) \
2> $dir/acc_b.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error computing stats to accumulate B && exit 1
gmm-et-est-b --verbose=1 $dir/$x.et $dir/$x1.et $dir/$x.mat $dir/$x.?.et_acc_b \
2> $dir/update_b.$x.log || exit 1;
rm $dir/$x.?.et_acc_b
# Careful!: gmm-transform-means here changes $x1.mdl in-place.
# It's important that the very next thing we do after this is to re-estimate
# the ET transforms: any existing transforms would be invalid.
gmm-transform-means $dir/$x.mat $dir/$x1.mdl $dir/$x1.mdl 2> $dir/transform_means.$x.log
fi
fi
if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
# Accumulate stats for "alignment model" which is as the model but with
# the baseline features (shares Gaussian-level alignments).
gmm-et-get-b $dir/$numiters_et.et $dir/B.mat 2>$dir/get_b.log || exit 1
compose-transforms $dir/B.mat $dir/lda.mat $dir/default.mat 2>>$dir/get_b.log || exit 1
defaultfeats="ark,s,cs:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/default.mat ark:- ark:- |"
( ali-to-post "ark:gunzip -c $dir/cur?.ali.gz|" ark:- | \
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$defaultfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
# Update model.
gmm-est --remove-low-count-gaussians=false $dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \
2>$dir/est_alimdl.log || exit 1;
rm $dir/$x.acc2
# The following files may be be useful for display purposes.
y=1
while [ $y -lt $numiters_et ]; do
cat $dir/$y.?.warp | scripts/process_warps.pl data/spk2gender.map > $dir/warps.$y
y=$[$y+1]
done
( cd $dir; rm final.{mdl,alimdl,et} 2>/dev/null;
ln -s $x.mdl final.mdl; ln -s $x.alimdl final.alimdl;
ln -s $numiters_et.et final.et )

Просмотреть файл

@ -227,18 +227,19 @@ done
ln -s $x.mdl final.mdl;
ln -s `basename $cur_lda` final.mat )
defaultfeats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $cur_lda ark:- ark:-|"
defaultfeats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/final.mat ark:- ark:-|"
feats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $dir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/cur?.trans|\" ark:- ark:- |"
# Accumulate stats for "alignment model" which is as the model but with
# the unadapted, default features (shares Gaussian-level alignments).
( ali-to-post "ark:cat $dir/cur?.ali.gz|" ark:- | \
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$defaultfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
( ali-to-post "ark:gunzip -c $dir/cur?.ali.gz|" ark:- | \
gmm-acc-stats-twofeats $dir/final.mdl "$feats" "$defaultfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
# Update model.
gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false \
$dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \
2>$dir/est_alimdl.log || exit 1;
rm $dir/$x.acc2
( cd $dir; rm final.{mdl,alimdl,mat} 2>/dev/null;
ln -s $x.mdl final.mdl; ln -s $x.alimdl final.alimdl
ln -s `basename $cur_lda` final.mat )
( cd $dir; rm final.alimdl 2>/dev/null;
ln -s $x.alimdl final.alimdl )

205
egs/wsj/s1/steps/train_tri3k.sh Executable file
Просмотреть файл

@ -0,0 +1,205 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# tri3k is triphone training with splice-9-frames + LDA + ET (exponential
# transform), using the entire si-284 training set, starting from the
# model in tri2k. Initializing the states from the previous model's states
# for faster training. We use the previous model to get the speaker transforms,
# and do this only once (we use the alignment model).
# This script uses (about) 3 CPUs.
if [ -f path.sh ]; then . path.sh; fi
dir=exp/tri3k
srcdir=exp/tri2k
srcmodel=$srcdir/final.mdl
et=$srcdir/final.et
ldamat=$srcdir/lda.mat
defaultmat=$srcdir/default.mat # with the "default" exponential transform, used by alignment model.
srcalimodel=$srcdir/final.alimdl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=20
maxiterinc=10 # By this iter, we have all the Gaussians.
realign_iters="10 15";
numleaves=4200
numgauss=20000 # Initial num-gauss. Initializing states using tri2c model,
# so can have a reasonably large number.
totgauss=40000 # Total num-gauss
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
silphonelist=`cat data/silphones.csl`
mkdir -p $dir
# Use all the SI-284 data.
cp data/train.{scp,tra,utt2spk} $dir
# Split up the scp and related files to 3 parts; create spk2utt files.
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train.scp $dir/train{1,2,3}.scp
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train.tra $dir/train{1,2,3}.tra
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
done
# also see featspart below, used for sub-parts of the features.
# origfeats is feats with just LDA, no exponential trnsform.
# defaultfeats is for the "default" speaker-- used with alignment model.
origfeats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $ldamat ark:- ark:- |"
defaultfeats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $defaultmat ark:- ark:- |"
feats="ark:splice-feats scp:$dir/train.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/?.trans|\" ark:- ark:- |"
for n in 1 2 3; do
featspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train$n.utt2spk ark:$dir/$n.trans ark:- ark:- |"
origfeatspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- |"
defaultfeatspart[$n]="ark,s,cs:splice-feats scp:$dir/train${n}.scp ark:- | transform-feats $defaultmat ark:- ark:- |"
done
cp $srcdir/topo $dir
echo "Aligning all training data with alignment model"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $srcdir/tree $srcalimodel data/L.fst ark:$dir/train${n}.tra ark:- 2>$dir/graphsold.${n}.log | \
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcalimodel ark:- "${defaultfeatspart[$n]}" "ark:|gzip -c > $dir/0.${n}.ali.gz" 2> $dir/align.0.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo compile-graphs error RE old system && exit 1
echo "Computing ET transforms."
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/0.${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $srcalimodel ark:- ark:- | \
gmm-post-to-gpost $srcalimodel "${defaultfeatspart[$n]}" ark,o:- ark:- | \
gmm-est-et --spk2utt=ark:$dir/train$n.spk2utt $srcmodel $et "${origfeatspart[$n]}" ark,o:- \
ark:$dir/$n.trans ark,t:$dir/$n.warp ) 2>$dir/est_et.$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo Error computing ET transfirms && exit 1
# debug info for warping factors.
cat $dir/?.warp | scripts/process_warps.pl data/spk2gender.map > $dir/warps
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" "ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
# The next few commands are involved with making the questions
# for tree clustering. The extra complexity vs. the RM recipe has
# to do with the desire to ask questions about the "real" phones
# ignoring things like stress and position-in-word, and ask questions
# separately about stress and position-in-word.
# Don't include silences as things to be clustered -> --nosil option.
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
scripts/make_roots.sh > $dir/roots_syms.txt
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $dir/topo $dir/1.mdl \
$srcdir/tree $srcmodel 2> $dir/init_model.log || exit 1;
# Mix down and mix up to get exactly the targets #Gauss
# (note: the tool does mix-down first regardless of option order.)
gmm-mixup --mix-down=$numgauss --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/mixup.log || exit 1;
rm $dir/treeacc $dir/1.occs
# Convert alignments generated from monophone model, to use as initial alignments.
for n in 1 2 3; do
convert-ali $srcmodel $dir/1.mdl $dir/tree \
"ark:gunzip -c $dir/0.$n.ali.gz|" \
"ark:|gzip -c > $dir/cur$n.ali.gz" 2>$dir/convert.$n.log
done
rm $dir/0.?.ali.gz
# Make training graphs
echo "Compiling training graphs"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $dir/tree $dir/1.mdl data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo compile-graphs error && exit 1
x=1
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|| touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo compile-graphs error && exit 1
fi
for n in 1 2 3; do
gmm-acc-stats-ali --binary=false $dir/$x.mdl "${featspart[$n]}" \
"ark:gunzip -c $dir/cur${n}.ali.gz|" $dir/$x.$n.acc 2> $dir/acc.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo accumulation error && exit 1
gmm-sum-accs $dir/$x.acc $dir/$x.?.acc || exit 1;
rm $dir/$x.?.acc
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
( ali-to-post "ark:gunzip -c $dir/cur?.ali.gz|" ark:- | \
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$defaultfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
# Update model.
gmm-est --remove-low-count-gaussians=false $dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \
2>$dir/est_alimdl.log || exit 1;
rm $dir/$x.acc2
( cd $dir; rm final.{mdl,alimdl,et} 2>/dev/null;
ln -s $x.mdl final.mdl; ln -s $x.alimdl final.alimdl;
ln -s `pwd`/$et $dir/final.et )