git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@540 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2011-09-29 21:13:41 +00:00
Родитель b30395ad00
Коммит 669479471e
9 изменённых файлов: 537 добавлений и 9 удалений

Просмотреть файл

@ -111,7 +111,6 @@ exp/decode_sgmme_fmllr/wer:Average WER is 2.266018 (284 / 12533)
#### Note: stuff below this line may be out of date / not computed
# with most recent version of toolkit.
# note: when changing (phn,spk) dimensions from (40,39) -> (30,30),

Просмотреть файл

@ -205,8 +205,8 @@ defaultfeats="$basefeats transform-feats $dir/B.mat ark:- ark:- |"
rm $dir/$x.acc2
# The following files may be useful for display purposes.
for n in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
cat $dir/warps/$n.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$n.warp_info
for y in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
cat $dir/warps/$y.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$y.warp_info
done
( cd $dir; rm final.mdl 2>/dev/null;

Просмотреть файл

@ -87,6 +87,14 @@ scripts/decode.sh steps/decode_deltas.sh exp/tri2a/graph_tgpr data/dev_nov93 exp
# Train tri2b, which is LDA+MLLT, on si84 data.
steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b
scripts/mkgraph.sh data/lang_test_tgpr exp/tri2a exp/tri2a/graph_tgpr
scripts/decode.sh steps/decode_lda_mllt.sh exp/tri2b/graph_tgpr data/eval_nov92 exp/tri2b/decode_tgpr_eval92
scripts/decode.sh steps/decode_lda_mllt.sh exp/tri2b/graph_tgpr data/dev_nov93 exp/tri2b/decode_tgpr_dev93
# Align tri2b system with si84 data.
steps/align_lda_mllt.sh data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84
steps/train_lda_et.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2c
# exp/decode_mono_tgpr_eval92 exp/graph_mono_tg_pruned/HCLG.fst steps/decode_mono.sh data/eval_nov92.scp

Просмотреть файл

@ -0,0 +1,101 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using
# CMN + delta + delta-delta features. It splits the data into
# four chunks and does everything in parallel on the same machine.
# Its output, all in its own
# experimental directory, is {0,1,2,3}.cmvn {0,1,2,3}.ali, tree, final.mdl ,
# final.mat and final.occs (the last four are just copied from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
# These must be split into four pieces.
oldgraphs=false
if [ "$1" == --use-graphs ]; then
shift;
oldgraphs=true
fi
if [ $# != 4 ]; then
echo "Usage: steps/align_lda_mllt.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/align_lda_mllt.sh data/train data/lang exp/tri1 exp/tri1_ali"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov_sym="<SPOKEN_NOISE>" # Map OOVs to this in training.
grep SPOKEN_NOISE $lang/words.txt >/dev/null || echo "Warning: SPOKEN_NOISE not in dictionary"
mkdir -p $dir
cp $srcdir/{tree,final.mdl,final.mat,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
if [ ! -f $data/split4 -o $data/split4 -ot $data/feats.scp ]; then
scripts/split_data.sh $data 4
fi
echo "Computing cepstral mean and variance statistics"
for n in 0 1 2 3; do
compute-cmvn-stats --spk2utt=ark:$data/split4/$n/spk2utt scp:$data/split4/$n/feats.scp \
ark:$dir/$n.cmvn 2>$dir/cmvn$n.log || exit 1;
done
# Align all training data using the supplied model.
rm $dir/.error 2>/dev/null
echo "Aligning data from $data"
if $oldgraphs; then
for n in 0 1 2 3; do
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat |"
if [ ! -f $srcdir/$n.fsts.gz ]; then
echo You specified --use-graphs but no such file $srcdir/$n.fsts.gz
exit 1;
fi
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
"ark:gunzip -c $srcdir/$n.fsts.gz|" "$feats" "ark:|gzip -c >$dir/$n.ali.gz" \
2> $dir/align$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo error doing alignment && exit 1;
else
for n in 0 1 2 3; do
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat |"
# compute integer form of transcripts.
tra="ark:scripts/sym2int.pl --map-oov \"$oov_sym\" --ignore-first-field $lang/words.txt $data/split4/$n/text|";
gmm-align $scale_opts --beam=10 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" "$tra" "ark:|gzip -c >$dir/$n.ali.gz" 2> $dir/align$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo error doing alignment && exit 1;
fi
echo "Done aligning data."

Просмотреть файл

@ -21,10 +21,6 @@
# This script just generates lattices for a single broken-up
# piece of the data.
#nd rescores them with different
# acoustic weights, in order to explore a range of different
# weights.
if [ -f ./path.sh ]; then . ./path.sh; fi
numjobs=1

Просмотреть файл

@ -0,0 +1,79 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and the baseline
# [e.g. MFCC] features plus cepstral mean subtraction plus
# LDA + ET (exponential transform) features. This script first
# generates a pruned state-level lattice without adaptation,
# then does acoustic rescoring on this lattice to generate
# a new lattice; it determinizes and prunes this ready for
# further rescoring (e.g. with new LMs, or varying the acoustic
# scale).
if [ -f ./path.sh ]; then . ./path.sh; fi
numjobs=1
jobid=0
if [ "$1" == "-j" ]; then
shift;
numjobs=$1;
jobid=$2;
shift; shift;
if [ $jobid -ge $numjobs ]; then
echo "Invalid job number, $jobid >= $numjobs";
exit 1;
fi
fi
if [ $# != 3 ]; then
echo "Usage: steps/decode_lda_mllt.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_lda_mllt.sh -j 8 0 exp/mono/graph_tgpr data/dev_nov93 exp/mono/decode_dev93_tgpr"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
mkdir -p $dir
if [ $numjobs -gt 1 ]; then
mydata=$data/split$numjobs/$jobid
else
mydata=$data
fi
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.mat $graphdir/HCLG.fst"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_lda_mllt.sh: no such file $f";
exit 1;
fi
done
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
gmm-latgen-faster --max-active=7000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.083333 \
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
2> $dir/decode$jobid.log || exit 1;

Просмотреть файл

@ -0,0 +1,76 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and the baseline
# [e.g. MFCC] features plus cepstral mean subtraction plus
# LDA+MLLT or similar transform.
# This script just generates lattices for a single broken-up
# piece of the data.
if [ -f ./path.sh ]; then . ./path.sh; fi
numjobs=1
jobid=0
if [ "$1" == "-j" ]; then
shift;
numjobs=$1;
jobid=$2;
shift; shift;
if [ $jobid -ge $numjobs ]; then
echo "Invalid job number, $jobid >= $numjobs";
exit 1;
fi
fi
if [ $# != 3 ]; then
echo "Usage: steps/decode_lda_mllt.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_lda_mllt.sh -j 8 0 exp/mono/graph_tgpr data/dev_nov93 exp/mono/decode_dev93_tgpr"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
mkdir -p $dir
if [ $numjobs -gt 1 ]; then
mydata=$data/split$numjobs/$jobid
else
mydata=$data
fi
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.mat $graphdir/HCLG.fst"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_lda_mllt.sh: no such file $f";
exit 1;
fi
done
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
gmm-latgen-faster --max-active=7000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.083333 \
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
2> $dir/decode$jobid.log || exit 1;

269
egs/wsj/s3/steps/train_lda_et.sh Executable file
Просмотреть файл

@ -0,0 +1,269 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 6 ]; then
echo "Usage: steps/train_lda_et.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_lda_et.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2c"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
numleaves=$1
totgauss=$2
data=$3
lang=$4
alidir=$5
dir=$6
if [ ! -f $alidir/final.mdl -o ! -f $alidir/0.ali.gz -o ! -f $alidir/3.ali.gz ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and {0,1,2,3}.ali.gz"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="10 20 30";
numiters_et=15
normtype=offset # et option; could be offset [recommended], or none
oov_sym="<SPOKEN_NOISE>" # Map OOVs to this in training.
grep SPOKEN_NOISE $lang/words.txt >/dev/null || echo "Warning: SPOKEN_NOISE not in dictionary"
silphonelist=`cat $lang/silphones.csl`
numiters=35 # Number of iterations of training
maxiterinc=25 # Last iter to increase #Gauss on.
numgauss=$numleaves
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
randprune=4.0
mkdir -p $dir/log $dir/warps
if [ ! -f $data/split4 -o $data/split4 -ot $data/feats.scp ]; then
scripts/split_data.sh $data 4
fi
# basefeats is all the feats, transformed with lda.mat-- just needed for tree accumulation.
basefeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $alidir/*.cmvn|\" scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
for n in 0 1 2 3; do
splicedfeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split4/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- |"
basefeatspart[$n]="${splicedfeatspart[$n]} transform-feats $dir/lda.mat ark:- ark:- |"
featspart[$n]="${basefeatspart[$n]}" # This gets overwritten later in the script.
done
echo "Accumulating LDA statistics."
rm $dir/.error 2>/dev/null
for n in 0 1 2 3; do
( ali-to-post "ark:gunzip -c $alidir/$n.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- | \
acc-lda --rand-prune=$randprune $alidir/final.mdl "${splicedfeatspart[$n]}" ark,s,cs:- \
$dir/lda.$n.acc ) 2>$dir/log/lda_acc.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error accumulating LDA stats" && exit 1;
est-lda $dir/lda.mat $dir/lda.*.acc 2>$dir/log/lda_est.log || exit 1; # defaults to dim=40
rm $dir/lda.*.acc
cur_lda=$dir/0.mat
# The next stage assumes we won't need the context of silence, which
# assumes something about $lang/roots.txt, but it seems pretty safe.
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$basefeats" \
"ark:gunzip -c $alidir/?.ali.gz|" $dir/treeacc 2> $dir/log/acc_tree.log || exit 1;
echo "Computing questions for tree clustering"
# preparing questions, roots file...
scripts/sym2int.pl $lang/phones.txt $lang/phonesets_cluster.txt > $dir/phonesets.txt || exit 1;
cluster-phones $dir/treeacc $dir/phonesets.txt $dir/questions.txt 2> $dir/log/questions.log || exit 1;
scripts/sym2int.pl $lang/phones.txt $lang/extra_questions.txt >> $dir/questions.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
scripts/sym2int.pl --ignore-oov $lang/phones.txt $lang/roots.txt > $dir/roots.txt
echo "Building tree"
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree 2> $dir/log/train_tree.log || exit 1;
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/log/mixup.log || exit 1;
gmm-init-et --normalize-type=$normtype --binary=false --dim=40 $dir/1.et 2>$dir/init_et.log || exit 1
rm $dir/treeacc
# Convert alignments in $alidir, to use as initial alignments.
# This assumes that $alidir was split in 4 pieces, just like the
# current dir.
echo "Converting old alignments"
for n in 0 1 2 3; do
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
"ark:gunzip -c $alidir/$n.ali.gz|" "ark:|gzip -c >$dir/$n.ali.gz" \
2>$dir/log/convert$n.log || exit 1;
done
# Make training graphs (this is split in 4 parts).
echo "Compiling training graphs"
rm $dir/.error 2>/dev/null
for n in 0 1 2 3; do
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --map-oov \"$oov_sym\" --ignore-first-field $lang/words.txt < $data/split4/$n/text |" \
"ark:|gzip -c >$dir/$n.fsts.gz" 2>$dir/log/compile_graphs$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo "Error compiling training graphs" && exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
for n in 0 1 2 3; do
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/$n.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/$n.ali.gz" 2> $dir/log/align.$x.$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo "Error aligning data on iteration $x" && exit 1;
fi
if [ $x -lt $numiters_et ]; then
echo "Re-estimating ET transforms"
for n in 0 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
rand-prune-post $randprune ark:- ark:- | \
gmm-post-to-gpost $dir/$x.mdl "${featspart[$n]}" ark:- ark:- | \
gmm-est-et --spk2utt=ark:$data/split4/$n/spk2utt $dir/$x.mdl $dir/$x.et "${basefeatspart[$n]}" \
ark,s,cs:- ark:$dir/$n.trans.tmp ark,t:$dir/warps/$x.$n.warp ) \
2> $dir/log/trans.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error computing ET transforms on iteration $x" && exit 1;
for n in 0 1 2 3; do
mv $dir/$n.trans.tmp $dir/$n.trans || exit 1;
featspart[$n]="${basefeatspart[$n]} transform-feats --utt2spk=ark:$data/split4/$n/utt2spk ark:$dir/$n.trans ark:- ark:- |"
done
fi
for n in 0 1 2 3; do
gmm-acc-stats-ali --binary=false $dir/$x.mdl "${featspart[$n]}" \
"ark:gunzip -c $dir/$n.ali.gz|" $dir/$x.$n.acc \
2>$dir/log/acc.$x.$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
"gmm-sum-accs - $dir/$x.{0,1,2,3}.acc |" $dir/$[$x+1].mdl 2> $dir/log/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.{0,1,2,3}.acc
rm $dir/$x.occs
x1=$[$x+1];
if [ $x -lt $numiters_et ]; then
# Alternately estimate either A or B.
if [ $[$x%2] == 0 ]; then # Estimate A:
for n in 0 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
rand-prune-post $randprune ark:- ark:- | \
gmm-post-to-gpost $dir/$x1.mdl "${featspart[$n]}" ark:- ark:- | \
gmm-et-acc-a --spk2utt=ark:$data/split4/$n/spk2utt --verbose=1 $dir/$x1.mdl $dir/$x.et "${basefeatspart[$n]}" \
ark,s,cs:- $dir/$x.$n.et_acc_a ) 2> $dir/log/acc_a.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error accumulating ET stats for A on iter $x" && exit 1;
gmm-et-est-a --verbose=1 $dir/$x.et $dir/$x1.et $dir/$x.*.et_acc_a 2> $dir/log/update_a.$x.log || exit 1;
rm $dir/$x.*.et_acc_a
else
for n in 0 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
gmm-acc-mllt --rand-prune=$randprune $dir/$x1.mdl "${featspart[$n]}" ark:- \
$dir/$x.$n.mllt_acc ) 2> $dir/log/acc_b.$x.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error accumulating ET stats for A on iter $x" && exit 1;
est-mllt $dir/$x.mat $dir/$x.*.mllt_acc 2> $dir/log/update_b.$x.log || exit 1;
gmm-et-apply-c $dir/$x.et $dir/$x.mat $dir/$x1.et 2>>$dir/log/update_b.$x.log || exit 1;
gmm-transform-means $dir/$x.mat $dir/$x1.mdl $dir/$x1.mdl 2>> $dir/log/update_b.$x.log || exit 1;
# Modify current transforms by premultiplying by C.
for n in 0 1 2 3; do
compose-transforms $dir/$x.mat ark:$dir/$n.trans ark:$dir/tmp.trans 2>> $dir/update_b.$x.log || exit 1;
mv $dir/tmp.trans $dir/$n.trans
done
rm $dir/$x.mat
rm $dir/$x.*.mllt_acc
fi
fi
if [[ $x -le $maxiterinc ]]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
# Write out the B matrix which we will combine with LDA to get
# final.mat; and write out final.et which is the current final et
# but with B set to unity (since it's now part of final.mat).
# This is just more convenient going forward, since the "default features"
# (i.e. when speaker factor equals zero) are now the same as the
# features that the ET acts on.
gmm-et-get-b $dir/$numiters_et.et $dir/B.mat $dir/final.et 2>$dir/get_b.log || exit 1
compose-transforms $dir/B.mat $dir/lda.mat $dir/final.mat 2>>$dir/get_b.log || exit 1
for n in 0 1 2 3; do
defaultfeatspart[$n]="${basefeatspart[$n]} transform-feats $dir/B.mat ark:- ark:- |"
done
# Accumulate stats for "alignment model" which is as the model but with
# the default features (shares Gaussian-level alignments).
for n in 0 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
gmm-acc-stats-twofeats $dir/$x.mdl "${featspart[$n]}" "${defaultfeatspart[$n]}" \
ark:- $dir/$x.$n.acc2 ) 2>$dir/acc_alimdl.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo "Error accumulating alignment statistics." && exit 1;
# Update model.
gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
"gmm-sum-accs - $dir/$x.*.acc2|" $dir/$x.alimdl \
2>$dir/est_alimdl.log || exit 1;
rm $dir/$x.*.acc2
# The following files may be useful for display purposes.
for y in 2 3 4 5 6 7 8 9 10 11 12 13 14; do
cat $dir/warps/$y.*.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$y.warp_info
done
( cd $dir;
ln -s $x.mdl final.mdl;
ln -s $x.occs final.occs;
ln -s $x.alimdl final.alimdl
ln -s $[$numiters_et-1].trans final.trans )
echo Done

Просмотреть файл

@ -21,8 +21,8 @@
# the same type of features.
if [ $# != 6 ]; then
echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
echo "Usage: steps/train_lda_mllt.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
exit 1;
fi