зеркало из https://github.com/mozilla/kaldi.git
Committing some scripts.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@103 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
a21a43afe3
Коммит
d7a6d499aa
|
@ -77,6 +77,7 @@ cp $srcdir/topo $dir
|
|||
# Note: a small number of utterances don't have graphs at this stage because of differences
|
||||
# in how the data splitting is done when we switch to using speaker information.
|
||||
|
||||
|
||||
echo "Aligning all training data"
|
||||
|
||||
rm -f $dir/.error
|
||||
|
@ -89,7 +90,6 @@ done
|
|||
wait;
|
||||
[ -f $dir/.error ] && echo align error RE old system && exit 1
|
||||
|
||||
|
||||
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" \
|
||||
"ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
@ -166,7 +166,7 @@ while [ $x -lt $numiters ]; do
|
|||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
sgmm-align-compiled ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
"--gselect=ark:gunzip -c $dir/gselect$n.gz|" \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
$scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|
||||
|
@ -181,9 +181,9 @@ while [ $x -lt $numiters ]; do
|
|||
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk2utt=ark:$dir/train$n.spk2utt ${spkvecs_opt[$n]} \
|
||||
"--gselect=ark:gunzip -c $dir/gselect$n.gz|" \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
--rand-prune=$randprune $dir/$x.mdl \
|
||||
"${featspart[$n]}" ark:- ark:$dir/tmp$n.vecs && mv $dir/tmp$n.vecs $cur/cur$n.vecs ) \
|
||||
"${featspart[$n]}" ark,s,cs:- ark:$dir/tmp$n.vecs && mv $dir/tmp$n.vecs $dir/cur$n.vecs ) \
|
||||
2>$dir/spkvecs.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
spkvecs_opt[$n]="--spk-vecs=ark:$dir/cur$n.vecs"
|
||||
|
@ -202,7 +202,7 @@ while [ $x -lt $numiters ]; do
|
|||
|
||||
for n in 1 2 3; do
|
||||
sgmm-acc-stats-ali ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
--update-flags=$flags "--gselect=ark:gunzip -c $dir/gselect$n.gz|" \
|
||||
--update-flags=$flags "--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
--rand-prune=$randprune --binary=true $dir/$x.mdl "${featspart[$n]}" \
|
||||
"ark:gunzip -c $dir/cur$n.ali.gz|" $dir/$x.$n.acc 2> $dir/acc.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
|
@ -225,7 +225,7 @@ flags=MwcS
|
|||
for n in 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/cur$n.ali.gz|" ark:- | \
|
||||
sgmm-post-to-gpost ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
"--gselect=ark:gunzip -c $dir/gselect$n.gz|" \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
$dir/$x.mdl "${featspart[$n]}" ark,s,cs:- ark:- | \
|
||||
sgmm-acc-stats-gpost --update-flags=$flags $dir/$x.mdl "${featspart[$n]}" \
|
||||
ark,s,cs:- $dir/$x.$n.aliacc ) 2> $dir/acc_ali.$x.$n.log || touch $dir/.error &
|
||||
|
|
|
@ -0,0 +1,248 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# 3b2 is with dim=50
|
||||
# sgmm3b is as sgmm2b (SGMM with speaker vectors), but using all
|
||||
# the training data.
|
||||
# Instead of starting from sgmm2b we start from tri3a. This means we can
|
||||
# essentially reuse the train_sgmm2b.sh script, and don't have to do
|
||||
# alignment of the model with speaker vectors (which requires multiple
|
||||
# passes to do properly and is a bit of a hassle).
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
dir=exp/sgmm3b2
|
||||
srcdir=exp/tri3a # more convenient as has graphs and alignments for this data already.
|
||||
ubm=exp/ubm3a/final.ubm # 600 UBM comps
|
||||
srcmodel=$srcdir/final.mdl
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
numiters=35 # Total number of iterations.
|
||||
realign_iters="5 15 25"; # realign a bit earlier than we did in tri2a,
|
||||
# since SGMM system quite different
|
||||
# from normal triphone system.
|
||||
spkvec_iters="5 8 12 17 22 32"
|
||||
maxiterinc=20 # By this iter, we have all the substates.
|
||||
numleaves=6000 # was 4.2k for GMM system: incresaing it for SGMM system.
|
||||
numsubstates=6000 # initial #-substates
|
||||
totsubstates=35000 # a little less than #Gauss for baseline GMM system (40k)
|
||||
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
|
||||
phn_dim=50
|
||||
phn_dim_iter=3 # iter to increase phn dim.
|
||||
|
||||
silphonelist=`cat data/silphones.csl`
|
||||
randprune=0.1
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/train.scp $dir
|
||||
cp $srcdir/train.tra $dir
|
||||
scripts/filter_scp.pl $dir/train.scp data/train.utt2spk > $dir/train.utt2spk
|
||||
scripts/utt2spk_to_spk2utt.pl $dir/train.utt2spk > $dir/train.spk2utt
|
||||
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.scp
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.tra
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
|
||||
|
||||
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
|
||||
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
|
||||
done
|
||||
|
||||
# also see featspart below, used for sub-parts of the features;
|
||||
# try to keep them in sync.
|
||||
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
|
||||
for n in 1 2 3; do
|
||||
featspart[$n]="ark:add-deltas --print-args=false scp:$dir/train${n}.scp ark:- |"
|
||||
done
|
||||
|
||||
if [ ! -f $ubm ]; then
|
||||
echo "No UBM in $ubm";
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cp $srcdir/topo $dir
|
||||
|
||||
# Align all training data using old model (and old graphs, since we
|
||||
# use the same data-subset as last time).
|
||||
# Note: a small number of utterances don't have graphs at this stage because of differences
|
||||
# in how the data splitting is done when we switch to using speaker information.
|
||||
|
||||
echo "Aligning all training data"
|
||||
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
|
||||
"ark:gunzip -c $srcdir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/0.${n}.ali.gz" \
|
||||
2> $dir/align.0.${n}.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo align error RE old system && exit 1
|
||||
|
||||
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" \
|
||||
"ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
||||
# The next few commands are involved with making the questions
|
||||
# for tree clustering. The extra complexity vs. the RM recipe has
|
||||
# to do with the desire to ask questions about the "real" phones
|
||||
# ignoring things like stress and position-in-word, and ask questions
|
||||
# separately about stress and position-in-word.
|
||||
|
||||
# Don't include silences as things to be clustered -> --nosil option.
|
||||
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
|
||||
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
|
||||
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
|
||||
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
|
||||
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
|
||||
|
||||
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
|
||||
|
||||
scripts/make_roots.sh > $dir/roots_syms.txt
|
||||
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
|
||||
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
|
||||
|
||||
# the sgmm-init program accepts a GMM, so we just create a temporary GMM "0.gmm"
|
||||
|
||||
gmm-init-model --write-occs=$dir/0.occs \
|
||||
$dir/tree $dir/treeacc $dir/topo $dir/0.gmm 2> $dir/init_gmm.log || exit 1;
|
||||
|
||||
sgmm-init --spk-space-dim=39 $dir/0.gmm $ubm $dir/0.mdl 2> $dir/init_sgmm.log || exit 1;
|
||||
|
||||
rm $dir/0.gmm
|
||||
|
||||
rm $dir/treeacc
|
||||
|
||||
for n in 1 2 3; do
|
||||
sgmm-gselect $dir/0.mdl "${featspart[$n]}" ark,t:- 2>$dir/gselect$n.log | \
|
||||
gzip -c > $dir/gselect${n}.gz || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error in gselect phase" && exit 1;
|
||||
|
||||
# Convert alignments generated from previous model, to use as
|
||||
# initial alignments.
|
||||
|
||||
for n in 1 2 3; do
|
||||
convert-ali $srcmodel $dir/0.mdl $dir/tree \
|
||||
"ark:gunzip -c $dir/0.$n.ali.gz|" \
|
||||
"ark:|gzip -c > $dir/cur$n.ali.gz" \
|
||||
2>$dir/convert.$n.log || exit 1; # don't parallelize: mostly I/O.
|
||||
done
|
||||
rm $dir/0.?.ali.gz
|
||||
|
||||
# Make training graphs
|
||||
echo "Compiling training graphs"
|
||||
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
compile-train-graphs $dir/tree $dir/0.mdl data/L.fst ark:$dir/train${n}.tra \
|
||||
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
|
||||
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo compile-graphs error && exit 1
|
||||
|
||||
|
||||
x=0
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo "Pass $x"
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
sgmm-align-compiled ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
$scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo error aligning data && exit 1
|
||||
fi
|
||||
if echo $spkvec_iters | grep -w $x >/dev/null; then
|
||||
echo "Computing speaker vectors"
|
||||
for n in 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk2utt=ark:$dir/train$n.spk2utt ${spkvecs_opt[$n]} \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
--rand-prune=$randprune $dir/$x.mdl \
|
||||
"${featspart[$n]}" ark:- ark:$dir/tmp$n.vecs && mv $dir/tmp$n.vecs $dir/cur$n.vecs ) \
|
||||
2>$dir/spkvecs.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
spkvecs_opt[$n]="--spk-vecs=ark:$dir/cur$n.vecs"
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error computing speaker vectors && exit 1
|
||||
fi
|
||||
|
||||
if [ $x -eq 0 ]; then
|
||||
flags=vwcS
|
||||
elif [ $[$x%2] -eq 1 -a $x -gt 4 ]; then # even iters after 4 (i.e. starting from 6)...
|
||||
flags=vNwcS
|
||||
else
|
||||
flags=vMwcS
|
||||
fi
|
||||
|
||||
for n in 1 2 3; do
|
||||
sgmm-acc-stats-ali ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
--update-flags=$flags "--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
--rand-prune=$randprune --binary=true $dir/$x.mdl "${featspart[$n]}" \
|
||||
"ark:gunzip -c $dir/cur$n.ali.gz|" $dir/$x.$n.acc 2> $dir/acc.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error accumulating stats on iter $x && exit 1
|
||||
if [ $x == $phn_dim_iter ]; then
|
||||
phn_dim_opt=--increase-phn-dim=$phn_dim
|
||||
else
|
||||
phn_dim_opt=
|
||||
fi
|
||||
sgmm-est $phn_dim_opt --update-flags=$flags --split-substates=$numsubstates \
|
||||
--write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.?.acc|" \
|
||||
$dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.?.acc $dir/$x.occs 2>/dev/null
|
||||
if [ $x -lt $maxiterinc ]; then
|
||||
numsubstates=$[$numsubstates+$incsubstates]
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl final.occs 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
|
||||
|
||||
# Create "alignment model"
|
||||
flags=MwcS
|
||||
for n in 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/cur$n.ali.gz|" ark:- | \
|
||||
sgmm-post-to-gpost ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
|
||||
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
|
||||
$dir/$x.mdl "${featspart[$n]}" ark,s,cs:- ark:- | \
|
||||
sgmm-acc-stats-gpost --update-flags=$flags $dir/$x.mdl "${featspart[$n]}" \
|
||||
ark,s,cs:- $dir/$x.$n.aliacc ) 2> $dir/acc_ali.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error accumulating stats for alignment model && exit 1
|
||||
|
||||
sgmm-est --update-flags=$flags --remove-speaker-space=true $dir/$x.mdl \
|
||||
"sgmm-sum-accs - $dir/$x.?.aliacc|" $dir/$x.alimdl 2>$dir/update_ali.$x.log || exit 1;
|
||||
rm $dir/$x.?.aliacc
|
||||
|
||||
( cd $dir; rm final.alimdl 2>/dev/null; ln -s $x.alimdl final.alimdl; )
|
||||
|
Загрузка…
Ссылка в новой задаче