зеркало из https://github.com/mozilla/kaldi.git
Modifications to s3 scripts.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@540 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
b30395ad00
Коммит
669479471e
|
@ -111,7 +111,6 @@ exp/decode_sgmme_fmllr/wer:Average WER is 2.266018 (284 / 12533)
|
|||
|
||||
|
||||
|
||||
|
||||
#### Note: stuff below this line may be out of date / not computed
|
||||
# with most recent version of toolkit.
|
||||
# note: when changing (phn,spk) dimensions from (40,39) -> (30,30),
|
||||
|
|
|
@ -205,8 +205,8 @@ defaultfeats="$basefeats transform-feats $dir/B.mat ark:- ark:- |"
|
|||
rm $dir/$x.acc2
|
||||
|
||||
# The following files may be useful for display purposes.
|
||||
for n in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
|
||||
cat $dir/warps/$n.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$n.warp_info
|
||||
for y in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
|
||||
cat $dir/warps/$y.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$y.warp_info
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null;
|
||||
|
|
|
@ -87,6 +87,14 @@ scripts/decode.sh steps/decode_deltas.sh exp/tri2a/graph_tgpr data/dev_nov93 exp
|
|||
# Train tri2b, which is LDA+MLLT, on si84 data.
|
||||
steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b
|
||||
scripts/mkgraph.sh data/lang_test_tgpr exp/tri2a exp/tri2a/graph_tgpr
|
||||
scripts/decode.sh steps/decode_lda_mllt.sh exp/tri2b/graph_tgpr data/eval_nov92 exp/tri2b/decode_tgpr_eval92
|
||||
scripts/decode.sh steps/decode_lda_mllt.sh exp/tri2b/graph_tgpr data/dev_nov93 exp/tri2b/decode_tgpr_dev93
|
||||
|
||||
# Align tri2b system with si84 data.
|
||||
steps/align_lda_mllt.sh data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84
|
||||
|
||||
|
||||
steps/train_lda_et.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2c
|
||||
|
||||
# exp/decode_mono_tgpr_eval92 exp/graph_mono_tg_pruned/HCLG.fst steps/decode_mono.sh data/eval_nov92.scp
|
||||
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
|
||||
# This script does training-data alignment given a model built using
|
||||
# CMN + delta + delta-delta features. It splits the data into
|
||||
# four chunks and does everything in parallel on the same machine.
|
||||
# Its output, all in its own
|
||||
# experimental directory, is {0,1,2,3}.cmvn {0,1,2,3}.ali, tree, final.mdl ,
|
||||
# final.mat and final.occs (the last four are just copied from the source directory).
|
||||
|
||||
|
||||
# Option to use precompiled graphs from last phase, if these
|
||||
# are available (i.e. if they were built with the same data).
|
||||
# These must be split into four pieces.
|
||||
|
||||
oldgraphs=false
|
||||
if [ "$1" == --use-graphs ]; then
|
||||
shift;
|
||||
oldgraphs=true
|
||||
fi
|
||||
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/align_lda_mllt.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
|
||||
echo " e.g.: steps/align_lda_mllt.sh data/train data/lang exp/tri1 exp/tri1_ali"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
oov_sym="<SPOKEN_NOISE>" # Map OOVs to this in training.
|
||||
grep SPOKEN_NOISE $lang/words.txt >/dev/null || echo "Warning: SPOKEN_NOISE not in dictionary"
|
||||
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/{tree,final.mdl,final.mat,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
if [ ! -f $data/split4 -o $data/split4 -ot $data/feats.scp ]; then
|
||||
scripts/split_data.sh $data 4
|
||||
fi
|
||||
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
for n in 0 1 2 3; do
|
||||
compute-cmvn-stats --spk2utt=ark:$data/split4/$n/spk2utt scp:$data/split4/$n/feats.scp \
|
||||
ark:$dir/$n.cmvn 2>$dir/cmvn$n.log || exit 1;
|
||||
done
|
||||
|
||||
|
||||
# Align all training data using the supplied model.
|
||||
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
echo "Aligning data from $data"
|
||||
if $oldgraphs; then
|
||||
for n in 0 1 2 3; do
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat |"
|
||||
if [ ! -f $srcdir/$n.fsts.gz ]; then
|
||||
echo You specified --use-graphs but no such file $srcdir/$n.fsts.gz
|
||||
exit 1;
|
||||
fi
|
||||
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
|
||||
"ark:gunzip -c $srcdir/$n.fsts.gz|" "$feats" "ark:|gzip -c >$dir/$n.ali.gz" \
|
||||
2> $dir/align$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error doing alignment && exit 1;
|
||||
else
|
||||
for n in 0 1 2 3; do
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat |"
|
||||
# compute integer form of transcripts.
|
||||
tra="ark:scripts/sym2int.pl --map-oov \"$oov_sym\" --ignore-first-field $lang/words.txt $data/split4/$n/text|";
|
||||
gmm-align $scale_opts --beam=10 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
|
||||
"$feats" "$tra" "ark:|gzip -c >$dir/$n.ali.gz" 2> $dir/align$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error doing alignment && exit 1;
|
||||
fi
|
||||
|
||||
echo "Done aligning data."
|
|
@ -21,10 +21,6 @@
|
|||
# This script just generates lattices for a single broken-up
|
||||
# piece of the data.
|
||||
|
||||
#nd rescores them with different
|
||||
# acoustic weights, in order to explore a range of different
|
||||
# weights.
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
numjobs=1
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and the baseline
|
||||
# [e.g. MFCC] features plus cepstral mean subtraction plus
|
||||
# LDA + ET (exponential transform) features. This script first
|
||||
# generates a pruned state-level lattice without adaptation,
|
||||
# then does acoustic rescoring on this lattice to generate
|
||||
# a new lattice; it determinizes and prunes this ready for
|
||||
# further rescoring (e.g. with new LMs, or varying the acoustic
|
||||
# scale).
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
numjobs=1
|
||||
jobid=0
|
||||
if [ "$1" == "-j" ]; then
|
||||
shift;
|
||||
numjobs=$1;
|
||||
jobid=$2;
|
||||
shift; shift;
|
||||
if [ $jobid -ge $numjobs ]; then
|
||||
echo "Invalid job number, $jobid >= $numjobs";
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: steps/decode_lda_mllt.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_lda_mllt.sh -j 8 0 exp/mono/graph_tgpr data/dev_nov93 exp/mono/decode_dev93_tgpr"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ $numjobs -gt 1 ]; then
|
||||
mydata=$data/split$numjobs/$jobid
|
||||
else
|
||||
mydata=$data
|
||||
fi
|
||||
|
||||
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.mat $graphdir/HCLG.fst"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_mllt.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# We only do one decoding pass, so there is no point caching the
|
||||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
gmm-latgen-faster --max-active=7000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.083333 \
|
||||
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
|
||||
2> $dir/decode$jobid.log || exit 1;
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and the baseline
|
||||
# [e.g. MFCC] features plus cepstral mean subtraction plus
|
||||
# LDA+MLLT or similar transform.
|
||||
# This script just generates lattices for a single broken-up
|
||||
# piece of the data.
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
numjobs=1
|
||||
jobid=0
|
||||
if [ "$1" == "-j" ]; then
|
||||
shift;
|
||||
numjobs=$1;
|
||||
jobid=$2;
|
||||
shift; shift;
|
||||
if [ $jobid -ge $numjobs ]; then
|
||||
echo "Invalid job number, $jobid >= $numjobs";
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: steps/decode_lda_mllt.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_lda_mllt.sh -j 8 0 exp/mono/graph_tgpr data/dev_nov93 exp/mono/decode_dev93_tgpr"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ $numjobs -gt 1 ]; then
|
||||
mydata=$data/split$numjobs/$jobid
|
||||
else
|
||||
mydata=$data
|
||||
fi
|
||||
|
||||
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.mat $graphdir/HCLG.fst"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_mllt.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# We only do one decoding pass, so there is no point caching the
|
||||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
gmm-latgen-faster --max-active=7000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.083333 \
|
||||
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
|
||||
2> $dir/decode$jobid.log || exit 1;
|
||||
|
|
@ -0,0 +1,269 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Triphone model training, using delta-delta features and cepstral
|
||||
# mean normalization. It starts from an existing directory (e.g.
|
||||
# exp/mono), supplied as an argument, which is assumed to be built using
|
||||
# the same type of features.
|
||||
|
||||
if [ $# != 6 ]; then
|
||||
echo "Usage: steps/train_lda_et.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_lda_et.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2c"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
numleaves=$1
|
||||
totgauss=$2
|
||||
data=$3
|
||||
lang=$4
|
||||
alidir=$5
|
||||
dir=$6
|
||||
|
||||
if [ ! -f $alidir/final.mdl -o ! -f $alidir/0.ali.gz -o ! -f $alidir/3.ali.gz ]; then
|
||||
echo "Error: alignment dir $alidir does not contain final.mdl and {0,1,2,3}.ali.gz"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
realign_iters="10 20 30";
|
||||
numiters_et=15
|
||||
normtype=offset # et option; could be offset [recommended], or none
|
||||
oov_sym="<SPOKEN_NOISE>" # Map OOVs to this in training.
|
||||
grep SPOKEN_NOISE $lang/words.txt >/dev/null || echo "Warning: SPOKEN_NOISE not in dictionary"
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
numiters=35 # Number of iterations of training
|
||||
maxiterinc=25 # Last iter to increase #Gauss on.
|
||||
numgauss=$numleaves
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
randprune=4.0
|
||||
|
||||
mkdir -p $dir/log $dir/warps
|
||||
|
||||
if [ ! -f $data/split4 -o $data/split4 -ot $data/feats.scp ]; then
|
||||
scripts/split_data.sh $data 4
|
||||
fi
|
||||
|
||||
# basefeats is all the feats, transformed with lda.mat-- just needed for tree accumulation.
|
||||
basefeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $alidir/*.cmvn|\" scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
|
||||
|
||||
for n in 0 1 2 3; do
|
||||
splicedfeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split4/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split4/$n/feats.scp ark:- | splice-feats ark:- ark:- |"
|
||||
basefeatspart[$n]="${splicedfeatspart[$n]} transform-feats $dir/lda.mat ark:- ark:- |"
|
||||
featspart[$n]="${basefeatspart[$n]}" # This gets overwritten later in the script.
|
||||
done
|
||||
|
||||
echo "Accumulating LDA statistics."
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
for n in 0 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $alidir/$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- | \
|
||||
acc-lda --rand-prune=$randprune $alidir/final.mdl "${splicedfeatspart[$n]}" ark,s,cs:- \
|
||||
$dir/lda.$n.acc ) 2>$dir/log/lda_acc.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error accumulating LDA stats" && exit 1;
|
||||
est-lda $dir/lda.mat $dir/lda.*.acc 2>$dir/log/lda_est.log || exit 1; # defaults to dim=40
|
||||
rm $dir/lda.*.acc
|
||||
cur_lda=$dir/0.mat
|
||||
|
||||
# The next stage assumes we won't need the context of silence, which
|
||||
# assumes something about $lang/roots.txt, but it seems pretty safe.
|
||||
echo "Accumulating tree stats"
|
||||
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$basefeats" \
|
||||
"ark:gunzip -c $alidir/?.ali.gz|" $dir/treeacc 2> $dir/log/acc_tree.log || exit 1;
|
||||
|
||||
echo "Computing questions for tree clustering"
|
||||
# preparing questions, roots file...
|
||||
scripts/sym2int.pl $lang/phones.txt $lang/phonesets_cluster.txt > $dir/phonesets.txt || exit 1;
|
||||
cluster-phones $dir/treeacc $dir/phonesets.txt $dir/questions.txt 2> $dir/log/questions.log || exit 1;
|
||||
scripts/sym2int.pl $lang/phones.txt $lang/extra_questions.txt >> $dir/questions.txt
|
||||
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
|
||||
scripts/sym2int.pl --ignore-oov $lang/phones.txt $lang/roots.txt > $dir/roots.txt
|
||||
|
||||
echo "Building tree"
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $lang/topo $dir/tree 2> $dir/log/train_tree.log || exit 1;
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
|
||||
|
||||
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
|
||||
2>$dir/log/mixup.log || exit 1;
|
||||
|
||||
gmm-init-et --normalize-type=$normtype --binary=false --dim=40 $dir/1.et 2>$dir/init_et.log || exit 1
|
||||
|
||||
rm $dir/treeacc
|
||||
|
||||
# Convert alignments in $alidir, to use as initial alignments.
|
||||
# This assumes that $alidir was split in 4 pieces, just like the
|
||||
# current dir.
|
||||
|
||||
echo "Converting old alignments"
|
||||
for n in 0 1 2 3; do
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
|
||||
"ark:gunzip -c $alidir/$n.ali.gz|" "ark:|gzip -c >$dir/$n.ali.gz" \
|
||||
2>$dir/log/convert$n.log || exit 1;
|
||||
done
|
||||
|
||||
# Make training graphs (this is split in 4 parts).
|
||||
echo "Compiling training graphs"
|
||||
rm $dir/.error 2>/dev/null
|
||||
for n in 0 1 2 3; do
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:scripts/sym2int.pl --map-oov \"$oov_sym\" --ignore-first-field $lang/words.txt < $data/split4/$n/text |" \
|
||||
"ark:|gzip -c >$dir/$n.fsts.gz" 2>$dir/log/compile_graphs$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "Error compiling training graphs" && exit 1;
|
||||
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo Pass $x
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
for n in 0 1 2 3; do
|
||||
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/$n.fsts.gz|" "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/$n.ali.gz" 2> $dir/log/align.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "Error aligning data on iteration $x" && exit 1;
|
||||
fi
|
||||
|
||||
if [ $x -lt $numiters_et ]; then
|
||||
echo "Re-estimating ET transforms"
|
||||
for n in 0 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
rand-prune-post $randprune ark:- ark:- | \
|
||||
gmm-post-to-gpost $dir/$x.mdl "${featspart[$n]}" ark:- ark:- | \
|
||||
gmm-est-et --spk2utt=ark:$data/split4/$n/spk2utt $dir/$x.mdl $dir/$x.et "${basefeatspart[$n]}" \
|
||||
ark,s,cs:- ark:$dir/$n.trans.tmp ark,t:$dir/warps/$x.$n.warp ) \
|
||||
2> $dir/log/trans.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error computing ET transforms on iteration $x" && exit 1;
|
||||
for n in 0 1 2 3; do
|
||||
mv $dir/$n.trans.tmp $dir/$n.trans || exit 1;
|
||||
featspart[$n]="${basefeatspart[$n]} transform-feats --utt2spk=ark:$data/split4/$n/utt2spk ark:$dir/$n.trans ark:- ark:- |"
|
||||
done
|
||||
fi
|
||||
|
||||
for n in 0 1 2 3; do
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "${featspart[$n]}" \
|
||||
"ark:gunzip -c $dir/$n.ali.gz|" $dir/$x.$n.acc \
|
||||
2>$dir/log/acc.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
|
||||
"gmm-sum-accs - $dir/$x.{0,1,2,3}.acc |" $dir/$[$x+1].mdl 2> $dir/log/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.{0,1,2,3}.acc
|
||||
rm $dir/$x.occs
|
||||
|
||||
x1=$[$x+1];
|
||||
if [ $x -lt $numiters_et ]; then
|
||||
# Alternately estimate either A or B.
|
||||
if [ $[$x%2] == 0 ]; then # Estimate A:
|
||||
for n in 0 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
|
||||
rand-prune-post $randprune ark:- ark:- | \
|
||||
gmm-post-to-gpost $dir/$x1.mdl "${featspart[$n]}" ark:- ark:- | \
|
||||
gmm-et-acc-a --spk2utt=ark:$data/split4/$n/spk2utt --verbose=1 $dir/$x1.mdl $dir/$x.et "${basefeatspart[$n]}" \
|
||||
ark,s,cs:- $dir/$x.$n.et_acc_a ) 2> $dir/log/acc_a.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error accumulating ET stats for A on iter $x" && exit 1;
|
||||
gmm-et-est-a --verbose=1 $dir/$x.et $dir/$x1.et $dir/$x.*.et_acc_a 2> $dir/log/update_a.$x.log || exit 1;
|
||||
rm $dir/$x.*.et_acc_a
|
||||
else
|
||||
for n in 0 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
|
||||
gmm-acc-mllt --rand-prune=$randprune $dir/$x1.mdl "${featspart[$n]}" ark:- \
|
||||
$dir/$x.$n.mllt_acc ) 2> $dir/log/acc_b.$x.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error accumulating ET stats for A on iter $x" && exit 1;
|
||||
est-mllt $dir/$x.mat $dir/$x.*.mllt_acc 2> $dir/log/update_b.$x.log || exit 1;
|
||||
gmm-et-apply-c $dir/$x.et $dir/$x.mat $dir/$x1.et 2>>$dir/log/update_b.$x.log || exit 1;
|
||||
gmm-transform-means $dir/$x.mat $dir/$x1.mdl $dir/$x1.mdl 2>> $dir/log/update_b.$x.log || exit 1;
|
||||
# Modify current transforms by premultiplying by C.
|
||||
for n in 0 1 2 3; do
|
||||
compose-transforms $dir/$x.mat ark:$dir/$n.trans ark:$dir/tmp.trans 2>> $dir/update_b.$x.log || exit 1;
|
||||
mv $dir/tmp.trans $dir/$n.trans
|
||||
done
|
||||
rm $dir/$x.mat
|
||||
rm $dir/$x.*.mllt_acc
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $x -le $maxiterinc ]]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
|
||||
# Write out the B matrix which we will combine with LDA to get
|
||||
# final.mat; and write out final.et which is the current final et
|
||||
# but with B set to unity (since it's now part of final.mat).
|
||||
# This is just more convenient going forward, since the "default features"
|
||||
# (i.e. when speaker factor equals zero) are now the same as the
|
||||
# features that the ET acts on.
|
||||
|
||||
gmm-et-get-b $dir/$numiters_et.et $dir/B.mat $dir/final.et 2>$dir/get_b.log || exit 1
|
||||
|
||||
compose-transforms $dir/B.mat $dir/lda.mat $dir/final.mat 2>>$dir/get_b.log || exit 1
|
||||
|
||||
for n in 0 1 2 3; do
|
||||
defaultfeatspart[$n]="${basefeatspart[$n]} transform-feats $dir/B.mat ark:- ark:- |"
|
||||
done
|
||||
|
||||
# Accumulate stats for "alignment model" which is as the model but with
|
||||
# the default features (shares Gaussian-level alignments).
|
||||
for n in 0 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/$n.ali.gz|" ark:- | \
|
||||
gmm-acc-stats-twofeats $dir/$x.mdl "${featspart[$n]}" "${defaultfeatspart[$n]}" \
|
||||
ark:- $dir/$x.$n.acc2 ) 2>$dir/acc_alimdl.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "Error accumulating alignment statistics." && exit 1;
|
||||
# Update model.
|
||||
gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
|
||||
"gmm-sum-accs - $dir/$x.*.acc2|" $dir/$x.alimdl \
|
||||
2>$dir/est_alimdl.log || exit 1;
|
||||
rm $dir/$x.*.acc2
|
||||
|
||||
# The following files may be useful for display purposes.
|
||||
for y in 2 3 4 5 6 7 8 9 10 11 12 13 14; do
|
||||
cat $dir/warps/$y.*.warp | scripts/process_warps.pl $data/spk2gender > $dir/warps/$y.warp_info
|
||||
done
|
||||
|
||||
( cd $dir;
|
||||
ln -s $x.mdl final.mdl;
|
||||
ln -s $x.occs final.occs;
|
||||
ln -s $x.alimdl final.alimdl
|
||||
ln -s $[$numiters_et-1].trans final.trans )
|
||||
|
||||
echo Done
|
|
@ -21,8 +21,8 @@
|
|||
# the same type of features.
|
||||
|
||||
if [ $# != 6 ]; then
|
||||
echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
|
||||
echo "Usage: steps/train_lda_mllt.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче