зеркало из https://github.com/mozilla/kaldi.git
Multilingual SGMM training scripts for GlobalPhone
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1070 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
55cfe71d79
Коммит
bf91124841
|
@ -0,0 +1,359 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is Subspace Gaussian Mixture Model (SGMM) training--
|
||||
# see "The subspace Gaussian mixture model--A structured model for speech recognition"
|
||||
# by D. Povey et al, Computer Speech and Language, 2011.
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
# retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[0-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
function est_alimodel () {
|
||||
# If we have speaker vectors, we need an alignment model. This function gets
|
||||
# the Gaussian-level alignments with the speaker vectors but accumulates stats
|
||||
# without any speaker vectors; we re-estimate M, w, c and S to get a model
|
||||
# that's compatible with not having speaker vectors. Note that the transitions
|
||||
# are not updated since the decoding graph will be shared with the normal model.
|
||||
local lx=$1
|
||||
for L in $LANGUAGES; do
|
||||
wdir=$dir/$L
|
||||
local lspkdim=`sgmm-info $wdir/$lx.mdl | grep speaker | awk '{print $NF}'`
|
||||
if [ "$lspkdim" -le 0 ]; then
|
||||
echo "est_alimodel: No speaker space in model '$wdir/$lx.mdl'. Returning."
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
||||
local y=0;
|
||||
local lflags=MwcS # First time don't update v
|
||||
while [ $y -lt $numiters_alimdl ]; do
|
||||
[ $y -gt 0 ] && lflags=vMwcS
|
||||
echo "Pass $y of building alignment model, flags = '$lflags'"
|
||||
local lmulti_est_opts='' # model, acc, model-out, occs-out tuples
|
||||
for L in $LANGUAGES; do
|
||||
(
|
||||
data=data/$L/train
|
||||
lang=data/$L/lang
|
||||
wdir=$dir/$L
|
||||
local cur_alimdl=$wdir/tmp$y.alimdl
|
||||
[ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl
|
||||
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|"
|
||||
spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs"
|
||||
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc_ali${lx}_$y.TASK_ID.log \
|
||||
$sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \
|
||||
sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \
|
||||
--utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $wdir/$lx.mdl \
|
||||
"$feats" ark,s,cs:- ark:- \| \
|
||||
sgmm-acc-stats-gpost --update-flags=$lflags $cur_alimdl "$feats" \
|
||||
ark,s,cs:- $wdir/$y.TASK_ID.aliacc \
|
||||
|| { touch $dir/err; \
|
||||
error_exit "$L; Align model iter $y: Error accumulating stats"; }
|
||||
|
||||
# Summing accs is quite fast; run locally
|
||||
sgmm-sum-accs $wdir/sum.aliacc $wdir/$y.*.aliacc || \
|
||||
{ touch $dir/err; \
|
||||
error_exit "$L; Align model iter $y: Error summing stats"; }
|
||||
)& # Accumulate in parallel for different languages
|
||||
wdir=$dir/$L
|
||||
local cur_alimdl=$wdir/tmp$y.alimdl
|
||||
[ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl
|
||||
lmulti_est_opts="$lmulti_est_opts $cur_alimdl $wdir/sum.aliacc $wdir/tmp$[$y+1].alimdl $wdir/tmp$[$y+1].occs"
|
||||
done
|
||||
wait
|
||||
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \
|
||||
sgmm-est-multi --update-flags=$lflags --remove-speaker-space=true \
|
||||
$lmulti_est_opts \
|
||||
|| error_exit "Error estimating alignment models on iter $y";
|
||||
|
||||
rm -f $dir/??/$y.*.aliacc $dir/??/sum.aliacc || exit 1;
|
||||
[ $y -gt 0 ] && rm $dir/??/tmp$y.{alimdl,occs}
|
||||
y=$[$y+1]
|
||||
done
|
||||
|
||||
for L in $LANGUAGES; do
|
||||
mv $dir/$L/tmp$y.alimdl $dir/$L/$lx.alimdl
|
||||
done
|
||||
}
|
||||
|
||||
nj=4 # Default number of jobs
|
||||
stage=-5 # Default starting stage (start with tree building)
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
sjopts="" # Options for the submit_jobs.sh script
|
||||
LANGUAGES='GE PO SP SW' # Languages processed
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <phone-dim> <spk-dim> <ubm> <out-dir>\n
|
||||
e.g.: $PROG 40 39 exp/ubm3c/final.ubm exp/sgmm3c\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--lang STR\tList of languages to process (default = '$LANGUAGES')\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n
|
||||
--qcmd STR\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
--sjopts STR\tOptions for the 'submit_jobs.sh' script\n
|
||||
--stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n
|
||||
";
|
||||
|
||||
echo "$PROG $@"
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--lang) LANGUAGES="$2"; shift 2 ;;
|
||||
--num-jobs)
|
||||
shift; nj=`readint $1`;
|
||||
[ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd=" --qcmd=${1}"; shift ;;
|
||||
--sjopts)
|
||||
shift; sjopts="$1"; shift ;;
|
||||
--stage)
|
||||
shift; stage=`readint $1`; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as num-leaves
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
# This is SGMM with speaker vectors, on top of LDA+[something] features.
|
||||
# Any speaker-specific transforms are obtained from the alignment directory.
|
||||
# To be run from ..
|
||||
|
||||
phndim=$1
|
||||
spkdim=$2
|
||||
ubm=$3
|
||||
dir=$4
|
||||
|
||||
[ -f $ubm ] || error_exit "UBM file '$ubm' does not exist"
|
||||
mkdir -p $dir/log || error_exit "Cannot create '$dir/log'"
|
||||
|
||||
# (1): Model initialization; training graph and initial alignment generation.
|
||||
for L in $LANGUAGES; do
|
||||
(
|
||||
data=data/$L/train
|
||||
lang=data/$L/lang
|
||||
alidir=exp/$L/tri2a_ali
|
||||
wdir=$dir/$L
|
||||
oov_sym=`cat $lang/oov.txt`
|
||||
mkdir -p $wdir/log || error_exit "Cannot create working directory '$wdir'"
|
||||
|
||||
# Initialize the model (removed the --spk-space-dim option)
|
||||
if [ $stage -le -5 ]; then
|
||||
echo "$L: Initializing model"
|
||||
submit_jobs.sh "$qcmd" --log=$wdir/log/init_sgmm.log $sjopts \
|
||||
sgmm-init --phn-space-dim=$phndim $lang/topo $wdir/tree $ubm \
|
||||
$wdir/0.mdl || { touch $dir/err; error_exit "$L: SGMM init failed."; }
|
||||
fi
|
||||
|
||||
# Make training graphs
|
||||
if [ $stage -le -4 ]; then
|
||||
echo "$L: Compiling training graphs"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/mkgraphs.TASK_ID.log \
|
||||
$sjopts compile-train-graphs $wdir/tree $wdir/0.mdl $lang/L.fst \
|
||||
"ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \
|
||||
"ark:|gzip -c >$wdir/TASK_ID.fsts.gz" \
|
||||
|| { touch $dir/err; error_exit "$L: Error compiling training graphs"; }
|
||||
fi
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "$L: Converting alignments"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/convert.TASK_ID.log \
|
||||
$sjopts convert-ali $alidir/final.mdl $wdir/0.mdl $wdir/tree \
|
||||
"ark:gunzip -c $alidir/TASK_ID.ali.gz|" \
|
||||
"ark:|gzip -c >$wdir/TASK_ID.ali.gz" \
|
||||
|| { touch $dir/err; error_exit "$L: Convert alignment failed."; }
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
echo "$L: Computing cepstral mean and variance statistics"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj $sjopts --log=$wdir/log/cmvn.TASK_ID.log \
|
||||
compute-cmvn-stats --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \
|
||||
scp:$data/split$nj/TASK_ID/feats.scp ark:$wdir/TASK_ID.cmvn \
|
||||
|| { touch $dir/err; error_exit "$L: Computing CMN/CVN stats failed."; }
|
||||
fi
|
||||
|
||||
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
echo "$L: Doing Gaussian selection"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/gselectTASK_ID.log \
|
||||
$sjopts sgmm-gselect $wdir/0.mdl "$feats" "ark,t:|gzip -c > $wdir/TASK_ID.gselect.gz" \
|
||||
|| { touch $dir/err; error_exit "$L: Error doing Gaussian selection"; }
|
||||
fi
|
||||
)& # Run the language-specific initializations in parallel
|
||||
done
|
||||
wait
|
||||
[ -f $dir/err ] && { rm $dir/err; error_exit "Error initializing models."; }
|
||||
|
||||
# Language independent constants
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
numiters_alimdl=3 # Number of iterations for estimating alignment model.
|
||||
incsub_interval=8 # increase substates every 8 iterations
|
||||
# total substates after each such increment
|
||||
total_substates=( 5000 7000 9000 12000 16000 20000 25000 30000 35000 40000 )
|
||||
# For a given number of substates, iterate for $incsub_interval iterations
|
||||
numiters=$[(${#total_substates[@]}+1)*$incsub_interval]
|
||||
realign_interval=4 # realign every 4 iterations
|
||||
spkvec_start=8 # use speaker subspace *after* 8 iterations
|
||||
spkvec_interval=2 # reestimate the speaker vectors every 2 iterations
|
||||
randprune=0.1
|
||||
|
||||
# Initially don't have speaker vectors, but change this after we estimate them.
|
||||
spkvecs_gen=0
|
||||
|
||||
x=0
|
||||
while [ $x -lt $numiters ]; do
|
||||
if [ $x -eq 0 ]; then
|
||||
flags=v # On first iter, don't update M or N.
|
||||
elif [ $spkdim -gt 0 -a $[$x%2] -eq 0 -a $x -gt $spkvec_start ]; then
|
||||
# Update N on odd iterations after 1st spkvec iter, if we have spk-space.
|
||||
flags=NwSvct
|
||||
else # Else update M but not N.
|
||||
flags=MwSvct
|
||||
fi
|
||||
|
||||
if [ $stage -le $x ]; then
|
||||
echo "Pass $x: update flags = '$flags' "
|
||||
multi_est_opts='' # Will contain model, acc, model-out, occs-out tuples
|
||||
for L in $LANGUAGES; do
|
||||
(
|
||||
data=data/$L/train
|
||||
lang=data/$L/lang
|
||||
wdir=$dir/$L
|
||||
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|"
|
||||
if [ $spkdim -gt 0 -a $spkvecs_gen -eq 1 ]; then
|
||||
spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs"
|
||||
else
|
||||
spkvecs_opt=''
|
||||
fi
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
# numsubstates=`cat $wdir/numleaves` # Initial #-substates.
|
||||
|
||||
if [ $[$x%$realign_interval] -eq 0 -a $x -gt 0 ]; then
|
||||
echo "$L; iter $x: Aligning data"
|
||||
submit_jobs.sh "$qcmd" $sjopts --log=$wdir/log/align.$x.TASK_ID.log \
|
||||
--njobs=$nj sgmm-align-compiled $spkvecs_opt $scale_opts \
|
||||
"$gselect_opt" --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \
|
||||
--beam=8 --retry-beam=40 $wdir/$x.mdl \
|
||||
"ark:gunzip -c $wdir/TASK_ID.fsts.gz|" "$feats" \
|
||||
"ark:|gzip -c >$wdir/TASK_ID.ali.gz" || \
|
||||
{ touch $dir/err; error_exit "$L, it $x: Error realigning data"; }
|
||||
fi
|
||||
|
||||
if [ $spkdim -gt 0 -a $x -gt $spkvec_start \
|
||||
-a $[$x%$spkvec_interval] -eq 0 ]; then
|
||||
echo "$L; iter $x: Computing speaker vectors"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/spkvecs.$x.TASK_ID.log \
|
||||
$sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \
|
||||
weight-silence-post 0.01 $silphonelist $wdir/$x.mdl ark:- ark:- \| \
|
||||
sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \
|
||||
$spkvecs_opt "$gselect_opt" --rand-prune=$randprune $wdir/$x.mdl \
|
||||
"$feats" ark,s,cs:- ark:$wdir/tmpTASK_ID.vecs || \
|
||||
{ touch $dir/err; error_exit "$L, it $x: Error computing spkvecs"; }
|
||||
for n in `seq 1 $nj`; do
|
||||
mv $wdir/tmp${n}.vecs $wdir/${n}.vecs;
|
||||
done
|
||||
spkvecs_gen=1
|
||||
fi
|
||||
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc.$x.TASK_ID.log \
|
||||
$sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \
|
||||
--update-flags=$flags --rand-prune=$randprune $spkvecs_opt \
|
||||
"$gselect_opt" $wdir/$x.mdl "$feats" \
|
||||
"ark,s,cs:ali-to-post 'ark:gunzip -c $wdir/TASK_ID.ali.gz|' ark:-|" \
|
||||
$wdir/$x.TASK_ID.acc || \
|
||||
{ touch $dir/err; error_exit "$L, it $x: Error accumulating stats"; }
|
||||
|
||||
# Summing accs is quite fast; run locally
|
||||
sgmm-sum-accs $wdir/sum.acc $wdir/$x.*.acc || \
|
||||
{ touch $dir/err; error_exit "$L, it $x: Error summing stats"; }
|
||||
) & # Accumulate in parallel for different languages
|
||||
wdir=$dir/$L
|
||||
multi_est_opts="$multi_est_opts $wdir/$x.mdl $wdir/sum.acc $wdir/$[$x+1].mdl $wdir/$[$x+1].occs"
|
||||
done
|
||||
wait
|
||||
[ -f $dir/err ] && \
|
||||
{ rm $dir/err; error_exit "Iter $x: Error in accumulation"; }
|
||||
|
||||
add_dim_opts=''
|
||||
if [ $x -eq $spkvec_start ]; then
|
||||
add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim"
|
||||
elif [ $x -eq $[$spkvec_start*2] ]; then
|
||||
add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim"
|
||||
fi
|
||||
split_opts=''
|
||||
if [ $[$x%$incsub_interval] -eq 1 -a $x -gt 1 ]; then
|
||||
index=$[($x/$incsub_interval)-1]
|
||||
numsubstates=${total_substates[$index]}
|
||||
split_opts="--split-substates=$numsubstates"
|
||||
fi
|
||||
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \
|
||||
sgmm-est-multi --update-flags=$flags $split_opts $add_dim_opts \
|
||||
$multi_est_opts || error_exit "Error in pass $x estimation."
|
||||
|
||||
# If using speaker vectors, estimate alignment model without spkvecs
|
||||
if [ $[$x%$incsub_interval] -eq 0 -a $x -gt 0 ]; then
|
||||
chmod -w $dir/??/$x.mdl $dir/??/$x.occs # Preserve for scoring
|
||||
[ $spkdim -gt 0 ] && est_alimodel $x;
|
||||
else
|
||||
rm -f $dir/??/$x.mdl $dir/??/$x.occs
|
||||
fi
|
||||
rm -f $dir/??/$x.*.acc $dir/??/sum.acc
|
||||
fi # End of current stage
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
for L in $LANGUAGES; do
|
||||
(
|
||||
wdir=$dir/$L
|
||||
rm -f $wdir/final.mdl $wdir/final.occs;
|
||||
chmod -w $wdir/$x.mdl $wdir/$x.occs # Preserve for scoring
|
||||
ln -s $wdir/$x.mdl $wdir/final.mdl;
|
||||
ln -s $wdir/$x.occs $wdir/final.occs;
|
||||
# If using speaker vectors, estimate alignment model without spkvecs
|
||||
[ $spkdim -gt 0 ] && est_alimodel $wdir/$x.mdl;
|
||||
rm -f $wdir/final.alimdl;
|
||||
ln -sf $wdir/$x.alimdl $wdir/final.alimdl;
|
||||
|
||||
# Print out summary of the warning messages.
|
||||
for x in $wdir/log/*.log; do
|
||||
n=`grep WARNING $x | wc -l`;
|
||||
if [ $n -ne 0 ]; then echo "$n warnings in $x"; fi;
|
||||
done
|
||||
)
|
||||
done
|
||||
|
||||
echo Done
|
|
@ -0,0 +1,148 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Train UBM from a trained HMM/GMM system using (e.g. MFCC) + delta +
|
||||
# acceleration features and cepstral mean normalization.
|
||||
# Alignment directory is used for the CMN and transforms.
|
||||
# A UBM is just a single mixture of Gaussians (full-covariance, in our case),
|
||||
# that's trained on all the data. This will later be used in SGMM training.
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
nj=4 # Default number of jobs
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
sjopts="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <num-comp> <out-dir>\n
|
||||
e.g.: $PROG exp/ubm3a\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n
|
||||
--qcmd STR\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
--sjopts STR\tOptions for the 'submit_jobs.sh' script\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--num-jobs)
|
||||
shift; nj=`readint $1`;
|
||||
[ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd=" --qcmd=${1}"; shift ;;
|
||||
--sjopts)
|
||||
shift; sjopts="$1"; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as number of components
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 2 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
numcomps=$1
|
||||
dir=$2
|
||||
|
||||
LANGUAGES='GE PO SP SW' # Languages processed
|
||||
[ -f path.sh ] && . path.sh
|
||||
mkdir -p $dir/{data,log}
|
||||
for f in feats.scp spk2utt utt2spk text wav.scp; do
|
||||
for L in $LANGUAGES; do
|
||||
cat data/$L/train/$f
|
||||
done \
|
||||
| sort -k1,1 > $dir/data/$f
|
||||
done
|
||||
data=$dir/data
|
||||
split_data.sh $data $nj
|
||||
|
||||
# typically: --intermediate-numcomps=2000 --ubm-numcomps=400
|
||||
intermediate=$[$numcomps*5]
|
||||
|
||||
merge_ubms=
|
||||
for L in $LANGUAGES; do
|
||||
alidir=exp/$L/tri2a_ali
|
||||
merge_ubms=$merge_ubms" $dir/${L}.ubm"
|
||||
echo "Language '$L': Clustering model $alidir/final.mdl to get initial UBM"
|
||||
(
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/cluster_$L.log $sjopts \
|
||||
init-ubm --intermediate-numcomps=$intermediate --ubm-numcomps=$numcomps \
|
||||
--verbose=2 --fullcov-ubm=true $alidir/final.mdl $alidir/final.occs \
|
||||
$dir/${L}.ubm || touch $dir/.error
|
||||
) & # Run the language-specific clusterings in parallel
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && \
|
||||
{ rm $dir/.error; error_exit "UBM initialization failed."; }
|
||||
|
||||
echo "Merging language-specific UBMs to a global UBM."
|
||||
fgmm-global-merge $dir/0.ubm $dir/ubm_sizes $merge_ubms
|
||||
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj $sjopts --log=$dir/log/cmvn.TASK_ID.log \
|
||||
compute-cmvn-stats --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \
|
||||
scp:$data/split$nj/TASK_ID/feats.scp ark:$dir/TASK_ID.cmvn \
|
||||
|| error_exit "Computing CMN/CVN stats failed.";
|
||||
|
||||
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# First do Gaussian selection to 100 components, which will be used
|
||||
# as the initial screen for all further passes.
|
||||
ngselect=100
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/gselect_diag.TASK_ID.log \
|
||||
$sjopts gmm-gselect --n=$ngselect "fgmm-global-to-gmm $dir/0.ubm - |" \
|
||||
"$feats" "ark:|gzip -c >$dir/gselect_diag.TASK_ID.gz" \
|
||||
|| error_exit "Error doing GMM selection";
|
||||
gs_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect_diag.TASK_ID.gz|"
|
||||
|
||||
ngselect=50 # During iterations select 50 components
|
||||
for x in 0 1 2 3; do
|
||||
echo "Pass $x"
|
||||
submit_jobs.sh "$qcmd" --njobs=$nj $sjopts --log=$dir/log/acc.$x.TASK_ID.log \
|
||||
gmm-gselect --n=$ngselect "$gs_opt" "fgmm-global-to-gmm $dir/$x.ubm - |" \
|
||||
"$feats" ark:- \| \
|
||||
fgmm-global-acc-stats --gselect=ark,s,cs:- $dir/$x.ubm "$feats" \
|
||||
$dir/$x.TASK_ID.acc \
|
||||
|| error_exit "Error accumulating stats for UBM estimation on pass $x."
|
||||
|
||||
# Only remove low-count Gaussians on last iter-- keeps gselect info valid.
|
||||
lowcount_opt="--remove-low-count-gaussians=false"
|
||||
[ $x -eq 3 ] && lowcount_opt=
|
||||
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \
|
||||
fgmm-global-est $lowcount_opt --verbose=2 $dir/$x.ubm \
|
||||
"fgmm-global-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].ubm \
|
||||
|| error_exit "Error estimating UBM on pass $x.";
|
||||
rm $dir/$x.*.acc $dir/$x.ubm
|
||||
done
|
||||
|
||||
rm $dir/gselect_diag.*.gz
|
||||
rm -f $dir/final.ubm
|
||||
mv $dir/4.ubm $dir/final.ubm || exit 1;
|
Загрузка…
Ссылка в новой задаче