sandbox/language_id: script changes for applying VTLN in language-id; not yet tested.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@4174 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2014-07-19 21:15:24 +00:00
Родитель 4fd9c20c6a
Коммит 6a91edb723
4 изменённых файлов: 172 добавлений и 13 удалений

141
egs/lre/v1/lid/get_vtln_warps.sh Executable file
Просмотреть файл

@ -0,0 +1,141 @@
#!/bin/bash
# Copyright 2014 Daniel Povey
# Apache 2.0
#
# This script takes a data directory and a directory computed by
# ./train_lvtln_model.sh, and it computes speaker warp-factors spk2warp. It
# expects vad.scp to exist in the data directory. Note: like
# train_lvtln_model.sh, it uses features of the speaker-id type, i.e. double
# delta features with sliding window cepstral mean normalization.
# Begin configuration.
stage=0
config=
cmd=run.pl
logdet_scale=0.0
subsample=5 # We use every 5th frame by default; this is more
# CPU-efficient.
nj=4
cleanup=true
num_gselect=15
refine_transforms=true # if true, do a second pass of transform estimation.
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 <data-dir> <vtln-dir> <exp-dir>"
echo "e.g.: $0 data/train_novtln exp/vtln exp/train_warps"
echo "where <vtln-dir> is produced by train_lvtln_model.sh"
echo "Output is <exp-dir>/spk2warp"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --nj <num-jobs> # number of jobs to use (default 4)"
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
fi
data=$1
vtlndir=$2
dir=$3
for f in $data/feats.scp $data/spk2utt $vtlndir/final.lvtln $vtlndir/final.dubm $vtlndir/final.ali_dubm; do
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
done
if [ -f $data/utt2warp ]; then
echo "$0: source data directory $data appears to already have VTLN.";
exit 1;
fi
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj;
split_data.sh $data $nj || exit 1;
cmvn_sliding_opts="--norm-vars=false --center=true --cmn-window=300"
# don't change $cmvn_sliding_opts, it should probably match the
# options used in ../sid/train_diag_ubm.sh and ./train_lvtln_model.sh
sifeats="ark,s,cs:add-deltas scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding $cmvn_sliding_opts ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- | subsample-feats --n=$subsample ark:- ark:- |"
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk $dir/trans.0.JOB ark:- ark:- |"
if [ $stage -le 0 ]; then
echo "$0: computing Gaussian selection info."
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
gmm-gselect --n=$num_gselect $vtlndir/final.ali_dubm "$sifeats" \
"ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
if [ $stage -le 0 ]; then
echo "$0: computing initial LVTLN transforms"
$cmd JOB=1:$nj $dir/log/lvtln.0.JOB.log \
gmm-global-gselect-to-post $dir/final.ali_dubm "$sifeats" \
"ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark:- \| \
gmm-global-est-lvtln-trans --spk2utt=$sdata/JOB/spk2utt \
--logdet-scale=$logdet_scale --verbose=1 \
$vtlndir/final.dubm $vtlndir/final.lvtln "$sifeats" ark,s,cs:- \
ark:$dir/trans.0.JOB ark,t:$dir/warp.0.JOB || exit 1
# consolidate the warps into one file.
for j in $(seq $nj); do cat $dir/warp.0.$j; done > $dir/warp.0
rm $dir/warp.0.*
fi
if $refine_transforms; then
ln -sf warp.0 $dir/spk2warp
$cleanup && rm $dir/gselect.*.gz $dir/trans.0.*
echo "$0: --refine-transforms=false so exiting with current warps."
echo "$0: Distribution of classes for one job is below."
grep 'Distribution of classes' $dir/log/lvtln.0.1.log
exit 0;
fi
if [ $stage -le 1 ]; then
echo "$0: computing refined LVTLN transforms"
$cmd JOB=1:$nj $dir/log/lvtln.1.JOB.log \
gmm-global-gselect-to-post $dir/final.dubm "$feats" \
"ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark:- \| \
gmm-global-est-lvtln-trans --spk2utt=$sdata/JOB/spk2utt \
--logdet-scale=$logdet_scale --verbose=1 \
$vtlndir/final.dubm $vtlndir/final.lvtln "$sifeats" ark,s,cs:- \
ark:/dev/null ark,t:$dir/warp.1.JOB || exit 1
# consolidate the warps into one file.
for j in $(seq $nj); do cat $dir/warp.1.$j; done > $dir/warp.1
rm $dir/warp.1.*
ns1=$(cat $dir/0.warp | wc -l)
ns2=$(cat $dir/1.warp | wc -l)
! [ "$ns1" == "$ns2" ] && echo "$0: Number of speakers differ pass1 vs pass2, $ns1 != $ns2" && exit 1;
paste $dir/0.warp $dir/1.warp | awk '{x=$2 - $4; if ((x>0?x:-x) > 0.010001) { print $1, $2, $4; }}' > $dir/warp_changed
nc=$(cat $dir/warp_changed | wc -l)
echo "$0: For $nc speakers out of $ns1, warp changed pass1 vs pass2 by >0.01, see $dir/warp_changed for details"
fi
$cleanup && rm $dir/gselect.*.gz $dir/trans.0.*
ln -sf warp.1 $dir/spk2warp
echo "$0: created warp factors in $dir/spk2warp"
echo "$0: Distribution of classes for one job is below."
grep 'Distribution of classes' $dir/log/lvtln.1.1.log
# Summarize warning messages...
utils/summarize_warnings.pl $dir/log
echo "$0: Done training LVTLN model in $dir"

Просмотреть файл

@ -172,8 +172,6 @@ if [ $stage -le -3 ]; then
exit 1;
fi
done
rm $dir/final.lvtln 2>/dev/null
ln -s 0.lvtln $dir/final.lvtln
fi
cp $ubmdir/final.dubm $dir/0.dubm

Просмотреть файл

@ -54,16 +54,25 @@ local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train
# max_voiced=3000
# local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train
# Vtln-related things:
# We'll use a subset of utterances to train the GMM we'll use for VTLN
# warping.
utils/subset_data_dir.sh data/train 5000 data/train_5k_novtln
use_vtln=true
if $use_vtln; then
for t in train lre07; do
cp -rt data/${t} data/${t}_novtln
rm -r data/${t}_novtln/{split,.backup} 2>/dev/null
steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 100 --cmd "$train_cmd" \
data/${t}_novtln exp/make_mfcc $mfccdir
lid/compute_vad_decision.sh data/${t}_novtln exp/make_mfcc $mfccdir
done
# Vtln-related things:
# We'll use a subset of utterances to train the GMM we'll use for VTLN
# warping.
utils/subset_data_dir.sh data/train_novtln 5000 data/train_novtln_5k
# for the features we use to estimate VTLN warp factors, we use more cepstra
# (13 instead of just 7); this needs to be tuned.
steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 50 --cmd "$train_cmd" \
data/train_5k_novtln exp/make_mfcc $mfccdir
lid/compute_vad_decision.sh data/train_5k_novtln exp/make_mfcc $mfccdir
# note, we're using the speaker-id version of the train_diag_ubm.sh script, which
# uses double-delta instead of SDC features. We train a 256-Gaussian UBM; this
# has to be tuned.
@ -72,7 +81,14 @@ local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train
lid/train_lvtln_model.sh --mfcc-config conf/mfcc_vtln.conf --nj 30 --cmd "$train_cmd" \
data/train_5k_novtln exp/diag_ubm_vtln exp/vtln
)
for t in train lre07; do
lid/get_vtln_warps.sh --nj 30 --cmd "$train_cmd" \
data/${t}_novtln exp/vtln exp/${t}_warps
cp exp/${t}_warps/spk2warp ${t}/
done
lid/get_vtln_warps.sh --nj 30 --cmd "$train_cmd" \
data/lre07 exp/vtln exp/train_warps
fi
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
data/train exp/make_mfcc $mfccdir

Просмотреть файл

@ -71,14 +71,18 @@ else
postprocess_config_opt=
fi
# note: in general, the double-parenthesis construct in bash "((" is "C-style
# syntax" where we can get rid of the $ for variable names, and omit spaces.
# The "for" loop in this style is a special construct.
if [ -f $data/spk2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
fi
if [ -f $data/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
# note: in general, the double-parenthesis construct in bash "((" is "C-style
# syntax" where we can get rid of the $ for variable names, and omit spaces.
# The "for" loop in this style is a special construct.
for ((n=1; n<=nj; n++)); do
split_segments="$split_segments $logdir/segments.$n"
done
@ -86,7 +90,7 @@ if [ -f $data/segments ]; then
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats --verbose=2 --config=$mfcc_config ark:- ark:- |"
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
@ -104,7 +108,7 @@ else
utils/split_scp.pl $scp $split_scps || exit 1;
mfcc_feats="ark:compute-mfcc-feats --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \