зеркало из https://github.com/mozilla/kaldi.git
sandbox/language_id: script changes for applying VTLN in language-id; not yet tested.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@4174 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
4fd9c20c6a
Коммит
6a91edb723
|
@ -0,0 +1,141 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2014 Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
#
|
||||
# This script takes a data directory and a directory computed by
|
||||
# ./train_lvtln_model.sh, and it computes speaker warp-factors spk2warp. It
|
||||
# expects vad.scp to exist in the data directory. Note: like
|
||||
# train_lvtln_model.sh, it uses features of the speaker-id type, i.e. double
|
||||
# delta features with sliding window cepstral mean normalization.
|
||||
|
||||
# Begin configuration.
|
||||
stage=0
|
||||
config=
|
||||
cmd=run.pl
|
||||
logdet_scale=0.0
|
||||
subsample=5 # We use every 5th frame by default; this is more
|
||||
# CPU-efficient.
|
||||
nj=4
|
||||
cleanup=true
|
||||
num_gselect=15
|
||||
refine_transforms=true # if true, do a second pass of transform estimation.
|
||||
# End configuration.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f path.sh ] && . ./path.sh;
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 <data-dir> <vtln-dir> <exp-dir>"
|
||||
echo "e.g.: $0 data/train_novtln exp/vtln exp/train_warps"
|
||||
echo "where <vtln-dir> is produced by train_lvtln_model.sh"
|
||||
echo "Output is <exp-dir>/spk2warp"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --nj <num-jobs> # number of jobs to use (default 4)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
vtlndir=$2
|
||||
dir=$3
|
||||
|
||||
for f in $data/feats.scp $data/spk2utt $vtlndir/final.lvtln $vtlndir/final.dubm $vtlndir/final.ali_dubm; do
|
||||
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
if [ -f $data/utt2warp ]; then
|
||||
echo "$0: source data directory $data appears to already have VTLN.";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
mkdir -p $dir/log
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
sdata=$data/split$nj;
|
||||
split_data.sh $data $nj || exit 1;
|
||||
|
||||
cmvn_sliding_opts="--norm-vars=false --center=true --cmn-window=300"
|
||||
# don't change $cmvn_sliding_opts, it should probably match the
|
||||
# options used in ../sid/train_diag_ubm.sh and ./train_lvtln_model.sh
|
||||
|
||||
sifeats="ark,s,cs:add-deltas scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding $cmvn_sliding_opts ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- | subsample-feats --n=$subsample ark:- ark:- |"
|
||||
|
||||
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk $dir/trans.0.JOB ark:- ark:- |"
|
||||
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "$0: computing Gaussian selection info."
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
|
||||
gmm-gselect --n=$num_gselect $vtlndir/final.ali_dubm "$sifeats" \
|
||||
"ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "$0: computing initial LVTLN transforms"
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/lvtln.0.JOB.log \
|
||||
gmm-global-gselect-to-post $dir/final.ali_dubm "$sifeats" \
|
||||
"ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark:- \| \
|
||||
gmm-global-est-lvtln-trans --spk2utt=$sdata/JOB/spk2utt \
|
||||
--logdet-scale=$logdet_scale --verbose=1 \
|
||||
$vtlndir/final.dubm $vtlndir/final.lvtln "$sifeats" ark,s,cs:- \
|
||||
ark:$dir/trans.0.JOB ark,t:$dir/warp.0.JOB || exit 1
|
||||
|
||||
# consolidate the warps into one file.
|
||||
for j in $(seq $nj); do cat $dir/warp.0.$j; done > $dir/warp.0
|
||||
rm $dir/warp.0.*
|
||||
fi
|
||||
|
||||
if $refine_transforms; then
|
||||
ln -sf warp.0 $dir/spk2warp
|
||||
$cleanup && rm $dir/gselect.*.gz $dir/trans.0.*
|
||||
echo "$0: --refine-transforms=false so exiting with current warps."
|
||||
echo "$0: Distribution of classes for one job is below."
|
||||
grep 'Distribution of classes' $dir/log/lvtln.0.1.log
|
||||
exit 0;
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ]; then
|
||||
echo "$0: computing refined LVTLN transforms"
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/lvtln.1.JOB.log \
|
||||
gmm-global-gselect-to-post $dir/final.dubm "$feats" \
|
||||
"ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark:- \| \
|
||||
gmm-global-est-lvtln-trans --spk2utt=$sdata/JOB/spk2utt \
|
||||
--logdet-scale=$logdet_scale --verbose=1 \
|
||||
$vtlndir/final.dubm $vtlndir/final.lvtln "$sifeats" ark,s,cs:- \
|
||||
ark:/dev/null ark,t:$dir/warp.1.JOB || exit 1
|
||||
|
||||
# consolidate the warps into one file.
|
||||
for j in $(seq $nj); do cat $dir/warp.1.$j; done > $dir/warp.1
|
||||
rm $dir/warp.1.*
|
||||
|
||||
ns1=$(cat $dir/0.warp | wc -l)
|
||||
ns2=$(cat $dir/1.warp | wc -l)
|
||||
! [ "$ns1" == "$ns2" ] && echo "$0: Number of speakers differ pass1 vs pass2, $ns1 != $ns2" && exit 1;
|
||||
paste $dir/0.warp $dir/1.warp | awk '{x=$2 - $4; if ((x>0?x:-x) > 0.010001) { print $1, $2, $4; }}' > $dir/warp_changed
|
||||
nc=$(cat $dir/warp_changed | wc -l)
|
||||
echo "$0: For $nc speakers out of $ns1, warp changed pass1 vs pass2 by >0.01, see $dir/warp_changed for details"
|
||||
fi
|
||||
|
||||
$cleanup && rm $dir/gselect.*.gz $dir/trans.0.*
|
||||
|
||||
ln -sf warp.1 $dir/spk2warp
|
||||
|
||||
echo "$0: created warp factors in $dir/spk2warp"
|
||||
|
||||
echo "$0: Distribution of classes for one job is below."
|
||||
grep 'Distribution of classes' $dir/log/lvtln.1.1.log
|
||||
|
||||
# Summarize warning messages...
|
||||
utils/summarize_warnings.pl $dir/log
|
||||
|
||||
echo "$0: Done training LVTLN model in $dir"
|
|
@ -172,8 +172,6 @@ if [ $stage -le -3 ]; then
|
|||
exit 1;
|
||||
fi
|
||||
done
|
||||
rm $dir/final.lvtln 2>/dev/null
|
||||
ln -s 0.lvtln $dir/final.lvtln
|
||||
fi
|
||||
|
||||
cp $ubmdir/final.dubm $dir/0.dubm
|
||||
|
|
|
@ -54,16 +54,25 @@ local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train
|
|||
# max_voiced=3000
|
||||
# local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train
|
||||
|
||||
# Vtln-related things:
|
||||
# We'll use a subset of utterances to train the GMM we'll use for VTLN
|
||||
# warping.
|
||||
utils/subset_data_dir.sh data/train 5000 data/train_5k_novtln
|
||||
use_vtln=true
|
||||
if $use_vtln; then
|
||||
for t in train lre07; do
|
||||
cp -rt data/${t} data/${t}_novtln
|
||||
rm -r data/${t}_novtln/{split,.backup} 2>/dev/null
|
||||
steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 100 --cmd "$train_cmd" \
|
||||
data/${t}_novtln exp/make_mfcc $mfccdir
|
||||
lid/compute_vad_decision.sh data/${t}_novtln exp/make_mfcc $mfccdir
|
||||
done
|
||||
# Vtln-related things:
|
||||
# We'll use a subset of utterances to train the GMM we'll use for VTLN
|
||||
# warping.
|
||||
utils/subset_data_dir.sh data/train_novtln 5000 data/train_novtln_5k
|
||||
|
||||
# for the features we use to estimate VTLN warp factors, we use more cepstra
|
||||
# (13 instead of just 7); this needs to be tuned.
|
||||
steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 50 --cmd "$train_cmd" \
|
||||
data/train_5k_novtln exp/make_mfcc $mfccdir
|
||||
lid/compute_vad_decision.sh data/train_5k_novtln exp/make_mfcc $mfccdir
|
||||
|
||||
# note, we're using the speaker-id version of the train_diag_ubm.sh script, which
|
||||
# uses double-delta instead of SDC features. We train a 256-Gaussian UBM; this
|
||||
# has to be tuned.
|
||||
|
@ -72,7 +81,14 @@ local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train
|
|||
lid/train_lvtln_model.sh --mfcc-config conf/mfcc_vtln.conf --nj 30 --cmd "$train_cmd" \
|
||||
data/train_5k_novtln exp/diag_ubm_vtln exp/vtln
|
||||
|
||||
)
|
||||
for t in train lre07; do
|
||||
lid/get_vtln_warps.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/${t}_novtln exp/vtln exp/${t}_warps
|
||||
cp exp/${t}_warps/spk2warp ${t}/
|
||||
done
|
||||
lid/get_vtln_warps.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/lre07 exp/vtln exp/train_warps
|
||||
fi
|
||||
|
||||
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
|
||||
data/train exp/make_mfcc $mfccdir
|
||||
|
|
|
@ -71,14 +71,18 @@ else
|
|||
postprocess_config_opt=
|
||||
fi
|
||||
|
||||
# note: in general, the double-parenthesis construct in bash "((" is "C-style
|
||||
# syntax" where we can get rid of the $ for variable names, and omit spaces.
|
||||
# The "for" loop in this style is a special construct.
|
||||
if [ -f $data/spk2warp ]; then
|
||||
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
|
||||
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
|
||||
fi
|
||||
|
||||
|
||||
if [ -f $data/segments ]; then
|
||||
echo "$0 [info]: segments file exists: using that."
|
||||
split_segments=""
|
||||
# note: in general, the double-parenthesis construct in bash "((" is "C-style
|
||||
# syntax" where we can get rid of the $ for variable names, and omit spaces.
|
||||
# The "for" loop in this style is a special construct.
|
||||
for ((n=1; n<=nj; n++)); do
|
||||
split_segments="$split_segments $logdir/segments.$n"
|
||||
done
|
||||
|
@ -86,7 +90,7 @@ if [ -f $data/segments ]; then
|
|||
utils/split_scp.pl $data/segments $split_segments || exit 1;
|
||||
rm $logdir/.error 2>/dev/null
|
||||
|
||||
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats --verbose=2 --config=$mfcc_config ark:- ark:- |"
|
||||
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
|
||||
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
|
||||
|
||||
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
|
||||
|
@ -104,7 +108,7 @@ else
|
|||
|
||||
utils/split_scp.pl $scp $split_scps || exit 1;
|
||||
|
||||
mfcc_feats="ark:compute-mfcc-feats --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
|
||||
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
|
||||
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
|
||||
|
||||
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
|
||||
|
|
Загрузка…
Ссылка в новой задаче