зеркало из https://github.com/mozilla/kaldi.git
sandbox/language_id: Adding vad-based utterance splitting scripts in lid setup
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3826 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
f39b8993ab
Коммит
7954900219
|
@ -0,0 +1,57 @@
|
|||
#!/bin/bash
|
||||
|
||||
max_voiced=3000
|
||||
stage=0
|
||||
cleanup=true
|
||||
|
||||
. utils/parse_options.sh
|
||||
|
||||
if [ $# -ne 3 ]; then
|
||||
echo "Usage: $0 [options] <in-data-dir> <split-mfcc-out-dir> <out-data-dir>"
|
||||
echo "e.g.: $0 --max-voiced 3000 data/train mfcc data/train_split"
|
||||
echo "This script splits up long utterances into smaller pieces."
|
||||
echo "It assumes the wav.scp contains has a certain form, with .sph"
|
||||
echo "files in it (so the script is not completely general)."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
in_dir=$1
|
||||
mfccdir=$2
|
||||
dir=$3
|
||||
|
||||
for f in $in_dir/{utt2spk,spk2utt,wav.scp,utt2lang,feats.scp,vad.scp}; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "$0: expected input file $f to exist";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
utils/validate_data_dir.sh --no-text $in_dir || exit 1;
|
||||
mkdir -p $dir/temp || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ]; then
|
||||
|
||||
create-split-from-vad --max-voiced=$max_voiced scp:$in_dir/vad.scp $dir/frame_indexed_segments;
|
||||
|
||||
extract-rows $dir/frame_indexed_segments scp:$in_dir/feats.scp ark,scp:$mfccdir/raw_mfcc_split.ark,$dir/feats.scp;
|
||||
|
||||
copy-vector-segments $dir/frame_indexed_segments scp:$in_dir/vad.scp ark,scp:$mfccdir/vad_split.ark,$dir/temp/vad.scp;
|
||||
sort $dir/temp/vad.scp > $dir/vad.scp;
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
local/vad_split_utts_fix_data.pl $in_dir $dir;
|
||||
fi
|
||||
|
||||
utils/filter_scp.pl -f 0 \
|
||||
<(echo "`awk < "$dir/segments" '{ print $2 }'`") $in_dir/wav.scp \
|
||||
> $dir/wav.scp
|
||||
|
||||
utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
|
||||
utils/validate_data_dir.sh --no-text --no-feats $dir || exit 1;
|
||||
|
||||
$cleanup && rm -r $dir/temp
|
||||
|
||||
exit 0;
|
|
@ -0,0 +1,50 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# Copyright 2014 David Snyder
|
||||
# Apache 2.0.
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <in-data-dir> <out-data-dir>\n";
|
||||
print STDERR "e.g. $0 data/train_unsplit data/train\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
($in_dir, $out_dir) = @ARGV;
|
||||
|
||||
%utt2lang = ();
|
||||
%utt2spk = ();
|
||||
|
||||
open(UTT2LANG, "<$in_dir/utt2lang") or die "Cannot open utt2lang";
|
||||
while($line = <UTT2LANG>) {
|
||||
($utt, $lang) = split(" ", $line);
|
||||
$utt2lang{$utt} = $lang;
|
||||
}
|
||||
close(UTT2LANG) or die;
|
||||
|
||||
open(UTT2SPK, "<$in_dir/utt2spk") or die "Cannot open utt2spk";
|
||||
while($line = <UTT2SPK>) {
|
||||
($utt, $spk) = split(" ", $line);
|
||||
$utt2spk{$utt} = $spk;
|
||||
}
|
||||
close(UTT2SPK) or die;
|
||||
|
||||
open(FEATSEG, "<$out_dir/frame_indexed_segments")
|
||||
or die "Unable to open feats_segment";
|
||||
open(UTT2LANG, ">$out_dir/utt2lang") or die "Cannot open utt2lang";
|
||||
open(UTT2SPK, ">$out_dir/utt2spk") or die "Cannot open utt2spk";
|
||||
open(SEGMENT, ">$out_dir/segments") or die "Cannot open segments";
|
||||
|
||||
while($seg = <FEATSEG>) {
|
||||
($split_utt, $utt, $start, $end) = split(" ", $seg);
|
||||
print UTT2LANG "$split_utt $utt2lang{$utt}\n";
|
||||
print UTT2SPK "$split_utt $utt\n";
|
||||
$start_t = $start * 0.01;
|
||||
$end_t = $end * 0.01;
|
||||
print SEGMENT "$split_utt $utt $start_t $end_t\n";
|
||||
}
|
||||
|
||||
close(FEATSEG) || die;
|
||||
close(UTT2LANG) || die;
|
||||
close(UTT2SPK) || die;
|
||||
close(SEGMENT) || die;
|
||||
system("utils/fix_data_dir.sh $out_dir");
|
|
@ -2,8 +2,7 @@
|
|||
# Copyright 2014 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# An incomplete run.sh for this example. Currently this only trains up up a gender
|
||||
# independent UBM and ivector with the SRE08 training data.
|
||||
# An incomplete run.sh for this example.
|
||||
|
||||
. cmd.sh
|
||||
. path.sh
|
||||
|
@ -49,9 +48,11 @@ rm foo
|
|||
|
||||
local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train
|
||||
|
||||
##
|
||||
## HERE
|
||||
##
|
||||
# This commented script is an alternative to the above utterance
|
||||
# splitting method. Here we split the utterance based on the number of
|
||||
# frames which are voiced, rather than the total number of frames.
|
||||
# max_voiced=3000
|
||||
# local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train
|
||||
|
||||
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
|
||||
data/train exp/make_mfcc $mfccdir
|
||||
|
|
Загрузка…
Ссылка в новой задаче