git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3291 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Chao Weng 2013-12-10 19:29:00 +00:00
Родитель 57fe6cc8ee
Коммит 94c4646aba
236 изменённых файлов: 33644 добавлений и 0 удалений

29
egs/chime_wsj0/s5/cmd.sh Normal file
Просмотреть файл

@ -0,0 +1,29 @@
# "queue.pl" uses qsub. The options to it are
# options to qsub. If you have GridEngine installed,
# change this to a queue you have access to.
# Otherwise, use "run.pl", which will run jobs locally
# (make sure your --num-jobs options are no more than
# the number of cpus on your machine.
#a) JHU cluster options
export train_cmd="queue.pl -l arch=*64"
export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
#export cuda_cmd="..."
#b) BUT cluster options
#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
#c) run it locally...
#export train_cmd=run.pl
#export decode_cmd=run.pl
export cuda_cmd=run.pl
#export mkgraph_cmd=run.pl

Просмотреть файл

@ -0,0 +1,2 @@
beam=18.0 # beam for decoding. Was 13.0 in the scripts.
latbeam=10.0 # this has most effect on size of the lattices.

Просмотреть файл

@ -0,0 +1,11 @@
# No non-default options for now.
--window-type=hamming # disable Dans window, use the standard
--use-energy=false # only fbank outputs
--sample-frequency=16000 # Cantonese is sampled at 8kHz
--low-freq=64 # typical setup from Frantisek Grezl
--high-freq=8000
--dither=1
--num-mel-bins=40 # 8kHz so we use 15 bins
--htk-compat=true # try to make it compatible with HTK

Просмотреть файл

@ -0,0 +1 @@
--use-energy=false # only non-default option.

Просмотреть файл

@ -0,0 +1,117 @@
#!/bin/bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 29/05/12
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
exit 1;
fi
CORPUS=$1
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# reverb list for SI-84
find $1/si_tr_s -name '*.wav' | sort -u > train_si84_binmask.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find $1/si_dt_05 -name '*.wav' | sort -u > dev_dt_05_binmask.flist
find $1/si_et_05 -name '*.wav' | sort -u > test_eval92_5k_binmask.flist
# Finding the transcript files:
#find -L $CORPUS -iname '*.dot' > dot_files.flist
if [ ! -e $dir/dot_files.flist ]; then
echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
exit 1;
fi
# Convert the transcripts into our format (no normalization yet)
# adding suffix to utt_id
# 1 for reverb condition
for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
cat $x.flist | perl -e '
while(<>) {
m:^\S+/(\w+)\.wav$: || die "Bad line $_";
$id = $1;
$id =~ tr/A-Z/a-z/;
print "$id $_";
}
' | sort > ${x}_wav_tmp.scp
#cat ${x}_wav_tmp.scp | awk '{print $1}' \
# | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
cat ${x}_wav_tmp.scp | perl -e '
while(<STDIN>) {
@A=split(" ", $_);
@B=split("/", $_);
$abs_path_len=@B;
$condition=$B[$abs_path_len-3];
if ($condition eq "9dB") {$key_suffix=8;}
elsif ($condition eq "6dB") {$key_suffix=9;}
elsif ($condition eq "3dB") {$key_suffix=a;}
elsif ($condition eq "0dB") {$key_suffix=b;}
elsif ($condition eq "m3dB") {$key_suffix=c;}
elsif ($condition eq "m6dB") {$key_suffix=d;}
else {print STDERR "error condition $condition";}
print $A[0].$key_suffix." ".$A[1]."\n";
}
' | sort -k1 > ${x}_wav.scp
cat ${x}_wav.scp | awk '{print $1}' \
| $local/find_noisy_transcripts.pl dot_files.flist > ${x}.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
| sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
# awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
# > ${x}_wav.scp
#done
# Make the utt2spk and spk2utt files.
for x in train_si84_binmask dev_dt_05_binmask test_eval92_5k_binmask; do
cat ${x}_wav.scp | awk '{print $1}' \
| perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,86 @@
#!/bin/bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# This script takes data prepared in a corpus-dependent way
# in data/local/, and converts it into the "canonical" form,
# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
# data/train_si284, data/train_si84, etc.
# Don't bother doing train_si84 separately (although we have the file lists
# in data/local/) because it's just the first 7138 utterances in train_si284.
# We'll create train_si84 after doing the feature extraction.
. ./path.sh || exit 1;
echo "Preparing train and test data"
srcdir=data/local/data
lmdir=data/local/nist_lm
tmpdir=data/local/lm_tmp
lexicon=data/local/lang_tmp/lexiconp.txt
mkdir -p $tmpdir
for x in test_eval92_clean test_eval92_noisy test_eval92_5k_clean test_eval92_5k_noisy dev_dt_05_clean dev_dt_05_reverb dev_dt_05_noisy dev_dt_20_clean dev_dt_20_reverb dev_dt_20_noisy train_si84_clean train_si84_reverb train_si84_noisy; do
mkdir -p data/$x
cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
cp $srcdir/$x.txt data/$x/text || exit 1;
cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
utils/filter_scp.pl data/$x/spk2utt $srcdir/spk2gender > data/$x/spk2gender || exit 1;
done
# Next, for each type of language model, create the corresponding FST
# and the corresponding lang_test_* directory.
echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
mkdir -p $test
for f in phones.txt words.txt phones.txt L.fst L_disambig.fst \
phones/; do
cp -r data/lang/$f $test
done
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt
# grep -v '<s> <s>' because the LM seems to have some strange and useless
# stuff in it with multiple <s>'s in the history. Encountered some other similar
# things in a LM from Geoff. Removing all "illegal" combinations of <s> and </s>,
# which are supposed to occur only at being/end of utt. These can cause
# determinization failures of CLG [ends up being epsilon cycles].
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
grep -v '<s> <s>' | \
grep -v '</s> <s>' | \
grep -v '</s> </s>' | \
arpa2fst - | fstprint | \
utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
--osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $test/G.fst
fstisstochastic $test/G.fst
# The output is like:
# 9.14233e-05 -0.259833
# we do expect the first of these 2 numbers to be close to zero (the second is
# nonzero because the backoff weights make the states sum to >1).
# Because of the <s> fiasco for these particular LMs, the first number is not
# as close to zero as it could be.
# Everything below is only for diagnostic.
# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
# this might cause determinization failure of CLG.
# #0 is treated as an empty word.
mkdir -p $tmpdir/g
awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
< "$lexicon" >$tmpdir/g/select_empty.fst.txt
fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \
fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' &&
echo "Language model has cycles with empty words" && exit 1
rm -r $tmpdir/g
done
echo "Succeeded in formatting data."
rm -r $tmpdir

Просмотреть файл

@ -0,0 +1,190 @@
#!/bin/bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 29/05/12
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
exit 1;
fi
CORPUS=$1
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# This version for SI-84
cat $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
| $local/cstr_ndx2flist.pl $CORPUS | sort -u > train_si84_clean.flist
# This version for SI-284
#cat $CORPUS/wsj1/doc/indices/si_tr_s.ndx \
# $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
# | $local/cstr_ndx2flist.pl $CORPUS | sort \
# | grep -v wsj0/si_tr_s/401 > train_si284.flist
# Now for the test sets.
# $CORPUS/wsj1/doc/indices/readme.doc
# describes all the different test sets.
# Note: each test-set seems to come in multiple versions depending
# on different vocabulary sizes, verbalized vs. non-verbalized
# pronunciations, etc. We use the largest vocab and non-verbalized
# pronunciations.
# The most normal one seems to be the "baseline 60k test set", which
# is h1_p0.
# Nov'92 (333 utts)
# These index files have a slightly different format;
# have to add .wv1, which is done in cstr_ndx2flist.pl
cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_clean.flist
# Nov'92 (330 utts, 5k vocab)
cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_5k_clean.flist
# Nov'93: (213 utts)
# Have to replace a wrong disk-id.
#cat $CORPUS/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
# $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93.flist
# Nov'93: (215 utts, 5k)
#cat $CORPUS/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
# $local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93_5k.flist
# Dev-set for Nov'93 (503 utts)
#cat $CORPUS/wsj1/doc/indices/h1_p0.ndx | \
# $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93.flist
# Dev-set for Nov'93 (513 utts, 5k vocab)
#cat $CORPUS/wsj1/doc/indices/h2_p0.ndx | \
# $local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93_5k.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find $CORPUS/wsj0/si_dt_20 -print | grep -i ".wv1" | sort > dev_dt_20_clean.flist
find $CORPUS/wsj0/si_dt_05 -print | grep -i ".wv1" | sort > dev_dt_05_clean.flist
# Finding the transcript files:
find -L $CORPUS -iname '*.dot' > dot_files.flist
# Convert the transcripts into our format (no normalization yet)
# adding suffix to utt_id
# 0 for clean condition
for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
$local/flist2scp.pl $x.flist | sort > ${x}_sph_tmp.scp
cat ${x}_sph_tmp.scp | awk '{print $1}' \
| $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
cat ${x}_sph_tmp.scp | awk '{printf("%s0 %s\n", $1, $2);}' > ${x}_sph.scp
cat ${x}_tmp.trans1 | awk '{printf("%s0 ", $1); for(i=2;i<=NF;i++) printf("%s ", $i); printf("\n");}' > ${x}.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
| sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
> ${x}_wav.scp
done
# Make the utt2spk and spk2utt files.
for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
cat ${x}_sph.scp | awk '{print $1}' \
| perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
#in case we want to limit lm's on most frequent words, copy lm training word frequency list
cp $CORPUS/wsj0/doc/lng_modl/vocab/wfl_64.lst $lmdir
chmod u+w $lmdir/*.lst # had weird permissions on source.
# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
# verbalized pronunciations. This is the most common test setup, I understand.
cp $CORPUS/wsj0/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg.arpa.gz
# trigram would be:
cat $CORPUS/wsj0/doc/lng_modl/base_lm/tcb20onp.z | \
perl -e 'while(<>){ if(m/^\\data\\/){ print; last; } } while(<>){ print; }' \
| gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
gzip -f $lmdir/lm_tgpr.arpa || exit 1;
# repeat for 5k language models
cp $CORPUS/wsj0/doc/lng_modl/base_lm/bcb05onp.z $lmdir/lm_bg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg_5k.arpa.gz
# trigram would be: !only closed vocabulary here!
cp $CORPUS/wsj0/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_tg_5k.arpa.gz
gunzip $lmdir/lm_tg_5k.arpa.gz
tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
rm $lmdir/lm_tg_5k.arpa
prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
rm -f wsj0-train-spkrinfo.txt
wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
|| ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt );
fi
if [ ! -f wsj0-train-spkrinfo.txt ]; then
echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
echo "This is possibly omitted from the training disks; couldn't find it."
echo "Everything else may have worked; we just may be missing gender info"
echo "which is only needed for VTLN-related diagnostics anyway."
exit 1
fi
# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
# LDC put it on the web. Perhaps it was accidentally omitted from the
# disks.
cat $CORPUS/wsj0/doc/spkrinfo.txt \
./wsj0-train-spkrinfo.txt | \
perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,13 @@
#!/bin/bash
. path.sh
data=$1
old_ali_dir=$2
mix_ali_dir=$3
mkdir -p $mix_ali_dir
cp $old_ali_dir/{final.mdl,num_jobs,tree} $mix_ali_dir/
gunzip -c $old_ali_dir/ali.*.gz | gzip -c > $old_ali_dir/ali.gz
feats="ark,s,cs:copy-feats scp:$data/feats.scp ark:- |"
copy-clean-ali "$feats" "ark:gunzip -c $old_ali_dir/ali.gz |" "ark:| gzip -c > $mix_ali_dir/ali.1.gz"

Просмотреть файл

@ -0,0 +1,54 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 12/1/12
# This program takes as its standard input an .ndx file from the WSJ corpus that looks
# like this:
#;; File: tr_s_wv1.ndx, updated 04/26/94
#;;
#;; Index for WSJ0 SI-short Sennheiser training data
#;; Data is read WSJ sentences, Sennheiser mic.
#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
#;; per speaker TI) = 7236 utts
#;;
#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
# and as command-line argument it takes the names of the WSJ disk locations, e.g.:
# /group/corpora/public/wsjcam0/data on DICE machines.
# It outputs a list of absolute pathnames.
$wsj_dir = $ARGV[0];
while(<STDIN>){
if(m/^;/){ next; } # Comment. Ignore it.
else {
m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_";
$filename = $2; # as a subdirectory of the distributed disk.
if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; }
$filename = "$wsj_dir/$filename";
if (-e $filename) {
print "$filename\n";
} else {
print STDERR "File $filename found in the index but not on disk\n";
}
}
}

Просмотреть файл

@ -0,0 +1,187 @@
#!/bin/bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 29/05/12
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
exit 1;
fi
CORPUS=$1
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# This version for SI-84
cat $CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
| $local/cstr_ndx2flist.pl $CORPUS | sort \
| grep -v wsj0/si_tr_s/401 > train_si84.flist
# This version for SI-284
cat $CORPUS/wsj1/doc/indices/si_tr_s.ndx \
$CORPUS/wsj0/doc/indices/train/tr_s_wv1.ndx \
| $local/cstr_ndx2flist.pl $CORPUS | sort \
| grep -v wsj0/si_tr_s/401 > train_si284.flist
# Now for the test sets.
# $CORPUS/wsj1/doc/indices/readme.doc
# describes all the different test sets.
# Note: each test-set seems to come in multiple versions depending
# on different vocabulary sizes, verbalized vs. non-verbalized
# pronunciations, etc. We use the largest vocab and non-verbalized
# pronunciations.
# The most normal one seems to be the "baseline 60k test set", which
# is h1_p0.
# Nov'92 (333 utts)
# These index files have a slightly different format;
# have to add .wv1, which is done in cstr_ndx2flist.pl
cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92.flist
# Nov'92 (330 utts, 5k vocab)
cat $CORPUS/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval92_5k.flist
# Nov'93: (213 utts)
# Have to replace a wrong disk-id.
cat $CORPUS/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93.flist
# Nov'93: (215 utts, 5k)
cat $CORPUS/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_eval93_5k.flist
# Dev-set for Nov'93 (503 utts)
cat $CORPUS/wsj1/doc/indices/h1_p0.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93.flist
# Dev-set for Nov'93 (513 utts, 5k vocab)
cat $CORPUS/wsj1/doc/indices/h2_p0.ndx | \
$local/cstr_ndx2flist.pl $CORPUS | sort > test_dev93_5k.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find $CORPUS/???1/??_??_20 -print | grep -i ".wv1" | sort > dev_dt_20.flist
find $CORPUS/???1/??_??_05 -print | grep -i ".wv1" | sort > dev_dt_05.flist
# Finding the transcript files:
find -L $CORPUS -iname '*.dot' > dot_files.flist
# Convert the transcripts into our format (no normalization yet)
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
$local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
cat ${x}_sph.scp | awk '{print $1}' \
| $local/find_transcripts.pl dot_files.flist > $x.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
| sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
> ${x}_wav.scp
done
# Make the utt2spk and spk2utt files.
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
cat ${x}_sph.scp | awk '{print $1}' \
| perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
#in case we want to limit lm's on most frequent words, copy lm training word frequency list
cp $CORPUS/wsj1/doc/lng_modl/vocab/wfl_64.lst $lmdir
chmod u+w $lmdir/*.lst # had weird permissions on source.
# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
# verbalized pronunciations. This is the most common test setup, I understand.
cp $CORPUS/wsj1/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg.arpa.gz
# trigram would be:
cat $CORPUS/wsj1/doc/lng_modl/base_lm/tcb20onp.z | \
perl -e 'while(<>){ if(m/^\\data\\/){ print; last; } } while(<>){ print; }' \
| gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
gzip -f $lmdir/lm_tgpr.arpa || exit 1;
# repeat for 5k language models
cp $CORPUS/wsj1/doc/lng_modl/base_lm/bcb05onp.z $lmdir/lm_bg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg_5k.arpa.gz
# trigram would be: !only closed vocabulary here!
cp $CORPUS/wsj1/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_tg_5k.arpa.gz
gunzip $lmdir/lm_tg_5k.arpa.gz
tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
rm $lmdir/lm_tg_5k.arpa
prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
rm -f wsj0-train-spkrinfo.txt
wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
|| ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt );
fi
if [ ! -f wsj0-train-spkrinfo.txt ]; then
echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
echo "This is possibly omitted from the training disks; couldn't find it."
echo "Everything else may have worked; we just may be missing gender info"
echo "which is only needed for VTLN-related diagnostics anyway."
exit 1
fi
# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
# LDC put it on the web. Perhaps it was accidentally omitted from the
# disks.
cat $CORPUS/wsj0/doc/spkrinfo.txt \
$CORPUS/wsj1/doc/evl_spok/spkrinfo.txt \
$CORPUS/wsj1/doc/dev_spok/spkrinfo.txt \
$CORPUS/wsj1/doc/train/spkrinfo.txt \
./wsj0-train-spkrinfo.txt | \
perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,172 @@
#!/bin/bash
# This script builds a larger word-list and dictionary
# than used for the LMs supplied with the WSJ corpus.
# It uses a couple of strategies to fill-in words in
# the LM training data but not in CMUdict. One is
# to generate special prons for possible acronyms, that
# just consist of the constituent letters. The other
# is designed to handle derivatives of known words
# (e.g. deriving the pron of a plural from the pron of
# the base-word), but in a more general, learned-from-data
# way.
# It makes use of scripts in local/dict/
if [ $# -ne 1 ]; then
echo "Usage: local/cstr_wsj_train_lms.sh WSJ1_doc_dir"
exit 1
fi
export PATH=$PATH:`pwd`/local/dict/
srcdir=$1
if [ ! -d $srcdir/lng_modl ]; then
echo "Expecting 'lng_modl' under WSJ doc directory '$srcdir'"
exit 1
fi
mkdir -p data/local/dict_larger
dir=data/local/dict_larger
cp data/local/dict/* data/local/dict_larger # Various files describing phones etc.
# are there; we just want to copy them as the phoneset is the same.
rm data/local/dict_larger/lexicon.txt # we don't want this.
mincount=2 # Minimum count of an OOV we will try to generate a pron for.
[ ! -f data/local/dict/cmudict/cmudict.0.7a ] && echo "CMU dict not in expected place" && exit 1;
# Remove comments from cmudict; print first field; remove
# words like FOO(1) which are alternate prons: our dict format won't
# include these markers.
grep -v ';;;' data/local/dict/cmudict/cmudict.0.7a |
perl -ane 's/^(\S+)\(\d+\)/$1/; print; ' | sort | uniq > $dir/dict.cmu
cat $dir/dict.cmu | awk '{print $1}' | sort | uniq > $dir/wordlist.cmu
echo "Getting training data [this should take at least a few seconds; if not, there's a problem]"
# Convert to uppercase, remove XML-like markings.
# For words ending in "." that are not in CMUdict, we assume that these
# are periods that somehow remained in the data during data preparation,
# and we we replace the "." with "\n". Note: we found this by looking at
# oov.counts below (before adding this rule).
touch $dir/cleaned.gz
if [ `du -m $dir/cleaned.gz | cut -f 1` -eq 73 ]; then
echo "Not getting cleaned data in $dir/cleaned.gz again [already exists]";
else
gunzip -c $srcdir/lng_modl/lm_train/np_data/{87,88,89}/*.z \
| awk '/^</{next}{print toupper($0)}' | perl -e '
open(F, "<$ARGV[0]")||die;
while(<F>){ chop; $isword{$_} = 1; }
while(<STDIN>) {
@A = split(" ", $_);
for ($n = 0; $n < @A; $n++) {
$a = $A[$n];
if (! $isword{$a} && $a =~ s/^([^\.]+)\.$/$1/) { # nonwords that end in "."
# and have no other "." in them: treat as period.
print "$a";
if ($n+1 < @A) { print "\n"; }
} else { print "$a "; }
}
print "\n";
}
' $dir/wordlist.cmu | gzip -c > $dir/cleaned.gz
fi
# get unigram counts
echo "Getting unigram counts"
gunzip -c $dir/cleaned.gz | tr -s ' ' '\n' | \
awk '{count[$1]++} END{for (w in count) { print count[w], w; }}' | sort -nr > $dir/unigrams
cat $dir/unigrams | awk -v dict=$dir/dict.cmu \
'BEGIN{while(getline<dict) seen[$1]=1;} {if(!seen[$2]){print;}}' \
> $dir/oov.counts
echo "Most frequent unseen unigrams are: "
head $dir/oov.counts
# Prune away singleton counts, and remove things with numbers in
# (which should have been normalized) and with no letters at all.
cat $dir/oov.counts | awk -v thresh=$mincount '{if ($1 >= thresh) { print $2; }}' \
| awk '/[0-9]/{next;} /[A-Z]/{print;}' > $dir/oovlist
# Automatic rule-finding...
# First make some prons for possible acronyms.
# Note: we don't do this for things like U.K or U.N,
# or A.B. (which doesn't exist anyway),
# as we consider this normalization/spelling errors.
cat $dir/oovlist | local/dict/get_acronym_prons.pl $dir/dict.cmu > $dir/dict.acronyms
mkdir $dir/f $dir/b # forward, backward directions of rules...
# forward is normal suffix
# rules, backward is reversed (prefix rules). These
# dirs contain stuff we create while making the rule-based
# extensions to the dictionary.
# Remove ; and , from words, if they are present; these
# might crash our scripts, as they are used as separators there.
filter_dict.pl $dir/dict.cmu > $dir/f/dict
cat $dir/oovlist | filter_dict.pl > $dir/f/oovs
reverse_dict.pl $dir/f/dict > $dir/b/dict
reverse_dict.pl $dir/f/oovs > $dir/b/oovs
# The next stage takes a few minutes.
# Note: the forward stage takes longer, as English is
# mostly a suffix-based language, and there are more rules
# that it finds.
for d in $dir/f $dir/b; do
(
cd $d
cat dict | get_rules.pl 2>get_rules.log >rules
get_rule_hierarchy.pl rules >hierarchy
awk '{print $1}' dict | get_candidate_prons.pl rules dict | \
limit_candidate_prons.pl hierarchy | \
score_prons.pl dict | \
count_rules.pl >rule.counts
# the sort command below is just for convenience of reading.
score_rules.pl <rule.counts | sort -t';' -k3,3 -n -r >rules.with_scores
get_candidate_prons.pl rules.with_scores dict oovs | \
limit_candidate_prons.pl hierarchy > oovs.candidates
) &
done
wait
# Merge the candidates.
reverse_candidates.pl $dir/b/oovs.candidates | cat - $dir/f/oovs.candidates | sort > $dir/oovs.candidates
select_candidate_prons.pl <$dir/oovs.candidates | awk -F';' '{printf("%s %s\n", $1, $2);}' \
> $dir/dict.oovs
cat $dir/dict.acronyms $dir/dict.oovs | sort | uniq > $dir/dict.oovs_merged
awk '{print $1}' $dir/dict.oovs_merged | uniq > $dir/oovlist.handled
sort $dir/oovlist | diff - $dir/oovlist.handled | grep -v 'd' | sed 's:< ::' > $dir/oovlist.not_handled
# add_counts.pl attaches to original counts to the list of handled/not-handled OOVs
add_counts.pl $dir/oov.counts $dir/oovlist.handled | sort -nr > $dir/oovlist.handled.counts
add_counts.pl $dir/oov.counts $dir/oovlist.not_handled | sort -nr > $dir/oovlist.not_handled.counts
echo "**Top OOVs we handled are:**";
head $dir/oovlist.handled.counts
echo "**Top OOVs we didn't handle are as follows (note: they are mostly misspellings):**";
head $dir/oovlist.not_handled.counts
echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
echo "Count of OOVs we didn't handle due to low count is" \
`awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`
# The two files created above are for humans to look at, as diagnostics.
cat <<EOF | cat - $dir/dict.cmu $dir/dict.oovs_merged | sort | uniq > $dir/lexicon.txt
!SIL SIL
<SPOKEN_NOISE> SPN
<UNK> SPN
<NOISE> NSN
EOF
echo "Created $dir/lexicon.txt"

Просмотреть файл

@ -0,0 +1,31 @@
#!/usr/bin/perl
# Add counts to an oovlist.
# Reads in counts as output by uniq -c, and
# an oovlist, and prints out the counts of the oovlist.
(@ARGV == 1 || @ARGV == 2) || die "Usage: add_counts.pl count_file [oovlist]\n";
$counts = shift @ARGV;
open(C, "<$counts") || die "Opening counts file $counts";
while(<C>) {
@A = split(" ", $_);
@A == 2 || die "Bad line in counts file: $_";
($count, $word) = @A;
$count =~ m:^\d+$: || die "Bad count $A[0]\n";
$counts{$word} = $count;
}
while(<>) {
chop;
$w = $_;
$w =~ m:\S+: || die "Bad word $w";
defined $counts{$w} || die "Word $w not present in counts file";
print "\t$counts{$w}\t$w\n";
}

Просмотреть файл

@ -0,0 +1,44 @@
#!/usr/bin/perl
# This program takes the output of score_prons.pl and collates
# it for each (rule, destress) pair so that we get the
# counts of right/partial/wrong for each pair.
# The input is a 7-tuple on each line, like:
# word;pron;base-word;base-pron;rule-name;de-stress;right|partial|wrong
#
# The output format is a 5-tuple like:
#
# rule;destress;right-count;partial-count;wrong-count
#
if (@ARGV != 0 && @ARGV != 1) {
die "Usage: count_rules.pl < scored_candidate_prons > rule_counts";
}
while(<>) {
chop;
$line = $_;
my ($word, $pron, $baseword, $basepron, $rulename, $destress, $score) = split(";", $line);
my $key = $rulename . ";" . $destress;
if (!defined $counts{$key}) {
$counts{$key} = [ 0, 0, 0 ]; # new anonymous array.
}
$ref = $counts{$key};
if ($score eq "right") {
$$ref[0]++;
} elsif ($score eq "partial") {
$$ref[1]++;
} elsif ($score eq "wrong") {
$$ref[2]++;
} else {
die "Bad score $score\n";
}
}
while ( my ($key, $value) = each(%counts)) {
print $key . ";" . join(";", @$value) . "\n";
}

Просмотреть файл

@ -0,0 +1,19 @@
#!/usr/bin/perl
# This program reads and writes either a dictionary or just a list
# of words, and it removes any words containing ";" or "," as these
# are used in these programs. It will warn about these.
# It will die if the pronunciations have these symbols in.
while(<>) {
chop;
@A = split(" ", $_);
$word = shift @A;
if ($word =~ m:[;,]:) {
print STDERR "Omitting line $_ since it has one of the banned characters ; or ,\n" ;
} else {
$_ =~ m:[;,]: && die "Phones cannot have ; or , in them.";
print $_ . "\n";
}
}

Просмотреть файл

@ -0,0 +1,95 @@
#!/usr/bin/perl
# Reads a dictionary, and prints out a list of words that seem to be pronounced
# as acronyms (not including plurals of acronyms, just acronyms). Uses
# the prons of the individual letters (A., B. and so on) to judge this.
# Note: this is somewhat dependent on the convention used in CMUduct, that
# the individual letters are spelled this way (e.g. "A.").
$max_length = 6; # Max length of words that might be
# acronyms.
while(<>) { # Read the dict.
chop;
@A = split(" ", $_);
$word = shift @A;
$pron = join(" ", @A);
if ($word =~ m/^([A-Z])\.$/ ) {
chop $word; # Remove trailing "." to get just the letter
$letter = $1;
if (!defined $letter_prons{$letter} ) {
$letter_prons{$letter} = [ ]; # new anonymous array
}
$arrayref = $letter_prons{$letter};
push @$arrayref, $pron;
} elsif( length($word) <= $max_length ) {
$pronof{$word . "," . $pron} = 1;
$isword{$word} = 1;
#if (!defined $prons{$word} ) {
# $prons{$word} = [ ];
#}
# push @{$prons{$word}}, $pron;
}
}
sub get_letter_prons;
foreach $word (keys %isword) {
my @letter_prons = get_letter_prons($word);
foreach $pron (@letter_prons) {
if (defined $pronof{$word.",".$pron}) {
print "$word $pron\n";
}
}
}
sub get_letter_prons {
@acronym = split("", shift); # The letters in the word.
my @prons = ( "" );
while (@acronym > 0) {
$l = shift @acronym;
$n = 1; # num-repeats of letter $l.
while (@acronym > 0 && $acronym[0] eq $l) {
$n++;
shift @acronym;
}
my $arrayref = $letter_prons{$l};
my @prons_of_block = ();
if ($n == 1) { # Just one repeat.
foreach $lpron ( @$arrayref ) {
push @prons_of_block, $lpron; # typically (always?) just one pron of a letter.
}
} elsif ($n == 2) { # Two repeats. Can be "double a" or "a a"
foreach $lpron ( @$arrayref ) {
push @prons_of_block, "D AH1 B AH0 L " . $lpron;
push @prons_of_block, $lpron . $lpron;
}
} elsif ($n == 3) { # can be "triple a" or "a a a"
foreach $lpron ( @$arrayref ) {
push @prons_of_block, "T R IH1 P AH0 L " . $lpron;
push @prons_of_block, $lpron . $lpron . $lpron;
}
} elsif ($n >= 4) { # let's say it can only be that letter repeated $n times..
# not sure really.
foreach $lpron ( @$arrayref ) {
$nlpron = "";
for ($m = 0; $m < $n; $m++) { $nlpron = $nlpron . $lpron; }
push @prons_of_block, $nlpron;
}
}
my @new_prons = ();
foreach $pron (@prons) {
foreach $pron_of_block(@prons_of_block) {
if ($pron eq "") {
push @new_prons, $pron_of_block;
} else {
push @new_prons, $pron . " " . $pron_of_block;
}
}
}
@prons = @new_prons;
}
return @prons;
}

Просмотреть файл

@ -0,0 +1,123 @@
#!/usr/bin/perl
# Reads a dictionary (for prons of letters), and an OOV list,
# and puts out candidate pronunciations of words in that list
# that could plausibly be acronyms.
# We judge that a word can plausibly be an acronym if it is
# a sequence of just letters (no non-letter characters such
# as "'"), or something like U.K.,
# and the number of letters is four or less.
#
# If the text were not already pre-normalized, there would
# be other hints such as capitalization.
# This program appends
# the prons of the individual letters (A., B. and so on) to work out
# the pron of the acronym.
# Note: this is somewhat dependent on the convention used in CMUduct, that
# the individual letters are spelled this way (e.g. "A."). [it seems
# to also have the separated versions.
if (!(@ARGV == 1 || @ARGV == 2)) {
print "Usage: get_acronym_prons.pl dict [oovlist]";
}
$max_length = 4; # Max #letters in an acronym. (Longer
# acronyms tend to have "pseudo-pronunciations", e.g. think about UNICEF.
$dict = shift @ARGV;
open(D, "<$dict") || die "Opening dictionary $dict";
while(<D>) { # Read the dict, to get the prons of the letters.
chop;
@A = split(" ", $_);
$word = shift @A;
$pron = join(" ", @A);
if ($word =~ m/^([A-Z])\.$/ ) {
chop $word; # Remove trailing "." to get just the letter
$letter = $1;
if (!defined $letter_prons{$letter} ) {
$letter_prons{$letter} = [ ]; # new anonymous array
}
$arrayref = $letter_prons{$letter};
push @$arrayref, $pron;
} elsif( length($word) <= $max_length ) {
$pronof{$word . "," . $pron} = 1;
$isword{$word} = 1;
#if (!defined $prons{$word} ) {
# $prons{$word} = [ ];
#}
# push @{$prons{$word}}, $pron;
}
}
sub get_letter_prons;
while(<>) { # Read OOVs.
# For now, just do the simple cases without "." in
# between... things with "." in the OOV list seem to
# be mostly errors.
chop;
$word = $_;
if ($word =~ m/^[A-Z]{1,5}$/) {
foreach $pron ( get_letter_prons($word) ) { # E.g. UNPO
print "$word $pron\n";
}
} elsif ($word =~ m:^(\w\.){1,4}\w\.?$:) { # E.g. U.K. Make the final "." optional.
$letters = $word;
$letters =~ s:\.::g;
foreach $pron ( get_letter_prons($letters) ) {
print "$word $pron\n";
}
}
}
sub get_letter_prons {
@acronym = split("", shift); # The letters in the word.
my @prons = ( "" );
while (@acronym > 0) {
$l = shift @acronym;
$n = 1; # num-repeats of letter $l.
while (@acronym > 0 && $acronym[0] eq $l) {
$n++;
shift @acronym;
}
my $arrayref = $letter_prons{$l};
my @prons_of_block = ();
if ($n == 1) { # Just one repeat.
foreach $lpron ( @$arrayref ) {
push @prons_of_block, $lpron; # typically (always?) just one pron of a letter.
}
} elsif ($n == 2) { # Two repeats. Can be "double a" or "a a"
foreach $lpron ( @$arrayref ) {
push @prons_of_block, "D AH1 B AH0 L " . $lpron;
push @prons_of_block, $lpron . " " . $lpron;
}
} elsif ($n == 3) { # can be "triple a" or "a a a"
foreach $lpron ( @$arrayref ) {
push @prons_of_block, "T R IH1 P AH0 L " . $lpron;
push @prons_of_block, "$lpron $lpron $lpron";
}
} elsif ($n >= 4) { # let's say it can only be that letter repeated $n times..
# not sure really.
foreach $lpron ( @$arrayref ) {
$nlpron = $lpron;
for ($m = 1; $m < $n; $m++) { $nlpron = $nlpron . " " . $lpron; }
push @prons_of_block, $nlpron;
}
}
my @new_prons = ();
foreach $pron (@prons) {
foreach $pron_of_block(@prons_of_block) {
if ($pron eq "") {
push @new_prons, $pron_of_block;
} else {
push @new_prons, $pron . " " . $pron_of_block;
}
}
}
@prons = @new_prons;
}
return @prons;
}

Просмотреть файл

@ -0,0 +1,187 @@
#!/usr/bin/perl
# This script takes three command-line arguments (typically files, or "-"):
# the suffix rules (as output by get_rules.pl), the rule-hierarchy
# (from get_rule_hierarchy.pl), and the words that we want prons to be
# generated for (one per line).
# The output consists of candidate generated pronunciations for those words,
# together with information about how we generated those pronunciations.
# This does not do pruning of the candidates using the restriction
# "you can't use a more general rule when a more specific one is applicable".
# That is done by limit_candidate_prons.pl.
# Each line of the output consists of a 4-tuple, separated by ";", of the
# form:
# word;pron;base-word;base-pron;rule-name;destress[;rule-score]
# [the last field is only present if you supplied rules with score information].
# where:
# - "word" is the input word that we queried for, e.g. WASTED
# - "pron" is the generated pronunciation, e.g. "W EY1 S T AH0 D"
# - rule-name is a 4-tuple separated by commas that describes the rule, e.g.
# "STED,STING,D,NG",
# - "base-word" is the base-word we're getting the pron from,
# e.g. WASTING
# - "base-pron" is the pron of the base-word, e.g. "W EY1 S T IH0 NG"
# - "destress" is either "yes" or "no" and corresponds to whether we destressed the
# base-word or not [de-stressing just corresponds to just taking any 2's down to 1's,
# although we may extend this in future]...
# - "rule-score" is a numeric score of the rule (this field is only present
# if there was score information in your rules.
(@ARGV == 2 || @ARGV == 3) || die "Usage: get_candidate_prons.pl rules base-dict [ words ]";
$min_prefix_len = 3; # this should probably match with get_rules.pl
$rules = shift @ARGV; # Note: rules may be with destress "yes/no" indicators or without...
# if without, it's treated as if both "yes" and "no" are present.
$dict = shift @ARGV;
open(R, "<$rules") || die "Opening rules file: $rules";
sub process_word;
while(<R>) {
chop $_;
my ($rule, $destress, $rule_score) = split(";", $_); # We may have "destress" markings (yes|no),
# and scores, or we may have just rule, in which case
# $destress and $rule_score will be undefined.
my @R = split(",", $rule, 4); # "my" means new instance of @R each
# time we do this loop -> important because we'll be creating
# a reference to @R below.
# Note: the last arg to SPLIT tells it how many fields max to get.
# This stops it from omitting empty trailing fields.
@R == 4 || die "Bad rule $_";
$suffix = $R[0]; # Suffix of word we want pron for.
if (!defined $isrule{$rule}) {
$isrule{$rule} = 1; # make sure we do this only once for each rule
# (don't repeate for different stresses).
if (!defined $suffix2rule{$suffix}) {
# The syntax [ $x, $y, ... ] means a reference to a newly created array
# containing $x, $y, etc. \@R creates an array reference to R.
# so suffix2rule is a hash from suffix to ref to array of refs to
# 4-dimensional arrays.
$suffix2rule{$suffix} = [ \@R ];
} else {
# Below, the syntax @{$suffix2rule{$suffix}} dereferences the array
# reference inside the hash; \@R pushes onto that array a new array
# reference pointing to @R.
push @{$suffix2rule{$suffix}}, \@R;
}
}
if (!defined $rule_score) { $rule_score = -1; } # -1 means we don't have the score info.
# Now store information on which destress markings (yes|no) this rule
# is valid for, and the associated scores (if supplied)
# If just the rule is given (i.e. no destress marking specified),
# assume valid for both.
if (!defined $destress) { # treat as if both "yes" and "no" are valid.
$rule_and_destress_to_rule_score{$rule.";yes"} = $rule_score;
$rule_and_destress_to_rule_score{$rule.";no"} = $rule_score;
} else {
$rule_and_destress_to_rule_score{$rule.";".$destress} = $rule_score;
}
}
open(D, "<$dict") || die "Opening base dictionary: $dict";
while(<D>) {
@A = split(" ", $_);
$word = shift @A;
$pron = join(" ", @A);
if (!defined $word2prons{$word}) {
$word2prons{$word} = [ $pron ]; # Ref to new anonymous array containing just "pron".
} else {
push @{$word2prons{$word}}, $pron; # Push $pron onto array referred to (@$ref derefs array).
}
}
foreach $word (%word2prons) {
# Set up the hash "prefixcount", which says how many times a char-sequence
# is a prefix (not necessarily a strict prefix) of a word in the dict.
$len = length($word);
for ($l = 0; $l <= $len; $l++) {
$prefixcount{substr($word, 0, $l)}++;
}
}
open(R, "<$rules") || die "Opening rules file: $rules";
while(<>) {
chop;
m/^\S+$/ || die;
process_word($_);
}
sub process_word {
my $word = shift @_;
$len = length($word);
# $owncount is used in evaluating whether a particular prefix is a prefix
# of some other word in the dict... if a word itself may be in the dict
# (usually because we're running this on the dict itself), we need to
# correct for this.
if (defined $word2prons{$word}) { $owncount = 1; } else { $owncount = 0; }
for ($prefix_len = $min_prefix_len; $prefix_len <= $len; $prefix_len++) {
my $prefix = substr($word, 0, $prefix_len);
my $suffix = substr($word, $prefix_len);
if ($prefixcount{$prefix} - $owncount == 0) {
# This prefix is not a prefix of any word in the dict, so no point
# checking the rules below-- none of them can match.
next;
}
$rules_array_ref = $suffix2rule{$suffix};
if (defined $rules_array_ref) {
foreach $R (@$rules_array_ref) { # @$rules_array_ref dereferences the array.
# $R is a refernce to a 4-dimensional array, whose elements we access with
# $$R[0], etc.
my $base_suffix = $$R[1];
my $base_word = $prefix . $base_suffix;
my $base_prons_ref = $word2prons{$base_word};
if (defined $base_prons_ref) {
my $psuffix = $$R[2];
my $base_psuffix = $$R[3];
if ($base_psuffix ne "") {
$base_psuffix = " " . $base_psuffix;
# Include " ", the space between phones, to prevent
# matching partial phones below.
}
my $base_psuffix_len = length($base_psuffix);
foreach $base_pron (@$base_prons_ref) { # @$base_prons_ref derefs
# that reference to an array.
my $base_pron_prefix_len = length($base_pron) - $base_psuffix_len;
# Note: these lengths are in characters, not phones.
if ($base_pron_prefix_len >= 0 &&
substr($base_pron, $base_pron_prefix_len) eq $base_psuffix) {
# The suffix of the base_pron is what it should be.
my $pron_prefix = substr($base_pron, 0, $base_pron_prefix_len);
my $rule = join(",", @$R); # we'll output this..
my $len = @R;
for ($destress = 0; $destress <= 1; $destress++) { # Two versions
# of each rule: with destressing and without.
# pron is the generated pron.
if ($destress) { $pron_prefix =~ s/2/1/g; }
my $pron;
if ($psuffix ne "") { $pron = $pron_prefix . " " . $psuffix; }
else { $pron = $pron_prefix; }
# Now print out the info about the generated pron.
my $destress_mark = ($destress ? "yes" : "no");
my $rule_score = $rule_and_destress_to_rule_score{$rule.";".$destress_mark};
if (defined $rule_score) { # Means that the (rule,destress) combination was
# seen [note: this if-statement may be pointless, as currently we don't
# do any pruning of rules].
my @output = ($word, $pron, $base_word, $base_pron, $rule, $destress_mark);
if ($rule_score != -1) { push @output, $rule_score; } # If scores were supplied,
# we also output the score info.
print join(";", @output) . "\n";
}
}
}
}
}
}
}
}
}

Просмотреть файл

@ -0,0 +1,73 @@
#!/usr/bin/perl
#This reads in rules, of the form put out by get_rules.pl, e.g.:
# ERT,,ER0 T,
# MENT,ING,M AH0 N T,IH0 NG
# S,TON,Z,T AH0 N
# ,ER,IH0 NG,IH0 NG ER0
# ,'S,M AH0 N,M AH0 N Z
#TIONS,TIVE,SH AH0 N Z,T IH0 V
# and it works out a hierarchy that says which rules are sub-cases
# of which rules: it outputs on each line a pair separated by ";", where
# each member of the pair is a rule, first one is the specialization, the
# second one being more general.
# E.g.:
# RED,RE,D,/ED,E,D,
# RED,RE,D,/D,,D,
# GING,GE,IH0 NG,/ING,I,IH0 NG,
# TOR,TING,T ER0,T IH0 NG/OR,OR,T ER0,T ER0
# ERED,ER,D,/RED,R,D,
# ERED,ER,D,/ED,,D,
while(<>) {
chop;
$rule = $_;
$isrule{$rule} = 1;
push @rules, $rule;
}
foreach my $rule (@rules) {
# Truncate the letters and phones in the rule, while we
# can, to get more general rules; if the more general rule
# exists, put out the pair.
@A = split(",", $rule);
@suffixa = split("", $A[0]);
@suffixb = split("", $A[1]);
@psuffixa = split(" ", $A[2]);
@psuffixb = split(" ", $A[3]);
for ($common_suffix_len = 0; $common_suffix_len < @suffixa && $common_suffix_len < @suffixb;) {
if ($suffixa[$common_suffix_len] eq $suffixb[$common_suffix_len]) {
$common_suffix_len++;
} else {
last;
}
}
for ($common_psuffix_len = 0; $common_psuffix_len < @psuffixa && $common_psuffix_len < @psuffixb;) {
if ($psuffixa[$common_psuffix_len] eq $psuffixb[$common_psuffix_len]) {
$common_psuffix_len++;
} else {
last;
}
}
# Get all combinations of pairs of integers <= (common_suffix_len, common_psuffix_len),
# except (0,0), and print out this rule together with the corresponding rule (if it exists).
for ($m = 0; $m <= $common_suffix_len; $m++) {
$sa = join("", @suffixa[$m...$#suffixa]); # @x[a..b] is array slice notation.
$sb = join("", @suffixb[$m...$#suffixb]);
for ($n = 0; $n <= $common_psuffix_len; $n++) {
if (!($m == 0 && $n == 0)) {
$psa = join(" ", @psuffixa[$n...$#psuffixa]);
$psb = join(" ", @psuffixb[$n...$#psuffixb]);
$more_general_rule = join(",", ($sa, $sb, $psa, $psb));
if (defined $isrule{$more_general_rule}) {
print $rule . ";" . $more_general_rule . "\n";
}
}
}
}
}

Просмотреть файл

@ -0,0 +1,204 @@
#!/usr/bin/perl
# This program creates suggested suffix rules from a dictionary.
# It outputs quadruples of the form:
# suffix,base-suffix,psuffix,base-psuffix
# where "suffix" is the suffix of the letters of a word, "base-suffix" is
# the suffix of the letters of the base-word, "psuffix" is the suffix of the
# pronunciation of the word (a space-separated list of phonemes), and
# "base-psuffix" is the suffix of the pronunciation of the baseword.
# As far as this program is concerned, there is no distinction between
# "word" and "base-word". To simplify things slightly, what it does
# is return all tuples (a,b,c,d) [with a != b] such that there are
# at least $min_suffix_count instances in the dictionary of
# a (word-prefix, pron-prefix) pair where there exists (word,pron)
# pairs of the form
# ( word-prefix . a, pron-prefix . c)
# and
# ( word-prefix . b, pron-prefix . d)
# For example if (a,b,c,d) equals (USLY,US,S L IY0,S)
# then this quadruple will be output as long as there at least
# e.g. 30 instances of prefixes like (FAM, F EY1 M AH0)
# where there exist (word, pron) pairs like:
# FAMOUS, F EY1 M AH0 S
# FAMOUSLY F EY1 M AH0 S L IY0
#
# There are some modifications to the picture above, for efficiency.
# If $disallow_empty_suffix != 0, this program will not output 4-tuples where
# the first element (the own-word suffix) is empty, as this would cause
# efficiency problems in get_candidate_prons.pl. If
# $ignore_prefix_stress != 0, this program will ignore stress markings
# while evaluating whether prefixes are the same.
# The minimum count for a quadruple to be output is $min_suffix_count
# (e.g. 30).
#
# The function of this program is not to evaluate the accuracy of these rules;
# it is mostly a pruning step, where we suggest rules that have large enough
# counts to be suitable for our later procedure where we evaluate their
# accuracy in predicting prons.
$disallow_empty_suffix = 1; # Disallow rules where the suffix of the "own-word" is
# empty. This is for efficiency in later stages (e.g. get_candidate_prons.pl).
$min_prefix_len = 3; # this must match with get_candidate_prons.pl
$ignore_prefix_stress = 1; # or 0 to take account of stress in prefix.
$min_suffix_count = 20;
# Takes in dictionary.
print STDERR "Reading dict\n";
while(<>) {
@A = split(" ", $_);
my $word = shift @A;
my $pron = join(" ", @A);
if (!defined $prons{$word}) {
$prons{$word} = $pron;
push @words, $word;
} else {
$prons{$word} = $prons{$word} . ";" . $pron;
}
}
# Get common suffixes (e.g., count >100). Include empty suffix.
print STDERR "Getting common suffix counts.\n";
{
foreach $word (@words) {
$len = length($word);
for ($x = $min_prefix_len; $x <= $len; $x++) {
$suffix_count{substr($word, $x)}++;
}
}
foreach $suffix (keys %suffix_count) {
if ($suffix_count{$suffix} >= $min_suffix_count) {
$newsuffix_count{$suffix} = $suffix_count{$suffix};
}
}
%suffix_count = %newsuffix_count;
undef %newsuffix_count;
foreach $suffix ( sort { $suffix_count{$b} <=> $suffix_count{$a} } keys %suffix_count ) {
print STDERR "$suffix_count{$suffix} $suffix\n";
}
}
print STDERR "Getting common suffix pairs.\n";
{
print STDERR " Getting map from prefix -> suffix-set.\n";
# Create map from prefix -> suffix-set.
foreach $word (@words) {
$len = length($word);
for ($x = $min_prefix_len; $x <= $len; $x++) {
$prefix = substr($word, 0, $x);
$suffix = substr($word, $x);
if (defined $suffix_count{$suffix}) { # Suffix is common...
if (!defined $suffixes_of{$prefix}) {
$suffixes_of{$prefix} = [ $suffix ]; # Create a reference to a new array with
# one element.
} else {
push @{$suffixes_of{$prefix}}, $suffix; # Push $suffix onto array that the
# hash member is a reference .
}
}
}
}
my %suffix_set_count;
print STDERR " Getting map from suffix-set -> count.\n";
while ( my ($key, $value) = each(%suffixes_of) ) {
my @suffixes = sort ( @$value );
$suffix_set_count{join(";", @suffixes)}++;
}
print STDERR " Getting counts for suffix pairs.\n";
while ( my ($suffix_set, $count) = each (%suffix_set_count) ) {
my @suffixes = split(";", $suffix_set);
# Consider pairs to be ordered. This is more convenient
# later on.
foreach $suffix_a (@suffixes) {
foreach $suffix_b (@suffixes) {
if ($suffix_a ne $suffix_b) {
$suffix_pair = $suffix_a . "," . $suffix_b;
$suffix_pair_count{$suffix_pair} += $count;
}
}
}
}
# To save memory, only keep pairs above threshold in the hash.
while ( my ($suffix_pair, $count) = each (%suffix_pair_count) ) {
if ($count >= $min_suffix_count) {
$new_hash{$suffix_pair} = $count;
}
}
%suffix_pair_count = %new_hash;
undef %new_hash;
# Print out the suffix pairs so the user can see.
foreach $suffix_pair (
sort { $suffix_pair_count{$b} <=> $suffix_pair_count{$a} } keys %suffix_pair_count ) {
print STDERR "$suffix_pair_count{$suffix_pair} $suffix_pair\n";
}
}
print STDERR "Getting common suffix/suffix/psuffix/psuffix quadruples\n";
{
while ( my ($prefix, $suffixes_ref) = each(%suffixes_of) ) {
# Note: suffixes_ref is a reference to an array. We dereference with
# @$suffixes_ref.
# Consider each pair of suffixes (in each order).
foreach my $suffix_a ( @$suffixes_ref ) {
foreach my $suffix_b ( @$suffixes_ref ) {
# could just used "defined" in next line, but this is for clarity.
$suffix_pair = $suffix_a.",".$suffix_b;
if ( $suffix_pair_count{$suffix_pair} >= $min_suffix_count ) {
foreach $pron_a_str (split(";", $prons{$prefix.$suffix_a})) {
@pron_a = split(" ", $pron_a_str);
foreach $pron_b_str (split(";", $prons{$prefix.$suffix_b})) {
@pron_b = split(" ", $pron_b_str);
$len_a = @pron_a; # evaluating array as scalar automatically gives length.
$len_b = @pron_b;
for (my $pos = 0; $pos <= $len_a && $pos <= $len_b; $pos++) {
# $pos is starting-pos of psuffix-pair.
$psuffix_a = join(" ", @pron_a[$pos...$#pron_a]);
$psuffix_b = join(" ", @pron_b[$pos...$#pron_b]);
$quadruple = $suffix_pair . "," . $psuffix_a . "," . $psuffix_b;
$quadruple_count{$quadruple}++;
my $pron_a_pos = $pron_a[$pos], $pron_b_pos = $pron_b[$pos];
if ($ignore_prefix_stress) {
$pron_a_pos =~ s/\d//; # e.g convert IH0 to IH. Only affects
$pron_b_pos =~ s/\d//; # whether we exit the loop below.
}
if ($pron_a_pos ne $pron_b_pos) {
# This is important: we don't consider a pron suffix-pair to be
# valid unless the pron prefix is the same.
last;
}
}
}
}
}
}
}
}
# To save memory, only keep pairs above threshold in the hash.
while ( my ($quadruple, $count) = each (%quadruple_count) ) {
if ($count >= $min_suffix_count) {
$new_hash{$quadruple} = $count;
}
}
%quadruple_count = %new_hash;
undef %new_hash;
# Print out the quadruples for diagnostics.
foreach $quadruple (
sort { $quadruple_count{$b} <=> $quadruple_count{$a} } keys %quadruple_count ) {
print STDERR "$quadruple_count{$quadruple} $quadruple\n";
}
}
# Now print out the quadruples; these are the output of this program.
foreach $quadruple (keys %quadruple_count) {
print $quadruple."\n";
}

Просмотреть файл

@ -0,0 +1,103 @@
#!/usr/bin/perl
# This program enforces the rule that
# if a "more specific" rule applies, we cannot use the more general rule.
# It takes in tuples generated by get_candidate_prons (one per line, separated
# by ";"), of the form:
# word;pron;base-word;base-pron;rule-name;de-stress[;rule-score]
# [note: we mean that the last element, the numeric score of the rule, is optional]
# and it outputs a (generally shorter) list
# of the same form.
# For each word:
# For each (base-word,base-pron):
# Eliminate "more-general" rules as follows:
# For each pair of rules applying to this (base-word, base-pron):
# If pair is in more-general hash, disallow more general one.
# Let the output be: for each (base-word, base-pron, rule):
# for (destress-prefix) in [yes, no], do:
# print out the word input, the rule-name, [destressed:yes|no], and the new pron.
if (@ARGV != 1 && @ARGV != 2) {
die "Usage: limit_candidate_prons.pl rule_hierarchy [candidate_prons] > limited_candidate_prons";
}
$hierarchy = shift @ARGV;
open(H, "<$hierarchy") || die "Opening rule hierarchy $hierarchy";
while(<H>) {
chop;
m:.+;.+: || die "Bad rule-hierarchy line $_";
$hierarchy{$_} = 1; # Format is: if $rule1 is the string form of the more specific rule
# and $rule21 is that string form of the more general rule, then $hierarchy{$rule1.";".$rule2}
# is defined, else undefined.
}
sub process_word;
undef $cur_word;
@cur_lines = ();
while(<>) {
# input, output is:
# word;pron;base-word;base-pron;rule-name;destress;score
chop;
m:^([^;]+);: || die "Unexpected input: $_";
$word = $1;
if (!defined $cur_word || $word eq $cur_word) {
if (!defined $cur_word) { $cur_word = $word; }
push @cur_lines, $_;
} else {
process_word(@cur_lines); # Process a series of suggested prons
# for a particular word.
$cur_word = $word;
@cur_lines = ( $_ );
}
}
process_word(@cur_lines);
sub process_word {
my %pair2rule_list; # hash from $baseword.";".$baseword to ref
# to array of [ line1, line2, ... ].
my @cur_lines = @_;
foreach my $line (@cur_lines) {
my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
my $key = $baseword.";".$basepron;
if (defined $pair2rule_list{$key}) {
push @{$pair2rule_list{$key}}, $line; # @{...} derefs the array pointed to
# by the array ref inside {}.
} else {
$pair2rule_list{$key} = [ $line ]; # [ $x ] is new anonymous array with 1 elem ($x)
}
}
while ( my ($key, $value) = each(%pair2rule_list) ) {
my @lines = @$value; # array of lines that are for this (baseword,basepron).
my @stress, @rules; # Arrays of stress markers and rule names, indexed by
# same index that indexes @lines.
for (my $n = 0; $n < @lines; $n++) {
my $line = $lines[$n];
my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
$stress[$n] = $destress;
$rules[$n] = $rulename;
}
for (my $m = 0; $m < @lines; $m++) {
my $ok = 1; # if stays 1, this line is OK.
for (my $n = 0; $n < @lines; $n++) {
if ($m != $n && $stress[$m] eq $stress[$n]) {
if (defined $hierarchy{$rules[$n].";".$rules[$m]}) {
# Note: this "hierarchy" variable is defined if $rules[$n] is a more
# specific instances of $rules[$m], thus invalidating $rules[$m].
$ok = 0;
last; # no point iterating further.
}
}
}
if ($ok != 0) {
print $lines[$m] . "\n";
}
}
}
}

Просмотреть файл

@ -0,0 +1,50 @@
#!/usr/bin/perl
# This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
# which is 7-tuples, one per line, of the form:
# word;pron;base-word;base-pron;rule-name;de-stress;rule-score
# (where rule-score is somtimes listed as optional, but this
# program does expect it, since we don't anticipate it being used
# without it).
# This program assumes that all the words and prons and rules have
# come from a reversed dictionary (reverse_dict.pl) where the order
# of the characters in the words, and the phones in the prons, have
# been reversed, and it un-reverses them. That it, the characters
# in "word" and "base-word", and the phones in "pron" and "base-pron"
# are reversed; and the rule ("rule-name") is parsed as a 4-tuple,
# like:
# suffix,base-suffix,psuffix,base-psuffix
# so this program reverses the characters in "suffix" and "base-suffix"
# and the phones (separated by spaces) in "psuffix" and "base-psuffix".
sub reverse_str {
$str = shift;
return join("", reverse(split("", $str)));
}
sub reverse_pron {
$str = shift;
return join(" ", reverse(split(" ", $str)));
}
while(<>){
chop;
@A = split(";", $_);
@A == 7 || die "Bad input line $_: found $len fields, expected 7.";
($word,$pron,$baseword,$basepron,$rule,$destress,$score) = @A;
$word = reverse_str($word);
$pron = reverse_pron($pron);
$baseword = reverse_str($baseword);
$basepron = reverse_pron($basepron);
@R = split(",", $rule, 4);
@R == 4 || die "Bad rule $rule";
$R[0] = reverse_str($R[0]); # suffix.
$R[1] = reverse_str($R[1]); # base-suffix.
$R[2] = reverse_pron($R[2]); # pron.
$R[3] = reverse_pron($R[3]); # base-pron.
$rule = join(",", @R);
@A = ($word,$pron,$baseword,$basepron,$rule,$destress,$score);
print join(";", @A) . "\n";
}

Просмотреть файл

@ -0,0 +1,14 @@
#!/usr/bin/perl
# Used in conjunction with get_rules.pl
# example input line: XANTHE Z AE1 N DH
# example output line: EHTNAX DH N AE1 Z
while(<>){
@A = split(" ", $_);
$word = shift @A;
$word = join("", reverse(split("", $word))); # Reverse letters of word.
@A = reverse(@A); # Reverse phones in pron.
unshift @A, $word;
print join(" ", @A) . "\n";
}

Просмотреть файл

@ -0,0 +1,50 @@
#!/usr/bin/perl
# This program takes candidate prons from "get_candidate_prons.pl" or
# "limit_candidate_prons.pl", and a reference dictionary covering those words,
# and outputs the same format but with scoring information added (so we go from
# 6 to 7 fields). The scoring information says, for each generated pron,
# whether we have a match, a partial match, or no match, to some word in the
# dictionary. A partial match means it's correct except for stress.
# The input is a 6-tuple on each line, like:
# word;pron;base-word;base-pron;rule-name;de-stress
#
# The output is the same except with one more field, the score,
# which may be "right", "wrong", "partial".
if (@ARGV != 1 && @ARGV != 2) {
die "Usage: score_prons.pl reference_dict [candidate_prons] > scored_candidate_prons";
}
$dict = shift @ARGV;
open(D, "<$dict") || die "Opening dictionary $dict";
while(<D>) { # Set up some hashes that tell us when
# a (word,pron) pair is correct (and the same for
# prons with stress information removed).
chop;
@A = split(" ", $_);
$word = shift @A;
$pron = join(" ", @A);
$pron_nostress = $pron;
$pron_nostress =~ s:\d::g;
$word_and_pron{$word.";".$pron} = 1;
$word_and_pron_nostress{$word.";".$pron_nostress} = 1;
}
while(<>) {
chop;
$line = $_;
my ($word, $pron, $baseword, $basepron, $rulename, $destress) = split(";", $line);
$pron_nostress = $pron;
$pron_nostress =~ s:\d::g;
if (defined $word_and_pron{$word.";".$pron}) {
$score = "right";
} elsif (defined $word_and_pron_nostress{$word.";".$pron_nostress}) {
$score = "partial";
} else {
$score = "wrong";
}
print $line.";".$score."\n";
}

Просмотреть файл

@ -0,0 +1,52 @@
#!/usr/bin/perl
# This program takes the output of count_rules.pl, which is tuples
# of the form
#
# rule;destress;right-count;partial-count;wrong-count
#
# and outputs lines of the form
#
# rule;de-stress;score
#
# where the score, between 0 and 1 (1 better), is
# equal to:
#
# It forms a score between 0 and 1, of the form:
# ((#correct) + $partial_score * (#partial)) / (#correct + #partial + #wrong + $ballast)
#
# where $partial_score (e.g. 0.8) is the score we assign to a "partial" match,
# and $ballast is a small number, e.g. 1, that is treated like "extra" wrong scores, to penalize
# rules with few observations.
#
# It outputs all rules that at are at least the
$ballast = 1;
$partial_score = 0.8;
$destress_penalty = 1.0e-05; # Give destressed rules a small
# penalty vs. their no-destress counterparts, so if we
# have to choose arbitrarily we won't destress (seems safer)>
for ($n = 1; $n <= 4; $n++) {
if ($ARGV[0] eq "--ballast") {
shift @ARGV;
$ballast = shift @ARGV;
}
if ($ARGV[0] eq "--partial-score") {
shift @ARGV;
$partial_score = shift @ARGV;
($partial_score >= 0.0 && $partial_score <= 1.0) || die "Invalid partial_score: $partial_score";
}
}
(@ARGV == 0 || @ARGV == 1) || die "Usage: score_rules.pl [--ballast ballast-count] [--partial-score partial-score] [input from count_rules.pl]";
while(<>) {
@A = split(";", $_);
@A == 5 || die "Bad input line; $_";
($rule,$destress,$right_count,$partial_count,$wrong_count) = @A;
$rule_score = ($right_count + $partial_score*$partial_count) /
($right_count+$partial_count+$wrong_count+$ballast);
if ($destress eq "yes") { $rule_score -= $destress_penalty; }
print join(";", $rule, $destress, sprintf("%.5f", $rule_score)) . "\n";
}

Просмотреть файл

@ -0,0 +1,84 @@
#!/usr/bin/perl
# This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
# or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
#
# word;pron;base-word;base-pron;rule-name;de-stress;rule-score
#
# and selects the most likely prons for the words based on rule
# score. It outputs in the same format as the input (thus, it is
# similar to limit_candidates.pl in its input and output format,
# except it has a different way of selecting the prons to put out).
#
# This script will select the $max_prons best pronunciations for
# each candidate word, subject to the constraint that no pron should
# have a rule score worse than $min_rule_score.
# It first merges the candidates by, if there are multiple candidates
# generating the same pron, selecting the candidate that had the
# best associated score. It then sorts the prons on score and
# selects the n best prons (but doesn't print out candidates with
# score beneath the threshold).
$max_prons = 4;
$min_rule_score = 0.35;
for ($n = 1; $n <= 3; $n++) {
if ($ARGV[0] eq "--max-prons") {
shift @ARGV;
$max_prons = shift @ARGV;
}
if ($ARGV[0] eq "--min-rule-score") {
shift @ARGV;
$min_rule_score = shift @ARGV;
}
}
if (@ARGV != 0 && @ARGV != 1) {
die "Usage: select_candidates_prons.pl [candidate_prons] > selected_candidate_prons";
}
sub process_word;
undef $cur_word;
@cur_lines = ();
while(<>) {
# input, output is:
# word;pron;base-word;base-pron;rule-name;destress;score
chop;
m:^([^;]+);: || die "Unexpected input: $_";
$word = $1;
if (!defined $cur_word || $word eq $cur_word) {
if (!defined $cur_word) { $cur_word = $word; }
push @cur_lines, $_;
} else {
process_word(@cur_lines); # Process a series of suggested prons
# for a particular word.
$cur_word = $word;
@cur_lines = ( $_ );
}
}
process_word(@cur_lines);
sub process_word {
my %pron2rule_score; # hash from generated pron to rule score for that pron.
my %pron2line; # hash from generated pron to best line for that pron.
my @cur_lines = @_;
foreach my $line (@cur_lines) {
my ($word, $pron, $baseword, $basepron, $rulename, $destress, $rule_score) = split(";", $line);
if (!defined $pron2rule_score{$pron} ||
$rule_score > $pron2rule_score{$pron}) {
$pron2rule_score{$pron} = $rule_score;
$pron2line{$pron} = $line;
}
}
my @prons = sort { $pron2rule_score{$b} <=> $pron2rule_score{$a} } keys %pron2rule_score;
for (my $n = 0; $n < @prons && $n < $max_prons &&
$pron2rule_score{$prons[$n]} >= $min_rule_score; $n++) {
print $pron2line{$prons[$n]} . "\n";
}
}

Просмотреть файл

@ -0,0 +1,65 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program takes on its standard input a list of utterance
# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
# It takes as
# Extracts from the dot files the transcripts for a given
# dataset (represented by a file list).
#
@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
$dot_flist = shift @ARGV;
open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
while(<L>){
chop;
m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
$spk = $1;
$spk2dot{$spk} = $_;
}
while(<STDIN>){
chop;
$uttid_orig = $_;
$uttid = substr $uttid_orig, 0, 8;
$uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
$spk = $1;
if($spk ne $curspk) {
%utt2trans = { }; # Don't keep all the transcripts in memory...
$curspk = $spk;
$dotfile = $spk2dot{$spk};
defined $dotfile || die "No dot file for speaker $spk\n";
open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
while(<F>) {
$_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
$trans = $1;
$utt = $2;
$utt2trans{$utt} = $trans;
}
}
if(!defined $utt2trans{$uttid}) {
print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
} else {
print "$uttid_orig $utt2trans{$uttid}\n";
}
}

Просмотреть файл

@ -0,0 +1,64 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program takes on its standard input a list of utterance
# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
# It takes as
# Extracts from the dot files the transcripts for a given
# dataset (represented by a file list).
#
@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
$dot_flist = shift @ARGV;
open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
while(<L>){
chop;
m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
$spk = $1;
$spk2dot{$spk} = $_;
}
while(<STDIN>){
chop;
$uttid = $_;
$uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
$spk = $1;
if($spk ne $curspk) {
%utt2trans = { }; # Don't keep all the transcripts in memory...
$curspk = $spk;
$dotfile = $spk2dot{$spk};
defined $dotfile || die "No dot file for speaker $spk\n";
open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
while(<F>) {
$_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
$trans = $1;
$utt = $2;
$utt2trans{$utt} = $trans;
}
}
if(!defined $utt2trans{$uttid}) {
print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
} else {
print "$uttid $utt2trans{$uttid}\n";
}
}

Просмотреть файл

@ -0,0 +1,31 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# takes in a file list with lines like
# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
# and outputs an scp in kaldi format with lines like
# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
# (the first thing is the utterance-id, which is the same as the basename of the file.
while(<>){
m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
$id = $1;
$id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
print "$id $_";
}

Просмотреть файл

@ -0,0 +1,110 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
if [ $# -ne 2 ]; then
echo "Usage: local/generate_example_kws.sh <data-dir> <kws-data-dir>"
echo " e.g.: local/generate_example_kws.sh data/test_eval92/ <data/kws>"
exit 1;
fi
datadir=$1;
kwsdatadir=$2;
text=$datadir/text;
mkdir -p $kwsdatadir;
# Generate keywords; we generate 20 unigram keywords with at least 20 counts,
# 20 bigram keywords with at least 10 counts and 10 trigram keywords with at
# least 5 counts.
cat $text | perl -e '
%unigram = ();
%bigram = ();
%trigram = ();
while(<>) {
chomp;
@col=split(" ", $_);
shift @col;
for($i = 0; $i < @col; $i++) {
# unigram case
if (!defined($unigram{$col[$i]})) {
$unigram{$col[$i]} = 0;
}
$unigram{$col[$i]}++;
# bigram case
if ($i < @col-1) {
$word = $col[$i] . " " . $col[$i+1];
if (!defined($bigram{$word})) {
$bigram{$word} = 0;
}
$bigram{$word}++;
}
# trigram case
if ($i < @col-2) {
$word = $col[$i] . " " . $col[$i+1] . " " . $col[$i+2];
if (!defined($trigram{$word})) {
$trigram{$word} = 0;
}
$trigram{$word}++;
}
}
}
$max_count = 100;
$total = 20;
$current = 0;
$min_count = 20;
while ($current < $total && $min_count <= $max_count) {
foreach $x (keys %unigram) {
if ($unigram{$x} == $min_count) {
print "$x\n";
$unigram{$x} = 0;
$current++;
}
if ($current == $total) {
last;
}
}
$min_count++;
}
$total = 20;
$current = 0;
$min_count = 4;
while ($current < $total && $min_count <= $max_count) {
foreach $x (keys %bigram) {
if ($bigram{$x} == $min_count) {
print "$x\n";
$bigram{$x} = 0;
$current++;
}
if ($current == $total) {
last;
}
}
$min_count++;
}
$total = 10;
$current = 0;
$min_count = 3;
while ($current < $total && $min_count <= $max_count) {
foreach $x (keys %trigram) {
if ($trigram{$x} == $min_count) {
print "$x\n";
$trigram{$x} = 0;
$current++;
}
if ($current == $total) {
last;
}
}
$min_count++;
}
' > $kwsdatadir/raw_keywords.txt
echo "Keywords generation succeeded"

Просмотреть файл

@ -0,0 +1,60 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
if [ $# -ne 3 ]; then
echo "Usage: local/kws_data_prep.sh <lang-dir> <data-dir> <kws-data-dir>"
echo " e.g.: local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/"
exit 1;
fi
langdir=$1;
datadir=$2;
kwsdatadir=$3;
mkdir -p $kwsdatadir;
# Create keyword id for each keyword
cat $kwsdatadir/raw_keywords.txt | perl -e '
$idx=1;
while(<>) {
chomp;
printf "WSJ-%04d $_\n", $idx;
$idx++;
}' > $kwsdatadir/keywords.txt
# Map the keywords to integers; note that we remove the keywords that
# are not in our $langdir/words.txt, as we won't find them anyway...
cat $kwsdatadir/keywords.txt | \
sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \
grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int
# Compile keywords into FSTs
transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:$kwsdatadir/keywords.fsts
# Create utterance id for each utterance; Note that by "utterance" here I mean
# the keys that will appear in the lattice archive. You may have to modify here
cat $datadir/wav.scp | \
awk '{print $1}' | \
sort | uniq | perl -e '
$idx=1;
while(<>) {
chomp;
print "$_ $idx\n";
$idx++;
}' > $kwsdatadir/utter_id
# Map utterance to the names that will appear in the rttm file. You have
# to modify the commands below accoring to your rttm file. In the WSJ case
# since each file is an utterance, we assume that the actual file names will
# be the "names" in the rttm, so the utterance names map to themselves.
cat $datadir/wav.scp | \
awk '{print $1}' | \
sort | uniq | perl -e '
while(<>) {
chomp;
print "$_ $_\n";
}' > $kwsdatadir/utter_map;
echo "Kws data preparation succeeded"

Просмотреть файл

@ -0,0 +1,62 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program takes as its standard input an .ndx file from the WSJ corpus that looks
# like this:
#;; File: tr_s_wv1.ndx, updated 04/26/94
#;;
#;; Index for WSJ0 SI-short Sennheiser training data
#;; Data is read WSJ sentences, Sennheiser mic.
#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
#;; per speaker TI) = 7236 utts
#;;
#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1 ... etc.
# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
# /mnt/matylda2/data/WSJ0/11-1.1.
# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
# uppercase rather than lower case filenames.
foreach $fn (@ARGV) {
$fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
$disk_id=$1;
$disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
$fn =~ s:/$::; # Remove final slash, just in case it is present.
$disk2fn{$disk_id} = $fn;
}
while(<STDIN>){
if(m/^;/){ next; } # Comment. Ignore it.
else {
m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_";
$disk=$1;
if(!defined $disk2fn{$disk}) {
die "Disk id $disk not found";
}
$filename = $2; # as a subdirectory of the distributed disk.
if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
# The disk 13-16.1 has been uppercased for some reason, on the
# BUT system. This is a fix specifically for that case.
$filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames. Why?
}
print "$disk2fn{$disk}/$filename\n";
}
}

Просмотреть файл

@ -0,0 +1,69 @@
#!/bin/bash
stage=0
train_stage=-100
# This trains only unadapted (just cepstral mean normalized) features,
# and uses various combinations of VTLN warping factor and time-warping
# factor to artificially expand the amount of data.
. cmd.sh
. utils/parse_options.sh # to parse the --stage option, if given
[ $# != 0 ] && echo "Usage: local/run_4b.sh [--stage <stage> --train-stage <train-stage>]" && exit 1;
set -e
if [ $stage -le 0 ]; then
# Create the training data.
featdir=`pwd`/mfcc/nnet5b; mkdir -p $featdir
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
steps/nnet2/get_perturbed_feats.sh --cmd "$train_cmd" \
$fbank_conf $featdir exp/perturbed_fbanks_si284 data/train_si284 data/train_si284_perturbed_fbank &
steps/nnet2/get_perturbed_feats.sh --cmd "$train_cmd" --feature-type mfcc \
conf/mfcc.conf $featdir exp/perturbed_mfcc_si284 data/train_si284 data/train_si284_perturbed_mfcc &
wait
fi
if [ $stage -le 1 ]; then
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
data/train_si284_perturbed_mfcc data/lang exp/tri4b exp/tri4b_ali_si284_perturbed_mfcc
fi
if [ $stage -le 2 ]; then
steps/nnet2/train_block.sh --stage "$train_stage" \
--cleanup false \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-epochs 10 --num-epochs-extra 5 \
--cmd "$decode_cmd" \
--hidden-layer-dim 1536 \
--num-block-layers 3 --num-normal-layers 3 \
data/train_si284_perturbed_fbank data/lang exp/tri4b_ali_si284_perturbed_mfcc exp/nnet5b || exit 1
fi
if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
done
fi
if [ $stage -le 4 ]; then
steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 10 \
exp/tri4b/graph_bd_tgpr data/test_dev93_fbank exp/nnet5b/decode_bd_tgpr_dev93
steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 8 \
exp/tri4b/graph_bd_tgpr data/test_eval92_fbank exp/nnet5b/decode_bd_tgpr_eval92
fi
exit 0;

Просмотреть файл

@ -0,0 +1,24 @@
#!/bin/bash
# This is neural net training on top of adapted 40-dimensional features.
#
. ./cmd.sh
(
steps/nnet2/train_tanh.sh \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-hidden-layers 4 --hidden-layer-dim 1024 \
--cmd "$decode_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c || exit 1
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c/decode_bd_tgpr_dev93
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/nnet5c/decode_bd_tgpr_eval92
)

Просмотреть файл

@ -0,0 +1,119 @@
#!/bin/bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 29/05/12
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
exit 1;
fi
CORPUS=$1
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# reverb list for SI-84
find $1/si_tr_s -name '*.wav' | sort -u > train_si84_noisy.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find $1/si_dt_20 -name '*.wav' | sort -u > dev_dt_20_noisy.flist
find $1/si_dt_05 -name '*.wav' | sort -u > dev_dt_05_noisy.flist
find $1/si_et_20 -name '*.wav' | sort -u > test_eval92_noisy.flist
find $1/si_et_05 -name '*.wav' | sort -u > test_eval92_5k_noisy.flist
# Finding the transcript files:
#find -L $CORPUS -iname '*.dot' > dot_files.flist
if [ ! -e $dir/dot_files.flist ]; then
echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
exit 1;
fi
# Convert the transcripts into our format (no normalization yet)
# adding suffix to utt_id
# 1 for reverb condition
for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
cat $x.flist | perl -e '
while(<>) {
m:^\S+/(\w+)\.wav$: || die "Bad line $_";
$id = $1;
$id =~ tr/A-Z/a-z/;
print "$id $_";
}
' | sort > ${x}_wav_tmp.scp
#cat ${x}_wav_tmp.scp | awk '{print $1}' \
# | $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
cat ${x}_wav_tmp.scp | perl -e '
while(<STDIN>) {
@A=split(" ", $_);
@B=split("/", $_);
$abs_path_len=@B;
$condition=$B[$abs_path_len-5];
if ($condition eq "9dB") {$key_suffix=2;}
elsif ($condition eq "6dB") {$key_suffix=3;}
elsif ($condition eq "3dB") {$key_suffix=4;}
elsif ($condition eq "0dB") {$key_suffix=5;}
elsif ($condition eq "m3dB") {$key_suffix=6;}
elsif ($condition eq "m6dB") {$key_suffix=7;}
else {print STDERR "error condition $condition";}
print $A[0].$key_suffix." ".$A[1]."\n";
}
' | sort -k1 > ${x}_wav.scp
cat ${x}_wav.scp | awk '{print $1}' \
| $local/find_noisy_transcripts.pl dot_files.flist > ${x}.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
| sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
# awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
# > ${x}_wav.scp
#done
# Make the utt2spk and spk2utt files.
for x in train_si84_noisy dev_dt_05_noisy dev_dt_20_noisy test_eval92_noisy test_eval92_5k_noisy; do
cat ${x}_wav.scp | awk '{print $1}' \
| perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,59 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This takes data from the standard input that's unnormalized transcripts in the format
# 4k2c0308 Of course there isn\'t any guarantee the company will keep its hot hand [misc_noise]
# 4k2c030a [loud_breath] And new hardware such as the set of personal computers I\. B\. M\. introduced last week can lead to unexpected changes in the software business [door_slam]
# and outputs normalized transcripts.
# c.f. /mnt/matylda2/data/WSJ0/11-10.1/wsj0/transcrp/doc/dot_spec.doc
@ARGV == 1 || die "usage: normalize_transcript.pl noise_word < transcript > transcript2";
$noise_word = shift @ARGV;
while(<STDIN>) {
$_ =~ m:^(\S+) (.+): || die "bad line $_";
$utt = $1;
$trans = $2;
print "$utt";
foreach $w (split (" ",$trans)) {
$w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
$w =~ s:\\::g; # Remove backslashes. We don't need the quoting.
$w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
$w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
if($w =~ m:^\[\<\w+\]$: || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
$w =~ m:^\[\w+\>\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete.
$w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon.
$w =~ m:\[\/\w+]$: || # E.g. [/phone_ring], which indicates the end of this phenomenon.
$w eq "~" || # This is used to indicate truncation of an utterance. Not a word.
$w eq ".") { # "." is used to indicate a pause. Silence is optional anyway so not much
# point including this in the transcript.
next; # we won't print this word.
} elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath].
print " $noise_word";
} elsif($w =~ m:^\<([\w\']+)\>$:) {
# e.g. replace <and> with and. (the <> means verbal deletion of a word).. but it's pronounced.
print " $1";
} elsif($w eq "--DASH") {
print " -DASH"; # This is a common issue; the CMU dictionary has it as -DASH.
# } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word
# print " $1 -DASH";
} else {
print " $w";
}
}
print "\n";
}

Просмотреть файл

@ -0,0 +1,100 @@
#!/bin/bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This is modified from the script in standard Kaldi recipe to account
# for the way the WSJ data is structured on the Edinburgh systems.
# - Arnab Ghoshal, 29/05/12
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
exit 1;
fi
CORPUS=$1
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# reverb list for SI-84
find $1/si_tr_s -name '*.wav' | sort -u > train_si84_reverb.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find $1/si_dt_20 -name '*.wav' | sort -u > dev_dt_20_reverb.flist
find $1/si_dt_05 -name '*.wav' | sort -u > dev_dt_05_reverb.flist
# Finding the transcript files:
#find -L $CORPUS -iname '*.dot' > dot_files.flist
if [ ! -e $dir/dot_files.flist ]; then
echo "Could not find $dir/dot_files.flist files, first run clean_data_prep.sh";
exit 1;
fi
# Convert the transcripts into our format (no normalization yet)
# adding suffix to utt_id
# 1 for reverb condition
for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
cat $x.flist | perl -e '
while(<>) {
m:^\S+/(\w+)\.wav$: || die "Bad line $_";
$id = $1;
$id =~ tr/A-Z/a-z/;
print "$id $_";
}
' | sort > ${x}_wav_tmp.scp
cat ${x}_wav_tmp.scp | awk '{print $1}' \
| $local/find_transcripts.pl dot_files.flist > ${x}_tmp.trans1
cat ${x}_wav_tmp.scp | awk '{printf("%s1 %s\n", $1, $2);}' > ${x}_wav.scp
cat ${x}_tmp.trans1 | awk '{printf("%s1 ", $1); for(i=2;i<=NF;i++) printf("%s ", $i); printf("\n");}' > ${x}.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword \
| sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
#for x in train_si84_clean test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean; do
# awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp \
# > ${x}_wav.scp
#done
# Make the utt2spk and spk2utt files.
for x in train_si84_reverb dev_dt_05_reverb dev_dt_20_reverb; do
cat ${x}_wav.scp | awk '{print $1}' \
| perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,42 @@
#!/bin/bash
. cmd.sh
mfccdir=mfcc
# Make "per-utterance" versions of the test sets where the speaker
# information corresponds to utterances-- to demonstrate adaptation on
# short utterances, particularly for basis fMLLR
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
rm -r data/$y
cp -r data/$x data/$y
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;
done
# basis fMLLR experiments.
# First a baseline: decode per-utterance with normal fMLLR.
steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_utt || exit 1;
steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_utt || exit 1;
# get the fMLLR basis.
steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train_si84 data/lang exp/tri3b
# decoding tri3b with basis fMLLR
steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_basis || exit 1;
steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92_basis || exit 1;
# The same, per-utterance.
steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_basis_utt || exit 1;
steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_basis_utt || exit 1;

Просмотреть файл

@ -0,0 +1,181 @@
#!/bin/bash
# Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
# In this recipe we build DNN in four stages:
# 1) Data preparations : the fMLLR features are stored to disk
# 2) RBM pre-training : in this unsupervised stage we train stack of RBMs, a good starting point for Cross-entropy trainig
# 3) Frame-level cross-entropy training : in this stage the objective is to classify frames correctly.
# 4) Sequence-criterion training : in this stage the objective is to classify the whole sequence correctly,
# the idea is similar to the 'Discriminative training' in context of GMM-HMMs.
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
#false && \
{
gmmdir=exp/tri4b
###
### Generate the alignments of dev93
### (held-out set for Cross-entropy training)
###
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
data/test_dev93 data/lang $gmmdir exp/tri4b_ali_dev93 || exit 1
###
### Store the fMLLR features, so we can train on them easily
###
# train si284
# generate the features
dir=data-fmllr-tri4b/train_si284
steps/make_fmllr_feats.sh --nj 20 --cmd "$train_cmd" \
--transform-dir exp/tri4b_ali_si284 \
$dir data/train_si284 $gmmdir $dir/_log $dir/_data || exit 1
# eval92
dir=data-fmllr-tri4b/test_eval92
steps/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
--transform-dir exp/tri4b/decode_tgpr_eval92 \
$dir data/test_eval92 $gmmdir $dir/_log $dir/_data || exit 1
# dev93 (unsupervised fMLLR)
# held-out set of Cross-entropy training
dir=data-fmllr-tri4b/test_dev93
steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
--transform-dir exp/tri4b/decode_tgpr_dev93 \
$dir data/test_dev93 $gmmdir $dir/_log $dir/_data || exit 1
}
###
### Now we can pre-train stack of RBMs
###
#false && \
{ # Pre-train the DBN
dir=exp/tri4b_pretrain-dbn
(tail --pid=$$ -F $dir/_pretrain_dbn.log 2>/dev/null)&
$cuda_cmd $dir/_pretrain_dbn.log \
steps/pretrain_dbn.sh --rbm-iter 3 data-fmllr-tri4b/train_si284 $dir
}
###
### Now we train the DNN optimizing cross-entropy.
### This will take quite some time.
###
#false && \
{ # Train the MLP
dir=exp/tri4b_pretrain-dbn_dnn
ali=exp/tri4b_ali
feature_transform=exp/tri4b_pretrain-dbn/final.feature_transform
dbn=exp/tri4b_pretrain-dbn/6.dbn
(tail --pid=$$ -F $dir/_train_nnet.log 2>/dev/null)&
$cuda_cmd $dir/_train_nnet.log \
steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
data-fmllr-tri4b/train_si284 data-fmllr-tri4b/test_dev93 data/lang ${ali}_si284 ${ali}_dev93 $dir || exit 1;
# decode with 'big-dictionary' (reuse HCLG graph)
steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 --config conf/decode_dnn.config \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_bd_tgpr_dev93 || exit 1;
steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --acwt 0.10 --config conf/decode_dnn.config \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_bd_tgpr_eval92 || exit 1;
}
###
### Finally we train using sMBR criterion.
### We do Stochastic-GD with per-utterance updates.
###
### To get faster convergence, we will re-generate
### the lattices after 1st epoch of sMBR.
###
dir=exp/tri4b_pretrain-dbn_dnn_smbr
srcdir=exp/tri4b_pretrain-dbn_dnn
acwt=0.10
# First we need to generate lattices and alignments:
#false && \
{
steps/align_nnet.sh --nj 100 --cmd "$train_cmd" \
data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_ali_si284 || exit 1;
steps/make_denlats_nnet.sh --nj 100 --cmd "$decode_cmd" \
--config conf/decode_dnn.config --acwt $acwt \
data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_denlats_si284 || exit 1;
}
# Now we re-train the hybrid by single iteration of sMBR
#false && \
{
steps/train_nnet_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \
data-fmllr-tri4b/train_si284 data/lang $srcdir \
${srcdir}_ali_si284 ${srcdir}_denlats_si284 $dir || exit 1
}
# Decode
#false && \
{
for ITER in 1; do
# decode dev93 with big dict graph_bd_tgpr
steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_dev93_bd_tgpr_it${ITER} || exit 1
# decode eval92 with big dict graph_bd_tgpr
steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_eval92_bd_tgpr_it${ITER} || exit 1
done
}
###
### Re-generate lattices and run several more iterations of sMBR
###
dir=exp/tri4b_pretrain-dbn_dnn_smbr_iter1-lats
srcdir=exp/tri4b_pretrain-dbn_dnn_smbr
acwt=0.10
# First we need to generate lattices and alignments:
#false && \
{
steps/align_nnet.sh --nj 100 --cmd "$train_cmd" \
data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_ali_si284 || exit 1;
steps/make_denlats_nnet.sh --nj 100 --cmd "$decode_cmd" \
--config conf/decode_dnn.config --acwt $acwt \
data-fmllr-tri4b/train_si284 data/lang $srcdir ${srcdir}_denlats_si284 || exit 1;
}
# Now we re-train the hybrid by several iterations of sMBR
#false && \
{
steps/train_nnet_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
data-fmllr-tri4b/train_si284 data/lang $srcdir \
${srcdir}_ali_si284 ${srcdir}_denlats_si284 $dir
}
# Decode
#false && \
{
for ITER in 1 2 3 4; do
# decode dev93 with big dict graph_bd_tgpr
steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_dev93 $dir/decode_dev93_bd_tgpr_it${ITER} || exit 1
# decode eval92 with big dict graph_bd_tgpr
steps/decode_nnet.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4b/graph_bd_tgpr data-fmllr-tri4b/test_eval92 $dir/decode_eval92_bd_tgpr_it${ITER} || exit 1
done
}
# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

Просмотреть файл

@ -0,0 +1,41 @@
#prepare reverse lexicon and language model for backwards decoding
utils/prepare_lang.sh --reverse true data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp.reverse data/lang.reverse || exit 1;
utils/reverse_lm.sh data/local/nist_lm/lm_bg_5k.arpa.gz data/lang.reverse data/lang_test_bg_5k.reverse || exit 1;
utils/reverse_lm_test.sh data/lang_test_bg_5k data/lang_test_bg_5k.reverse || exit 1;
# normal forward decoding
utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 8 --cmd "$decode_cmd" \
exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_10 || exit 1;
# backward decoding
utils/mkgraph.sh --reverse data/lang_test_bg_5k.reverse exp/tri2a exp/tri2a/graph_bg5k_r
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 8 --cmd "$decode_cmd" \
exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_reverse10 || exit 1;
# pingpong decoding
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 8 --cmd "$decode_cmd" \
--first_pass exp/tri2a/decode_eval92_bg5k_10 exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_pingpong10 || exit 1;
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 8 --cmd "$decode_cmd" \
--first_pass exp/tri2a/decode_eval92_bg5k_reverse10 exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_pongping10 || exit 1;
# same for bigger language models (on machine with 8GB RAM, you can run the whole decoding in 3-4 min without SGE)
utils/prepare_lang.sh --reverse true data/local/dict_larger "<SPOKEN_NOISE>" data/local/lang_larger.reverse data/lang_bd.reverse || exit;
utils/reverse_lm.sh --lexicon data/local/dict_larger/lexicon.txt data/local/local_lm/3gram-mincount/lm_pr6.0.gz data/lang_bd.reverse data/lang_test_bd_tgpr.reverse || exit 1;
utils/reverse_lm_test.sh data/lang_test_bd_tgpr data/lang_test_bd_tgpr.reverse || exit 1;
utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri2a exp/tri2a/graph_bd_tgpr
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 4 --cmd run.pl \
exp/tri2a/graph_bd_tgpr data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_10 || exit 1;
utils/mkgraph.sh --reverse data/lang_test_bd_tgpr.reverse exp/tri2a exp/tri2a/graph_bd_tgpr_r
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 4 --cmd run.pl \
exp/tri2a/graph_bd_tgpr_r data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_reverse10 || exit 1;
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 4 --cmd run.pl \
--first_pass exp/tri2a/decode_eval92_bdtgpr4_10 exp/tri2a/graph_bd_tgpr_r data/test_eval92 \
exp/tri2a/decode_eval92_bdtgpr4_pingpong10 || exit 1;
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 4 --cmd run.pl \
--first_pass exp/tri2a/decode_eval92_bdtgpr4_reverse10 exp/tri2a/graph_bd_tgpr data/test_eval92 \
exp/tri2a/decode_eval92_bdtgpr4_pongping10 || exit 1;

Просмотреть файл

@ -0,0 +1,60 @@
#!/bin/bash
. ./cmd.sh
# Train and test MMI (and boosted MMI) on tri2b system.
steps/make_denlats.sh --sub-split 20 --nj 10 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri2b exp/tri2b_denlats_si84 || exit 1;
# train the basic MMI system.
steps/train_mmi.sh --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri2b_ali_si84 \
exp/tri2b_denlats_si84 exp/tri2b_mmi || exit 1;
for iter in 3 4; do
steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_mmi/decode_tgpr_dev93_it$iter &
steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_mmi/decode_tgpr_eval92_it$iter &
done
# MMI with 0.1 boosting factor.
steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 \
exp/tri2b_mmi_b0.1 || exit 1;
for iter in 3 4; do
steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_mmi_b0.1/decode_tgpr_dev93_it$iter &
steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_mmi_b0.1/decode_tgpr_eval92_it$iter &
done
# Train a UBM with 400 components, for fMMI.
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 10 --cmd "$train_cmd" \
400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b
steps/train_mmi_fmmi.sh --boost 0.1 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
exp/tri2b_fmmi_b0.1
for iter in `seq 3 8`; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it$iter &
done
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
exp/tri2b_fmmi_b0.1_lr0.005 || exit 1;
for iter in `seq 3 8`; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it$iter &
done
steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
exp/tri2b_fmmi_indirect_b0.1
for iter in `seq 3 8`; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it$iter &
done

Просмотреть файл

@ -0,0 +1,50 @@
#!/bin/bash
. ./cmd.sh
steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
--transform-dir exp/tri4b_ali_si284 \
data/train_si284 data/lang exp/tri4b exp/tri4b_denlats_si284 || exit 1;
steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284 \
exp/tri4b_mmi_b0.1 || exit 1;
steps/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_tgpr_dev93 \
exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi_b0.1/decode_tgpr_dev93
#first, train UBM for fMMI experiments.
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b
# Next, fMMI+MMI.
steps/train_mmi_fmmi.sh \
--boost 0.1 --cmd "$train_cmd" data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b exp/tri4b_denlats_si284 \
exp/tri4b_fmmi_a || exit 1;
for iter in 3 4 5 6 7 8; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri3b/decode_tgpr_dev93 exp/tri4b/graph_tgpr data/test_dev93 \
exp/tri4b_fmmi_a/decode_tgpr_dev93_it$iter &
done
# decode the last iter with the bd model.
for iter in 8; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri3b/decode_bd_tgpr_dev93 exp/tri4b/graph_bd_tgpr data/test_dev93 \
exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it$iter &
steps/decode_fmmi.sh --nj 8 --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri3b/decode_bd_tgpr_eval92 exp/tri4b/graph_bd_tgpr data/test_eval92 \
exp/tri4b_fmmi_a/decode_tgpr_eval92_it$iter &
done
# fMMI + mmi with indirect differential.
steps/train_mmi_fmmi_indirect.sh \
--boost 0.1 --cmd "$train_cmd" data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b exp/tri4b_denlats_si284 \
exp/tri4b_fmmi_indirect || exit 1;
for iter in 3 4 5 6 7 8; do
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri3b/decode_tgpr_dev93 exp/tri4b/graph_tgpr data/test_dev93 \
exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it$iter &
done

Просмотреть файл

@ -0,0 +1,9 @@
#!/bin/bash
. ./cmd.sh
# ...
local/nnet2/run_5c.sh

Просмотреть файл

@ -0,0 +1,66 @@
#!/bin/bash
steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84_raw
steps/train_raw_sat.sh --cmd "$train_cmd" \
2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84_raw exp/tri3c || exit 1;
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;
done
(
utils/mkgraph.sh data/lang_test_tgpr exp/tri3c exp/tri3c/graph_tgpr || exit 1;
steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93 || exit 1;
steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92 || exit 1;
steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_dev93_utt exp/tri3c/decode_tgpr_dev93_utt || exit 1;
steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_eval92_utt exp/tri3c/decode_tgpr_eval92_utt || exit 1;
steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 10 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93_2fmllr || exit 1;
steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 8 --cmd "$decode_cmd" \
exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92_2fmllr || exit 1;
)&
(
utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri3c exp/tri3c/graph_bd_tgpr || exit 1;
steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 8 exp/tri3c/graph_bd_tgpr \
data/test_eval92 exp/tri3c/decode_bd_tgpr_eval92
steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 10 exp/tri3c/graph_bd_tgpr \
data/test_dev93 exp/tri3c/decode_bd_tgpr_dev93
)&
steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri3c exp/tri3c_ali_si284 || exit 1;
steps/train_raw_sat.sh --cmd "$train_cmd" \
4200 40000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4d || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/tri4d exp/tri4d/graph_tgpr || exit 1;
steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri4d/graph_tgpr data/test_dev93 exp/tri4d/decode_tgpr_dev93 || exit 1;
steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri4d/graph_tgpr data/test_eval92 exp/tri4d/decode_tgpr_eval92 || exit 1;
) &
wait
#for x in exp/tri3{b,c}/decode_tgpr*; do grep WER $x/wer_* | utils/best_wer.sh ; done

Просмотреть файл

@ -0,0 +1,42 @@
#!/bin/bash
for test in dev93 eval92; do
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_bd_tgpr data/lang_test_bd_fg \
data/test_${test} exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 || exit 1;
# Note: for N-best-list generation, choosing the acoustic scale (12) that gave
# the best WER on this test set. Ideally we should do this on a dev set.
# This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm30_0.25 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm100_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm200_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.75 \
|| exit 1;
done

Просмотреть файл

@ -0,0 +1,64 @@
#!/bin/bash
. cmd.sh
# This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm30_0.25 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm100_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm200_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 \
|| exit 1;
steps/rnnlmrescore.sh \
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75_N1000
rm -rf $dir
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 $dir
steps/rnnlmrescore.sh \
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75
rm -rf $dir
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
steps/rnnlmrescore.sh \
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.25
rm -rf $dir
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
steps/rnnlmrescore.sh \
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
0.25 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
steps/rnnlmrescore.sh \
--N 10 --cmd "$decode_cmd" --inv-acwt 17 \
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N10 \
|| exit 1;

Просмотреть файл

@ -0,0 +1,113 @@
#!/bin/bash
# This script is invoked from ../run.sh
# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
. cmd.sh
# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for
# training, but this shouldn't have much effect.
(
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
steps/train_ubm.sh --cmd "$train_cmd" \
400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
steps/train_sgmm.sh --cmd "$train_cmd" \
3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \
exp/ubm5b/final.ubm exp/sgmm5a || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93
) &
steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
--use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1;
steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1
for iter in 1 2 3 4; do
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
done
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
--update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9
for iter in 1 2 3 4; do
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
done
) &
(
# The next commands are the same thing on all the si284 data.
# SGMM system on the si284 data [sgmm5b]
steps/train_ubm.sh --cmd "$train_cmd" \
600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
steps/train_sgmm.sh --cmd "$train_cmd" \
5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
exp/ubm5b/final.ubm exp/sgmm5b || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93
steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92
utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1;
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93
steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92
) &
steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
--use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284
steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1
for iter in 1 2 3 4; do
for test in dev93 eval92; do
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \
exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter &
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \
exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
done
done
) &
# Train quinphone SGMM system.
steps/train_sgmm.sh --cmd "$train_cmd" \
--context-opts "--context-width=5 --central-position=2" \
5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
exp/ubm5b/final.ubm exp/sgmm5c || exit 1;
# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93.
steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93

Просмотреть файл

@ -0,0 +1,148 @@
#!/bin/bash
# This script is invoked from ../run.sh
# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
. cmd.sh
# Note: you might want to try to give the option --spk-dep-weights=false to train_sgmm2.sh;
# this takes out the "symmetric SGMM" part which is not always helpful.
# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for
# training, but this shouldn't have much effect.
(
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
steps/train_ubm.sh --cmd "$train_cmd" \
400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
steps/train_sgmm2.sh --cmd "$train_cmd" \
7000 9000 data/train_si84 data/lang exp/tri4b_ali_si84 \
exp/ubm5a/final.ubm exp/sgmm2_5a || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5a exp/sgmm2_5a/graph_tgpr
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm2_5a/graph_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93
) &
steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
--use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm2_5a exp/sgmm2_5a_ali_si84 || exit 1;
steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84 exp/sgmm2_5a_mmi_b0.1
for iter in 1 2 3 4; do
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93 \
exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
done
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
--update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm2_5a_ali_si84 exp/sgmm2_5a_denlats_si84 exp/sgmm2_5a_mmi_b0.1_m0.9
for iter in 1 2 3 4; do
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm2_5a/decode_tgpr_dev93 \
exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
done
) &
(
# The next commands are the same thing on all the si284 data.
# SGMM system on the si284 data [sgmm5b]
steps/train_ubm.sh --cmd "$train_cmd" \
600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
steps/train_sgmm2.sh --cmd "$train_cmd" \
11000 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
exp/ubm5b/final.ubm exp/sgmm2_5b || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5b exp/sgmm2_5b/graph_tgpr
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm2_5b/graph_tgpr data/test_dev93 exp/sgmm2_5b/decode_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
exp/sgmm2_5b/graph_tgpr data/test_eval92 exp/sgmm2_5b/decode_tgpr_eval92
utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm2_5b exp/sgmm2_5b/graph_bd_tgpr || exit 1;
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/sgmm2_5b/graph_bd_tgpr data/test_dev93 exp/sgmm2_5b/decode_bd_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/sgmm2_5b/graph_bd_tgpr data/test_eval92 exp/sgmm2_5b/decode_bd_tgpr_eval92
) &
# This shows how you would build and test a quinphone SGMM2 system, but
(
steps/train_sgmm2.sh --cmd "$train_cmd" \
--context-opts "--context-width=5 --central-position=2" \
11000 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
exp/ubm5b/final.ubm exp/sgmm2_5c || exit 1;
# Decode from lattices in exp/sgmm2_5b
steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
data/test_dev93 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_dev93 exp/sgmm2_5c/decode_tgpr_dev93
steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
data/test_eval92 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_eval92 exp/sgmm2_5c/decode_tgpr_eval92
) &
steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
--use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm2_5b exp/sgmm2_5b_ali_si284
steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284 exp/sgmm2_5b_mmi_b0.1
for iter in 1 2 3 4; do
for test in eval92; do # dev93
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_fg data/test_${test} exp/sgmm2_5b/decode_bd_tgpr_${test} \
exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
done
done
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
--zero-if-disjoint true data/train_si284 data/lang exp/sgmm2_5b_ali_si284 exp/sgmm2_5b_denlats_si284 exp/sgmm2_5b_mmi_b0.1_z
for iter in 1 2 3 4; do
for test in eval92 dev93; do
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_fg data/test_${test} exp/sgmm2_5b/decode_bd_tgpr_${test} \
exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_${test}_it$iter &
done
done
) &
wait
# Examples of combining some of the best decodings: SGMM+MMI with
# MMI+fMMI on a conventional system.
local/score_combine.sh data/test_eval92 \
data/lang_test_bd_tgpr \
exp/tri4b_fmmi_a/decode_tgpr_eval92_it8 \
exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3 \
exp/combine_tri4b_fmmi_a_sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it8_3
# %WER 4.43 [ 250 / 5643, 41 ins, 12 del, 197 sub ] exp/tri4b_fmmi_a/decode_tgpr_eval92_it8/wer_11
# %WER 3.85 [ 217 / 5643, 35 ins, 11 del, 171 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3/wer_10
# combined to:
# %WER 3.76 [ 212 / 5643, 32 ins, 12 del, 168 sub ] exp/combine_tri4b_fmmi_a_sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it8_3/wer_12
# Checking MBR decode of baseline:
cp -r -T exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3{,.mbr}
local/score_mbr.sh data/test_eval92 data/lang_test_bd_tgpr exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr
# MBR decoding did not seem to help (baseline was 3.85). I think this is normal at such low WERs.
%WER 3.86 [ 218 / 5643, 35 ins, 11 del, 172 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr/wer_10

Просмотреть файл

@ -0,0 +1,67 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
[ -f ./path.sh ] && . ./path.sh
# begin configuration section.
cmd=run.pl
stage=0
decode_mbr=true
reverse=false
word_ins_penalty=0.0
min_lmwt=5
max_lmwt=20
#end configuration section.
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
echo " Options:"
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
echo " --stage (0|1|2) # start scoring script from part-way through."
echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)."
echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring "
echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring "
echo " --reverse (true/false) # score with time reversed features "
exit 1;
fi
data=$1
lang_or_graph=$2
dir=$3
symtab=$lang_or_graph/words.txt
for f in $symtab $dir/lat.1.gz $data/text; do
[ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
done
mkdir -p $dir/scoring/log
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
lattice-best-path --word-symbol-table=$symtab \
ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
if $reverse; then
for lmwt in `seq $min_lmwt $max_lmwt`; do
mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig
awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
<$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra
done
fi
# Note: the double level of quoting for the sed command
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
cat $dir/scoring/LMWT.tra \| \
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
compute-wer --text --mode=present \
ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1;
exit 0;

Просмотреть файл

@ -0,0 +1,95 @@
#!/bin/bash
# Copyright 2013 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Script for system combination using minimum Bayes risk decoding.
# This calls lattice-combine to create a union of lattices that have been
# normalized by removing the total forward cost from them. The resulting lattice
# is used as input to lattice-mbr-decode. This should not be put in steps/ or
# utils/ since the scores on the combined lattice must not be scaled.
# begin configuration section.
cmd=run.pl
min_lmwt=9
max_lmwt=20
lat_weights=
#end configuration section.
help_message="Usage: "$(basename $0)" [options] <data-dir> <graph-dir|lang-dir> <decode-dir1> <decode-dir2> [decode-dir3 ... ] <out-dir>
Options:
--cmd (run.pl|queue.pl...) # specify how to run the sub-processes.
--min-lmwt INT # minumum LM-weight for lattice rescoring
--max-lmwt INT # maximum LM-weight for lattice rescoring
--lat-weights STR # colon-separated string of lattice weights
";
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -lt 5 ]; then
printf "$help_message\n";
exit 1;
fi
data=$1
graphdir=$2
odir=${@: -1} # last argument to the script
shift 2;
decode_dirs=( $@ ) # read the remaining arguments into an array
unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir
num_sys=${#decode_dirs[@]} # number of systems to combine
symtab=$graphdir/words.txt
[ ! -f $symtab ] && echo "$0: missing word symbol table '$symtab'" && exit 1;
[ ! -f $data/text ] && echo "$0: missing reference '$data/text'" && exit 1;
mkdir -p $odir/log
for i in `seq 0 $[num_sys-1]`; do
model=${decode_dirs[$i]}/../final.mdl # model one level up from decode dir
for f in $model ${decode_dirs[$i]}/lat.1.gz ; do
[ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
done
lats[$i]="\"ark:gunzip -c ${decode_dirs[$i]}/lat.*.gz |\""
done
mkdir -p $odir/scoring/log
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' \
> $odir/scoring/test_filt.txt
if [ -z "$lat_weights" ]; then
$cmd LMWT=$min_lmwt:$max_lmwt $odir/log/combine_lats.LMWT.log \
lattice-combine --inv-acoustic-scale=LMWT ${lats[@]} ark:- \| \
lattice-mbr-decode --word-symbol-table=$symtab ark:- \
ark,t:$odir/scoring/LMWT.tra || exit 1;
else
$cmd LMWT=$min_lmwt:$max_lmwt $odir/log/combine_lats.LMWT.log \
lattice-combine --inv-acoustic-scale=LMWT --lat-weights=$lat_weights \
${lats[@]} ark:- \| \
lattice-mbr-decode --word-symbol-table=$symtab ark:- \
ark,t:$odir/scoring/LMWT.tra || exit 1;
fi
$cmd LMWT=$min_lmwt:$max_lmwt $odir/scoring/log/score.LMWT.log \
cat $odir/scoring/LMWT.tra \| \
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
compute-wer --text --mode=present \
ark:$odir/scoring/test_filt.txt ark,p:- ">&" $odir/wer_LMWT || exit 1;
exit 0

Просмотреть файл

@ -0,0 +1,58 @@
#!/bin/bash
# Script for minimum bayes risk decoding.
[ -f ./path.sh ] && . ./path.sh;
# begin configuration section.
cmd=run.pl
min_lmwt=9
max_lmwt=20
#end configuration section.
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: local/score_mbr.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
echo " Options:"
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring "
echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring "
exit 1;
fi
data=$1
lang_or_graph=$2
dir=$3
symtab=$lang_or_graph/words.txt
for f in $symtab $dir/lat.1.gz $data/text; do
[ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1;
done
mkdir -p $dir/scoring/log
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
# We submit the jobs separately, not as an array, because it's hard
# to get the inverse of the LM scales.
rm $dir/.error 2>/dev/null
for inv_acwt in `seq $min_lmwt $max_lmwt`; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
$cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \
lattice-mbr-decode --acoustic-scale=$acwt --word-symbol-table=$symtab \
"ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \
|| touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout.";
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
cat $dir/scoring/LMWT.tra \| \
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
compute-wer --text --mode=present \
ark:$dir/scoring/test_filt.txt ark,p:- ">" $dir/wer_LMWT || exit 1;

Просмотреть файл

@ -0,0 +1,201 @@
#!/bin/bash
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
if [ $# -le 3 ]; then
echo "Arguments should be a list of WSJ directories, see ../run.sh for example."
exit 1;
fi
dir=`pwd`/data/local/data
lmdir=`pwd`/data/local/nist_lm
mkdir -p $dir $lmdir
local=`pwd`/local
utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1;
fi
cd $dir
# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command
# line arguments being absolute pathnames.
rm -r links/ 2>/dev/null
mkdir links/
ln -s $* links
# Do some basic checks that we have what we expected.
if [ ! -d links/11-13.1 -o ! -d links/13-34.1 -o ! -d links/11-2.1 ]; then
echo "wsj_data_prep.sh: Spot check of command line arguments failed"
echo "Command line arguments must be absolute pathnames to WSJ directories"
echo "with names like 11-13.1."
exit 1;
fi
# This version for SI-84
cat links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
$local/ndx2flist.pl $* | sort | \
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
nl=`cat train_si84.flist | wc -l`
[ "$nl" -eq 7138 ] || echo "Warning: expected 7138 lines in train_si84.flist, got $nl"
# This version for SI-284
cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
$local/ndx2flist.pl $* | sort | \
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si284.flist
nl=`cat train_si284.flist | wc -l`
[ "$nl" -eq 37416 ] || echo "Warning: expected 37416 lines in train_si284.flist, got $nl"
# Now for the test sets.
# links/13-34.1/wsj1/doc/indices/readme.doc
# describes all the different test sets.
# Note: each test-set seems to come in multiple versions depending
# on different vocabulary sizes, verbalized vs. non-verbalized
# pronunciations, etc. We use the largest vocab and non-verbalized
# pronunciations.
# The most normal one seems to be the "baseline 60k test set", which
# is h1_p0.
# Nov'92 (333 utts)
# These index files have a slightly different format;
# have to add .wv1
cat links/11-13.1/wsj0/doc/indices/test/nvp/si_et_20.ndx | \
$local/ndx2flist.pl $* | awk '{printf("%s.wv1\n", $1)}' | \
sort > test_eval92.flist
# Nov'92 (330 utts, 5k vocab)
cat links/11-13.1/wsj0/doc/indices/test/nvp/si_et_05.ndx | \
$local/ndx2flist.pl $* | awk '{printf("%s.wv1\n", $1)}' | \
sort > test_eval92_5k.flist
# Nov'93: (213 utts)
# Have to replace a wrong disk-id.
cat links/13-32.1/wsj1/doc/indices/wsj1/eval/h1_p0.ndx | \
sed s/13_32_1/13_33_1/ | \
$local/ndx2flist.pl $* | sort > test_eval93.flist
# Nov'93: (213 utts, 5k)
cat links/13-32.1/wsj1/doc/indices/wsj1/eval/h2_p0.ndx | \
sed s/13_32_1/13_33_1/ | \
$local/ndx2flist.pl $* | sort > test_eval93_5k.flist
# Dev-set for Nov'93 (503 utts)
cat links/13-34.1/wsj1/doc/indices/h1_p0.ndx | \
$local/ndx2flist.pl $* | sort > test_dev93.flist
# Dev-set for Nov'93 (513 utts, 5k vocab)
cat links/13-34.1/wsj1/doc/indices/h2_p0.ndx | \
$local/ndx2flist.pl $* | sort > test_dev93_5k.flist
# Dev-set Hub 1,2 (503, 913 utterances)
# Note: the ???'s below match WSJ and SI_DT, or wsj and si_dt.
# Sometimes this gets copied from the CD's with upcasing, don't know
# why (could be older versions of the disks).
find `readlink links/13-16.1`/???1/??_??_20 -print | grep -i ".wv1" | sort > dev_dt_20.flist
find `readlink links/13-16.1`/???1/??_??_05 -print | grep -i ".wv1" | sort > dev_dt_05.flist
# Finding the transcript files:
for x in $*; do find -L $x -iname '*.dot'; done > dot_files.flist
# Convert the transcripts into our format (no normalization yet)
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
$local/flist2scp.pl $x.flist | sort > ${x}_sph.scp
cat ${x}_sph.scp | awk '{print $1}' | $local/find_transcripts.pl dot_files.flist > $x.trans1
done
# Do some basic normalization steps. At this point we don't remove OOVs--
# that will be done inside the training scripts, as we'd like to make the
# data-preparation stage independent of the specific lexicon used.
noiseword="<NOISE>";
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
cat $x.trans1 | $local/normalize_transcript.pl $noiseword | sort > $x.txt || exit 1;
done
# Create scp's with wav's. (the wv1 in the distribution is not really wav, it is sph.)
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
done
# Make the utt2spk and spk2utt files.
for x in train_si84 train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
cat ${x}_sph.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $&\n";' > $x.utt2spk
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
done
#in case we want to limit lm's on most frequent words, copy lm training word frequency list
cp links/13-32.1/wsj1/doc/lng_modl/vocab/wfl_64.lst $lmdir
chmod u+w $lmdir/*.lst # had weird permissions on source.
# The 20K vocab, open-vocabulary language model (i.e. the one with UNK), without
# verbalized pronunciations. This is the most common test setup, I understand.
cp links/13-32.1/wsj1/doc/lng_modl/base_lm/bcb20onp.z $lmdir/lm_bg.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg.arpa.gz
# trigram would be:
cat links/13-32.1/wsj1/doc/lng_modl/base_lm/tcb20onp.z | \
perl -e 'while(<>){ if(m/^\\data\\/){ print; last; } } while(<>){ print; }' | \
gzip -c -f > $lmdir/lm_tg.arpa.gz || exit 1;
prune-lm --threshold=1e-7 $lmdir/lm_tg.arpa.gz $lmdir/lm_tgpr.arpa || exit 1;
gzip -f $lmdir/lm_tgpr.arpa || exit 1;
# repeat for 5k language models
cp links/13-32.1/wsj1/doc/lng_modl/base_lm/bcb05onp.z $lmdir/lm_bg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_bg_5k.arpa.gz
# trigram would be: !only closed vocabulary here!
cp links/13-32.1/wsj1/doc/lng_modl/base_lm/tcb05cnp.z $lmdir/lm_tg_5k.arpa.gz || exit 1;
chmod u+w $lmdir/lm_tg_5k.arpa.gz
gunzip $lmdir/lm_tg_5k.arpa.gz
tail -n 4328839 $lmdir/lm_tg_5k.arpa | gzip -c -f > $lmdir/lm_tg_5k.arpa.gz
rm $lmdir/lm_tg_5k.arpa
prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit 1;
gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;
if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
rm wsj0-train-spkrinfo.txt
! wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt && \
echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj0-train-spkrinfo.txt
fi
if [ ! -f wsj0-train-spkrinfo.txt ]; then
echo "Could not get the spkrinfo.txt file from LDC website (moved)?"
echo "This is possibly omitted from the training disks; couldn't find it."
echo "Everything else may have worked; we just may be missing gender info"
echo "which is only needed for VTLN-related diagnostics anyway."
exit 1
fi
# Note: wsj0-train-spkrinfo.txt doesn't seem to be on the disks but the
# LDC put it on the web. Perhaps it was accidentally omitted from the
# disks.
cat links/11-13.1/wsj0/doc/spkrinfo.txt \
links/13-32.1/wsj1/doc/evl_spok/spkrinfo.txt \
links/13-34.1/wsj1/doc/dev_spok/spkrinfo.txt \
links/13-34.1/wsj1/doc/train/spkrinfo.txt \
./wsj0-train-spkrinfo.txt | \
perl -ane 'tr/A-Z/a-z/; m/^;/ || print;' | \
awk '{print $1, $2}' | grep -v -- -- | sort | uniq > spk2gender
echo "Data preparation succeeded"

Просмотреть файл

@ -0,0 +1,173 @@
#!/bin/bash
# This script builds a larger word-list and dictionary
# than used for the LMs supplied with the WSJ corpus.
# It uses a couple of strategies to fill-in words in
# the LM training data but not in CMUdict. One is
# to generate special prons for possible acronyms, that
# just consist of the constituent letters. The other
# is designed to handle derivatives of known words
# (e.g. deriving the pron of a plural from the pron of
# the base-word), but in a more general, learned-from-data
# way.
# It makes use of scripts in local/dict/
if [ $# -ne 1 ]; then
echo "Usage: local/wsj_train_lms.sh /foo/bar/WSJ/13-32.1/"
exit 1
fi
if [ "`basename $1`" != 13-32.1 ]; then
echo "Expecting the argument to this script to end in 13-32.1"
exit 1
fi
# e.g.
#srcdir=/mnt/matylda2/data/WSJ1/13-32.1
export PATH=$PATH:`pwd`/local/dict/
srcdir=$1
mkdir -p data/local/dict_larger
dir=data/local/dict_larger
cp data/local/dict/* data/local/dict_larger # Various files describing phones etc.
# are there; we just want to copy them as the phoneset is the same.
rm data/local/dict_larger/lexicon.txt # we don't want this.
rm data/local/dict_larger/lexiconp.txt # we don't want this either.
mincount=2 # Minimum count of an OOV we will try to generate a pron for.
[ ! -f data/local/dict/cmudict/cmudict.0.7a ] && echo "CMU dict not in expected place" && exit 1;
# Remove comments from cmudict; print first field; remove
# words like FOO(1) which are alternate prons: our dict format won't
# include these markers.
grep -v ';;;' data/local/dict/cmudict/cmudict.0.7a |
perl -ane 's/^(\S+)\(\d+\)/$1/; print; ' | sort | uniq > $dir/dict.cmu
cat $dir/dict.cmu | awk '{print $1}' | sort | uniq > $dir/wordlist.cmu
echo "Getting training data [this should take at least a few seconds; if not, there's a problem]"
# Convert to uppercase, remove XML-like markings.
# For words ending in "." that are not in CMUdict, we assume that these
# are periods that somehow remained in the data during data preparation,
# and we we replace the "." with "\n". Note: we found this by looking at
# oov.counts below (before adding this rule).
touch $dir/cleaned.gz
if [ `du -m $dir/cleaned.gz | cut -f 1` -eq 73 ]; then
echo "Not getting cleaned data in $dir/cleaned.gz again [already exists]";
else
gunzip -c $srcdir/wsj1/doc/lng_modl/lm_train/np_data/{87,88,89}/*.z \
| awk '/^</{next}{print toupper($0)}' | perl -e '
open(F, "<$ARGV[0]")||die;
while(<F>){ chop; $isword{$_} = 1; }
while(<STDIN>) {
@A = split(" ", $_);
for ($n = 0; $n < @A; $n++) {
$a = $A[$n];
if (! $isword{$a} && $a =~ s/^([^\.]+)\.$/$1/) { # nonwords that end in "."
# and have no other "." in them: treat as period.
print "$a";
if ($n+1 < @A) { print "\n"; }
} else { print "$a "; }
}
print "\n";
}
' $dir/wordlist.cmu | gzip -c > $dir/cleaned.gz
fi
# get unigram counts
echo "Getting unigram counts"
gunzip -c $dir/cleaned.gz | tr -s ' ' '\n' | \
awk '{count[$1]++} END{for (w in count) { print count[w], w; }}' | sort -nr > $dir/unigrams
cat $dir/unigrams | awk -v dict=$dir/dict.cmu \
'BEGIN{while(getline<dict) seen[$1]=1;} {if(!seen[$2]){print;}}' \
> $dir/oov.counts
echo "Most frequent unseen unigrams are: "
head $dir/oov.counts
# Prune away singleton counts, and remove things with numbers in
# (which should have been normalized) and with no letters at all.
cat $dir/oov.counts | awk -v thresh=$mincount '{if ($1 >= thresh) { print $2; }}' \
| awk '/[0-9]/{next;} /[A-Z]/{print;}' > $dir/oovlist
# Automatic rule-finding...
# First make some prons for possible acronyms.
# Note: we don't do this for things like U.K or U.N,
# or A.B. (which doesn't exist anyway),
# as we consider this normalization/spelling errors.
cat $dir/oovlist | local/dict/get_acronym_prons.pl $dir/dict.cmu > $dir/dict.acronyms
mkdir $dir/f $dir/b # forward, backward directions of rules...
# forward is normal suffix
# rules, backward is reversed (prefix rules). These
# dirs contain stuff we create while making the rule-based
# extensions to the dictionary.
# Remove ; and , from words, if they are present; these
# might crash our scripts, as they are used as separators there.
filter_dict.pl $dir/dict.cmu > $dir/f/dict
cat $dir/oovlist | filter_dict.pl > $dir/f/oovs
reverse_dict.pl $dir/f/dict > $dir/b/dict
reverse_dict.pl $dir/f/oovs > $dir/b/oovs
# The next stage takes a few minutes.
# Note: the forward stage takes longer, as English is
# mostly a suffix-based language, and there are more rules
# that it finds.
for d in $dir/f $dir/b; do
(
cd $d
cat dict | get_rules.pl 2>get_rules.log >rules
get_rule_hierarchy.pl rules >hierarchy
awk '{print $1}' dict | get_candidate_prons.pl rules dict | \
limit_candidate_prons.pl hierarchy | \
score_prons.pl dict | \
count_rules.pl >rule.counts
# the sort command below is just for convenience of reading.
score_rules.pl <rule.counts | sort -t';' -k3,3 -n -r >rules.with_scores
get_candidate_prons.pl rules.with_scores dict oovs | \
limit_candidate_prons.pl hierarchy > oovs.candidates
) &
done
wait
# Merge the candidates.
reverse_candidates.pl $dir/b/oovs.candidates | cat - $dir/f/oovs.candidates | sort > $dir/oovs.candidates
select_candidate_prons.pl <$dir/oovs.candidates | awk -F';' '{printf("%s %s\n", $1, $2);}' \
> $dir/dict.oovs
cat $dir/dict.acronyms $dir/dict.oovs | sort | uniq > $dir/dict.oovs_merged
awk '{print $1}' $dir/dict.oovs_merged | uniq > $dir/oovlist.handled
sort $dir/oovlist | diff - $dir/oovlist.handled | grep -v 'd' | sed 's:< ::' > $dir/oovlist.not_handled
# add_counts.pl attaches to original counts to the list of handled/not-handled OOVs
add_counts.pl $dir/oov.counts $dir/oovlist.handled | sort -nr > $dir/oovlist.handled.counts
add_counts.pl $dir/oov.counts $dir/oovlist.not_handled | sort -nr > $dir/oovlist.not_handled.counts
echo "**Top OOVs we handled are:**";
head $dir/oovlist.handled.counts
echo "**Top OOVs we didn't handle are as follows (note: they are mostly misspellings):**";
head $dir/oovlist.not_handled.counts
echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
echo "Count of OOVs we didn't handle due to low count is" \
`awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`
# The two files created above are for humans to look at, as diagnostics.
cat <<EOF | cat - $dir/dict.cmu $dir/dict.oovs_merged | sort | uniq > $dir/lexicon.txt
!SIL SIL
<SPOKEN_NOISE> SPN
<UNK> SPN
<NOISE> NSN
EOF
echo "Created $dir/lexicon.txt"

Просмотреть файл

@ -0,0 +1,86 @@
#!/bin/bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# This script takes data prepared in a corpus-dependent way
# in data/local/, and converts it into the "canonical" form,
# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
# data/train_si284, data/train_si84, etc.
# Don't bother doing train_si84 separately (although we have the file lists
# in data/local/) because it's just the first 7138 utterances in train_si284.
# We'll create train_si84 after doing the feature extraction.
. ./path.sh || exit 1;
echo "Preparing train and test data"
srcdir=data/local/data
lmdir=data/local/nist_lm
tmpdir=data/local/lm_tmp
lexicon=data/local/lang_tmp/lexiconp.txt
mkdir -p $tmpdir
for x in train_si284 test_eval92 test_eval93 test_dev93 test_eval92_5k test_eval93_5k test_dev93_5k dev_dt_05 dev_dt_20; do
mkdir -p data/$x
cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
cp $srcdir/$x.txt data/$x/text || exit 1;
cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
utils/filter_scp.pl data/$x/spk2utt $srcdir/spk2gender > data/$x/spk2gender || exit 1;
done
# Next, for each type of language model, create the corresponding FST
# and the corresponding lang_test_* directory.
echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
mkdir -p $test
for f in phones.txt words.txt phones.txt L.fst L_disambig.fst \
phones/; do
cp -r data/lang/$f $test
done
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt
# grep -v '<s> <s>' because the LM seems to have some strange and useless
# stuff in it with multiple <s>'s in the history. Encountered some other similar
# things in a LM from Geoff. Removing all "illegal" combinations of <s> and </s>,
# which are supposed to occur only at being/end of utt. These can cause
# determinization failures of CLG [ends up being epsilon cycles].
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
grep -v '<s> <s>' | \
grep -v '</s> <s>' | \
grep -v '</s> </s>' | \
arpa2fst - | fstprint | \
utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
--osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $test/G.fst
fstisstochastic $test/G.fst
# The output is like:
# 9.14233e-05 -0.259833
# we do expect the first of these 2 numbers to be close to zero (the second is
# nonzero because the backoff weights make the states sum to >1).
# Because of the <s> fiasco for these particular LMs, the first number is not
# as close to zero as it could be.
# Everything below is only for diagnostic.
# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
# this might cause determinization failure of CLG.
# #0 is treated as an empty word.
mkdir -p $tmpdir/g
awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
< "$lexicon" >$tmpdir/g/select_empty.fst.txt
fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \
fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' &&
echo "Language model has cycles with empty words" && exit 1
rm -r $tmpdir/g
done
echo "Succeeded in formatting data."
rm -r $tmpdir

Просмотреть файл

@ -0,0 +1,52 @@
#!/bin/bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012
. ./path.sh
[ ! -d data/lang_bd ] && echo "Expect data/local/lang_bd to exist" && exit 1;
lm_srcdir_3g=data/local/local_lm/3gram-mincount
lm_srcdir_4g=data/local/local_lm/4gram-mincount
[ ! -d "$lm_srcdir_3g" ] && echo "No such dir $lm_srcdir_3g" && exit 1;
[ ! -d "$lm_srcdir_4g" ] && echo "No such dir $lm_srcdir_4g" && exit 1;
for d in data/lang_test_bd_{tg,tgpr,fg,fgpr}; do
rm -r $d 2>/dev/null
cp -r data/lang_bd $d
done
lang=data/lang_bd
# Be careful: this time we dispense with the grep -v '<s> <s>' so this might
# not work for LMs generated from all toolkits.
gunzip -c $lm_srcdir_3g/lm_pr6.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tgpr/G.fst
gunzip -c $lm_srcdir_3g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tg/G.fst
gunzip -c $lm_srcdir_4g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fg/G.fst
gunzip -c $lm_srcdir_4g/lm_pr7.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fgpr/G.fst
exit 0;

Просмотреть файл

@ -0,0 +1,83 @@
#!/bin/bash
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Call this script from one level above, e.g. from the s3/ directory. It puts
# its output in data/local/.
# The parts of the output of this that will be needed are
# [in data/local/dict/ ]
# lexicon.txt
# extra_questions.txt
# nonsilence_phones.txt
# optional_silence.txt
# silence_phones.txt
# run this from ../
dir=data/local/dict
mkdir -p $dir
# (1) Get the CMU dictionary
svn co https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \
$dir/cmudict || exit 1;
# can add -r 10966 for strict compatibility.
#(2) Dictionary preparation:
# Make phones symbol-table (adding in silence and verbal and non-verbal noises at this point).
# We are adding suffixes _B, _E, _S for beginning, ending, and singleton phones.
# silence phones, one per line.
(echo SIL; echo SPN; echo NSN) > $dir/silence_phones.txt
echo SIL > $dir/optional_silence.txt
# nonsilence phones; on each line is a list of phones that correspond
# really to the same base phone.
cat $dir/cmudict/cmudict.0.7a.symbols | perl -ane 's:\r::; print;' | \
perl -e 'while(<>){
chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
$phones_of{$1} .= "$_ "; }
foreach $list (values %phones_of) {print $list . "\n"; } ' \
> $dir/nonsilence_phones.txt || exit 1;
# A few extra questions that will be added to those obtained by automatically clustering
# the "real" phones. These ask about stress; there's also one for silence.
cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
>> $dir/extra_questions.txt || exit 1;
grep -v ';;;' $dir/cmudict/cmudict.0.7a | \
perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' \
> $dir/lexicon1_raw_nosil.txt || exit 1;
# Add to cmudict the silences, noises etc.
(echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; echo '<NOISE> NSN'; ) | \
cat - $dir/lexicon1_raw_nosil.txt > $dir/lexicon2_raw.txt || exit 1;
# lexicon.txt is without the _B, _E, _S, _I markers.
# This is the input to wsj_format_data.sh
cp $dir/lexicon2_raw.txt $dir/lexicon.txt
echo "Dictionary preparation succeeded"

Просмотреть файл

@ -0,0 +1,202 @@
#!/bin/bash
# This script trains LMs on the WSJ LM-training data.
# It requires that you have already run wsj_extend_dict.sh,
# to get the larger-size dictionary including all of CMUdict
# plus any OOVs and possible acronyms that we could easily
# derive pronunciations for.
# This script takes no command-line arguments
dir=data/local/local_lm
srcdir=data/local/dict_larger
mkdir -p $dir
. ./path.sh || exit 1; # for KALDI_ROOT
export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
( # First make sure the kaldi_lm toolkit is installed.
cd $KALDI_ROOT/tools || exit 1;
if [ -d kaldi_lm ]; then
echo Not installing the kaldi_lm toolkit since it is already there.
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
make || exit 1;
echo Done making the kaldi_lm tools
fi
) || exit 1;
if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
echo "Expecting files $srcdir/cleaned.gz and $srcdir/lexicon.txt to exist";
echo "You need to run local/wsj_extend_dict.sh before running this script."
exit 1;
fi
# Get a wordlist-- keep everything but silence, which should not appear in
# the LM.
awk '{print $1}' $srcdir/lexicon.txt | grep -v -w '!SIL' > $dir/wordlist.txt
# Get training data with OOV words (w.r.t. our current vocab) replaced with <UNK>.
echo "Getting training data with OOV words replaced with <UNK> (train_nounk.gz)"
gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.txt \
'BEGIN{while((getline<w)>0) v[$1]=1;}
{for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<UNK> ";print ""}'|sed 's/ $//g' \
| gzip -c > $dir/train_nounk.gz
# Get unigram counts (without bos/eos, but this doens't matter here, it's
# only to get the word-map, which treats them specially & doesn't need their
# counts).
# Add a 1-count for each word in word-list by including that in the data,
# so all words appear.
gunzip -c $dir/train_nounk.gz | cat - $dir/wordlist.txt | \
awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
sort -nr > $dir/unigram.counts
# Get "mapped" words-- a character encoding of the words that makes the common words very short.
cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" > $dir/word_map
gunzip -c $dir/train_nounk.gz | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
{ for(n=1;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz
# To save disk space, remove the un-mapped training data. We could
# easily generate it again if needed.
rm $dir/train_nounk.gz
train_lm.sh --arpa --lmtype 3gram-mincount $dir
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
# 7.8 million N-grams.
prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
# 1.45 million N-grams.
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
train_lm.sh --arpa --lmtype 4gram-mincount $dir
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
# 10.3 million N-grams.
prune_lm.sh --arpa 7.0 $dir/4gram-mincount
# 1.50 million N-grams
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
exit 0
### Below here, this script is showing various commands that
## were run during LM tuning.
train_lm.sh --arpa --lmtype 3gram-mincount $dir
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
# 7.8 million N-grams.
prune_lm.sh --arpa 3.0 $dir/3gram-mincount/
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 156.408740
# 2.5 million N-grams.
prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
# 1.45 million N-grams.
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
train_lm.sh --arpa --lmtype 4gram-mincount $dir
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
# 10.3 million N-grams.
prune_lm.sh --arpa 3.0 $dir/4gram-mincount
#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 143.206294
# 2.6 million N-grams.
prune_lm.sh --arpa 4.0 $dir/4gram-mincount
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 146.927717
# 2.15 million N-grams.
prune_lm.sh --arpa 5.0 $dir/4gram-mincount
# 1.86 million N-grams
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 150.162023
prune_lm.sh --arpa 7.0 $dir/4gram-mincount
# 1.50 million N-grams
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
train_lm.sh --arpa --lmtype 3gram $dir
# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 135.692866
# 20.0 million N-grams
! which ngram-count \
&& echo "SRILM tools not installed so not doing the comparison" && exit 1;
#################
# You could finish the script here if you wanted.
# Below is to show how to do baselines with SRILM.
# You'd have to install the SRILM toolkit first.
heldout_sent=10000 # Don't change this if you want result to be comparable with
# kaldi_lm results
sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
mkdir -p $sdir
gunzip -c $srcdir/cleaned.gz | head -$heldout_sent > $sdir/cleaned.heldout
gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent > $sdir/cleaned.train
(echo "<s>"; echo "</s>" ) | cat - $dir/wordlist.txt > $sdir/wordlist.final.s
# 3-gram:
ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
-map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/cleaned.heldout # consider -debug 2
#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
#0 zeroprobs, logprob= -491456 ppl= 141.457 ppl1= 177.437
# Trying 4-gram:
ngram-count -text $sdir/cleaned.train -order 4 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
-map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o4g.kn.gz
ngram -order 4 -lm $sdir/srilm.o4g.kn.gz -ppl $sdir/cleaned.heldout
#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
#0 zeroprobs, logprob= -480939 ppl= 127.233 ppl1= 158.822
#3-gram with pruning:
ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
-prune 0.0000001 -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.pr7.kn.gz
ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout
#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
#0 zeroprobs, logprob= -510828 ppl= 171.947 ppl1= 217.616
# Around 2.25M N-grams.
# Note: this is closest to the experiment done with "prune_lm.sh --arpa 3.0 $dir/3gram-mincount/"
# above, which gave 2.5 million N-grams and a perplexity of 156.
# Note: all SRILM experiments above fully discount all singleton 3 and 4-grams.
# You can use -gt3min=0 and -gt4min=0 to stop this (this will be comparable to
# the kaldi_lm experiments above without "-mincount".
## From here is how to train with
# IRSTLM. This is not really working at the moment.
export IRSTLM=$KALDI_ROOT/tools/irstlm/
idir=$dir/irstlm
mkdir $idir
gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | $IRSTLM/scripts/add-start-end.sh | \
gzip -c > $idir/train.gz
$IRSTLM/bin/dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no
cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
{print $0;}}' > vocab.irstlm.20k
$IRSTLM/bin/build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz -p yes \
-n 3 -s improved-kneser-ney -b yes
# Testing perplexity with SRILM tools:
ngram -lm $idir/lm_3gram.gz -ppl $sdir/cleaned.heldout
#data/local/local_lm/irstlm/lm_3gram.gz: line 162049: warning: non-zero probability for <unk> in closed-vocabulary LM
#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 0 OOVs
#0 zeroprobs, logprob= -513670 ppl= 175.041 ppl1= 221.599
# Perplexity is very bad (should be ~141, since we used -p option,
# not 175),
# but adding -debug 3 to the command line shows that
# the IRSTLM LM does not seem to sum to one properly, so it seems that
# it produces an LM that isn't interpretable in the normal way as an ARPA
# LM.

Просмотреть файл

@ -0,0 +1,153 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# This script trains LMs on the WSJ LM-training data.
# It requires that you have already run wsj_extend_dict.sh,
# to get the larger-size dictionary including all of CMUdict
# plus any OOVs and possible acronyms that we could easily
# derive pronunciations for.
# This script takes no command-line arguments but takes the --cmd option.
# Begin configuration section.
rand_seed=0
cmd=run.pl
nwords=10000 # This is how many words we're putting in the vocab of the RNNLM.
hidden=30
class=200 # Num-classes... should be somewhat larger than sqrt of nwords.
direct=1000 # Probably number of megabytes to allocate for hash-table for "direct" connections.
rnnlm_ver=rnnlm-0.3e # version of RNNLM to use
# End configuration section.
[ -f ./path.sh ] && . ./path.sh
. utils/parse_options.sh
if [ $# != 1 ]; then
echo "Usage: local/wsj_train_rnnlms.sh [options] <dest-dir>"
echo "For options, see top of script file"
exit 1;
fi
dir=$1
srcdir=data/local/dict_larger
mkdir -p $dir
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
( # First make sure the kaldi_lm toolkit is installed.
# Note: this didn't work out of the box for me, I had to
# change the g++ version to just "g++" (no cross-compilation
# needed for me as I ran on a machine that had been setup
# as 64 bit by default.
cd $KALDI_ROOT/tools || exit 1;
if [ -d $rnnlm_ver ]; then
echo Not installing the rnnlm toolkit since it is already there.
else
echo Downloading and installing the rnnlm tools
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
if [ ! -f $rnnlm_ver.tgz ]; then
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
fi
mkdir $rnnlm_ver
cd $rnnlm_ver
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
make CC=g++ || exit 1;
echo Done making the rnnlm tools
fi
) || exit 1;
if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
echo "Expecting files $srcdir/cleaned.gz and $srcdir/wordlist.final to exist";
echo "You need to run local/wsj_extend_dict.sh before running this script."
exit 1;
fi
cat $srcdir/lexicon.txt | awk '{print $1}' | grep -v -w '!SIL' > $dir/wordlist.all
# Get training data with OOV words (w.r.t. our current vocab) replaced with <UNK>.
echo "Getting training data with OOV words replaced with <UNK> (train_nounk.gz)"
gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.all \
'BEGIN{while((getline<w)>0) v[$1]=1;}
{for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<UNK> ";print ""}'|sed 's/ $//g' \
| gzip -c > $dir/all.gz
echo "Splitting data into train and validation sets."
heldout_sent=10000
gunzip -c $dir/all.gz | head -n $heldout_sent > $dir/valid.in # validation data
gunzip -c $dir/all.gz | tail -n +$heldout_sent | \
perl -e ' use List::Util qw(shuffle); @A=<>; print join("", shuffle(@A)); ' \
> $dir/train.in # training data
# The rest will consist of a word-class represented by <RNN_UNK>, that
# maps (with probabilities) to a whole class of words.
# Get unigram counts from our training data, and use this to select word-list
# for RNNLM training; e.g. 10k most frequent words. Rest will go in a class
# that we (manually, at the shell level) assign probabilities for words that
# are in that class. Note: this word-list doesn't need to include </s>; this
# automatically gets added inside the rnnlm program.
# Note: by concatenating with $dir/wordlist.all, we are doing add-one
# smoothing of the counts.
cat $dir/train.in $dir/wordlist.all | grep -v '</s>' | grep -v '<s>' | \
awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
sort -nr > $dir/unigram.counts
head -$nwords $dir/unigram.counts | awk '{print $2}' > $dir/wordlist.rnn
tail -n +$nwords $dir/unigram.counts > $dir/unk_class.counts
tot=`awk '{x=x+$1} END{print x}' $dir/unk_class.counts`
awk -v tot=$tot '{print $2, ($1*1.0/tot);}' <$dir/unk_class.counts >$dir/unk.probs
for type in train valid; do
cat $dir/$type.in | awk -v w=$dir/wordlist.rnn \
'BEGIN{while((getline<w)>0) v[$1]=1;}
{for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<RNN_UNK> ";print ""}'|sed 's/ $//g' \
> $dir/$type
done
rm $dir/train.in # no longer needed-- and big.
# Now randomize the order of the training data.
cat $dir/train | awk -v rand_seed=$rand_seed 'BEGIN{srand(rand_seed);} {printf("%f\t%s\n", rand(), $0);}' | \
sort | cut -f 2 > $dir/foo
mv $dir/foo $dir/train
# OK we'll train the RNNLM on this data.
# todo: change 100 to 320.
# using 100 classes as square root of 10k.
echo "Training RNNLM (note: this uses a lot of memory! Run it on a big machine.)"
#time rnnlm -train $dir/train -valid $dir/valid -rnnlm $dir/100.rnnlm \
# -hidden 100 -rand-seed 1 -debug 2 -class 100 -bptt 2 -bptt-block 20 \
# -direct-order 4 -direct 1000 -binary >& $dir/rnnlm1.log &
$cmd $dir/rnnlm.log \
$KALDI_ROOT/tools/$rnnlm_ver/rnnlm -independent -train $dir/train -valid $dir/valid \
-rnnlm $dir/rnnlm -hidden $hidden -rand-seed 1 -debug 2 -class $class -bptt 2 -bptt-block 20 \
-direct-order 4 -direct $direct -binary || exit 1;
# make it like a Kaldi table format, with fake utterance-ids.
cat $dir/valid.in | awk '{ printf("uttid-%d ", NR); print; }' > $dir/valid.with_ids
utils/rnnlm_compute_scores.sh $dir $dir/tmp.valid $dir/valid.with_ids \
$dir/valid.scores
nw=`wc -w < $dir/valid.with_ids` # Note: valid.with_ids includes utterance-ids which
# is one per word, to account for the </s> at the end of each sentence; this is the
# correct number to normalize buy.
p=`awk -v nw=$nw '{x=x+$2} END{print exp(x/nw);}' <$dir/valid.scores`
echo Perplexity is $p | tee $dir/perplexity.log
rm $dir/train $dir/all.gz
# This is a better setup, but takes a long time to train:
#echo "Training RNNLM (note: this uses a lot of memory! Run it on a big machine.)"
#time rnnlm -train $dir/train -valid $dir/valid -rnnlm $dir/320.rnnlm \
# -hidden 320 -rand-seed 1 -debug 2 -class 300 -bptt 2 -bptt-block 20 \
# -direct-order 4 -direct 2000 -binary

3
egs/chime_wsj0/s5/path.sh Executable file
Просмотреть файл

@ -0,0 +1,3 @@
export KALDI_ROOT=`pwd`/../../..
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet-cpubin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C

261
egs/chime_wsj0/s5/run.sh Executable file
Просмотреть файл

@ -0,0 +1,261 @@
#!/bin/bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
# This is a shell script, but it's recommended that you run the commands one by
# one by copying and pasting into the shell.
case 0 in #goto here
1)
;; #here:
esac
#exit 1;
#need wsj0 for the clean version and LMs
wsj0=/mnt/spdb/wall_street_journal
local/clean_wsj0_data_prep.sh $wsj0
reverb=/mnt/spdb/CHiME/chime2-wsj0/reverberated
local/reverb_wsj0_data_prep.sh $reverb
noisy=/mnt/spdb/CHiME/chime2-wsj0/isolated
local/noisy_wsj0_data_prep.sh $noisy
local/wsj_prepare_dict.sh || exit 1;
utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
local/chime_format_data.sh || exit 1;
# Now make MFCC features.
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcc
for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do
steps/make_mfcc.sh --nj 10 \
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
done
# Note: the --boost-silence option should probably be omitted by default
# for normal setups. It doesn't always help. [it's to discourage non-silence
# models from modeling silence.]
mfccdir=mfcc
for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do
steps/make_mfcc.sh --nj 10 \
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
done
mfccdir=mfcc
for x in dev_dt_05_reverb train_si84_reverb; do
steps/make_mfcc.sh --nj 10 \
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
done
#begin train gmm systems using multi condition data
#train_si84 = clean+reverb+noisy,
for s in train_si84 ; do
mkdir -p data/$s
cp data/${s}_clean/spk2gender data/$s/
for x in text wav.scp; do
cat data/${s}_clean/$x data/${s}_reverb/$x data/${s}_noisy/$x | sort -k1 > data/$s/$x
done
cat data/$s/wav.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $&\n";' > data/$s/utt2spk
cat data/$s/utt2spk | utils/utt2spk_to_spk2utt.pl > data/$s/spk2utt
done
mfccdir=mfcc
for x in train_si84; do
steps/make_mfcc.sh --nj 10 \
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
done
steps/train_mono.sh --boost-silence 1.25 --nj 10 \
data/train_si84 data/lang exp/mono0a || exit 1;
utils/mkgraph.sh --mono data/lang_test_tgpr_5k exp/mono0a exp/mono0a/graph_tgpr_5k
#steps/decode.sh --nj 8 \
# exp/mono0a/graph_tgpr_5k data/test_eval92_5k_clean exp/mono0a/decode_tgpr_eval92_5k_clean
steps/decode.sh --nj 8 \
exp/mono0a/graph_tgpr_5k data/test_eval92_5k_noisy exp/mono0a/decode_tgpr_eval92_5k_noisy
steps/align_si.sh --boost-silence 1.25 --nj 10 \
data/train_si84 data/lang exp/mono0a exp/mono0a_ali || exit 1;
steps/train_deltas.sh --boost-silence 1.25 \
2000 10000 data/train_si84 data/lang exp/mono0a_ali exp/tri1 || exit 1;
while [ ! -f data/lang_test_tgpr/tmp/LG.fst ] || \
[ -z data/lang_test_tgpr/tmp/LG.fst ]; do
sleep 20;
done
sleep 30;
# or the mono mkgraph.sh might be writing
# data/lang_test_tgpr/tmp/LG.fst which will cause this to fail.
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri1 exp/tri1/graph_tgpr_5k || exit 1;
#steps/decode.sh --nj 8 \
# exp/tri1/graph_tgpr data/test_eval92_5k_clean exp/tri1/decode_tgpr_eval92_5k_clean || exit 1;
steps/decode.sh --nj 8 \
exp/tri1/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri1/decode_tgpr_eval92_5k_noisy || exit 1;
# test various modes of LM rescoring (4 is the default one).
# This is just confirming they're equivalent.
#for mode in 1 2 3 4; do
#steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \
# data/test_dev93 exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_tg$mode || exit 1;
#done
# demonstrate how to get lattices that are "word-aligned" (arcs coincide with
# words, with boundaries in the right place).
#sil_label=`grep '!SIL' data/lang_test_tgpr/words.txt | awk '{print $2}'`
#steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \
# data/lang_test_tgpr exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_aligned || exit 1;
steps/align_si.sh --nj 10 \
data/train_si84 data/lang exp/tri1 exp/tri1_ali_si84 || exit 1;
# Train tri2a, which is deltas + delta-deltas, on si84 data.
steps/train_deltas.sh \
2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2a || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2a exp/tri2a/graph_tgpr_5k || exit 1;
#steps/decode.sh --nj 8 \
# exp/tri2a/graph_tgpr_5k data/test_eval92_5k_clean exp/tri2a/decode_tgpr_eval92_5k_clean || exit 1;
steps/decode.sh --nj 8 \
exp/tri2a/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2a/decode_tgpr_eval92_5k_noisy|| exit 1;
#utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
#steps/decode.sh --nj 8 \
# exp/tri2a/graph_bg5k data/test_eval92_5k_clean exp/tri2a/decode_bg_eval92_5k_clean || exit 1;
steps/train_lda_mllt.sh \
--splice-opts "--left-context=3 --right-context=3" \
2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2b exp/tri2b/graph_tgpr_5k || exit 1;
steps/decode.sh --nj 8 \
exp/tri2b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2b/decode_tgpr_eval92_5k_noisy || exit 1;
#steps/decode.sh --nj 8 \
# exp/tri2b/graph_tgpr data/test_eval92_clean exp/tri2b/decode_tgpr_eval92_clean || exit 1;
# Align tri2b system with si84 data.
steps/align_si.sh --nj 10 \
--use-graphs true data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84 || exit 1;
# From 2b system, train 3b which is LDA + MLLT + SAT.
steps/train_sat.sh \
2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b exp/tri3b/graph_tgpr_5k || exit 1;
steps/decode_fmllr.sh --nj 8 \
exp/tri3b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri3b/decode_tgpr_eval92_5k_noisy || exit 1;
# From 3b multi-condition system, align noisy si84 data.
steps/align_fmllr.sh --nj 10 \
data/train_si84_noisy data/lang exp/tri3b exp/tri3b_ali_si84_noisy || exit 1;
steps/align_fmllr.sh --nj 10 \
data/dev_dt_05_noisy data/lang exp/tri3b exp/tri3b_ali_dev_dt_05 || exit 1;
#begin training DNN-HMM system
#only on noisy si84
. ./path.sh
#RBM pretraining
dir=exp/tri4a_dnn_pretrain
$cuda_cmd $dir/_pretrain_dbn.log \
steps/pretrain_dbn.sh --use-gpu-id 0 --nn-depth 7 --rbm-iter 3 data-fbank/train_si84_noisy $dir
#BP
dir=exp/tri4a_dnn
ali=exp/tri3b_ali_si84_noisy
ali_dev=exp/tri3b_ali_dev_dt_05
feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
dbn=exp/tri4a_dnn_pretrain/7.dbn
$cuda_cmd $dir/_train_nnet.log \
steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri4a_dnn exp/tri4a_dnn/graph_tgpr_5k || exit 1;
steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
exp/tri4a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
#Retrain system using new ali,
#this is essential
#repeat this process for 3 times
srcdir=exp/tri4a_dnn
steps/align_nnet.sh --nj 10 \
data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
steps/align_nnet.sh --nj 10 \
data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
#no need to do pretraining again
dir=exp/tri5a_dnn
ali=exp/tri4a_dnn_ali_si84_noisy
ali_dev=exp/tri4a_dnn_ali_dt_05_noisy
feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
dbn=exp/tri4a_dnn_pretrain/7.dbn
$cuda_cmd $dir/_train_nnet.log \
steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri5a_dnn exp/tri5a_dnn/graph_tgpr_5k || exit 1;
steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
exp/tri5a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
srcdir=exp/tri5a_dnn
steps/align_nnet.sh --nj 10 \
data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
steps/align_nnet.sh --nj 10 \
data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
. ./path.sh
dir=exp/tri6a_dnn
ali=exp/tri5a_dnn_ali_si84_noisy
ali_dev=exp/tri5a_dnn_ali_dt_05_noisy
feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
dbn=exp/tri4a_dnn_pretrain/7.dbn
$cuda_cmd $dir/_train_nnet.log \
steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri6a_dnn exp/tri6a_dnn/graph_tgpr_5k || exit 1;
steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
exp/tri6a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
srcdir=exp/tri6a_dnn
steps/align_nnet.sh --nj 10 \
data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
steps/align_nnet.sh --nj 10 \
data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
. ./path.sh
dir=exp/tri7a_dnn
ali=exp/tri6a_dnn_ali_si84_noisy
ali_dev=exp/tri6a_dnn_ali_dt_05_noisy
feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
dbn=exp/tri4a_dnn_pretrain/7.dbn
$cuda_cmd $dir/_train_nnet.log \
steps/train_nnet.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 --use-gpu-id 0 \
data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri7a_dnn exp/tri7a_dnn/graph_tgpr_5k || exit 1;
steps/decode_nnet.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;

Просмотреть файл

@ -0,0 +1,150 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Copyright 2013 GoVivace Inc (Author: Nagendra Goel)
# Apache 2.0
# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
# + fMLLR (probably with SAT models).
# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
# is not present), then does 2 iterations of fMLLR estimation.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
boost_silence=1.5 # factor by which to boost silence during alignment.
fmllr_update_type=full
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --fmllr-update-type (full|diag|offset|none) # default full."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
graphdir=$dir
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
sdata=$data/split$nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/boost_phones.csl` $alimdl - |"
mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
if [ $stage -le 1 ]; then
echo "$0: aligning data in $data using $alimdl and speaker-independent features."
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 2 ]; then
echo "$0: computing fMLLR transforms"
if [ "$alimdl" != "$mdl" ]; then
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
gmm-est-basis-fmllr-gpost --fmllr-min-count=22 --num-iters=10 \
--size-scale=0.2 --step-size-iters=3 \
--write-weights=ark:$dir/pre_wgt.JOB \
$mdl $srcdir/fmllr.basis "$sifeats" ark,s,cs:- \
ark:$dir/trans.JOB || exit 1;
# else
# $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
# ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
# weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
# gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
# --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
# ark,s,cs:- ark:$dir/trans.JOB || exit 1;
fi
fi
feats="$sifeats transform-feats ark:$dir/pre_trans.JOB ark:- ark:- |"
if [ $stage -le 3 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
#rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,148 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
# + fMLLR (probably with SAT models).
# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
# is not present), then does 2 iterations of fMLLR estimation.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
boost_silence=1.0 # factor by which to boost silence during alignment.
fmllr_update_type=full
norm_vars=false
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --fmllr-update-type (full|diag|offset|none) # default full."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
sdata=$data/split$nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $srcdir/full.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
if [ $stage -le 1 ]; then
echo "$0: aligning data in $data using $alimdl and speaker-independent features."
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 2 ]; then
echo "$0: computing fMLLR transforms"
if [ "$alimdl" != "$mdl" ]; then
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
ark,s,cs:- ark:$dir/trans.JOB || exit 1;
else
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
ark,s,cs:- ark:$dir/trans.JOB || exit 1;
fi
fi
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
if [ $stage -le 3 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,99 @@
#!/bin/bash
# Copyright 2012 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
# Computes training alignments using MLP model
# If you supply the "--use-graphs true" option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match with the source directory.
# Begin configuration section.
nj=4
cmd=run.pl
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
use_gpu_id=-1 # disable gpu
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
#Get the files we will need
nnet=$srcdir/final.nnet;
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
class_frame_counts=$srcdir/ali_train_pdf.counts
[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
feature_transform=$srcdir/final.feature_transform
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
model=$dir/final.mdl
[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
###
### Prepare feature pipeline (same as for decoding)
###
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
# Finally add feature_transform and the MLP
feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
###
###
###
echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
# We could just use gmm-align-mapped in the next line, but it's less efficient as it compiles the
# training graphs one by one.
$cmd JOB=1:$nj $dir/log/align.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \
"$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
echo "$0: done aligning data."

Просмотреть файл

@ -0,0 +1,142 @@
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
# + fMLLR (probably with SAT models).
# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
# is not present), then does 2 iterations of fMLLR estimation.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
boost_silence=1.0 # factor by which to boost silence during alignment.
norm_vars=false
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
sdata=$data/split$nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
if [[ ! -f $srcdir/final.mat || ! -f $srcdir/full.mat ]]; then
echo "$0: we require final.mat and full.mat in the source directory $srcdir"
fi
full_lda_mat="get-full-lda-mat --print-args=false $srcdir/final.mat $srcdir/full.mat -|"
cp $srcdir/full.mat $srcdir/final.mat $dir
splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
sifeats="$splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
if [ $stage -le 1 ]; then
echo "$0: aligning data in $data using $alimdl and speaker-independent features."
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 2 ]; then
echo "$0: computing fMLLR transforms"
if [ "$alimdl" != "$mdl" ]; then
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-raw-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$mdl "$full_lda_mat" "$splicedfeats" ark,s,cs:- ark:$dir/raw_trans.JOB || exit 1;
else
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$full_lda_mat" \
"$splicedfeats" ark,s,cs:- ark:$dir/raw_trans.JOB || exit 1;
fi
fi
feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
if [ $stage -le 3 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,193 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments and (if needed) speaker-vectors, given an
# SGMM system. If the system is built on top of SAT, you should supply
# transforms with the --transform-dir option.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false # use graphs from srcdir
use_gselect=false # use gselect info from srcdir [regardless, we use
# Gaussian-selection info, we might have to compute it though.]
gselect=15 # Number of Gaussian-selection indices for SGMMs.
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
transform_dir= # directory to find fMLLR transforms in.
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
echo " exp/sgmm4a exp/sgmm5a_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
sdata=$data/split$nj
mkdir -p $dir/log
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
cp $srcdir/final.occs $dir;
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option during alignment."
fi
##
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
ln.pl $srcdir/fsts.*.gz $dir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
## Work out where we're getting the Gaussian-selection info from
if $use_gselect; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
graphdir=$srcdir
gselect_opt="--gselect=ark,s,cs:gunzip -c $srcdir/gselect.JOB.gz|"
ln.pl $srcdir/gselect.*.gz $dir
else
graphdir=$dir
if [ $stage -le 1 ]; then
echo "$0: computing Gaussian-selection info"
# Note: doesn't matter whether we use $alimdl or $mdl, they will
# have the same gselect info.
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm-gselect --full-gmm-nbest=$gselect $alimdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
fi
if [ $alimdl == $mdl ]; then
# Speaker-independent decoding-- just one pass. Not normal.
T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
[ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
if [ $stage -le 2 ]; then
echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
echo "$0: done aligning data."
exit 0;
fi
# Continue with system with speaker vectors.
if [ $stage -le 2 ]; then
echo "$0: aligning data in $data using model $alimdl"
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 3 ]; then
echo "$0: computing speaker vectors (1st pass)"
$cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
fi
if [ $stage -le 4 ]; then
echo "$0: computing speaker vectors (2nd pass)"
$cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
rm $dir/pre_vecs.*
fi
if [ $stage -le 5 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
--utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,193 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments and (if needed) speaker-vectors, given an
# SGMM system. If the system is built on top of SAT, you should supply
# transforms with the --transform-dir option.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false # use graphs from srcdir
use_gselect=false # use gselect info from srcdir [regardless, we use
# Gaussian-selection info, we might have to compute it though.]
gselect=15 # Number of Gaussian-selection indices for SGMMs.
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
transform_dir= # directory to find fMLLR transforms in.
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
echo " exp/sgmm4a exp/sgmm5a_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
sdata=$data/split$nj
mkdir -p $dir/log
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
cp $srcdir/final.occs $dir;
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option during alignment."
fi
##
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
ln.pl $srcdir/fsts.*.gz $dir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
## Work out where we're getting the Gaussian-selection info from
if $use_gselect; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
graphdir=$srcdir
gselect_opt="--gselect=ark,s,cs:gunzip -c $srcdir/gselect.JOB.gz|"
ln.pl $srcdir/gselect.*.gz $dir
else
graphdir=$dir
if [ $stage -le 1 ]; then
echo "$0: computing Gaussian-selection info"
# Note: doesn't matter whether we use $alimdl or $mdl, they will
# have the same gselect info.
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm2-gselect --full-gmm-nbest=$gselect $alimdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
fi
if [ $alimdl == $mdl ]; then
# Speaker-independent decoding-- just one pass. Not normal.
T=`sgmm2-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
[ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
if [ $stage -le 2 ]; then
echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
sgmm2-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
echo "$0: done aligning data."
exit 0;
fi
# Continue with system with speaker vectors.
if [ $stage -le 2 ]; then
echo "$0: aligning data in $data using model $alimdl"
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 3 ]; then
echo "$0: computing speaker vectors (1st pass)"
$cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
sgmm2-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
fi
if [ $stage -le 4 ]; then
echo "$0: computing speaker vectors (2nd pass)"
$cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
rm $dir/pre_vecs.*
fi
if [ $stage -le 5 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
--utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,89 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments using a model with delta or
# LDA+MLLT features.
# If you supply the "--use-graphs true" option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match with the source directory.
# Begin configuration section.
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
boost_silence=1.0 # Factor by which to boost silence during alignment.
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_si.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_si.sh data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/final.mdl - |"
if $use_graphs; then
[ $nj != "`cat $srcdir/num_jobs`" ] && echo "$0: mismatch in num-jobs" && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "$0: no such file $srcdir/fsts.1.gz" && exit 1;
$cmd JOB=1:$nj $dir/log/align.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
"ark:gunzip -c $srcdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
else
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
# We could just use gmm-align in the next line, but it's less efficient as it compiles the
# training graphs one by one.
$cmd JOB=1:$nj $dir/log/align.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" ark:- \
"$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
echo "$0: done aligning data."

Просмотреть файл

@ -0,0 +1,67 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# This script appends the features in two data directories.
# To be run from .. (one directory up from here)
# see ../run.sh for example
# Begin configuration section.
cmd=run.pl
nj=4
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "usage: append_feats.sh [options] <src-data-dir1> <src-data-dir2> <dest-data-dir> <log-dir> <path-to-storage-dir>";
echo "options: "
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data_src1=$1
data_src2=$2
data=$3
logdir=$4
mfccdir=$5
# make $mfccdir an absolute pathname.
mfccdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfccdir ${PWD}`
utils/split_data.sh $data_src1 $nj || exit 1;
utils/split_data.sh $data_src2 $nj || exit 1;
mkdir -p $mfccdir $logdir
mkdir -p $data
cp $data_src1/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
rm $data/cmvn.scp 2>/dev/null
rm -r $data/split* 2>/dev/null
# use "name" as part of name of the archive.
name=`basename $data`
$cmd JOB=1:$nj $logdir/append.JOB.log \
append-feats --truncate-frames=true \
scp:$data_src1/split$nj/JOB/feats.scp scp:$data_src2/split$nj/JOB/feats.scp \
ark,scp:$mfccdir/appended_$name.JOB.ark,$mfccdir/appended_$name.JOB.scp || exit 1;
# concatenate the .scp files together.
for ((n=1; n<=nj; n++)); do
cat $mfccdir/appended_$name.$n.scp >> $data/feats.scp || exit 1;
done > $data/feats.scp || exit 1;
nf=`cat $data/feats.scp | wc -l`
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
echo "Succeeded creating MFCC features for $name"

Просмотреть файл

@ -0,0 +1,80 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
# Compute cepstral mean and variance statistics per speaker.
# We do this in just one job; it's fast.
# This script takes no options.
#
# Note: there is no option to do CMVN per utterance. The idea is
# that if you did it per utterance it would not make sense to do
# per-speaker fMLLR on top of that (since you'd be doing fMLLR on
# top of different offsets). Therefore what would be the use
# of the speaker information? In this case you should probably
# make the speaker-ids identical to the utterance-ids. The
# speaker information does not have to correspond to actual
# speakers, it's just the level you want to adapt at.
echo "$0 $@" # Print the command line for logging
fake=false
if [ $1 == "--fake" ]; then
fake=true
shift
fi
if [ $# != 3 ]; then
echo "usage: compute_cmvn_stats.sh [--fake] <data-dir> <log-dir> <path-to-cmvn-dir>";
echo "(note: --fake gives you fake cmvn stats that do no normalization.)"
exit 1;
fi
if [ -f path.sh ]; then . ./path.sh; fi
data=$1
logdir=$2
cmvndir=$3
# make $cmvndir an absolute pathname.
cmvndir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $cmvndir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $cmvndir || exit 1;
mkdir -p $logdir || exit 1;
required="$data/feats.scp $data/spk2utt"
for f in $required; do
if [ ! -f $f ]; then
echo "make_cmvn.sh: no such file $f"
exit 1;
fi
done
if $fake; then
dim=`feat-to-dim scp:$data/feats.scp -`
! cat $data/spk2utt | awk -v dim=$dim '{print $1, "["; for (n=0; n < dim; n++) { printf("0 "); } print "1";
for (n=0; n < dim; n++) { printf("1 "); } print "0 ]";}' | \
copy-matrix ark:- ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp && \
echo "Error creating fake CMVN stats" && exit 1;
else
! compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp \
2> $logdir/cmvn_$name.log && echo "Error computing CMVN stats" && exit 1;
fi
cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
nc=`cat $data/cmvn.scp | wc -l`
nu=`cat $data/spk2utt | wc -l`
if [ $nc -ne $nu ]; then
echo "Error: it seems not all of the speakers got cmvn stats ($nc != $nu);"
exit 1;
fi
echo "Succeeded creating CMVN stats for $name"

108
egs/chime_wsj0/s5/steps/decode.sh Executable file
Просмотреть файл

@ -0,0 +1,108 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Begin configuration section.
transform_dir=
iter=
model= # You can specify the model to use (e.g. if you want to use the .alimdl)
stage=0
nj=4
cmd=run.pl
max_active=7000
max_arcs=-1
beam=13.0
latbeam=6.0
acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
scoring_opts=
# note: there are no more min-lmwt and max-lmwt options, instead use
# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
skip_scoring=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --iter <iter> # Iteration of model to test."
echo " --model <model> # which model to use (e.g. to"
echo " # specify the final.alimdl)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --transform-dir <trans-dir> # dir to find fMLLR transforms "
echo " --acwt <float> # acoustic scale used for lattice generation "
echo " --scoring-opts <string> # options to local/score.sh"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
if [ -z $iter ]; then model=$srcdir/final.mdl;
else model=$srcdir/$iter.mdl; fi
fi
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode.sh: feature type is $feat_type";
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "Using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
echo "Mismatch in number of jobs with $transform_dir";
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster$thread_string --max-arcs=$max_arcs --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $scoring_opts $data $graphdir $dir
fi
exit 0;

Просмотреть файл

@ -0,0 +1,206 @@
#!/bin/bash
# Copyright 2012 Carnegie Mellon University (Author: Yajie Miao)
# Johns Hopkins University (Author: Daniel Povey)
# Decoding script that does basis fMLLR. This can be on top of delta+delta-delta,
# or LDA+MLLT features.
# There are 3 models involved potentially in this script,
# and for a standard, speaker-independent system they will all be the same.
# The "alignment model" is for the 1st-pass decoding and to get the
# Gaussian-level alignments for the "adaptation model" the first time we
# do fMLLR. The "adaptation model" is used to estimate fMLLR transforms
# and to generate state-level lattices. The lattices are then rescored
# with the "final model".
#
# The following table explains where we get these 3 models from.
# Note: $srcdir is one level up from the decoding directory.
#
# Model Default source:
#
# "alignment model" $srcdir/final.alimdl --alignment-model <model>
# (or $srcdir/final.mdl if alimdl absent)
# "adaptation model" $srcdir/final.mdl --adapt-model <model>
# "final model" $srcdir/final.mdl --final-model <model>
# Begin configuration section
first_beam=10.0 # Beam used in initial, speaker-indep. pass
first_max_active=2000 # max-active used in initial pass.
alignment_model=
adapt_model=
final_model=
stage=0
acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in
# lattice generation.
# Parameters in alignment of training data
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
align_beam=10
retry_beam=40
max_active=7000
beam=13.0
lattice_beam=6.0
nj=4
silence_weight=0.01
cmd=run.pl
si_dir=
# End configuration section
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode_basis_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_basis_fmllr.sh exp/tri2b/graph_tgpr data/train_si84 data/test_dev93 exp/tri2b/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --adapt-model <adapt-mdl> # Model to compute transforms with"
echo " --alignment-model <ali-mdl> # Model to get Gaussian-level alignments for"
echo " # 1st pass of transform computation."
echo " --final-model <finald-mdl> # Model to finally decode with"
echo " --si-dir <speaker-indep-decoding-dir> # use this to skip 1st pass of decoding"
echo " # Caution-- must be with same tree"
echo " --acwt <acoustic-weight> # default 0.08333 ... used to get posteriors"
exit 1;
fi
graphdir=$1
data=$2
dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
# Some checks. Note: we don't need $srcdir/tree but we expect
# it should exist, given the current structure of the scripts.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree $srcdir/fmllr.basis; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
##
## Do the speaker-independent decoding, if --si-dir option not present. ##
if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
if [ $stage -le 0 ]; then
steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data $si_dir || exit 1;
fi
fi
##
## Some checks, and setting of defaults for variables.
[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
[ -z "$final_model" ] && final_model=$srcdir/final.mdl
for f in $adapt_model $final_model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
##
## Set up the unadapted features "$sifeats" for testing set
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type";
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
##
## Now get the first-pass fMLLR transforms.
## We give all the default parameters in gmm-est-basis-fmllr
if [ $stage -le 1 ]; then
echo "$0: getting first-pass fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
gunzip -c $si_dir/lat.JOB.gz \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
gmm-est-basis-fmllr-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
--fmllr-min-count=200 --num-iters=10 --size-scale=0.2 \
--step-size-iters=3 --write-weights=ark:$dir/pre_wgt.JOB \
$adapt_model $srcdir/fmllr.basis "$sifeats" ark,s,cs:- \
ark:$dir/pre_trans.JOB || exit 1;
fi
##
pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
## Do the main lattice generation pass. Note: we don't determinize the lattices at
## this stage, as we're going to use them in acoustic rescoring with the larger
## model, and it's more correct to store the full state-level lattice for this purpose.
if [ $stage -le 2 ]; then
echo "$0: doing main lattice generation phase"
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt \
--determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
|| exit 1;
fi
##
## Do a second pass of estimating the transform-- this time with the lattices
## generated from the alignment model. Compose the transforms to get
## $dir/trans.1, etc.
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-basis-fmllr --fmllr-min-count=200 \
--spk2utt=ark:$sdata/JOB/spk2utt --write-weights=ark:$dir/trans_tmp_wgt.JOB \
$adapt_model $srcdir/fmllr.basis "$pass1feats" ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
ark:$dir/trans.JOB || exit 1;
fi
##
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
# Rescore the state-level lattices with the final adapted features, and the final model
# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
# useful in case of discriminatively trained systems).
# At this point we prune and determinize the lattices and write them out, ready for
# language model rescoring.
if [ $stage -le 4 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
fi
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
rm $dir/{trans_tmp,pre_trans}.*
exit 0;

Просмотреть файл

@ -0,0 +1,86 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Begin configuration.
nj=4
cmd=run.pl
maxactive=7000
beam=13.0
latbeam=6.0
acwt=0.083333
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "Usage: steps/decode_si_biglm.sh [options] <graph-dir> <old-LM-fst> <new-LM-fst> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode_si.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
graphdir=$1
oldlm_fst=$2
newlm_fst=$3
data=$4
dir=$5
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $graphdir/HCLG.fst $oldlm_fst $newlm_fst; do
[ ! -f $f ] && echo "decode_si.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
[ -f `dirname $oldlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
echo "Warning: old LM words.txt does not match with that in $graphdir .. probably will not work.";
[ -f `dirname $newlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
echo "Warning: new LM words.txt does not match with that in $graphdir .. probably will not work.";
# fstproject replaces the disambiguation symbol #0, which only appears on the
# input side, with the <eps> that appears in the corresponding arcs on the output side.
oldlm_cmd="fstproject --project_output=true $oldlm_fst | fstarcsort --sort_type=ilabel |"
newlm_cmd="fstproject --project_output=true $newlm_fst | fstarcsort --sort_type=ilabel |"
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-biglm-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$oldlm_cmd" "$newlm_cmd" "$feats" \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
exit 0;

Просмотреть файл

@ -0,0 +1,59 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Combine two decoding directories by composing the lattices (we
# apply a weight to each of the original weights, by default 0.5 each).
# Begin configuration section.
weight1=0.5 # Weight on 1st set of lattices.
cmd=run.pl
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 5 ]; then
echo "Usage: steps/decode_combine.sh [options] <data> <lang-dir|graph-dir> <decode-dir1> <decode-dir2> <decode-dir-out>"
echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --weight1 <weight> # Weight on 1st set of lattices (default 0.5)"
exit 1;
fi
data=$1
lang_or_graphdir=$2
srcdir1=$3
srcdir2=$4
dir=$5
for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj1=`cat $srcdir1/num_jobs` || exit 1;
nj2=`cat $srcdir2/num_jobs` || exit 1;
[ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
nj=$nj1
mkdir -p $dir/log
echo $nj > $dir/num_jobs
# The lattice-interp command does the score interpolation (with composition),
# and the lattice-copy-backoff replaces the result with the 1st lattice, in
# cases where the composed result was empty.
$cmd JOB=1:$nj $dir/log/interp.JOB.log \
lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
"ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
"ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir
exit 0;

Просмотреть файл

@ -0,0 +1,217 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Decoding script that does fMLLR. This can be on top of delta+delta-delta, or
# LDA+MLLT features.
# There are 3 models involved potentially in this script,
# and for a standard, speaker-independent system they will all be the same.
# The "alignment model" is for the 1st-pass decoding and to get the
# Gaussian-level alignments for the "adaptation model" the first time we
# do fMLLR. The "adaptation model" is used to estimate fMLLR transforms
# and to generate state-level lattices. The lattices are then rescored
# with the "final model".
#
# The following table explains where we get these 3 models from.
# Note: $srcdir is one level up from the decoding directory.
#
# Model Default source:
#
# "alignment model" $srcdir/final.alimdl --alignment-model <model>
# (or $srcdir/final.mdl if alimdl absent)
# "adaptation model" $srcdir/final.mdl --adapt-model <model>
# "final model" $srcdir/final.mdl --final-model <model>
# Begin configuration section
first_beam=10.0 # Beam used in initial, speaker-indep. pass
first_max_active=2000 # max-active used in initial pass.
first_max_arcs=-1
alignment_model=
adapt_model=
final_model=
stage=0
acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in
# lattice generation.
max_active=7000
max_arcs=-1
beam=13.0
lattice_beam=6.0
nj=4
silence_weight=0.01
cmd=run.pl
si_dir=
fmllr_update_type=full
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
skip_scoring=false
scoring_opts=
norm_vars=false
# End configuration section
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Wrong #arguments ($#, expected 3)"
echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --adapt-model <adapt-mdl> # Model to compute transforms with"
echo " --alignment-model <ali-mdl> # Model to get Gaussian-level alignments for"
echo " # 1st pass of transform computation."
echo " --final-model <finald-mdl> # Model to finally decode with"
echo " --si-dir <speaker-indep-decoding-dir> # use this to skip 1st pass of decoding"
echo " # Caution-- must be with same tree"
echo " --acwt <acoustic-weight> # default 0.08333 ... used to get posteriors"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
echo " --scoring-opts <opts> # options to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
sdata=$data/split$nj;
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
# Some checks. Note: we don't need $srcdir/tree but we expect
# it should exist, given the current structure of the scripts.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
##
## Do the speaker-independent decoding, if --si-dir option not present. ##
if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
if [ $stage -le 0 ]; then
steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
--num-threads $num_threads --skip-scoring $skip_scoring \
--acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
--model $alignment_model --max-arcs $max_arcs --max-active \
$first_max_active $graphdir $data $si_dir || exit 1;
fi
fi
##
## Some checks, and setting of defaults for variables.
[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
[ -z "$final_model" ] && final_model=$srcdir/final.mdl
for f in $adapt_model $final_model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
##
## Set up the unadapted features "$sifeats"
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type";
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
##
## Now get the first-pass fMLLR transforms.
if [ $stage -le 1 ]; then
echo "$0: getting first-pass fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
gunzip -c $si_dir/lat.JOB.gz \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
ark:$dir/pre_trans.JOB || exit 1;
fi
##
pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
## Do the main lattice generation pass. Note: we don't determinize the lattices at
## this stage, as we're going to use them in acoustic rescoring with the larger
## model, and it's more correct to store the full state-level lattice for this purpose.
if [ $stage -le 2 ]; then
echo "$0: doing main lattice generation phase"
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --max-arcs=$max_arcs \
--determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
|| exit 1;
fi
##
## Do a second pass of estimating the transform-- this time with the lattices
## generated from the alignment model. Compose the transforms to get
## $dir/trans.1, etc.
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass1feats" \
ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
ark:$dir/trans.JOB || exit 1;
fi
##
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
# Rescore the state-level lattices with the final adapted features, and the final model
# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
# useful in case of discriminatively trained systems).
# At this point we prune and determinize the lattices and write them out, ready for
# language model rescoring.
if [ $stage -le 4 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
fi
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
rm $dir/{trans_tmp,pre_trans}.*
exit 0;

Просмотреть файл

@ -0,0 +1,250 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Decoding script that does fMLLR. This can be on top of delta+delta-delta, or
# LDA+MLLT features.
# This script does an extra pass of lattice generation over and above what the original
# script did-- it's for robustness in the case where your original cepstral mean
# normalization was way off.
# We also added a new option --distribute=true (by default) to
# weight-silence-post. This weights the silence frames in a different way,
# weighting all posteriors on the frame rather than just the silence ones, which
# removes a particular kind of bias that the old approach suffered from.
# There are 3 models involved potentially in this script,
# and for a standard, speaker-independent system they will all be the same.
# The "alignment model" is for the 1st-pass decoding and to get the
# Gaussian-level alignments for the "adaptation model" the first time we
# do fMLLR. The "adaptation model" is used to estimate fMLLR transforms
# and to generate state-level lattices. The lattices are then rescored
# with the "final model".
#
# The following table explains where we get these 3 models from.
# Note: $srcdir is one level up from the decoding directory.
#
# Model Default source:
#
# "alignment model" $srcdir/final.alimdl --alignment-model <model>
# (or $srcdir/final.mdl if alimdl absent)
# "adaptation model" $srcdir/final.mdl --adapt-model <model>
# "final model" $srcdir/final.mdl --final-model <model>
# Begin configuration section
first_beam=10.0 # Beam used in initial, speaker-indep. pass
first_max_active=2000 # max-active used in first two passes.
first_latbeam=4.0 # lattice pruning beam for si decode and first-pass fMLLR decode.
# the different spelling from lattice_beam is unfortunate; these scripts
# have a history.
alignment_model=
adapt_model=
final_model=
cleanup=true
stage=0
acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in
# lattice generation.
max_active=7000
beam=13.0
lattice_beam=6.0
nj=4
silence_weight=0.01
distribute=true # option to weight-silence-post.
cmd=run.pl
si_dir=
fmllr_update_type=full
skip_scoring=false
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
scoring_opts=
# End configuration section
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --adapt-model <adapt-mdl> # Model to compute transforms with"
echo " --alignment-model <ali-mdl> # Model to get Gaussian-level alignments for"
echo " # 1st pass of transform computation."
echo " --final-model <finald-mdl> # Model to finally decode with"
echo " --si-dir <speaker-indep-decoding-dir> # use this to skip 1st pass of decoding"
echo " # Caution-- must be with same tree"
echo " --acwt <acoustic-weight> # default 0.08333 ... used to get posteriors"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
echo " --scoring-opts <opts> # options to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
sdata=$data/split$nj;
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
# Some checks. Note: we don't need $srcdir/tree but we expect
# it should exist, given the current structure of the scripts.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
##
## Do the speaker-independent decoding, if --si-dir option not present. ##
if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
if [ $stage -le 0 ]; then
steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model\
--max-active $first_max_active --parallel-opts "${parallel_opts}" --num-threads $num_threads\
--skip-scoring true $graphdir $data $si_dir || exit 1;
fi
fi
##
## Some checks, and setting of defaults for variables.
[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
[ -z "$final_model" ] && final_model=$srcdir/final.mdl
for f in $adapt_model $final_model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
##
## Set up the unadapted features "$sifeats"
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type";
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
##
## Now get the first-pass fMLLR transforms.
if [ $stage -le 1 ]; then
echo "$0: getting first-pass fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
gunzip -c $si_dir/lat.JOB.gz \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post --distribute=$distribute $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
ark:$dir/trans1.JOB || exit 1;
fi
##
pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans1.JOB ark:- ark:- |"
## Do the first adapted lattice generation pass.
if [ $stage -le 2 ]; then
echo "$0: doing first adapted lattice generation phase"
$cmd $parallel_opts JOB=1:$nj $dir/log/decode1.JOB.log\
gmm-latgen-faster$thread_string --max-active=$first_max_active --beam=$first_beam --lattice-beam=$first_latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat1.JOB.gz" \
|| exit 1;
fi
## Do a second pass of estimating the transform. Compose the transforms to get
## $dir/trans2.*.
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat1.JOB.gz|" ark:- \| \
weight-silence-post --distribute=$distribute $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass1feats" \
ark,s,cs:- ark:$dir/trans1b.JOB '&&' \
compose-transforms --b-is-affine=true ark:$dir/trans1b.JOB ark:$dir/trans1.JOB \
ark:$dir/trans2.JOB || exit 1;
if $cleanup; then
rm $dir/trans1b.* $dir/trans1.* $dir/lat1.*.gz
fi
fi
##
pass2feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans2.JOB ark:- ark:- |"
# Generate a 3rd set of lattices, with the "adaptation model"; we'll use these
# to adapt a 3rd time, and we'll rescore them. Since we should be close to the final
# fMLLR, we don't bother dumping un-determinized lattices to disk.
## Do the final lattice generation pass (but we'll rescore these lattices
## after another stage of adaptation.)
if [ $stage -le 4 ]; then
echo "$0: doing final lattice generation phase"
$cmd $parallel_opts JOB=1:$nj $dir/log/decode2.JOB.log\
gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass2feats" "ark:|gzip -c > $dir/lat2.JOB.gz" \
|| exit 1;
fi
## Do a third pass of estimating the transform. Compose the transforms to get
## $dir/trans.*.
if [ $stage -le 5 ]; then
echo "$0: estimating fMLLR transforms a third time."
$cmd JOB=1:$nj $dir/log/fmllr_pass3.JOB.log \
lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat2.JOB.gz|" ark:- \| \
weight-silence-post --distribute=$distribute $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass2feats" \
ark,s,cs:- ark:$dir/trans2b.JOB '&&' \
compose-transforms --b-is-affine=true ark:$dir/trans2b.JOB ark:$dir/trans2.JOB \
ark:$dir/trans.JOB || exit 1;
if $cleanup; then
rm $dir/trans2b.* $dir/trans2.*
fi
fi
##
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
if [ $stage -le 6 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat2.JOB.gz|" "$feats" \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
if $cleanup; then
rm $dir/lat2.*.gz
fi
fi
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
exit 0;

Просмотреть файл

@ -0,0 +1,111 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Decoding of fMMI or fMPE models (feature-space discriminative training).
# If transform-dir supplied, expects e.g. fMLLR transforms in that dir.
# Begin configuration section.
stage=1
iter=final
nj=4
cmd=run.pl
maxactive=7000
beam=13.0
latbeam=6.0
acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE. Should match train.
transform_dir=
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
scoring_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode_fmmi.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode_fmmi.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo "You can also use fMLLR features-- you have to supply --transform-dir option."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --iter <iter> # Iteration of model to test."
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --acwt <float> # acoustic scale used for lattice generation "
echo " --transform-dir <transform-dir> # where to find fMLLR transforms."
echo " --scoring-opts <string> # options to local/score.sh"
echo " # speaker-adapted decoding"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
model=$srcdir/$iter.mdl
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "decode_fmmi.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode_fmmi.sh: feature type is $feat_type";
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "Using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
echo "Mismatch in number of jobs with $transform_dir";
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
fmpefeats="$feats fmpe-apply-transform $srcdir/$iter.fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |"
if [ $stage -le 1 ]; then
# Get Gaussian selection info.
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
gmm-gselect --n=$ngselect $srcdir/$iter.fmpe "$feats" \
"ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
if [ $stage -le 2 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster$thread_string --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst "$fmpefeats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
if [ $stage -le 3 ]; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
exit 0;

Просмотреть файл

@ -0,0 +1,90 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Decode, limited to the word-sequences that were present in a set
# of lattices on disk. The other lattices do not have to be built
# with the same tree or the same context size-- however, you do
# have to be using the same vocabulary (words.txt)-- if not you'd
# have to map the vocabulary somehow.
# Note: if the trees are identical, you can use gmm-rescore-lattice.
# Mechanism: create an unweighted acceptor (on words) for each utterance,
# compose that with G, determinize, and then use compile-train-graphs-fsts
# to compile a graph for each utterance, to decode with.
# Begin configuration.
cmd=run.pl
maxactive=7000
beam=20.0
latbeam=7.0
acwt=0.083333
batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/decode_si_fromlats.sh [options] <data-dir> <lang> <old-decode-dir> <decode-dir>"
echo "e.g.: steps/decode_si_fromlats.sh data/test_dev93 data/lang_test_tg exp/tri2b/decode_tgpr_dev93 exp/tri2a/decode_tgpr_dev93_fromlats"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
mkdir -p $dir/log
nj=`cat $olddir/num_jobs` || exit 1;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
sdata=$data/split$nj
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj >$dir/num_jobs
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $olddir/lat.1.gz \
$srcdir/tree $lang/L_disambig.fst $lang/phones.txt; do
[ ! -f $f ] && echo "decode_si_fromlats.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
$cmd JOB=1:$nj $dir/log/decode_lats.JOB.log \
lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
fstdeterminizestar ark:- ark:- \| \
compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
--batch-size=$batch_size $scale_opts $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
gmm-latgen-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt \
--allow-partial=true --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl ark:- "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $lang $dir
exit 0;

Просмотреть файл

@ -0,0 +1,122 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey), BUT (Author: Mirko Hannemann)
# Apache 2.0
# Begin configuration section.
transform_dir=
first_pass=
iter=
model= # You can specify the model to use (e.g. if you want to use the .alimdl)
nj=4
reverse=false
cmd=run.pl
max_active=7000
beam=13.0
latbeam=6.0
acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
extra_beam=0.0 # small additional beam over varying beam
max_beam=100.0 # maximum of varying beam
scoring_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode_fwdbwd.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --first_pass <decode-dir> # decoding dir of first pass"
echo " --nj <nj> # number of parallel jobs"
echo " --iter <iter> # Iteration of model to test."
echo " --model <model> # which model to use (e.g. to"
echo " # specify the final.alimdl)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --transform_dir <trans-dir> # dir to find fMLLR transforms "
echo " # speaker-adapted decoding"
echo " --scoring-opts <string> # options to local/score.sh"
echo " --reverse [true/false] # time reversal of features"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
if [ -z $iter ]; then model=$srcdir/final.mdl;
else model=$srcdir/$iter.mdl; fi
fi
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst $graphdir/words.txt; do
[ ! -f $f ] && echo "decode_fwdbwd.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode_fwdbwd.sh: feature type is $feat_type";
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "Using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
echo "Mismatch in number of jobs with $transform_dir";
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
if $reverse; then
feats="$feats reverse-feats ark:- ark:- |"
fi
if [ -f $first_pass/lat.1.gz ]; then
echo "converting first pass lattice to graph arc acceptor"
$cmd JOB=1:$nj $dir/log/arc_graph.JOB.log \
time lattice-arcgraph $model $graphdir/HCLG.fst \
"ark:gunzip -c $first_pass/lat.JOB.gz|" ark,t:$dir/lat.JOB.arcs || exit 1;
# --write-lattices=ark,t:$dir/lat.det
# --acoustic-scale=$acwt --lattice-beam=$latbeam --prune=false \
echo "decode with tracking first pass lattice"
$cmd JOB=1:$nj $dir/log/decode_fwdbwd.JOB.log \
gmm-latgen-tracking --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true \
--extra-beam=$extra_beam --max-beam=$max_beam \
--word-symbol-table=$graphdir/words.txt --verbose=2 \
$model $graphdir/HCLG.fst "$feats" ark:$dir/lat.JOB.arcs "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
else
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true \
--word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" --reverse $reverse $scoring_opts $data $graphdir $dir
echo "Decoding done."
exit 0;

Просмотреть файл

@ -0,0 +1,128 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely, Daniel Povey
# Apache 2.0
# Begin configuration section.
nnet= # Optionally pre-select network to use for getting state-likelihoods
feature_transform= # Optionally pre-select feature transform (in front of nnet)
model= # Optionally pre-select transition model
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
stage=0 # stage=1 skips lattice generation
nj=4
cmd=run.pl
max_active=7000 # maximum of active tokens
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
beam=13.0 # GMM:13.0
latbeam=8.0 # GMM:6.0
acwt=0.10 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
scoring_opts="--min-lmwt 4 --max-lmwt 15"
skip_scoring=false
use_gpu_id=-1 # disable gpu
parallel_opts="-pe smp 2" # use 2 CPUs (1 DNN-forward, 1 decoder)
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the DNN + transition model is."
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
echo ""
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
echo "which are then sent through feature-transform. It works out what type"
echo "of features you used from content of srcdir."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo ""
echo " --nnet <nnet> # which nnet to use (opt.)"
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
echo " --class-frame-counts <file> # file with frame counts (used to compute priors) (opt.)"
echo " --model <model> # which transition model to use (opt.)"
echo ""
echo " --acwt <float> # select acoustic scale for decoding"
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
nnet=$srcdir/final.nnet;
fi
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
model=$srcdir/final.mdl;
fi
# find the feature_transform to use
if [ -z "$feature_transform" ]; then
feature_transform=$srcdir/final.feature_transform
fi
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
# check that files exist
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# PREPARE THE LOG-POSTERIOR COMPUTATION PIPELINE
if [ -z "$class_frame_counts" ]; then
class_frame_counts=$srcdir/ali_train_pdf.counts
else
echo "Overriding class_frame_counts by $class_frame_counts"
fi
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
# Run the decoding in the queue
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet "$feats" ark:- \| \
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
# Run the scoring
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
fi
exit 0;

Просмотреть файл

@ -0,0 +1,127 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
# Apache 2.0.
# This script does decoding with a neural-net. If the neural net was built on
# top of fMLLR transforms from a conventional system, you should provide the
# --transform-dir option.
# Begin configuration section.
stage=1
transform_dir= # dir to find fMLLR transforms.
nj=4 # number of decoding jobs. If --transform-dir set, must match that number!
acwt=0.1 # Just a default value, used for adaptation and beam-pruning..
cmd=run.pl
beam=15.0
max_active=7000
#WARNING: This option is renamed lat_beam (it was renamed to follow the naming
# in the other scripts
lattice_beam=8.0 # Beam we use in lattice generation.
iter=final
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
scoring_opts=
skip_scoring=false
feat_type=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: steps/decode_nnet_cpu.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_nnet_cpu.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/tri3b/graph_tgpr data/test_dev93 exp/tri4a_nnet/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 15.0"
echo " --iter <iter> # Iteration of model to decode; default is final."
echo " --scoring-opts <string> # options to local/score.sh"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
model=$srcdir/$iter.mdl
for f in $graphdir/HCLG.fst $data/feats.scp $model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
sdata=$data/split$nj;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
## Set up features.
if [ -z "$feat_type" ]; then
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
fi
case $feat_type in
raw) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";;
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
if [ "$feat_type" == "raw" ]; then
[ ! -f $transform_dir/raw_trans.1 ] && echo "$0: no such file $transform_dir/raw_trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- |"
else
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
fi
elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then
echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
##
if [ $stage -le 1 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
nnet-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt "$model" \
$graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
# different acoustic scales to get the final output.
if [ $stage -le 2 ]; then
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
echo "score best paths"
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
echo "score confidence and timing with sclite"
fi
fi
echo "Decoding done."
exit 0;

Просмотреть файл

@ -0,0 +1,235 @@
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
# This decoding script is like decode_fmllr.sh, but it does the fMLLR on
# the raw cepstra, using the model in the LDA+MLLT space
#
# Decoding script that does fMLLR. This can be on top of delta+delta-delta, or
# LDA+MLLT features.
# There are 3 models involved potentially in this script,
# and for a standard, speaker-independent system they will all be the same.
# The "alignment model" is for the 1st-pass decoding and to get the
# Gaussian-level alignments for the "adaptation model" the first time we
# do fMLLR. The "adaptation model" is used to estimate fMLLR transforms
# and to generate state-level lattices. The lattices are then rescored
# with the "final model".
#
# The following table explains where we get these 3 models from.
# Note: $srcdir is one level up from the decoding directory.
#
# Model Default source:
#
# "alignment model" $srcdir/final.alimdl --alignment-model <model>
# (or $srcdir/final.mdl if alimdl absent)
# "adaptation model" $srcdir/final.mdl --adapt-model <model>
# "final model" $srcdir/final.mdl --final-model <model>
# Begin configuration section
first_beam=10.0 # Beam used in initial, speaker-indep. pass
first_max_active=2000 # max-active used in initial pass.
first_max_arcs=-1
alignment_model=
adapt_model=
final_model=
stage=0
acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in
# lattice generation.
max_active=7000
use_normal_fmllr=false
max_arcs=-1
beam=13.0
lattice_beam=6.0
nj=4
silence_weight=0.01
cmd=run.pl
si_dir=
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
skip_scoring=false
scoring_opts=
norm_vars=false
# End configuration section
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Wrong #arguments ($#, expected 3)"
echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --adapt-model <adapt-mdl> # Model to compute transforms with"
echo " --alignment-model <ali-mdl> # Model to get Gaussian-level alignments for"
echo " # 1st pass of transform computation."
echo " --final-model <finald-mdl> # Model to finally decode with"
echo " --si-dir <speaker-indep-decoding-dir> # use this to skip 1st pass of decoding"
echo " # Caution-- must be with same tree"
echo " --acwt <acoustic-weight> # default 0.08333 ... used to get posteriors"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
echo " --scoring-opts <opts> # options to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
sdata=$data/split$nj;
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
# Some checks. Note: we don't need $srcdir/tree but we expect
# it should exist, given the current structure of the scripts.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
##
## Do the speaker-independent decoding, if --si-dir option not present. ##
if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
if [ $stage -le 0 ]; then
steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
--num-threads $num_threads --skip-scoring $skip_scoring \
--acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
--model $alignment_model --max-arcs $max_arcs --max-active \
$first_max_active $graphdir $data $si_dir || exit 1;
fi
fi
##
## Some checks, and setting of defaults for variables.
[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
[ -z "$final_model" ] && final_model=$srcdir/final.mdl
for f in $adapt_model $final_model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
##
if [[ ! -f $srcdir/final.mat || ! -f $srcdir/full.mat ]]; then
echo "$0: we require final.mat and full.mat in the source directory $srcdir"
fi
splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
sifeats="$splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
full_lda_mat="get-full-lda-mat --print-args=false $srcdir/final.mat $srcdir/full.mat -|"
##
## Now get the first-pass fMLLR transforms.
if [ $stage -le 1 ]; then
echo "$0: getting first-pass raw-fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
gunzip -c $si_dir/lat.JOB.gz \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-raw-gpost --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$full_lda_mat" \
"$splicedfeats" ark,s,cs:- ark:$dir/pre_trans.JOB || exit 1;
fi
##
pass1splicedfeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- |"
pass1feats="$pass1splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
## Do the main lattice generation pass. Note: we don't determinize the lattices at
## this stage, as we're going to use them in acoustic rescoring with the larger
## model, and it's more correct to store the full state-level lattice for this purpose.
if [ $stage -le 2 ]; then
echo "$0: doing main lattice generation phase"
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --max-arcs=$max_arcs \
--determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
|| exit 1;
fi
##
## Do a second pass of estimating the transform-- this time with the lattices
## generated from the alignment model. Compose the transforms to get
## $dir/trans.1, etc.
if [ $stage -le 3 ]; then
echo "$0: estimating raw-fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-fmllr-raw --spk2utt=ark:$sdata/JOB/spk2utt \
$adapt_model "$full_lda_mat" "$pass1splicedfeats" ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
ark:$dir/raw_trans.JOB || exit 1;
fi
##
feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
if [ $stage -le 4 ] && $use_normal_fmllr; then
echo "$0: estimating normal fMLLR transforms"
$cmd JOB=1:$nj $dir/log/fmllr_pass3.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
gmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt \
$adapt_model "$feats" ark,s,cs:- ark:$dir/trans.JOB || exit 1;
fi
if $use_normal_fmllr; then
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
fi
# Rescore the state-level lattices with the final adapted features, and the final model
# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
# useful in case of discriminatively trained systems).
# At this point we prune and determinize the lattices and write them out, ready for
# language model rescoring.
if [ $stage -le 5 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
fi
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
#rm $dir/{trans_tmp,pre_trans}.*
exit 0;

Просмотреть файл

@ -0,0 +1,257 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, with speaker vectors.
# If the SGMM system was
# built on top of fMLLR transforms from a conventional system, you should
# provide the --transform-dir option.
# Begin configuration section.
stage=1
alignment_model=
transform_dir= # dir to find fMLLR transforms.
nj=4 # number of decoding jobs.
acwt=0.1 # Just a default value, used for adaptation and beam-pruning..
cmd=run.pl
beam=15.0
gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note:
# the first_pass_gselect variable is used for the 1st pass of
# decoding and can be tighter.
first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in
# the 1st pass of decoding (lattice generation).
max_active=7000
#WARNING: This option is renamed lat_beam (it was renamed to follow the naming
# in the other scripts
lattice_beam=8.0 # Beam we use in lattice generation.
vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for
# speaker-vector computation. Can be quite tight (actually we could
# probably just do best-path.
use_fmllr=false
fmllr_iters=10
fmllr_min_count=1000
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: steps/decode_sgmm.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --alignment-model <ali-mdl> # Model for the first-pass decoding."
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 13.0"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
sdata=$data/split$nj;
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
##
## Calculate FMLLR pre-transforms if needed. We are doing this here since this
## step is requried by models both with and without speaker vectors
if $use_fmllr; then
if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
echo "$0: computing pre-transform for fMLLR computation."
sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
fi
fi
## Save Gaussian-selection info to disk.
# Note: we can use final.mdl regardless of whether there is an alignment model--
# they use the same UBM.
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
# Generate state-level lattice which we can rescore. This is done with the
# alignment model and no speaker-vectors.
if [ $stage -le 2 ]; then
$cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
--word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \
$graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
fi
## Check if the model has speaker vectors
spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
if [ $spkdim -gt 0 ]; then ### For models with speaker vectors:
# Estimate speaker vectors (1st pass). Prune before determinizing
# because determinization can take a while on un-pruned lattices.
# Note: the sgmm-post-to-gpost stage is necessary because we have
# a separate alignment-model and final model, otherwise we'd skip it
# and use sgmm-est-spkvecs.
if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
fi
# Estimate speaker vectors (2nd pass). Since we already have spk vectors,
# at this point we need to rescore the lattice to get the correct posteriors.
if [ $stage -le 4 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
fi
rm $dir/pre_vecs.*
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ]; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz
else ### For models without speaker vectors:
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ] && $use_fmllr; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
rm $dir/pre_lat.*.gz
else # Already done with decoding if no adaptation needed.
for n in `seq 1 $nj`; do
mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
done
fi
fi
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
# different acoustic scales to get the final output.
if [ $stage -le 7 ]; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
echo "score best paths"
local/score.sh --cmd "$cmd" $data $graphdir $dir
echo "score confidence and timing with sclite"
#local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir
fi
echo "Decoding done."
exit 0;

Просмотреть файл

@ -0,0 +1,211 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, with speaker vectors.
# If the SGMM system was
# built on top of fMLLR transforms from a conventional system, you should
# provide the --transform-dir option.
# Begin configuration section.
stage=1
transform_dir= # dir to find fMLLR transforms.
nj=4 # number of decoding jobs.
acwt=0.1 # Just a default value, used for adaptation and beam-pruning..
cmd=run.pl
beam=13.0
gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note:
# the first_pass_gselect variable is used for the 1st pass of
# decoding and can be tighter.
first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in
# the 1st pass of decoding (lattice generation).
max_active=7000
max_arcs=-1
#WARNING: This option is renamed lat_beam (it was renamed to follow the naming
# in the other scripts
lattice_beam=6.0 # Beam we use in lattice generation.
vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for
# speaker-vector computation. Can be quite tight (actually we could
# probably just do best-path.
use_fmllr=false
fmllr_iters=10
fmllr_min_count=1000
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
skip_scoring=false
scoring_opts=
# note: there are no more min-lmwt and max-lmwt options, instead use
# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: steps/decode_sgmm2.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo " e.g.: steps/decode_sgmm2.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 13.0"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
sdata=$data/split$nj;
silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
if [ -f $transform_dir/trans.1 ]; then
echo "$0: using transforms from $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif [ -f $transform_dir/raw_trans.1 ]; then
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
else
echo "$0: no such file $transform_dir/trans.1 or $transform_dir/raw_trans.1, invalid --transform-dir option?"
exit 1;
fi
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
##
## Save Gaussian-selection info to disk.
# Note: we can use final.mdl regardless of whether there is an alignment model--
# they use the same UBM.
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
# Generate state-level lattice which we can rescore. This is done with the alignment
# model and no speaker-vectors.
if [ $stage -le 2 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_pass1.JOB.log \
sgmm2-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--max-arcs=$max_arcs --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
--word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $srcdir/final.alimdl \
$graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
fi
# Estimate speaker vectors (1st pass). Prune before determinizing
# because determinization can take a while on un-pruned lattices.
# Note: the sgmm2-post-to-gpost stage is necessary because we have
# a separate alignment-model and final model, otherwise we'd skip it
# and use sgmm2-est-spkvecs.
if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
sgmm2-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
fi
# Estimate speaker vectors (2nd pass). Since we already have spk vectors,
# at this point we need to rescore the lattice to get the correct posteriors.
if [ $stage -le 4 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
fi
rm $dir/pre_vecs.*
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
echo "$0: computing pre-transform for fMLLR computation."
sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
fi
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz
# The output of this script is the files "lat.*.gz"-- we'll rescore this at different
# acoustic scales to get the final output.
if [ $stage -le 7 ]; then
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
fi
exit 0;

Просмотреть файл

@ -0,0 +1,270 @@
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM2 system, with speaker vectors. If the
# SGMM2 system was built on top of fMLLR transforms from a conventional system,
# you should provide the --transform-dir option.
# This script does not use a decoding graph, but instead you provide
# a previous decoding directory with lattices in it. This script will only
# make use of the word sequences in the lattices; it limits the decoding
# to those sequences. You should also provide a "lang" directory from
# which this script will use the G.fst and L.fst.
# Begin configuration section.
stage=1
alignment_model=
transform_dir= # dir to find fMLLR transforms.
acwt=0.08333 # Just a default value, used for adaptation and beam-pruning..
batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
cmd=run.pl
beam=20.0
gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note:
# the first_pass_gselect variable is used for the 1st pass of
# decoding and can be tighter.
first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in
# the 1st pass of decoding (lattice generation).
max_active=7000
lattice_beam=8.0 # Beam we use in lattice generation.
vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for
# speaker-vector computation. Can be quite tight (actually we could
# probably just do best-path.
use_fmllr=false
fmllr_iters=10
fmllr_min_count=1000
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: steps/decode_sgmm_fromlats.sh [options] <data-dir> <lang-dir> <old-decode-dir> <decode-dir>"
echo ""
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --alignment-model <ali-mdl> # Model for the first-pass decoding."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 13.0"
exit 1;
fi
data=$1
lang=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`
for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \
$srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
silphonelist=`cat $lang/phones/silence.csl` || exit 1
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
## Set up features
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then
transform_dir=$olddir
fi
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
## Calculate FMLLR pre-transforms if needed. We are doing this here since this
## step is requried by models both with and without speaker vectors
if $use_fmllr; then
if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
echo "$0: computing pre-transform for fMLLR computation."
sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
fi
fi
## Save Gaussian-selection info to disk.
# Note: we can use final.mdl regardless of whether there is an alignment model--
# they use the same UBM.
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
# Generate state-level lattice which we can rescore. This is done with the
# alignment model and no speaker-vectors.
if [ $stage -le 2 ]; then
$cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
fstdeterminizestar ark:- ark:- \| \
compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
--batch-size=$batch_size $scale_opts \
$srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
sgmm2-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
--word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \
"ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
fi
## Check if the model has speaker vectors
spkdim=`sgmm2-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
if [ $spkdim -gt 0 ]; then ### For models with speaker vectors:
# Estimate speaker vectors (1st pass). Prune before determinizing
# because determinization can take a while on un-pruned lattices.
# Note: the sgmm2-post-to-gpost stage is necessary because we have
# a separate alignment-model and final model, otherwise we'd skip it
# and use sgmm2-est-spkvecs.
if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
sgmm2-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
fi
# Estimate speaker vectors (2nd pass). Since we already have spk vectors,
# at this point we need to rescore the lattice to get the correct posteriors.
if [ $stage -le 4 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
fi
rm $dir/pre_vecs.*
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ]; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz
else ### For models without speaker vectors:
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ] && $use_fmllr; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
rm $dir/pre_lat.*.gz
else # Already done with decoding if no adaptation needed.
for n in `seq 1 $nj`; do
mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
done
fi
fi
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
# different acoustic scales to get the final output.
if [ $stage -le 7 ]; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
echo "score best paths"
local/score.sh --cmd "$cmd" $data $lang $dir
echo "score confidence and timing with sclite"
#local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir
fi
echo "Decoding done."
exit 0;

Просмотреть файл

@ -0,0 +1,111 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, by rescoring lattices
# generated from a previous SGMM system. The directory with the lattices
# is assumed to contain speaker vectors, if used. Basically it rescores
# the lattices one final time, using the same setup as the final decoding
# pass of the source dir. The assumption is that the model may have
# been discriminatively trained.
# If the system was built on top of fMLLR transforms from a conventional system,
# you should provide the --transform-dir option.
# Begin configuration section.
transform_dir= # dir to find fMLLR transforms.
cmd=run.pl
iter=final
skip_scoring=false
scoring_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --iter <iter> # iteration of model to use (default: final)"
exit 1;
fi
graphdir=$1
data=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
$srcdir/$iter.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
gselect_opt="--gselect=ark,s,cs:gunzip -c $olddir/gselect.JOB.gz|"
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -f $olddir/vecs.1 ]; then
echo "$0: using speaker vectors from $olddir"
spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
else
echo "$0: no speaker vectors found."
spkvecs_opt=
fi
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
if [ -f $olddir/trans.1 ]; then
echo "$0: using (in addition to any previous transforms) transforms from $olddir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
fi
##
# Rescore the state-level lattices with the model provided. Just
# one command in this script.
echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt \
$srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
fi
exit 0;

Просмотреть файл

@ -0,0 +1,172 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, by rescoring lattices
# generated from a previous SGMM system. This version does the "predictive"
# SGMM, where we subtract some constant times the log-prob of the left
# few spliced frames, and the same for the right few.
# The directory with the lattices
# is assumed to contain any speaker vectors, if used. This script just
# adds into the acoustic scores, (some constant, default -0.25) times
# the acoustic score of the left model, and the same for the right model.
# the lattices one final time, using the same setup as the final decoding
# pass of the source dir. The assumption is that the model may have
# been discriminatively trained.
# If the system was built on top of fMLLR transforms from a conventional system,
# you should provide the --transform-dir option.
# Begin configuration section.
stage=0
transform_dir= # dir to find fMLLR transforms.
cmd=run.pl
iter=final
prob_scale=-0.25
dimensions=0:13:104:117
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 5 ]; then
echo "Usage: steps/decode_sgmm_rescore_project.sh [options] <full-lda-mat> <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
echo " e.g.: steps/decode_sgmm_rescore_project.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/tri2b/full.mat exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a/decode_dev93_tgpr_predict"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --prob-scale <scale> # Default -0.25, scale on left and right models."
exit 1;
fi
full_lda_mat=$1
graphdir=$2
data=$3
olddir=$4
dir=$5
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
for f in $full_lda_mat $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz \
$olddir/gselect.1.gz $srcdir/$iter.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -f $olddir/vecs.1 ]; then
echo "$0: using speaker vectors from $olddir"
spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
else
echo "$0: no speaker vectors found."
spkvecs_opt=
fi
if [ $stage -le 0 ]; then
# Get full LDA+MLLT mat and its inverse. Note: the full LDA+MLLT mat is
# the LDA+MLLT mat, plus the "rejected" rows of the LDA matrix.
$cmd $dir/log/get_full_lda.log \
get-full-lda-mat $srcdir/final.mat $full_lda_mat $dir/full.mat $dir/full_inv.mat || exit 1;
fi
if [ $stage -le 1 ]; then
left_start=`echo $dimensions | cut '-d:' -f 1`;
left_end=`echo $dimensions | cut '-d:' -f 2`;
right_start=`echo $dimensions | cut '-d:' -f 3`;
right_end=`echo $dimensions | cut '-d:' -f 4`;
# Prepare left and right models. For now, the dimensions are hardwired (e.g., 13 MFCCs and splice 9 frames).
# Note: the choice of dividing by the prob of the left 4 and the right 4 frames is a bit arbitrary and
# we could investigate different configurations.
$cmd $dir/log/left.log \
sgmm2-project --start-dim=$left_start --end-dim=$left_end $srcdir/final.mdl $dir/full.mat $dir/left.mdl $dir/left.mat || exit 1;
$cmd $dir/log/right.log \
sgmm2-project --start-dim=$right_start --end-dim=$right_end $srcdir/final.mdl $dir/full.mat $dir/right.mdl $dir/right.mat || exit 1;
fi
# we apply the scaling on the new acoustic probs by adding the inverse
# of that to the old acoustic probs, and then later inverting again.
# this has to do with limitations in sgmm2-rescore-lattice: we can only
# scale the *old* acoustic probs, not the new ones.
inverse_prob_scale=`perl -e "print (1.0 / $prob_scale);"`
cur_lats="ark:gunzip -c $olddir/lat.JOB.gz | lattice-scale --acoustic-scale=$inverse_prob_scale ark:- ark:- |"
## Set up features. Note: we only support LDA+MLLT features, this
## is inherent in the method, we could not support deltas.
for model_type in left right; do
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" # spliced features.
if [ ! -z "$transform_dir" ]; then # using speaker-specific transforms.
# we want to transform in the sequence: $dir/full.mat, then the result of
# (extend-transform-dim $transform_dir/trans.JOB), then $dir/full_inv.mat to
# get back to the spliced space, then the left.mat or right.mat. But
# note that compose-transforms operates in matrix-multiplication order,
# which is opposite from the "order of applying the transforms" order.
new_dim=$[`copy-matrix --binary=false $dir/full.mat - | wc -l` - 1]; # 117 in normal case.
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk 'ark:extend-transform-dim --new-dimension=$new_dim ark:$transform_dir/trans.JOB ark:- | compose-transforms ark:- $dir/full.mat ark:- | compose-transforms $dir/full_inv.mat ark:- ark:- | compose-transforms $dir/${model_type}.mat ark:- ark:- |' ark:- ark:- |"
else # else, we transform with the "left" or "right" matrix; these transform from the
# spliced space.
feats="$feats transform-feats $dir/${model_type}.mat |"
# If we don't have the --transform-dir option, make sure the model was
# trained in the same way.
if grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
fi
if [ -f $olddir/trans.1 ]; then
echo "$0: warning: not using transforms in $olddir (this is just a "
echo " limitation of the script right now, and could be fixed)."
fi
if [ $stage -le 2 ]; then
echo "Getting gselect info for $model_type model."
$cmd JOB=1:$nj $dir/log/gselect.$model_type.JOB.log \
sgmm2-gselect $dir/$model_type.mdl "$feats" \
"ark,t:|gzip -c >$dir/gselect.$model_type.JOB.gz" || exit 1;
fi
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.$model_type.JOB.gz|"
# Rescore the state-level lattices with the model provided. Just
# one command in this script.
# The --old-acoustic-scale=1.0 option means we just add the scores
# to the old scores.
if [ $stage -le 3 ]; then
echo "$0: rescoring lattices with $model_type model"
$cmd JOB=1:$nj $dir/log/rescore.${model_type}.JOB.log \
sgmm2-rescore-lattice --old-acoustic-scale=1.0 "$gselect_opt" $spkvecs_opt \
$dir/$model_type.mdl "$cur_lats" "$feats" \
"ark:|gzip -c > $dir/lat.${model_type}.JOB.gz" || exit 1;
fi
cur_lats="ark:gunzip -c $dir/lat.${model_type}.JOB.gz |"
done
if [ $stage -le 4 ]; then
echo "$0: getting final lattices."
$cmd JOB=1:$nj $dir/log/scale_lats.JOB.log \
lattice-scale --acoustic-scale=$prob_scale "$cur_lats" "ark:|gzip -c >$dir/lat.JOB.gz" \
|| exit 1;
fi
rm $dir/lat.{left,right}.*.gz 2>/dev/null # note: if these still exist, it will
# confuse the scoring script.
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
exit 0;

Просмотреть файл

@ -0,0 +1,273 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, with speaker vectors.
# If the SGMM system was
# built on top of fMLLR transforms from a conventional system, you should
# provide the --transform-dir option.
# This script does not use a decoding graph, but instead you provide
# a previous decoding directory with lattices in it. This script will only
# make use of the word sequences in the lattices; it limits the decoding
# to those sequences. You should also provide a "lang" directory from
# which this script will use the G.fst and L.fst.
# Begin configuration section.
stage=1
alignment_model=
transform_dir= # dir to find fMLLR transforms.
acwt=0.08333 # Just a default value, used for adaptation and beam-pruning..
batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
cmd=run.pl
beam=20.0
gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note:
# the first_pass_gselect variable is used for the 1st pass of
# decoding and can be tighter.
first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in
# the 1st pass of decoding (lattice generation).
max_active=7000
#WARNING: This option is renamed lat_beam (it was renamed to follow the naming
# in the other scripts
lattice_beam=8.0 # Beam we use in lattice generation.
vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for
# speaker-vector computation. Can be quite tight (actually we could
# probably just do best-path.
use_fmllr=false
fmllr_iters=10
fmllr_min_count=1000
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: steps/decode_sgmm_fromlats.sh [options] <data-dir> <lang-dir> <old-decode-dir> <decode-dir>"
echo ""
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --alignment-model <ali-mdl> # Model for the first-pass decoding."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 13.0"
exit 1;
fi
data=$1
lang=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`
for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \
$srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
silphonelist=`cat $lang/phones/silence.csl` || exit 1
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
## Set up features
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then
transform_dir=$olddir
fi
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
## Calculate FMLLR pre-transforms if needed. We are doing this here since this
## step is requried by models both with and without speaker vectors
if $use_fmllr; then
if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
echo "$0: computing pre-transform for fMLLR computation."
sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
fi
fi
## Save Gaussian-selection info to disk.
# Note: we can use final.mdl regardless of whether there is an alignment model--
# they use the same UBM.
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
# Generate state-level lattice which we can rescore. This is done with the
# alignment model and no speaker-vectors.
if [ $stage -le 2 ]; then
$cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
fstdeterminizestar ark:- ark:- \| \
compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
--batch-size=$batch_size $scale_opts \
$srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
--word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \
"ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
fi
## Check if the model has speaker vectors
spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
if [ $spkdim -gt 0 ]; then ### For models with speaker vectors:
# Estimate speaker vectors (1st pass). Prune before determinizing
# because determinization can take a while on un-pruned lattices.
# Note: the sgmm-post-to-gpost stage is necessary because we have
# a separate alignment-model and final model, otherwise we'd skip it
# and use sgmm-est-spkvecs.
if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
fi
# Estimate speaker vectors (2nd pass). Since we already have spk vectors,
# at this point we need to rescore the lattice to get the correct posteriors.
if [ $stage -le 4 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
fi
rm $dir/pre_vecs.*
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ]; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz
else ### For models without speaker vectors:
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ] && $use_fmllr; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
rm $dir/pre_lat.*.gz
else # Already done with decoding if no adaptation needed.
for n in `seq 1 $nj`; do
mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
done
fi
fi
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
# different acoustic scales to get the final output.
if [ $stage -le 7 ]; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
echo "score best paths"
local/score.sh --cmd "$cmd" $data $lang $dir
echo "score confidence and timing with sclite"
#local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir
fi
echo "Decoding done."
exit 0;

Просмотреть файл

@ -0,0 +1,107 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM system, by rescoring lattices
# generated from a previous SGMM system. The directory with the lattices
# is assumed to contain speaker vectors, if used. Basically it rescores
# the lattices one final time, using the same setup as the final decoding
# pass of the source dir. The assumption is that the model may have
# been discriminatively trained.
# If the system was built on top of fMLLR transforms from a conventional system,
# you should provide the --transform-dir option.
# Begin configuration section.
transform_dir= # dir to find fMLLR transforms.
cmd=run.pl
iter=final
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --iter <iter> # iteration of model to use (default: final)"
exit 1;
fi
graphdir=$1
data=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
$srcdir/$iter.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
gselect_opt="--gselect=ark,s,cs:gunzip -c $olddir/gselect.JOB.gz|"
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -f $olddir/vecs.1 ]; then
echo "$0: using speaker vectors from $olddir"
spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
else
echo "$0: no speaker vectors found."
spkvecs_opt=
fi
## Set up features.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
if [ -f $olddir/trans.1 ]; then
echo "$0: using (in addition to any previous transforms) transforms from $olddir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
fi
##
# Rescore the state-level lattices with the model provided. Just
# one command in this script.
echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt \
$srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
exit 0;

Просмотреть файл

@ -0,0 +1,108 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Begin configuration section.
transform_dir=
iter=
model= # You can specify the model to use (e.g. if you want to use the .alimdl)
stage=0
nj=4
cmd=run.pl
max_active=7000
max_arcs=-1
beam=13.0
latbeam=6.0
acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
num_threads=1 # if >1, will use gmm-latgen-faster-parallel
parallel_opts= # If you supply num-threads, you should supply this too.
scoring_opts=
# note: there are no more min-lmwt and max-lmwt options, instead use
# e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
skip_scoring=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --iter <iter> # Iteration of model to test."
echo " --model <model> # which model to use (e.g. to"
echo " # specify the final.alimdl)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --transform-dir <trans-dir> # dir to find fMLLR transforms "
echo " --acwt <float> # acoustic scale used for lattice generation "
echo " --scoring-opts <string> # options to local/score.sh"
echo " --num-threads <n> # number of threads to use, default 1."
echo " --parallel-opts <opts> # e.g. '-pe smp 4' if you supply --num-threads 4"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
if [ -z $iter ]; then model=$srcdir/final.mdl;
else model=$srcdir/$iter.mdl; fi
fi
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode.sh: feature type is $feat_type";
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "Using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
echo "Mismatch in number of jobs with $transform_dir";
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
gmm-latgen-faster$thread_string --max-arcs=$max_arcs --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $scoring_opts $data $graphdir $dir
fi
exit 0;

Просмотреть файл

@ -0,0 +1,113 @@
#!/bin/bash
# Copyright 2012 Neha Agrawal, Cisco Systems;
# Johns Hopkins University (Author: Daniel Povey);
#
# Apache 2.0
# Begin configuration section.
transform_dir=
iter=
model= # You can specify the model to use (e.g. if you want to use the .alimdl)
nj=4
cmd=run.pl
max_active=7000
beam=13.0
latbeam=6.0
acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
mean_tau=20
weight_tau=10
flags=mw # could also contain "v" for variance; the default
# tau for that is 50.
stage=1
# End configuration section.
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the model is."
echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
echo ""
echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
echo "what type of features you used (assuming it's one of these two)"
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --iter <iter> # Iteration of model to test."
echo " --model <model> # which model to use (e.g. to"
echo " # specify the final.alimdl)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --transform-dir <trans-dir> # dir to find fMLLR transforms "
echo " # speaker-adapted decoding"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
if [ -z $iter ]; then model=$srcdir/final.mdl;
else model=$srcdir/$iter.mdl; fi
fi
for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
done
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "decode.sh: feature type is $feat_type";
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "Using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
echo "Mismatch in number of jobs with $transform_dir";
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
if [ $stage -le 1 ]; then
echo "Doing first-pass decoding before MAP decoding."
$cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
gmm-decode-faster --max-active=$max_active --beam=$beam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst "$feats" ark:$dir/tmp.JOB.tra ark:$dir/pass1_decode.JOB.ali || exit 1;
fi
if [ $stage -le 2 ]; then
echo "Computing MAP stats and doing MAP-adapted decoding"
$cmd JOB=1:$nj $dir/log/decode_pass2.JOB.log \
ali-to-post ark:$dir/pass1_decode.JOB.ali ark:- \| \
gmm-adapt-map --mean-tau=$mean_tau --weight-tau=$weight_tau \
--update-flags=$flags --spk2utt=ark:$sdata/JOB/spk2utt \
$model "$feats" ark:- ark:- \| \
gmm-latgen-map --lattice-beam=$latbeam --acoustic-scale=$acwt \
--utt2spk=ark:$sdata/JOB/utt2spk --max-active=$max_active --beam=$beam \
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model ark,s,cs:- $graphdir/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz"
fi
#rm -f $dir/pass1_decode.*.ali
#rm -f $dir/tmp.*.tra
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $graphdir $dir
exit 0;

Просмотреть файл

@ -0,0 +1,66 @@
#!/bin/bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# This script produces CTM files from a decoding directory that has lattices
# present.
# begin configuration section.
cmd=run.pl
stage=0
use_segments=true # if we have a segments file, use it to convert
# the segments to be relative to the original files.
#end configuration section.
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: local/get_ctm.sh [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
echo " Options:"
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
echo " --stage (0|1|2) # start scoring script from part-way through."
echo " --use-segments (true|false) # use segments and reco2file_and_channel files "
echo " # to produce a ctm relative to the original audio"
echo " # files, with channel information (typically needed"
echo " # for NIST scoring)."
echo "e.g.:"
echo "local/get_ctm.sh data/train data/lang exp/tri4a/decode/"
exit 1;
fi
data=$1
lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
dir=$3
model=$dir/../final.mdl # assume model one level up from decoding dir.
for f in $lang/words.txt $lang/phones/word_boundary.int \
$model $dir/lat.1.gz; do
[ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
done
name=`basename $data`; # e.g. eval2000
mkdir -p $dir/scoring/log
if [ $stage -le 0 ]; then
if [ -f $data/segments ]; then
f=$data/reco2file_and_channel
[ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
else
filter_cmd=cat
fi
$cmd LMWT=5:20 $dir/scoring/log/get_ctm.LMWT.log \
mkdir -p $dir/score_LMWT/ '&&' \
lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
nbest-to-ctm ark:- - \| \
utils/int2sym.pl -f 5 $lang/words.txt \| \
$filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
fi

Просмотреть файл

@ -0,0 +1,95 @@
#!/bin/bash
# Copyright 2012 Carnegie Mellon University (Author: Yajie Miao)
# Johns Hopkins University (Author: Daniel Povey)
# Decoding script that computes basis for basis-fMLLR (see decode_fmllr_basis.sh).
# This can be on top of delta+delta-delta, or LDA+MLLT features.
stage=0
# Parameters in alignment of training data
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
per_utt=true # If true, then treat each utterance as a separate speaker for purposes of
# basis training... this is recommended if the number of actual speakers in your
# training set is less than (feature-dim) * (feature-dim+1).
align_beam=10
retry_beam=40
silence_weight=0.01
cmd=run.pl
# End configuration section
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: steps/get_fmllr_basis.sh [options] <data-dir> <lang-dir> <exp-dir>"
echo " e.g.: steps/decode_basis_fmllr.sh data/train_si84 data/lang exp/tri3b/"
echo "Note: we currently assume that this is the same data you trained the model with."
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
exit 1;
fi
data=$1
lang=$2
dir=$3
nj=`cat $dir/num_jobs` || exit 1;
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
splice_opts=`cat $dir/splice_opts 2>/dev/null` # frame-splicing options.
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
for f in $data/feats.scp $dir/final.alimdl $dir/final.mdl $dir/ali.1.gz; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Set up the unadapted features "$sifeats".
if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type";
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
# Set up the adapted features "$feats" for training set.
if [ -f $srcdir/trans.1 ]; then
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$sdata/trans.JOB ark:- ark:- |";
else
feats="$sifeats";
fi
if $per_utt; then
spk2utt_opt= # treat each utterance as separate speaker when computing basis.
echo "Doing per-utterance adaptation for purposes of computing the basis."
else
echo "Doing per-speaker adaptation for purposes of computing the basis."
[ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
echo "Warning: number of speakers is small, might be better to use --per-utt=true."
spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi
# Note: we get Gaussian level alignments with the "final.mdl" and the
# speaker adapted features.
$cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
gmm-basis-fmllr-accs-gpost $spk2utt_opt \
$dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1;
# Compute the basis matrices.
$cmd $dir/log/basis_training.log \
gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
rm $dir/basis.acc.* 2>/dev/null
exit 0;

Просмотреть файл

@ -0,0 +1,225 @@
#!/bin/bash
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# From a training or alignment directory, and an original lexicon.txt and lang/
# directory, obtain a new lexicon with pronunciation probabilities.
# Begin configuration section.
stage=0
smooth_count=1.0 # Amount of count to add corresponding to each original lexicon entry;
# this corresponds to add-one smoothing of the pron-probs.
max_one=true # If true, normalize the pron-probs so the maximum value for each word is 1.0,
# rather than summing to one. This is quite standard.
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 6 ]; then
echo "Usage: steps/get_lexicon_probs.sh <data-dir> <lang-dir> <src-dir|ali-dir> <old-lexicon> <exp-dir> <new-lexicon>"
echo "e.g.: steps/get_lexicon_probs.sh data/train data/lang exp/tri5 data/local/lexicon.txt \\"
echo " exp/tri5_lexprobs data/local_withprob/lexicon.txt"
echo "Note: we assume you ran using word-position-dependent phones but both the old and new lexicon will not have"
echo "these markings. We also assume the new lexicon will have pron-probs but the old one does not; this limitation"
echo "of the script can be removed later."
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --stage <stage> # used to control partial re-running."
echo " --max-one <true|false> # If true, normalize so max prob of each"
echo " # word is one. Default: true"
echo " --smooth <smooth-count> # Amount to smooth each count by (default: 1.0)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
old_lexicon=$4
dir=$5
new_lexicon=$6
oov=`cat $lang/oov.int` || exit 1;
nj=`cat $srcdir/num_jobs` || exit 1;
for f in $data/text $lang/L.fst $lang/phones/word_boundary.int $srcdir/ali.1.gz $old_lexicon; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/log
utils/split_data.sh $data $nj # Make sure split data-dir exists.
sdata=$data/split$nj
mkdir -p $dir/log
if [ $stage -le 0 ]; then
( ( for n in `seq $nj`; do gunzip -c $srcdir/ali.$n.gz; done ) | \
linear-to-nbest ark:- "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $data/text |" '' '' ark:- | \
lattice-align-words $lang/phones/word_boundary.int $srcdir/final.mdl ark:- ark:- | \
lattice-to-phone-lattice --replace-words=false $srcdir/final.mdl ark:- ark,t:- | \
awk '{ if (NF == 4) { word_phones = sprintf("%s %s", $3, $4); count[word_phones]++; } }
END { for(key in count) { print count[key], key; } }' | \
sed s:0,0,:: | awk '{print $2, $1, $3;}' | sed 's/_/ /g' | \
utils/int2sym.pl -f 3- $lang/phones.txt | \
sed -E 's/_I( |$)/ /g' | sed -E 's/_E( |$)/ /g' | sed -E 's/_B( |$)/ /g' | sed -E 's/_S( |$)/ /g' | \
utils/int2sym.pl -f 1 $lang/words.txt > $dir/lexicon_counts.txt
) 2>&1 | tee $dir/log/get_fsts.log
fi
cat $old_lexicon | awk '{if (!($2 > 0.0 && $2 < 1.0)) { exit(1); }}' && \
echo "Error: old lexicon $old_lexicon appears to have pron-probs; we don't expect this." && \
exit 1;
mkdir -p `dirname $new_lexicon` || exit 1;
if [ $stage -le 1 ]; then
grep -v -w '^<eps>' $dir/lexicon_counts.txt | \
perl -e ' ($old_lexicon, $smooth_count, $max_one) = @ARGV;
($smooth_count >= 0) || die "Invalid smooth_count $smooth_count";
($max_one eq "true" || $max_one eq "false") || die "Invalid max_one variable $max_one";
open(O, "<$old_lexicon")||die "Opening old-lexicon file $old_lexicon";
while(<O>) {
$_ =~ m/(\S+)\s+(.+)/ || die "Bad old-lexicon line $_";
$word = $1;
$orig_pron = $2;
# Remember the mapping from canonical prons to original prons: in the case of
# syllable based systems we want to remember the locations of tabs in
# the original lexicon.
$pron = join(" ", split(" ", $orig_pron));
$orig_pron{$word,$pron} = $orig_pron;
$count{$word,$pron} += $smooth_count;
$tot_count{$word} += $smooth_count;
}
while (<STDIN>) {
$_ =~ m/(\S+)\s+(\S+)\s+(.+)/ || die "Bad new-lexicon line $_";
$word = $1;
$this_count = $2;
$pron = join(" ", split(" ", $3));
$count{$word,$pron} += $this_count;
$tot_count{$word} += $this_count;
}
if ($max_one eq "true") { # replace $tot_count{$word} with max count
# of any pron.
%tot_count = {}; # set to empty assoc array.
foreach $key (keys %count) {
($word, $pron) = split($; , $key); # $; is separator for strings that index assoc. arrays.
$this_count = $count{$key};
if (!defined $tot_count{$word} || $this_count > $tot_count{$word}) {
$tot_count{$word} = $this_count;
}
}
}
foreach $key (keys %count) {
($word, $pron) = split($; , $key); # $; is separator for strings that index assoc. arrays.
$this_orig_pron = $orig_pron{$key};
if (!defined $this_orig_pron) { die "Word $word and pron $pron did not appear in original lexicon."; }
if (!defined $tot_count{$word}) { die "Tot-count not defined for word $word."; }
$prob = $count{$key} / $tot_count{$word};
print "$word\t$prob\t$this_orig_pron\n"; # Output happens here.
} ' $old_lexicon $smooth_count $max_one > $new_lexicon || exit 1;
fi
exit 0;
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
if [ $stage -le 1 ]; then
echo "$0: aligning data in $data using $alimdl and speaker-independent features."
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
fi
if [ $stage -le 2 ]; then
echo "$0: computing fMLLR transforms"
if [ "$alimdl" != "$mdl" ]; then
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
ark,s,cs:- ark:$dir/trans.JOB || exit 1;
else
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
--spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
ark,s,cs:- ark:$dir/trans.JOB || exit 1;
fi
fi
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
if [ $stage -le 3 ]; then
echo "$0: doing final alignment."
$cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
rm $dir/pre_ali.*.gz
echo "$0: done aligning data."
utils/summarize_warnings.pl $dir/log
exit 0;

Просмотреть файл

@ -0,0 +1,66 @@
#!/bin/bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# This script produces CTM files from a training directory that has alignments
# present.
# begin configuration section.
cmd=run.pl
stage=0
use_segments=true # if we have a segments file, use it to convert
# the segments to be relative to the original files.
#end configuration section.
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: local/get_train_ctm.sh [options] <data-dir> <lang-dir> <ali-dir|exp-dir>"
echo " Options:"
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
echo " --stage (0|1|2) # start scoring script from part-way through."
echo " --use-segments (true|false) # use segments and reco2file_and_channel files "
echo " # to produce a ctm relative to the original audio"
echo " # files, with channel information (typically needed"
echo " # for NIST scoring)."
echo "e.g.:"
echo "local/get_train_ctm.sh data/train data/lang exp/tri3a_ali"
echo "Produces ctm in: exp/tri3a_ali/ctm"
exit 1;
fi
data=$1
lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
dir=$3
model=$dir/final.mdl # assume model one level up from decoding dir.
for f in $lang/words.txt $lang/phones/word_boundary.int \
$model $dir/ali.1.gz $lang/oov.int; do
[ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
done
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/scoring/log
if [ $stage -le 0 ]; then
if [ -f $data/segments ]; then
f=$data/reco2file_and_channel
[ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
else
filter_cmd=cat
fi
$cmd $dir/log/get_ctm.log \
linear-to-nbest "ark:gunzip -c $dir/ali.*.gz|" \
"ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/text |" \
'' '' ark:- \| \
lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
nbest-to-ctm ark:- - \| \
utils/int2sym.pl -f 5 $lang/words.txt \| \
$filter_cmd '>' $dir/ctm || exit 1;
fi

Просмотреть файл

@ -0,0 +1,122 @@
#!/bin/bash
# Begin configuration section.
mode=4
cmd=run.pl
skip_scoring=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
for x in `seq 2`; do
[ "$1" == "--cmd" ] && cmd=$2 && shift 2;
[ "$1" == "--mode" ] && mode=$2 && shift 2;
done
if [ $# != 5 ]; then
echo "Do language model rescoring of lattices (remove old LM, add new LM)"
echo "Usage: steps/lmrescore.sh [options] <old-lang-dir> <new-lang-dir> <data-dir> <input-decode-dir> <output-decode-dir>"
echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--mode (1|2|3|4)]"
exit 1;
fi
[ -f path.sh ] && . ./path.sh;
oldlang=$1
newlang=$2
data=$3
indir=$4
outdir=$5
oldlm=$oldlang/G.fst
newlm=$newlang/G.fst
! cmp $oldlang/words.txt $newlang/words.txt && echo "Warning: vocabularies may be incompatible."
[ ! -f $oldlm ] && echo Missing file $oldlm && exit 1;
[ ! -f $newlm ] && echo Missing file $newlm && exit 1;
! ls $indir/lat.*.gz >/dev/null && echo "No lattices input directory $indir" && exit 1;
oldlmcommand="fstproject --project_output=true $oldlm |"
newlmcommand="fstproject --project_output=true $newlm |"
mkdir -p $outdir/log
phi=`grep -w '#0' $newlang/words.txt | awk '{print $2}'`
if [ "$mode" == 4 ]; then
# we have to prepare $outdir/Ldet.fst in this case: determinized
# lexicon (determinized on phones), with disambig syms removed.
# take L_disambig.fst; get rid of transition with "#0 #0" on it; determinize
# with epsilon removal; remove disambiguation symbols.
fstprint $newlang/L_disambig.fst | awk '{if($4 != '$phi'){print;}}' | fstcompile | \
fstdeterminizestar | fstrmsymbols $newlang/phones/disambig.int >$outdir/Ldet.fst || exit 1;
fi
nj=`cat $indir/num_jobs` || exit 1;
cp $indir/num_jobs $outdir
#for lat in $indir/lat.*.gz; do
# number=`basename $lat | cut -d. -f2`;
# newlat=$outdir/`basename $lat`
case "$mode" in
1) # 1 is inexact, it's the original way of doing it.
$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
lattice-lmrescore --lm-scale=-1.0 "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:- \| \
lattice-lmrescore --lm-scale=1.0 ark:- "$newlmcommand" "ark,t:|gzip -c>$outdir/lat.JOB.gz" \
|| exit 1;
;;
2) # 2 is equivalent to 1, but using more basic operations, combined.
$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
gunzip -c $indir/lat.JOB.gz \| \
lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
lattice-determinize ark:- ark:- \| \
lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
lattice-compose ark:- "fstproject --project_output=true $newlm |" ark:- \| \
lattice-determinize ark:- ark:- \| \
gzip -c \>$outdir/lat.JOB.gz || exit 1;
;;
3) # 3 is "exact" in that we remove the old LM scores accepting any path
# through G.fst (which is what we want as that happened in lattice
# generation), but we add the new one with "phi matcher", only taking
# backoff arcs if an explicit arc did not exist.
$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
gunzip -c $indir/lat.JOB.gz \| \
lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
lattice-determinize ark:- ark:- \| \
lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
lattice-determinize ark:- ark:- \| \
gzip -c \>$outdir/lat.JOB.gz || exit 1;
;;
4) # 4 is also exact (like 3), but instead of subtracting the old LM-scores,
# it removes the old graph scores entirely and adds in the lexicon,
# grammar and transition weights.
mdl=`dirname $indir`/final.mdl
[ ! -f $mdl ] && echo No such model $mdl && exit 1;
$cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
gunzip -c $indir/lat.JOB.gz \| \
lattice-scale --lm-scale=0.0 ark:- ark:- \| \
lattice-to-phone-lattice $mdl ark:- ark:- \| \
lattice-compose ark:- $outdir/Ldet.fst ark:- \| \
lattice-determinize ark:- ark:- \| \
lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
lattice-add-trans-probs --transition-scale=1.0 --self-loop-scale=0.1 \
$mdl ark:- ark:- \| \
gzip -c \>$outdir/lat.JOB.gz || exit 1;
;;
esac
rm $outdir/Ldet.fst 2>/dev/null
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh --cmd "$cmd" $data $newlang $outdir
else
echo "Not scoring because requested so..."
fi
exit 0;

Просмотреть файл

@ -0,0 +1,117 @@
#!/bin/bash
# Copyright 2012 Karel Vesely, Daniel Povey
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
# Begin configuration section.
nj=4
cmd=run.pl
remove_last_layers=4 # remove N last components from the nnet
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "usage: $0 [options] <tgt-data-dir> <src-data-dir> <nnet-dir> <log-dir> <abs-path-to-bn-feat-dir>";
echo "options: "
echo " --trim-transforms <N> # number of NNet Components to remove from the end"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
srcdata=$2
nndir=$3
logdir=$4
bnfeadir=$5
######## CONFIGURATION
# copy the dataset metadata from srcdata.
mkdir -p $data || exit 1;
cp $srcdata/* $data 2>/dev/null; rm $data/feats.scp $data/cmvn.scp;
# make $bnfeadir an absolute pathname.
bnfeadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $bnfeadir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $bnfeadir || exit 1;
mkdir -p $data || exit 1;
mkdir -p $logdir || exit 1;
srcscp=$srcdata/feats.scp
scp=$data/feats.scp
required="$srcscp $nndir/final.nnet"
for f in $required; do
if [ ! -f $f ]; then
echo "$0: no such file $f"
exit 1;
fi
done
if [ ! -d $srcdata/split$nj -o $srcdata/split$nj -ot $srcdata/feats.scp ]; then
utils/split_data.sh $srcdata $nj
fi
#cut the MLP
nnet=$bnfeadir/feature_extractor.nnet
copy-nnet --remove-last-layers=$remove_last_layers --binary=false $nndir/final.nnet $nnet 2>$logdir/feature_extractor.log
#get the feature transform
feature_transform=$nndir/final.feature_transform
echo "Creating bn-feats into $data"
###
### Prepare feature pipeline
feats="ark,s,cs:copy-feats scp:$srcdata/split$nj/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $nndir/norm_vars ]; then
norm_vars=$(cat $nndir/norm_vars 2>/dev/null)
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$srcdata/utt2spk scp:$srcdata/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $nndir/delta_order ]; then
delta_order=$(cat $nndir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
###
###
#Run the forward pass
$cmd JOB=1:$nj $logdir/make_bnfeats.JOB.log \
nnet-forward --feature-transform=$feature_transform $nnet "$feats" \
ark,scp:$bnfeadir/raw_bnfea_$name.JOB.ark,$bnfeadir/raw_bnfea_$name.JOB.scp \
|| exit 1;
N0=$(cat $srcdata/feats.scp | wc -l)
N1=$(cat $bnfeadir/raw_bnfea_$name.*.scp | wc -l)
if [[ "$N0" != "$N1" ]]; then
echo "Error producing bnfea features for $name:"
echo "Original feats : $N0 Bottleneck feats : $N1"
exit 1;
fi
# concatenate the .scp files together.
for ((n=1; n<=nj; n++)); do
cat $bnfeadir/raw_bnfea_$name.$n.scp >> $data/feats.scp
done
echo "Succeeded creating MLP-BN features for $name ($data)"

Просмотреть файл

@ -0,0 +1,146 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Create denominator lattices for MMI/MPE training.
# Creates its output in $dir/lat.*.gz
# Begin configuration section.
nj=4
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
transform_dir=
max_mem=20000000 # This will stop the processes getting too large.
# This is in bytes, but not "real" bytes-- you have to multiply
# by something like 5 or 10 to get real bytes (not sure why so large)
num_threads=1
parallel_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/tri1 exp/tri1_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms."
echo " --num-threads <n> # number of threads per decoding job"
echo " --parallel-opts <string> # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
sdata=$data/split$nj
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
|| exit 1;
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
fi
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "align_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "$0: using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
&& echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
[ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
echo "$0: LDA transforms differ between $srcdir and $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
else
if [ -f $srcdir/final.alimdl ]; then
echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
exit 1;
fi
fi
if [ $sub_split -eq 1 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else
for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
echo Merging archives for data subset $n
rm $dir/.error 2>/dev/null;
for k in `seq $sub_split`; do
gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
[ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
rm $dir/lat.$n.*.gz
touch $dir/.done.$n
fi
done
fi
echo "$0: done generating denominator lattices."

Просмотреть файл

@ -0,0 +1,177 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely, Daniel Povey
# Apache 2.0.
# Create denominator lattices for MMI/MPE/sMBR training.
# Creates its output in $dir/lat.*.ark,$dir/lat.scp
# The lattices are uncompressed, we need random access for DNN training.
# Begin configuration section.
nj=4
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
nnet=
max_mem=20000000 # This will stop the processes getting too large.
# This is in bytes, but not "real" bytes-- you have to multiply
# by something like 5 or 10 to get real bytes (not sure why so large)
# End configuration section.
use_gpu_id=-1 # disable gpu
parallel_opts="-pe smp 2"
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/$0 [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/$0 data/train data/lang exp/tri1 exp/tri1_denlats"
echo "Works for plain features (or CMN, delta), forwarded through feature-transform."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
sdata=$data/split$nj
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
|| exit 1;
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
fi
#Get the files we will need
cp $srcdir/{tree,final.mdl} $dir
[ -z "$nnet" ] && nnet=$srcdir/final.nnet;
[ ! -f "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
class_frame_counts=$srcdir/ali_train_pdf.counts
[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
feature_transform=$srcdir/final.feature_transform
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
model=$dir/final.mdl
[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
###
### Prepare feature pipeline (same as for decoding)
###
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
# Finally add feature_transform and the MLP
feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
###
###
###
###
### We will produce lattices, where the correct path is not necessarily present
###
#1) We don't use reference path here...
echo "Generating the denlats"
#2) Generate the denominator lattices
if [ $sub_split -eq 1 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp" || exit 1;
else
for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=$(echo $feats | sed s:JOB/:$n/split$sub_split/JOB/:g)
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark,scp:$dir/lat.$n.JOB.ark,$dir/lat.$n.JOB.scp" || exit 1;
echo Merging lists for data subset $n
for k in `seq $sub_split`; do
cat $dir/lat.$n.$k.scp
done > $dir/lat.$n.all.scp
echo Merge the ark $n
lattice-copy scp:$dir/lat.$n.all.scp ark,scp:$dir/lat.$n.ark,$dir/lat.$n.scp || exit 1;
#remove the data
rm $dir/lat.$n.*.ark $dir/lat.$n.*.scp $dir/lat.$n.all.scp
touch $dir/.done.$n
fi
done
fi
#3) Merge the SCPs to create full list of lattices (will use random access)
echo Merging to single list $dir/lat.scp
for ((n=1; n<=nj; n++)); do
cat $dir/lat.$n.scp
done > $dir/lat.scp
echo "$0: done generating denominator lattices."

Просмотреть файл

@ -0,0 +1,146 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Create denominator lattices for MMI/MPE training.
# Creates its output in $dir/lat.*.gz
# Begin configuration section.
transform_dir= # dir to find fMLLR transforms.
nj=4 # number of decoding jobs. If --transform-dir set, must match that number!
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
transform_dir=
max_mem=20000000 # This will stop the processes getting too large.
# This is in bytes, but not "real" bytes-- you have to multiply
# by something like 5 or 10 to get real bytes (not sure why so large)
# End configuration section.
num_threads=1 # Number of threads used in nnet-logprob computation. If you set
# this to a different value, make sure to also set the appropriate
# queue options. If you set this too high it won't use all the
# threads as most of the time will be taken in the decoder.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats_nnet_cpu.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/make_denlats_nnet_cpu.sh data/train data/lang exp/tri1 exp/tri1_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
sdata=$data/split$nj
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
|| exit 1;
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
fi
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "align_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "$0: using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
&& echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
[ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
echo "$0: LDA transforms differ between $srcdir and $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
else
if [ -f $srcdir/final.alimdl ]; then
echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
exit 1;
fi
fi
if [ $sub_split -eq 1 ]; then
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats" ark:- \| \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else
for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats_subset" ark:- \| \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
echo Merging archives for data subset $n
rm $dir/.error 2>/dev/null;
for k in `seq $sub_split`; do
gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
[ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
rm $dir/lat.$n.*.gz
touch $dir/.done.$n
fi
done
fi
echo "$0: done generating denominator lattices."

Просмотреть файл

@ -0,0 +1,159 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Create denominator lattices for MMI/MPE training, with SGMM models. If the
# features have fMLLR transforms you have to supply the --transform-dir option.
# It gets any speaker vectors from the "alignment dir" ($alidir). Note: this is
# possibly a slight mismatch because the speaker vectors come from supervised
# adaptation.
# Begin configuration section.
nj=4
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
transform_dir=
max_mem=20000000 # This will stop the processes getting too large.
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats_sgmm.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
echo " e.g.: steps/make_denlats_sgmm.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms."
exit 1;
fi
data=$1
lang=$2
alidir=$3 # could also be $srcdir, but only if no vectors supplied.
dir=$4
sdata=$data/split$nj
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
|| exit 1;
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
utils/mkgraph.sh $new_lang $alidir $dir/dengraph || exit 1;
fi
if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "align_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
cp $alidir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "$0: using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
&& echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
[ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
echo "$0: LDA transforms differ between $alidir and $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
else
echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
fi
if [ -f $alidir/gselect.1.gz ]; then
gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
else
echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
fi
if [ -f $alidir/vecs.1 ]; then
spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
else
if [ -f $alidir/final.alimdl ]; then
echo "You seem to have an SGMM system with speaker vectors,"
echo "yet we can't find speaker vectors. Perhaps you supplied"
echo "the model director instead of the alignment directory?"
exit 1;
fi
fi
if [ $sub_split -eq 1 ]; then
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
--lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else
for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
--beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
--word-symbol-table=$lang/words.txt $alidir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
echo Merging archives for data subset $n
rm $dir/.error 2>/dev/null;
for k in `seq $sub_split`; do
gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
[ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
rm $dir/lat.$n.*.gz
touch $dir/.done.$n
fi
done
fi
echo "$0: done generating denominator lattices with SGMMs."

Просмотреть файл

@ -0,0 +1,170 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Create denominator lattices for MMI/MPE training, with SGMM models. If the
# features have fMLLR transforms you have to supply the --transform-dir option.
# It gets any speaker vectors from the "alignment dir" ($alidir). Note: this is
# possibly a slight mismatch because the speaker vectors come from supervised
# adaptation.
# Begin configuration section.
nj=4
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
transform_dir=
max_mem=20000000 # This will stop the processes getting too large.
num_threads=1
parallel_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats_sgmm2.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
echo " e.g.: steps/make_denlats_sgmm2.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
echo " --transform-dir <transform-dir> # directory to find fMLLR transforms."
echo " --num-threads <n> # number of threads per decoding job"
echo " --parallel-opts <string> # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
exit 1;
fi
data=$1
lang=$2
alidir=$3 # could also be $srcdir, but only if no vectors supplied.
dir=$4
sdata=$data/split$nj
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
if [ $num_threads -gt 1 ]; then
# the -parallel becomes part of the binary name we decode with.
thread_string="-parallel --num-threads=$num_threads"
fi
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
|| exit 1;
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
utils/mkgraph.sh $new_lang $alidir $dir/dengraph || exit 1;
fi
if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "align_si.sh: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
cp $alidir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then # add transforms to features...
echo "$0: using fMLLR transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
[ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
&& echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
[ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
echo "$0: LDA transforms differ between $alidir and $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
else
echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
fi
if [ -f $alidir/gselect.1.gz ]; then
gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|"
else
echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
fi
if [ -f $alidir/vecs.1 ]; then
spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
[ "`cat $alidir/num_jobs`" -ne "$nj" ] \
&& echo "$0: mismatch in number of jobs with $alidir" && exit 1;
else
if [ -f $alidir/final.alimdl ]; then
echo "$0: You seem to have an SGMM system with speaker vectors,"
echo "yet we can't find speaker vectors. Perhaps you supplied"
echo "the model director instead of the alignment directory?"
exit 1;
fi
fi
if [ $sub_split -eq 1 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
sgmm2-latgen-faster$thread_string $spkvecs_opt "$gselect_opt" --beam=$beam \
--lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else
for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
sgmm2-latgen-faster$thread_string $spkvecs_opt_subset "$gselect_opt_subset" \
--beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
--word-symbol-table=$lang/words.txt $alidir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
echo Merging archives for data subset $n
rm $dir/.error 2>/dev/null;
for k in `seq $sub_split`; do
gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
[ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
rm $dir/lat.$n.*.gz
touch $dir/.done.$n
fi
done
fi
echo "$0: done generating denominator lattices with SGMMs."

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше