git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/karel@844 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Karel Vesely 2012-04-17 11:18:27 +00:00
Родитель caeb968dbe
Коммит 6c217ca5ce
68 изменённых файлов: 5161 добавлений и 9 удалений

Просмотреть файл

@ -21,4 +21,13 @@ should be Dan Povey (dpovey@microsoft.com). In addition to specific questions,
please let me know if there are specific aspects of the project that you feel
could be improved, that you find confusing, etc., and which missing features you
most wish it had.
==SVN-MERGING==
Merge with trunk:
svn merge ^/trunk ^/sandbox/karel
When merging, resolve the tree conflicts by:
svn resolve --accept working -R .

20
egs/rm/s4/README.txt Normal file
Просмотреть файл

@ -0,0 +1,20 @@
This recipe is using a publicly available subset of Resource Management data,
consisting of freely distributed feature files distributed by CMU and some
metadata(e.g. the word-pair grammar file) available from LDC's website.
To run the recipe the data should be downloaded first, for which ./getdata.sh
command can be used. Then ./run.sh script can be executed to automatically perform
all steps or the commands in it can be started manually by copy/pasting them.
The script and data layout are based on egs/rm/s3 recipe, with several exceptions:
- because this recipe uses pre-extracted feature vectors no conversion from .sph
to .wav format and consequent feature extraction is needed. The features are just
converted from CMU Sphinx feature files to Kaldi Tables.
- only one test set is available instead of several (e.g. mar87, oct87 and so on)
as in the original recipe
- no speaker-dependent processing
- on the plus side it requires less disk space (about 220MB)

1
egs/rm/s4/conf/mfcc.conf Normal file
Просмотреть файл

@ -0,0 +1 @@
--use-energy=false # only non-default option.

2
egs/rm/s4/conf/plp.conf Normal file
Просмотреть файл

@ -0,0 +1,2 @@
# No non-default options for now.

22
egs/rm/s4/conf/topo.proto Normal file
Просмотреть файл

@ -0,0 +1,22 @@
<Topology>
<TopologyEntry>
<ForPhones>
NONSILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
<State> 3 </State>
</TopologyEntry>
<TopologyEntry>
<ForPhones>
SILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
<State> 5 </State>
</TopologyEntry>
</Topology>

30
egs/rm/s4/getdata.sh Executable file
Просмотреть файл

@ -0,0 +1,30 @@
#!/bin/bash
# Copyright 2012 Vassil Panayotov
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
source path.sh
# Download and extract CMU's feature files
mkdir -p $RM1_ROOT
wget -P $RM1_ROOT http://www.speech.cs.cmu.edu/databases/rm1/rm1_cepstra.tar.gz
tar -C $RM1_ROOT/ -xf $RM1_ROOT/rm1_cepstra.tar.gz
# Download the available LDC metadata
# For some reason wget needs to be run twice in order to get all needed data ...
wget -P $RM1_ROOT -mk --no-parent -r -c -v -nH http://www.ldc.upenn.edu/Catalog/docs/LDC93S3B/
wget -P $RM1_ROOT -mk --no-parent -r -c -v -nH http://www.ldc.upenn.edu/Catalog/docs/LDC93S3B/
mv $RM1_ROOT/Catalog/docs/LDC93S3B $RM1_ROOT/
rm -rf $RM1_ROOT/Catalog

40
egs/rm/s4/local/decode.sh Executable file
Просмотреть файл

@ -0,0 +1,40 @@
#!/bin/bash
# This script basically calls the supplied decoding script
# once for each test set (in parallel on the same machine),
# and then averages the resulting WERs.
# The interpretation of the decode-dir-1, etc., as inputs,
# outputs and so on, depends on the decoding script you call.
# It assumes the model directory is one level of from decode-dir-1.
mono_opt=
if [ "$1" == "--mono" ]; then
mono_opt=$1;
shift;
fi
script=$1
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
decode_dir_2=$3
decode_dir_3=$4
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
if [ $# -ne 2 ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1>"
exit 1;
fi
if [ ! -x $script -o ! -d $dir ]; then
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
exit 1;
fi
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
$script $dir data/test data/lang $decode_dir_1/ &
wait
# The publicly available RM subset has just one test set(instead of mar87 etc.),
# so no averaging is needed
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."

69
egs/rm/s4/local/make_trans.pl Executable file
Просмотреть файл

@ -0,0 +1,69 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# usage: make_trans.sh prefix in.flist input.snr out.txt out.scp
# prefix is first letters of the database "key" (rest are numeric)
# in.flist is just a list of filenames, probably of .sph files.
# input.snr is an snr format file from the RM dataset.
# out.txt is the output transcriptions in format "key word1 word\n"
# out.scp is the output scp file, which is as in.scp but has the
# database-key first on each line.
# Reads from first argument e.g. $rootdir/rm1_audio1/rm1/doc/al_sents.snr
# and second argument train_wav.scp
# Writes to standard output trans.txt
if(@ARGV != 5) {
die "usage: make_trans.sh prefix in.flist input.snr out.txt out.scp\n";
}
($prefix, $in_flist, $input_snr, $out_txt, $out_scp) = @ARGV;
open(F, "<$input_snr") || die "Opening SNOR file $input_snr";
while(<F>) {
if(m/^;/) { next; }
m/(.+) \((.+)\)/ || die "bad line $_";
$T{$2} = $1;
}
close(F);
open(G, "<$in_flist") || die "Opening file list $in_flist";
open(O, ">$out_txt") || die "Open output transcription file $out_txt";
open(P, ">$out_scp") || die "Open output scp file $out_scp";
while(<G>) {
$_ =~ m:/(\w+)/(\w+)\.mfc\s+$:i || die "bad scp line $_";
$spkname = $1;
$uttname = $2;
$uttname =~ tr/a-z/A-Z/;
defined $T{$uttname} || die "no trans for sent $uttname";
$spkname =~ s/_//g; # remove underscore from spk name to make key nicer.
$key = $prefix . "_" . $spkname . "_" . $uttname;
$key =~ tr/A-Z/a-z/; # Make it all lower case.
# to make the numerical and string-sorted orders the same.
print O "$key $T{$uttname}\n";
print P "$key $_";
$n++;
}
close(O) || die "Closing output.";
close(P) || die "Closing output.";

80
egs/rm/s4/local/rm_data_prep.sh Executable file
Просмотреть файл

@ -0,0 +1,80 @@
#!/bin/bash
#
# Copyright 2012 Vassil Panayotov
# modified from a file that was:
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from one directory above this script.
# Note: when creating your own data preparation scripts, it's a good idea
# to make sure that the speaker id (if present) is a prefix of the utterance
# id, that the output scp file is sorted on utterance id, and that the
# transcription file is exactly the same length as the scp file and is also
# sorted on utterance id (missing transcriptions should be removed from the
# scp file using e.g. scripts/filter_scp.pl)
if [ $# != 1 ]; then
echo "Usage: ../../local/RM_data_prep.sh /path/to/RM"
exit 1;
fi
export LC_ALL=C
RMROOT=$1
mkdir -p data/local
cd data/local
if [ ! -d $RMROOT/LDC93S3B -o ! -d $RMROOT/rm1 ]; then
echo "Speech data is missing. You can download the data by running ./getdata.sh"
exit 1;
fi
# Make a list of files
cat $RMROOT/rm1/etc/rm1_train.fileids | \
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > train.flist
cat $RMROOT/rm1/etc/rm1_test.fileids | \
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > test.flist
# make_trans.pl also creates the utterance id's and the kaldi-format scp file.
# training set
../../local/make_trans.pl trn train.flist $RMROOT/LDC93S3B/disc_1/doc/al_sents.snr train_trans.txt train.scp
mv train_trans.txt tmp; sort -k 1 tmp > train_trans.txt
mv train.scp tmp; sort -k 1 tmp > train.scp
rm tmp
# test set
../../local/make_trans.pl test test.flist $RMROOT/LDC93S3B/disc_1/doc/al_sents.snr test_trans.txt test.scp
mv test_trans.txt tmp; sort -k 1 tmp > test_trans.txt
mv test.scp tmp; sort -k 1 tmp > test.scp
rm tmp
# We already have the features, so sph2pipe step is skipped and
# given the limited data the speaker-dependent processing is also not used
../../scripts/make_rm_lm.pl $RMROOT/LDC93S3B/disc_1/doc/wp_gram.txt > G.txt || exit 1;
# Convert the CMU's lexicon to a form which the other scripts expect
# (leave only the first pronunciation variant, convert "'" to "+",
# and convert the phones to lower case)
cat $RMROOT/rm1/etc/rm1.dic | \
egrep -v '\(' | \
sed -e "s/'/\+/g" | \
sed -e "s/^\([[:alnum:]-]\+\(+[[:alpha:]]\+\)\?\)\(.*\)/\1\L\3/g" > lexicon.txt
echo RM_data_prep succeeded.

126
egs/rm/s4/local/rm_format_data.sh Executable file
Просмотреть файл

@ -0,0 +1,126 @@
#!/bin/bash
#
# Copyright 2012 Vassil Panayotov
# modified from:
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from one directory above this script.
if [ -f path.sh ]; then . path.sh; fi
data_list="train test"
for x in lang lang_test $data_list; do
mkdir -p data/$x
done
# Copy stuff into its final location:
for x in $data_list; do
cp data/local/${x}.scp data/$x/mfc.scp || exit 1;
cp data/local/${x}_trans.txt data/$x/text || exit 1;
done
# We are not using make_words_symtab.pl for symbol table creation in this
# recipe, because CMU's lexicon have several words that are not in the
# word-pair grammar
cat data/local/lexicon.txt | \
awk 'BEGIN{print "<eps>\t0";} {print $1 "\t" NR;} END{print "!SIL\t" NR+1;}' \
> data/lang/words.txt
scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
cp data/lang/words.txt data/lang_test/words.txt
silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn"
# Generate colon-separated lists of silence and non-silence phones.
scripts/silphones.pl data/lang/phones.txt "$silphones" data/lang/silphones.csl \
data/lang/nonsilphones.csl
ndisambig=`scripts/add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
scripts/add_disambig.pl data/lang/phones.txt $ndisambig > data/lang_test/phones_disambig.txt
cp data/lang_test/phones_disambig.txt data/lang/ # needed for MMI.
silprob=0.5 # same prob as word
scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \
fstcompile --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt \
--keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > data/lang/L.fst
# Create L_align.fst, which is as L.fst but with alignment symbols (#1 and #2 at the
# beginning and end of words, on the input side)... useful if we
# ever need to e.g. create ctm's-- these are used to work out the
# word boundaries.
cat data/local/lexicon.txt | \
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
scripts/make_lexicon_fst.pl - 0.5 sil | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
> data/lang_test/L_disambig.fst
cp data/lang_test/L_disambig.fst data/lang/ # Needed for MMI training.
fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
--keep_osymbols=false data/local/G.txt > data/lang_test/G.fst
# Checking that G is stochastic [note, it wouldn't be for an Arpa]
fstisstochastic data/lang_test/G.fst || echo Error: G is not stochastic
# Checking that G.fst is determinizable.
fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
# Checking that L_disambig.fst is determinizable.
fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
# Checking that disambiguated lexicon times G is determinizable
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
fstdeterminize >/dev/null || echo Error
# Checking that LG is stochastic:
fsttablecompose data/lang/L.fst data/lang_test/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
# Checking that L_disambig.G is stochastic:
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
## Check lexicon.
## just have a look and make sure it seems sane.
echo "First few lines of lexicon FST:"
fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
cat conf/topo.proto | sed "s:NONSILENCEPHONES:$nonsilphonelist:" | \
sed "s:SILENCEPHONES:$silphonelist:" > data/lang/topo
for x in phones.txt words.txt silphones.csl nonsilphones.csl topo; do
cp data/lang/$x data/lang_test/$x || exit 1;
done
echo RM_format_data succeeded.

13
egs/rm/s4/path.sh Executable file
Просмотреть файл

@ -0,0 +1,13 @@
#!/bin/bash
# path to Kaldi's root directory
root=`pwd`/../../..
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:${root}/src/tiedbin/:$PATH
# path to the directory in which the subset of RM corpus is stored
export RM1_ROOT=`pwd`/data/download
export LC_ALL=C
export LC_LOCALE_ALL=C

57
egs/rm/s4/run.sh Executable file
Просмотреть файл

@ -0,0 +1,57 @@
#!/bin/bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
source ./path.sh
# call the next line with the directory where the RM data is
local/rm_data_prep.sh $RM1_ROOT || exit 1;
local/rm_format_data.sh || exit 1;
# the directory, where you want to store MFCC features.
featdir=data/rm_feats
# convert the Sphinx feature files to Kaldi tables
for x in train test; do
steps/make_mfcc.sh data/$x exp/make_mfcc/$x $featdir || exit 1;
done
scripts/subset_data_dir.sh data/train 1000 data/train.1k || exit 1;
# train monophone system.
steps/train_mono.sh data/train.1k data/lang exp/mono || exit 1;
# monophone decoding
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode || exit 1;
# Get alignments from monophone system.
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
# train tri1 [first triphone pass]
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1 || exit 1;
# decode tri1
local/decode.sh steps/decode_deltas.sh exp/tri1/decode || exit 1;
# align tri1
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
# train tri2a [delta+delta-deltas]
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
# decode tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode || exit 1;

Просмотреть файл

@ -0,0 +1,58 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds some specified number of disambig symbols to a symbol table.
# Adds these as #1, #2, etc.
# If the --include-zero option is specified, includes an extra one
# #0.
if(!(@ARGV == 2 || (@ARGV ==3 && $ARGV[0] eq "--include-zero"))) {
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
}
if(@ARGV == 3) {
$include_zero = 1;
$ARGV[0] eq "--include-zero" || die "Bad option/first argument $ARGV[0]";
shift @ARGV;
} else {
$include_zero = 0;
}
$input = $ARGV[0];
$nsyms = $ARGV[1];
open(F, "<$input") || die "Opening file $input";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "Bad line $_";
$lastsym = $A[1];
print;
}
if(!defined($lastsym)){
die "Empty symbol file?";
}
if($include_zero) {
$lastsym++;
print "#0 $lastsym\n";
}
for($n = 1; $n <= $nsyms; $n++) {
$y = $n + $lastsym;
print "#$n $y\n";
}

Просмотреть файл

@ -0,0 +1,101 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds disambiguation symbols to a lexicon.
# Outputs still in the normal lexicon format.
# Disambig syms are numbered #1, #2, #3, etc. (#0
# reserved for symbol in grammar).
# Outputs the number of disambig syms to the standard output.
if(@ARGV != 2) {
die "Usage: add_lex_disambig.pl [ --sil silphone ] lexicon.txt lexicon_disambig.txt "
}
$lexfn = shift @ARGV;
$lexoutfn = shift @ARGV;
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
# (1) Read in the lexicon.
@L = ( );
while(<L>) {
@A = split(" ", $_);
push @L, join(" ", @A);
}
# (2) Work out the count of each phone-sequence in the
# lexicon.
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
$count{join(" ",@A)}++;
}
# (3) For each left sub-sequence of each phone-sequence, note down
# that exists (for identifying prefixes of longer strings).
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
while(@A > 0) {
pop @A; # Remove last phone
$issubseq{join(" ",@A)} = 1;
}
}
# (4) For each entry in the lexicon:
# if the phone sequence is unique and is not a
# prefix of another word, no diambig symbol.
# Else output #1, or #2, #3, ... if the same phone-seq
# has already been assigned a disambig symbol.
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
$max_disambig = 0;
foreach $l (@L) {
@A = split(" ", $l);
$word = shift @A;
$phnseq = join(" ",@A);
if(!defined $issubseq{$phnseq}
&& $count{$phnseq}==1) {
; # Do nothing.
} else {
if($phnseq eq "") { # need disambig symbols for the empty string
# that are not used anywhere else.
$max_disambig++;
$reserved{$max_disambig} = 1;
$phnseq = "#$max_disambig";
} else {
$curnumber = $disambig_of{$phnseq};
if(!defined{$curnumber}) { $curnumber = 0; }
$curnumber++; # now 1 or 2, ...
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
if($curnumber > $max_disambig) {
$max_disambig = $curnumber;
}
$disambig_of{$phnseq} = $curnumber;
$phnseq = $phnseq . " #" . $curnumber;
}
}
print O "$word\t$phnseq\n";
}
print $max_disambig . "\n";

40
egs/rm/s4/scripts/filter_scp.pl Executable file
Просмотреть файл

@ -0,0 +1,40 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script takes a list of utterance-ids and filters an scp
# file (or any file whose first field is an utterance id), printing
# out only those lines whose first field is in id_list.
if(@ARGV < 1 || @ARGV > 2) {
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
}
$idlist = shift @ARGV;
open(F, "<$idlist") || die "Could not open id-list file $idlist";
while(<F>) {
@A = split;
@A>=1 || die "Invalid id-list file line $_";
$seen{$A[0]} = 1;
}
while(<>) {
@A = split;
@A > 0 || die "Invalid scp file line $_";
if($seen{$A[0]}) {
print $_;
}
}

90
egs/rm/s4/scripts/int2sym.pl Executable file
Просмотреть файл

@ -0,0 +1,90 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_noninteger = 0;
$ignore_first_field = 0;
$field = -1;
for($x = 0; $x < 2; $x++) {
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--field") {
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
if ($field < 1) { die "Bad argument to --field option: $field"; }
}
}
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
$zfield = $field-1; # Change to zero-based indexing.
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input] > output\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$int2sym{$A[1]} = $A[0];
}
sub int2sym {
my $a = shift @_;
my $pos = shift @_;
if($a !~ m:^\d+$:) { # not all digits..
if($ignore_noninteger) {
print $a . " ";
next;
} else {
if($pos == 0) {
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
} else {
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
}
}
}
$s = $int2sym{$a};
if(!defined ($s)) {
die "int2sym.pl: integer $a not in symbol table $symtab.";
}
return $s;
}
$error = 0;
while(<>) {
@A = split(" ", $_);
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
if ($field != -1) {
if ($zfield <= $#A && $zfield >= 0) {
$a = $A[$zfield];
$A[$zfield] = int2sym($a, $zfield);
}
print join(" ", @A);
} else {
for ($pos = 0; $pos <= $#A; $pos++) {
$a = $A[$pos];
$s = int2sym($a, $pos);
print $s . " ";
}
}
print "\n";
}

Просмотреть файл

@ -0,0 +1,122 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# makes lexicon FST (no pron-probs involved).
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
}
$lexfn = shift @ARGV;
if(@ARGV == 0) {
$silprob = 0.0;
} elsif (@ARGV == 2){
($silprob,$silphone) = @ARGV;
} else {
($silprob,$silphone,$sildisambig) = @ARGV;
}
if($silprob != 0.0) {
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
$silcost = -log($silprob);
$nosilcost = -log(1.0 - $silprob);
}
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
sub is_sil {
# Return true (1) if provided with a phone-sequence
# that means silence.
# @_ is the parameters of the function
# This function returns true if @_ equals ( $silphone )
# or something of the form ( "#0", $silphone, "#1" )
# where the "#0" and "#1" are disambiguation symbols.
return ( @_ == 1 && $_[0] eq $silphone ||
(@_ == 3 && $_[1] eq $silphone &&
$_[0] =~ m/^\#\d+$/ &&
$_[0] =~ m/^\#\d+$/));
}
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
$loopstate = 0;
$nexststate = 1; # next unallocated state.
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
} else {
$ns = $loopstate;
}
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
}
}
print "$loopstate\t0\n"; # final-cost.
} else { # have silence probs.
$startstate = 0;
$loopstate = 1;
$silstate = 2; # state from where we go to loopstate after emitting silence.
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
if (!defined $sildisambig) {
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
$nextstate = 3;
} else {
$disambigstate = 3;
$nextstate = 4;
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
}
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
} else {
if(!is_sil(@A)){
# This is non-deterministic but relatively compact,
# and avoids epsilons.
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
} else {
# no point putting opt-sil after silence word.
print "$s\t$loopstate\t$p\t$word_or_eps\n";
}
$word_or_eps = "<eps>";
}
}
}
print "$loopstate\t0\n"; # final-cost.
}

Просмотреть файл

@ -0,0 +1,37 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# make_phones_symtab.pl < lexicon.txt > phones.txt
while(<>) {
@A = split(" ", $_);
for ($i=2; $i<@A; $i++) {
$P{$A[$i]} = 1; # seen it.
}
}
print "<eps>\t0\n";
$n = 1;
foreach $p (sort keys %P) {
if($p ne "<eps>") {
print "$p\t$n\n";
$n++;
}
}
print "sil\t$n\n";

119
egs/rm/s4/scripts/make_rm_lm.pl Executable file
Просмотреть файл

@ -0,0 +1,119 @@
#!/usr/bin/perl
# Copyright 2010-2011 Yanmin Qian Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This file takes as input the file wp_gram.txt that comes with the RM
# distribution, and creates the language model as an acceptor in FST form.
# make_rm_lm.pl wp_gram.txt > G.txt
if (@ARGV != 1) {
print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n";
exit(0);
}
unless (open(IN_FILE, "@ARGV[0]")) {
die ("can't open @ARGV[0]");
}
$flag = 0;
$count_wrd = 0;
$cnt_ends = 0;
$init = "";
while ($line = <IN_FILE>)
{
chop($line);
$line =~ s/ //g;
if(($line =~ /^>/))
{
if($flag == 0)
{
$flag = 1;
}
$line =~ s/>//g;
$hashcnt{$init} = $i;
$init = $line;
$i = 0;
$count_wrd++;
@LineArray[$count_wrd - 1] = $init;
$hashwrd{$init} = 0;
}
elsif($flag != 0)
{
$hash{$init}[$i] = $line;
$i++;
if($line =~ /SENTENCE-END/)
{
$cnt_ends++;
}
}
else
{}
}
$hashcnt{$init} = $i;
$num = 0;
$weight = 0;
$init_wrd = "SENTENCE-END";
$hashwrd{$init_wrd} = @LineArray;
for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
{
$weight = -log(1/$hashcnt{$init_wrd});
$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n";
}
$num = $i;
for($i = 0; $i < @LineArray; $i++)
{
if(@LineArray[$i] eq 'SENTENCE-END')
{}
else
{
if($hashwrd{@LineArray[$i]} == 0)
{
$num++;
$hashwrd{@LineArray[$i]} = $num;
}
for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
{
$weight = -log(1/$hashcnt{@LineArray[$i]});
if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
{
$num++;
$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
}
if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
{
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} <eps> <eps> $weight\n"
}
else
{
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n";
}
}
}
}
print "$hashwrd{$init_wrd} 0\n";
close(IN_FILE);

102
egs/rm/s4/scripts/make_roots.pl Executable file
Просмотреть файл

@ -0,0 +1,102 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Written by Dan Povey 9/21/2010. Apache 2.0 License.
# This version of make_roots.pl is specialized for RM.
# This script creates the file roots.txt which is an input to train-tree.cc. It
# specifies how the trees are built. The input file phone-sets.txt is a partial
# version of roots.txt in which phones are represented by their spelled form, not
# their symbol id's. E.g. at input, phone-sets.txt might contain;
# shared not-split sil
# Any phones not specified in phone-sets.txt but present in phones.txt will
# be given a default treatment. If the --separate option is given, we create
# a separate tree root for each of them, otherwise they are all lumped in one set.
# The arguments shared|not-shared and split|not-split are needed if any
# phones are not specified in phone-sets.txt. What they mean is as follows:
# if shared=="shared" then we share the tree-root between different HMM-positions
# (0,1,2). If split=="split" then we actually do decision tree splitting on
# that root, otherwise we forbid decision-tree splitting. (The main reason we might
# set this to false is for silence when
# we want to ensure that the HMM-positions will remain with a single PDF id.
$separate = 0;
if($ARGV[0] eq "--separate") {
$separate = 1;
shift @ARGV;
}
if(@ARGV != 4) {
die "Usage: make_roots.pl [--separate] phones.txt silence-phone-list[integer,colon-separated] shared|not-shared split|not-split > roots.txt\n";
}
($phonesfile, $silphones, $shared, $split) = @ARGV;
if($shared ne "shared" && $shared ne "not-shared") {
die "Third argument must be \"shared\" or \"not-shared\"\n";
}
if($split ne "split" && $split ne "not-split") {
die "Third argument must be \"split\" or \"not-split\"\n";
}
open(F, "<$phonesfile") || die "Opening file $phonesfile";
while(<F>) {
@A = split(" ", $_);
if(@A != 2) {
die "Bad line in phones symbol file: ".$_;
}
if($A[1] != 0) {
$symbol2id{$A[0]} = $A[1];
$id2symbol{$A[1]} = $A[0];
}
}
if($silphones == ""){
die "Empty silence phone list in make_roots.pl";
}
foreach $silphoneid (split(":", $silphones)) {
defined $id2symbol{$silphoneid} || die "No such silence phone id $silphoneid";
# Give each silence phone its own separate pdfs in each state, but
# no sharing (in this recipe; WSJ is different.. in this recipe there
#is only one silence phone anyway.)
$issil{$silphoneid} = 1;
print "not-shared not-split $silphoneid\n";
}
$idlist = "";
$remaining_phones = "";
if($separate){
foreach $a (keys %id2symbol) {
if(!defined $issil{$a}) {
print "$shared $split $a\n";
}
}
} else {
print "$shared $split ";
foreach $a (keys %id2symbol) {
if(!defined $issil{$a}) {
print "$a ";
}
}
print "\n";
}

112
egs/rm/s4/scripts/mkgraph.sh Executable file
Просмотреть файл

@ -0,0 +1,112 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
N=3
P=1
clean=false
for x in 1 2 3; do
if [ $1 == "--mono" ]; then
N=1;
P=0;
shift;
fi
if [ $1 == "--clean" ]; then
clean=true
shift;
fi
done
if [ $# != 3 ]; then
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
lang=$1
tree=$2/tree
model=$2/final.mdl
dir=$3
if $clean; then rm -r $lang/tmp; fi
mkdir -p $dir
tscale=1.0
loopscale=0.1
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
# would have to use -o instead), -f means file exists, and -ot means older than).
mkdir -p $lang/tmp
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
fi
if [ ! -f $lang/phones_disambig.txt ]; then
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
exit 1;
fi
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
clg=$lang/tmp/CLG_${N}_${P}.fst
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
fstcomposecontext --context-size=$N --central-position=$P \
--read-disambig-syms=$lang/tmp/disambig_phones.list \
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
fstisstochastic $clg || echo "warning: CLG not stochastic."
fi
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
> $dir/Ha.fst || exit 1;
fi
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
$dir/HCLGa.fst -ot $clg ]]; then
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
fi
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
add-self-loops --self-loop-scale=$loopscale --reorder=true \
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
fi
fi
# to make const fst:
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst

57
egs/rm/s4/scripts/silphones.pl Executable file
Просмотреть файл

@ -0,0 +1,57 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# creates integer lists of silence and non-silence phones in files,
# e.g. silphones.csl="1:2:3 \n"
# and nonsilphones.csl="4:5:6:7:...:24\n";
if(@ARGV != 4) {
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
}
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
open(S,"<$symtab") || die "Opening symbol table $symtab";
foreach $s (split(" ", $sillist)) {
$issil{$s} = 1;
}
@sil = ();
@nonsil = ();
while(<S>){
@A = split(" ", $_);
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
($sym, $int) = @A;
if($int != 0) {
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
else { push @nonsil, $int; }
}
}
foreach $k(keys %issil) {
if(!$seensil{$k}) { die "No such silence phone $k"; }
}
open(F, ">$silphones") || die "opening silphones file $silphones";
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
print F join(":", @sil) . "\n";
print G join(":", @nonsil) . "\n";
close(F);
close(G);
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }

Просмотреть файл

@ -0,0 +1,99 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script operates on a directory, such as in data/train/,
# that contains some subset of the following files:
# feats.scp
# wav.scp
# spk2utt
# utt2spk
# text
# It creates a subset of that data, consisting of some specified
# number of utterances. (The selected utterances are distributed
# evenly throughout the file, by the program ./subset_scp.pl).
# If you give the --per-spk option, it will attempt to select
# the supplied number of utterances for each speaker (typically
# you would supply a much smaller number in this case).
perspk=false
if [ "$1" == "--per-spk" ]; then
perspk=true;
shift;
fi
if [ $# != 3 ]; then
echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
exit 1;
fi
srcdir=$1
numutt=$2
destdir=$3
if [ ! -f $srcdir/feats.scp ]; then
echo "subset_data_dir.sh: no such file $srcdir/feats.scp"
exit 1;
fi
## scripting note: $perspk evaluates to true or false
## so this becomes the command true or false.
if $perspk; then
mkdir -p $destdir
awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); }
printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
scripts/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
scripts/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
[ -f $srcdir/wav.scp ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/wav.scp >$destdir/wav.scp
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
srcutts=`cat $srcdir/utt2spk | wc -l`
destutts=`cat $destdir/utt2spk | wc -l`
echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
exit 0;
else
if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
exit 1;
fi
mkdir -p $destdir || exit 1;
# create feats.scp
scripts/subset_scp.pl $numutt $srcdir/feats.scp > $destdir/feats.scp || exit 1;
if [ -f $srcdir/wav.scp ]; then
scripts/filter_scp.pl $destdir/feats.scp $srcdir/mfc.scp > $destdir/mfc.scp || exit 1;
else
rm $destdir/mfc.scp 2>/dev/null
fi
if [ -f $srcdir/utt2spk ]; then
scripts/filter_scp.pl $destdir/feats.scp $srcdir/utt2spk > $destdir/utt2spk|| exit 1;
scripts/utt2spk_to_spk2utt.pl $destdir/utt2spk > $destdir/spk2utt || exit 1;
fi
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
echo "Created a $numutt-utterance subset of $srcdir and put it in $destdir."
exit 0;
fi

59
egs/rm/s4/scripts/subset_scp.pl Executable file
Просмотреть файл

@ -0,0 +1,59 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program selects a subset of N elements in the scp.
# It selects them evenly from throughout the scp, in order to
# avoid selecting too many from the same speaker.
# It prints them on the standard output.
if(@ARGV < 2 ) {
die "Usage: subset_scp.pl N in.scp ";
}
$N = shift @ARGV;
if($N == 0) {
die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
}
$inscp = shift @ARGV;
open(I, "<$inscp") || die "Opening input scp file $inscp";
@F = ();
while(<I>) {
push @F, $_;
}
$numlines = @F;
if($N > $numlines) {
die "You requested from subset_scp.pl more elements than available: $N > $numlines";
}
sub select_n {
my ($start,$end,$num_needed) = @_;
my $diff = $end - $start;
if($num_needed > $diff) { die "select_n: code error"; }
if($diff == 1 ) {
if($num_needed > 0) {
print $F[$start];
}
} else {
my $halfdiff = int($diff/2);
my $halfneeded = int($num_needed/2);
select_n($start, $start+$halfdiff, $halfneeded);
select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
}
}
select_n(0, $numlines, $N);

82
egs/rm/s4/scripts/sym2int.pl Executable file
Просмотреть файл

@ -0,0 +1,82 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_oov = 0;
$ignore_first_field = 0;
for($x = 0; $x < 3; $x++) {
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
# Else will complain and put nothing out.
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
}
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$sym2int{$A[0]} = $A[1] + 0;
}
$num_warning = 0;
$max_warning = 20;
$error = 0;
while(<>) {
@A = split(" ", $_);
if(@A == 0) {
die "Empty line in transcriptions input.";
}
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
@B = ();
foreach $a (@A) {
$i = $sym2int{$a};
if(!defined ($i)) {
if (defined $map_oov) {
if (!defined $sym2int{$map_oov}) {
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
}
if ($num_warning++ < $max_warning) {
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
if ($num_warning == $max_warning) {
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
}
}
$i = $sym2int{$map_oov};
} elsif($ignore_oov) {
$i = $a; # just print them out unmodified..
} else {
die "sym2int.pl: undefined symbol $a\n";
}
}
push @B, $i;
}
print join(" ", @B);
print "\n";
}
if($error) { exit(1); }
else { exit(0); }

78
egs/rm/s4/steps/align_deltas.sh Executable file
Просмотреть файл

@ -0,0 +1,78 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using
# CMN + delta + delta-delta features. Its output, all in its own
# experimental directory, is cmvn.ark, ali, tree, and final.mdl
# (the last two are just copied from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
graphs=
if [ "$1" == --graphs ]; then
shift;
graphs=$1
shift
fi
if [ $# != 4 ]; then
echo "Usage: steps/align_deltas.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/align_deltas.sh data/train data/lang exp/tri1 exp/tri1_ali"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
srcdir=$3
dir=$4
mkdir -p $dir
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats scp:$data/feats.scp \
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# Align all training data using the supplied model.
echo "Aligning all training data"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
"$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
fi
echo "Done."

Просмотреть файл

@ -0,0 +1,77 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
# This script generates lattices and rescores them with different
# acoustic weights, in order to explore a range of different
# weights.
if [ $# != 4 ]; then
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 20 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

48
egs/rm/s4/steps/make_mfcc.sh Executable file
Просмотреть файл

@ -0,0 +1,48 @@
#!/bin/bash
# Copyright 2012 Vassil Panayotov
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from .. (one directory up from here)
if [ $# != 3 ]; then
echo "usage: make_mfcc.sh <data-dir> <log-dir> <abs-path-to-mfccdir>";
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
logdir=$2
mfccdir=$3
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfccdir || exit 1;
mkdir -p $logdir || exit 1;
scp=$data/mfc.scp
if [ ! -f $scp ]; then
echo "make_mfcc.sh: no such file $f";
exit 1;
fi
log=$logdir/make_mfcc.log
copy-feats --sphinx-in=true \
scp:$scp ark,scp:$mfccdir/raw_mfcc_$name.ark,$data/feats.scp 2>$log
echo "Succeeded creating MFCC features for $name"

126
egs/rm/s4/steps/train_deltas.sh Executable file
Просмотреть файл

@ -0,0 +1,126 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 4 ]; then
echo "Usage: steps/train_deltas.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
maxiterinc=15 # Last iter to increase #Gauss on.
numleaves=1800 # target num-leaves in tree building.
numgauss=$[$numleaves + $numleaves/2]; # starting num-Gauss.
# Initially mix up to avg. 1.5 Gauss/state ( a bit more
# than this, due to state clustering... then slowly mix
# up to final amount.
totgauss=9000 # Target #Gaussians
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
echo "Building tree"
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/mixup.log || exit 1;
#rm $dir/treeacc
# Convert alignments generated from monophone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc
rm $dir/$x.occs
if [[ $x -le $maxiterinc ]]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done

105
egs/rm/s4/steps/train_mono.sh Executable file
Просмотреть файл

@ -0,0 +1,105 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Flat start and monophone training, with delta-delta features.
# This script applies cepstral mean normalization (per speaker),
# unlike the corresponding script in s1/
if [ $# != 3 ]; then
echo "Usage: steps/train_mono.sh <data-dir> <lang-dir> <exp-dir>"
echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
exit 1;
fi
data=$1
lang=$2
dir=$3
if [ -f path.sh ]; then . path.sh; fi
# Configuration:
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=30 # Number of iterations of training
maxiterinc=20 # Last iter to increase #Gauss on.
numgauss=250 # Initial num-Gauss (must be more than #states=3*phones).
totgauss=1000 # Target #Gaussians.
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
realign_iters="1 2 3 4 5 6 7 8 9 10 12 15 20 25";
mkdir -p $dir
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats scp:$data/feats.scp ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Initializing monophone system."
gmm-init-mono "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
$dir/0.mdl $dir/tree 2> $dir/init.log || exit 1;
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
ark:$dir/train.tra "ark:|gzip -c >$dir/graphs.fsts.gz" \
2>$dir/compile_graphs.log || exit 1
echo Pass 0
align-equal-compiled "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark,t,f:- 2>$dir/align.0.log | \
gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
$dir/0.acc 2> $dir/acc.0.log || exit 1;
# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
# we fail to est "rare" phones and later on, they never align properly.
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
$dir/0.mdl $dir/0.acc $dir/1.mdl 2> $dir/update.0.log || exit 1;
rm $dir/0.acc
beam=4 # will change to 8 below after 1st pass
x=1
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" t,ark:$dir/cur.ali \
2> $dir/align.$x.log || exit 1;
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss];
fi
beam=8
x=$[$x+1]
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
# example of showing the alignments:
# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4

24
egs/timit/s4/RESULTS Normal file
Просмотреть файл

@ -0,0 +1,24 @@
exp/mono/decode_dev_bg/wer_3
compute-wer --text --mode=present ark:exp/mono/decode_dev_bg/test_trans.filt ark,p:-
%WER 33.73 [ 5079 / 15057, 392 ins, 1716 del, 2971 sub ]
%SER 100.00 [ 400 / 400 ]
Scored 400 sentences, 0 not present in hyp.
exp/mono/decode_test_bg/wer
compute-wer --text --mode=present ark:exp/mono/decode_test_bg/test.trans ark,p:exp/mono/decode_test_bg/text
%WER 35.68 [ 2574 / 7215, 204 ins, 848 del, 1522 sub ]
%SER 100.00 [ 192 / 192 ]
Scored 192 sentences, 0 not present in hyp.
exp/tri1/decode_dev_bg/wer_6
compute-wer --text --mode=present ark:exp/tri1/decode_dev_bg/test.trans ark,p:-
%WER 28.68 [ 4319 / 15057, 474 ins, 1333 del, 2512 sub ]
%SER 100.00 [ 400 / 400 ]
Scored 400 sentences, 0 not present in hyp.
exp/tri1/decode_test_bg/wer
compute-wer --text --mode=present ark:exp/tri1/decode_test_bg/test.trans ark,p:exp/tri1/decode_test_bg/text
%WER 31.02 [ 2238 / 7215, 226 ins, 704 del, 1308 sub ]
%SER 100.00 [ 192 / 192 ]
Scored 192 sentences, 0 not present in hyp.

Просмотреть файл

@ -0,0 +1,50 @@
faks0
fdac1
fjem0
mgwt0
mjar0
mmdb1
mmdm2
mpdf0
fcmh0
fkms0
mbdg0
mbwm0
mcsh0
fadg0
fdms0
fedw0
mgjf0
mglb0
mrtk0
mtaa0
mtdt0
mthc0
mwjg0
fnmr0
frew0
fsem0
mbns0
mmjr0
mdls0
mdlf0
mdvc0
mers0
fmah0
fdrw0
mrcs0
mrjm4
fcal1
mmwh0
fjsj0
majc0
mjsw0
mreb0
fgjd0
fjmg0
mroa0
mteb0
mjfc0
mrjr0
fmml0
mrws1

Просмотреть файл

@ -0,0 +1 @@
--use-energy=false # only non-default option.

Просмотреть файл

@ -0,0 +1,61 @@
aa aa aa
ae ae ae
ah ah ah
ao ao aa
aw aw aw
ax ax ah
ax-h ax ah
axr er er
ay ay ay
b b b
bcl vcl sil
ch ch ch
d d d
dcl vcl sil
dh dh dh
dx dx dx
eh eh eh
el el l
em m m
en en n
eng ng ng
epi epi sil
er er er
ey ey ey
f f f
g g g
gcl vcl sil
h# sil sil
hh hh hh
hv hh hh
ih ih ih
ix ix ih
iy iy iy
jh jh jh
k k k
kcl cl sil
l l l
m m m
n n n
ng ng ng
nx n n
ow ow ow
oy oy oy
p p p
pau sil sil
pcl cl sil
q
r r r
s s s
sh sh sh
t t t
tcl cl sil
th th th
uh uh uh
uw uw uw
ux uw uw
v v v
w w w
y y y
z z z
zh zh sh

Просмотреть файл

@ -0,0 +1,24 @@
mdab0
mwbt0
felc0
mtas1
mwew0
fpas0
mjmp0
mlnt0
fpkt0
mlll0
mtls0
fjlm0
mbpm0
mklt0
fnlp0
mcmj0
mjdh0
fmgd0
mgrt0
mnjm0
fdhc0
mjln0
mpam0
fmld0

Просмотреть файл

@ -0,0 +1,20 @@
<Topology>
<TopologyEntry>
<ForPhones>
NONSILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
<State> 3 </State>
</TopologyEntry>
<TopologyEntry>
<ForPhones>
SILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
<State> 3 </State>
</TopologyEntry>
</Topology>

Просмотреть файл

@ -0,0 +1,110 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function read_dirname () {
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
echo $retval
}
PROG=`basename $0`;
usage="Usage: $PROG <arguments>\n
Prepare train, dev, test file lists for TIMIT.\n\n
Required arguments:\n
--config-dir=DIR\tDirecory containing the necessary config files\n
--corpus-dir=DIR\tDirectory for the GlobalPhone corpus\n
--work-dir=DIR\t\tWorking directory\n
";
if [ $# -lt 3 ]; then
error_exit $usage;
fi
while [ $# -gt 0 ];
do
case "$1" in
--help) echo -e $usage; exit 0 ;;
--config-dir=*)
CONFDIR=`read_dirname $1`; shift ;;
--corpus-dir=*)
CORPUS=`read_dirname $1`; shift ;;
--work-dir=*)
WDIR=`read_dirname $1`; shift ;;
*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
esac
done
# (1) check if the config files are in place:
cd $CONFDIR
[ -f test_spk.list ] || error_exit "$PROG: Eval-set speaker list not found.";
cd $WDIR
[ -f path.sh ] && . path.sh # Sets the PATH to contain necessary executables
# (2) get the various file lists (for audio, transcription, etc.)
mkdir -p data/local
timit_prep_flists.sh --corpus-dir=$CORPUS --dev-spk=$CONFDIR/dev_spk.list \
--test-spk=$CONFDIR/test_spk.list --work-dir=data
# (3) Normalize the transcripts.
timit_norm_trans.pl -i data/local/train.trans -m $CONFDIR/phones.60-48-39.map \
-to 48 > data/local/train.trans2;
for x in dev test; do
timit_norm_trans.pl -i data/local/${x}.trans -m $CONFDIR/phones.60-48-39.map \
-to 39 > data/local/${x}.trans2;
done
# Create the lexicon, which is just an identity mapping
cut -d' ' -f2- data/local/train.trans2 | tr ' ' '\n' | sort -u > data/local/p
paste data/local/p data/local/p > data/local/lexicon.txt
# add disambig symbols to the lexicon: TODO: delete
ndisambig=`add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence
echo $ndisambig > data/local/lex_ndisambig
# Get the list of phones and map them to integers (adding the null symbol <eps>
# to the list).
cut -f2 data/local/lexicon.txt \
| awk 'BEGIN{ print "<eps> 0"; } { printf("%s %d\n", $1, NR); }' \
> data/local/phones.txt
# Get the list of words:
cut -f1 data/local/lexicon.txt \
| awk 'BEGIN{print "<eps> 0";} {printf("%s %d\n", $1, NR);}
END{printf("#0 %d\n", NR+1);}' > data/local/words.txt
# (4) Create the phone bigram LM
(
[ -z "$IRSTLM" ] && \
error_exit "LM building wo'nt work without setting the IRSTLM env variable"
cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> data/local/lm_train.txt
build-lm.sh -i data/local/lm_train.txt -n 2 -o data/local/lm_phone_bg.ilm.gz
compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \
| grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz
) >& data/prepare_lm.log
echo "Finished data preparation."

Просмотреть файл

@ -0,0 +1,136 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o pipefail
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function read_dirname () {
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
echo $retval
}
PROG=`basename $0`;
usage="Usage: $PROG <arguments>\n
Prepare train, dev, test file lists.\n\n
Required arguments:\n
--hmm-proto=FILE\tPrototype of the HMM topology\n
--work-dir=DIR\t\tWorking directory\n
";
if [ $# -lt 2 ]; then
error_exit $usage;
fi
while [ $# -gt 0 ];
do
case "$1" in
--help) echo -e $usage; exit 0 ;;
--hmm-proto=*)
PROTO=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -f $PROTO ] || error_exit "Cannot find HMM prototype file '$PROTO'";
shift ;;
--work-dir=*)
WDIR=`read_dirname $1`; shift ;;
*) echo "Unknown argument: $1, exiting"; error_exit $usage ;;
esac
done
cd $WDIR
. path.sh
echo "Preparing train data"
# (0) Create a directory to contain files needed in training:
for x in train dev test; do
mkdir -p data/$x
cp data/local/${x}_wav.scp data/$x/wav.scp
cp data/local/${x}.trans2 data/$x/text
cp data/local/${x}.spk2utt data/$x/spk2utt
cp data/local/${x}.utt2spk data/$x/utt2spk
done
mkdir -p data/lang
cp data/local/phones.txt -t data/lang/
cp data/local/words.txt -t data/lang/
# (1) Generate colon-separated lists of silence and non-silence phones
silphones="cl epi sil vcl";
silphones.pl data/lang/phones.txt "$silphones" \
data/lang/silphones.csl data/lang/nonsilphones.csl
# (2) Create the L.fst without disambiguation symbols, for use in training.
make_lexicon_fst.pl data/local/lexicon.txt 0.5 sil \
| fstcompile --isymbols=data/lang/phones.txt \
--osymbols=data/lang/words.txt --keep_isymbols=false \
--keep_osymbols=false \
| fstarcsort --sort_type=olabel > data/lang/L.fst
# (3) Create phonesets.txt and extra_questions.txt.
timit_make_questions.pl -i data/lang/phones.txt \
-m data/lang/phonesets_mono.txt -r data/lang/roots.txt
grep -v sil data/lang/phonesets_mono.txt \
> data/lang/phonesets_cluster.txt
# (4), Finally, for training, create the HMM topology prototype:
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
sed -e "s:NONSILENCEPHONES:$nonsilphonelist:" \
-e "s:SILENCEPHONES:$silphonelist:" $PROTO > data/lang/topo
echo "Preparing test data"
# (0) Copy over some files common to traina and test:
mkdir -p data/lang_test
for f in phones.txt words.txt L.fst silphones.csl nonsilphones.csl; do
cp data/lang/$f -t data/lang_test/
done
# (1) Create a list of phones including the disambiguation symbols.
# --include-zero includes the #0 symbol that is passed from G.fst
ndisambig=`cat data/local/lex_ndisambig`;
add_disambig.pl --include-zero data/lang_test/phones.txt $ndisambig \
> data/lang_test/phones_disambig.txt
cp data/lang_test/phones_disambig.txt -t data/lang/ # for MMI.
# (2) Create the lexicon FST with disambiguation symbols. There is an extra
# step where we create a loop to "pass through" the disambiguation symbols
# from G.fst.
phone_disambig_symbol=`grep \#0 data/lang_test/phones_disambig.txt | awk '{print $2}'`
word_disambig_symbol=`grep \#0 data/lang_test/words.txt | awk '{print $2}'`
make_lexicon_fst.pl data/local/lexicon_disambig.txt 0.5 sil '#'$ndisambig \
| fstcompile --isymbols=data/lang_test/phones_disambig.txt \
--osymbols=data/lang_test/words.txt --keep_isymbols=false \
--keep_osymbols=false \
| fstaddselfloops "echo $phone_disambig_symbol |" \
"echo $word_disambig_symbol |" \
| fstarcsort --sort_type=olabel > data/lang_test/L_disambig.fst
# Needed for discriminative training
cp data/lang_test/L_disambig.fst -t data/lang/
# (3) Convert the language model to FST, and create decoding configuration.
timit_format_lms.sh data
echo "Succeeded in formatting data."

Просмотреть файл

@ -0,0 +1,71 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
#set -o pipefail
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function read_dirname () {
[ -d "$1" ] || error_exit "Argument '$1' not a directory";
local retval=`cd $1 2>/dev/null && pwd || exit 1`
echo $retval
}
function format_lms () {
local lm_suffix=$1;
local work_dir=$2
local test=$work_dir/lang_test_${lm_suffix}
mkdir -p $test
for f in phones.txt words.txt phones_disambig.txt L.fst L_disambig.fst \
silphones.csl nonsilphones.csl; do
cp $work_dir/lang_test/$f $test
done
# Removing all "illegal" combinations of <s> and </s>, which are supposed to
# occur only at being/end of utt. These can cause determinization failures
# of CLG [ends up being epsilon cycles].
gunzip -c $work_dir/local/lm_${lm_suffix}.arpa.gz \
| egrep -v '<s> <s>|</s> <s>|</s> </s>' \
| arpa2fst - | fstprint \
| eps2disambig.pl | s2eps.pl \
| fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt \
--keep_isymbols=false --keep_osymbols=false \
| fstrmepsilon > $test/G.fst
set +e
fstisstochastic $test/G.fst
set -e
}
PROG=`basename $0`;
usage="Usage: $PROG data_dir\n
Convert ARPA-format language models to FSTs.\n";
if [ $# -ne 1 ]; then
error_exit $usage;
fi
WDIR=`read_dirname $1`;
# Next, for each type of language model, create the corresponding FST
# and the corresponding lang_test directory.
echo "Preparing language models for test"
format_lms phone_bg $WDIR >& $WDIR/format_lms.log

Просмотреть файл

@ -0,0 +1,58 @@
#!/usr/bin/perl -w
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# 'phonesets_mono' contains sets of phones that are shared when building the
# monophone system and when asking questions based on an automatic clustering
# of phones, for the triphone system.
# 'roots' contain the information about which phones share a common root in
# the phonetic decision tree and which have distinct pdfs. It also states
# whether the tree-building should split the roots or not.
my $usage = "Usage: timit_make_questions.pl -i phones -m phoneset_mono -r roots\
Creates sharerd phonesets for monophone and context-dependent training.\
Required arguments:\
-i\tInput list of phones (can contain stress/position markers)\
-m\tOutput shared phoneset for use in monophone training\
-r\tOutput sharing and splitting info for context-dependent training\n";
use strict;
use Getopt::Long;
my ($in_phones, $mono, $roots, %phoneset);
GetOptions ("i=s" => \$in_phones, # Input list of phones
"m=s" => \$mono, # Shared phone-set for monophone system
"r=s" => \$roots ); # roots file for context-dependent systems
die "$usage" unless(defined($in_phones) && defined($mono) && defined($roots));
open(P, "<$in_phones") or die "Cannot read from file '$in_phones': $!";
open(MONO, ">$mono") or die "Cannot write to file '$mono': $!";
open(ROOTS, ">$roots") or die "Cannot write to file '$roots': $!";
while (<P>) {
next if m/eps|sil|vcl|cl|epi/;
chomp;
m/^(\S+)(_.)?\s+\S+$/ or die "Bad line: $_\n";
my $full_phone = defined($2)? $1.$2 : $1;
push @{$phoneset{$1}}, $full_phone;
}
print MONO "cl epi sil vcl\n";
print ROOTS "not-shared not-split cl epi sil vcl\n";
foreach my $p (sort keys %phoneset) {
print MONO join(" ", @{$phoneset{$p}}), "\n";
print ROOTS "shared split ", join(" ", @{$phoneset{$p}}), "\n";
}

Просмотреть файл

@ -0,0 +1,89 @@
#!/usr/bin/perl -w
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script normalizes the TIMIT phonetic transcripts that have been
# extracted in a format where each line contains an utterance ID followed by
# the transcript, e.g.:
# fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#
my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a
smaller set defined by the -m option. This script assumes that the mapping is
done in the \"standard\" fashion, i.e. to 48 or 39 phones. The input is
assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
be changed using the -from option. The input format is assumed to be utterance
ID followed by transcript on the same line.\n";
use strict;
use Getopt::Long;
die "$usage" unless(@ARGV >= 1);
my ($in_trans, $phone_map, $num_phones_out);
my $num_phones_in = 60;
GetOptions ("i=s" => \$in_trans, # Input transcription
"m=s" => \$phone_map, # File containing phone mappings
"from=i" => \$num_phones_in, # Input #phones: must be 60 or 48
"to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39
die $usage unless(defined($in_trans) && defined($phone_map) &&
defined($num_phones_out));
if ($num_phones_in != 60 && $num_phones_in != 48) {
die "Can only used 60 or 48 for -from (used $num_phones_in)."
}
if ($num_phones_out != 48 && $num_phones_out != 39) {
die "Can only used 48 or 39 for -to (used $num_phones_out)."
}
unless ($num_phones_out < $num_phones_in) {
die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
}
open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
my (%phonemap, %seen_phones);
my $num_seen_phones = 0;
while (<M>) {
chomp;
next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
my $mapped_from = ($num_phones_in == 60)? $1 : $2;
my $mapped_to = ($num_phones_out == 48)? $2 : $3;
if (!defined($seen_phones{$mapped_to})) {
$seen_phones{$mapped_to} = 1;
$num_seen_phones += 1;
}
$phonemap{$mapped_from} = $mapped_to;
}
if ($num_seen_phones != $num_phones_out) {
die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
}
open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
while (<T>) {
chomp;
$_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
my $utt_id = $1;
my $trans = $2;
$trans =~ s/q//g; # Remove glottal stops.
$trans =~ s/^\s*//; $trans =~ s/\s*$//; # Normalize spaces
print $utt_id;
for my $phone (split(/\s+/, $trans)) {
print " $phonemap{$phone}"
}
print "\n";
}

Просмотреть файл

@ -0,0 +1,121 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o pipefail
function read_dirname () {
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -d "$dir_name" ] || { echo "Argument '$dir_name' not a directory" >&2; \
exit 1; }
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
echo $retval
}
PROG=`basename $0`;
usage="Usage: $PROG <arguments>\n
Prepare train, dev, test file lists for TIMIT.\n\n
Required arguments:\n
--corpus-dir=DIR\tDirectory for the TIMIT corpus\n
--dev-spk=FILE\tDevelopment set speaker list\n
--test-spk=FILE\tCore test set speaker list\n
--work-dir=DIR\t\tPlace to write the files (in a subdirectory with the 2-letter language code)\n
";
if [ $# -lt 3 ]; then
echo -e $usage; exit 1;
fi
while [ $# -gt 0 ];
do
case "$1" in
--help) echo -e $usage; exit 0 ;;
--corpus-dir=*)
CORPUS=`read_dirname $1`; shift ;;
--dev-spk=*)
DEVSPK=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
--test-spk=*)
TESTSPK=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
--work-dir=*)
WDIR=`read_dirname $1`; shift ;;
*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
esac
done
if [ ! -d "$CORPUS/train" -a ! -d "$CORPUS/TRAIN" ]; then
echo "Expecting directory $CORPUS/train or $CORPUS/TRAIN to exist."
exit 1;
fi
tmpdir=$(mktemp -d);
trap 'rm -rf "$tmpdir"' EXIT
# Get the list of speakers. The list of speakers in the 24-speaker core test
# set and the 50-speaker development set must be supplied to the script. All
# speakers in the 'train' directory are used for training.
tr '[:upper:]' '[:lower:]' < $DEVSPK > $tmpdir/dev_spk # Just in case!
tr '[:upper:]' '[:lower:]' < $TESTSPK > $tmpdir/test_spk # Just in case!
ls -d "$CORPUS"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
ODIR=$WDIR/local # Directory to write file lists & transcripts
mkdir -p $ODIR
for x in train dev test; do
# First, find the list of audio files (use only si & sx utterances).
# Note: train & test sets are under different directories, but doing find on
# both and grepping for the speakers will work correctly.
find $CORPUS/{train,test} -not \( -name 'sa*' \) -name '*.wav' \
| grep -f $tmpdir/${x}_spk > $ODIR/${x}_sph.flist
sed -e 's:.*/\(.*\)/\(.*\).wav$:\1_\2:' $ODIR/${x}_sph.flist \
> $tmpdir/${x}_sph.uttids
paste $tmpdir/${x}_sph.uttids $ODIR/${x}_sph.flist \
| sort -k1,1 > $ODIR/${x}_sph.scp
# Now, get the transcripts: each line of the output contains an utterance
# ID followed by the transcript.
find $CORPUS/{train,test} -not \( -name 'sa*' \) -name '*.phn' \
| grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
sed -e 's:.*/\(.*\)/\(.*\).phn$:\1_\2:' $tmpdir/${x}_phn.flist \
> $tmpdir/${x}_phn.uttids
while read line; do
[ -f $line ] || error_exit "Cannot find transcription file '$line'";
cut -f3 -d' ' "$line" | tr '\n' ' ' | sed -e 's: *$:\n:'
done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
| sort -k1,1 > $ODIR/${x}.trans
# # Intersect the set of utterances with transcripts with the set of those
# # with valid audio.
# cut -f1 $tmpdir/${x}.trans \
# | join $tmpdir/${x}_basenames_wav2 - > $tmpdir/${x}_basenames
# # Get the common set of WAV files and transcripts.
# join $tmpdir/${x}_basenames $tmpdir/${x}_wav.scp \
# > $ODIR/${x}_wav.scp
# join $tmpdir/${x}_basenames $tmpdir/${x}.trans \
# > $ODIR/${x}.trans
awk '{printf("%s sph2pipe -f wav %s |\n", $1, $2);}' < $ODIR/${x}_sph.scp \
> $ODIR/${x}_wav.scp
sed -e 's:_.*$::' $tmpdir/${x}_sph.uttids \
| paste -d' ' $tmpdir/${x}_sph.uttids - | sort -k1,1 \
> $ODIR/${x}.utt2spk
utt2spk_to_spk2utt.pl $ODIR/${x}.utt2spk \
> $ODIR/${x}.spk2utt;
done

34
egs/timit/s4/path.sh Normal file
Просмотреть файл

@ -0,0 +1,34 @@
# This contains the locations of the tools and data required for running
# the GlobalPhone experiments.
KALDIROOT=`cd ../../..; pwd`
KALDISRC=$KALDIROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/tiedbin:$KALDISRC/lm
FSTBIN=$KALDIROOT/tools/openfst/bin
LMBIN=$KALDIROOT/tools/irstlm/bin
[ -d $PWD/local ] || { echo "Expecting 'local' subdirectory"; exit 1; }
[ -d $PWD/utils ] || { echo "Expecting 'utils' subdirectory"; exit 1; }
[ -d $PWD/steps ] || { echo "Expecting 'steps' subdirectory"; exit 1; }
LOCALUTILS=$PWD/local
KALDIUTILS=$PWD/utils
KALDISTEPS=$PWD/steps
SCRIPTS=$LOCALUTILS:$KALDIUTILS:$KALDISTEPS
# If you already have shorten and sox on your path, comment the following out.
# Else use install.sh to install them first in the specified locations.
SPH2PIPE=$KALDIROOT/tools/sph2pipe_v2.5
[ -x $SPH2PIPE/sph2pipe ] || { echo "Cannot find sph2pipe executable"; }
TOOLS=$SPH2PIPE
export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$TOOLS
export LC_ALL=C
export IRSTLM=$KALDIROOT/tools/irstlm
# Site-specific configs:
[ `hostname -y` == ecdf ] && \
{ . /etc/profile.d/modules.sh; module add intel/mkl; }

77
egs/timit/s4/run.sh Executable file
Просмотреть файл

@ -0,0 +1,77 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
exit 1;
# This script shows the steps needed to build a phone recognizer for TIMIT.
# This recipe follows the setup first described in:
# K. F. Lee and H. W. Hon, "Speaker-independent phone recognition using hidden Markov models," 1988
# where the training set is mapped to 48 phones and the results are presented
# on a 39-phone subset of that.
# Set WORKDIR to someplace with enough disk space. That is where MFCCs will
# get created, as well as the LM in ARPA & FST formats.
WORKDIR=/path/with/disk/space
cp -r conf local utils steps path.sh $WORKDIR
cd $WORKDIR
local/timit_data_prep.sh --config-dir=$PWD/conf --corpus-dir=/path/to/TIMIT --work-dir=$WORKDIR
local/timit_format_data.sh --hmm-proto=conf/topo.proto --work-dir=$PWD
# Now make MFCC features.
mfccdir=$WORKDIR/data/MFCC
for x in train dev test; do
steps/make_mfcc.sh --num-jobs 6 data/$x exp/make_mfcc/$x $mfccdir
done
decode_cmd="qsub -q all.q@@blade -l ram_free=500M,mem_free=500M"
train_cmd="qsub -q all.q@@blade -l ram_free=200M,mem_free=200M"
steps/train_mono.sh --num-jobs 10 --qcmd "$train_cmd" \
data/train data/lang exp/mono
utils/mkgraph.sh --mono data/lang_test_phone_bg exp/mono exp/mono/graph_bg
steps/decode_deltas.sh --accwt 1.0 --beam 20.0 --latgen --num-jobs 6 \
--qcmd "$decode_cmd" exp/mono/graph_bg data/dev exp/mono/decode_dev_bg
utils/score_lats.sh exp/mono/decode_dev_bg exp/mono/graph_bg/words.txt \
data/dev conf/phones.60-48-39.map
opt_accwt=`grep WER exp/mono/decode_dev_bg/wer_* \
| sed -e 's?.*wer_??' -e 's?:%WER??' -e 's?\[.*??' | sort -k2,2 -g \
| head -1 | awk '{print 1/$1}'`
steps/decode_deltas.sh --accwt $opt_accwt --beam 20.0 --num-jobs 4 \
--qcmd "$decode_cmd" exp/mono/graph_bg data/test exp/mono/decode_test_bg
utils/score_text.sh exp/mono/decode_test_bg exp/mono/graph_bg/words.txt \
data/test conf/phones.60-48-39.map
steps/align_deltas.sh --num-jobs 10 --qcmd "$train_cmd" \
data/train data/lang exp/mono exp/mono_ali
steps/train_deltas.sh --num-jobs 10 --qcmd "$train_cmd" \
2000 10000 data/train data/lang exp/mono_ali exp/tri1
utils/mkgraph.sh data/lang_test_phone_bg exp/tri1 exp/tri1/graph_bg
steps/decode_deltas.sh --accwt 1.0 --beam 20.0 --latgen --num-jobs 6 \
--qcmd "$decode_cmd" exp/tri1/graph_bg data/dev exp/tri1/decode_dev_bg
utils/score_lats.sh exp/tri1/decode_dev_bg exp/tri1/graph_bg/words.txt \
data/dev conf/phones.60-48-39.map
opt_accwt=`grep WER exp/tri1/decode_dev_bg/wer_* \
| sed -e 's?.*wer_??' -e 's?:%WER??' -e 's?\[.*??' | sort -k2,2 -g \
| head -1 | awk '{print 1/$1}'`
steps/decode_deltas.sh --accwt $opt_accwt --beam 20.0 --num-jobs 4 \
--qcmd "$decode_cmd" exp/tri1/graph_bg data/test exp/tri1/decode_test_bg
utils/score_text.sh exp/tri1/decode_test_bg exp/tri1/graph_bg/words.txt \
data/test conf/phones.60-48-39.map

Просмотреть файл

@ -0,0 +1,138 @@
#!/bin/bash
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using
# CMN + delta + delta-delta features. It splits the data into
# four chunks and does everything in parallel on the same machine.
# Its output, all in its own experimental directory, is (assuming
# you don't change the #jobs with --num-job option),
# {0,1,2,3}.cmvn {0,1,2,3}.ali.gz, tree, final.mdl
# and final.occs (the last three are just copied from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
# These must be split into four pieces.
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
njobs=4 # Default number of jobs
qcmd="" # Options for the submit_jobs.sh script
oldgraphs=false
PROG=`basename $0`;
usage="Usage: $PROG [options] <data-dir> <lang-dir> <src-dir> <exp-dir>\n
e.g.: $PROG data/train data/lang exp/tri1 exp/tri1_ali\n\n
Options:\n
--help\t\tPrint this message and exit\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
--use-graphs\tReuse older graphs\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--qcmd)
shift; qcmd=" --qcmd=${1}"; shift ;;
--use-graphs)
oldgraphs=true; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
*) break ;; # end of options: interpreted as the data-dir
esac
done
if [ $# != 4 ]; then
error_exit $usage;
fi
[ -f path.sh ] && . path.sh
data=$1
lang=$2
srcdir=$3
dir=$4
if [ -f $lang/oov.txt ]; then
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
else
oov_opt='--ignore-oov'
fi
mkdir -p $dir
# Create copy of the tree and model and occs...
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1;
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
split_data.sh $data $njobs
fi
echo "Computing cepstral mean and variance statistics"
# for n in `get_splits.pl $njobs`; do # Do this locally; it's fast.
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/cmvnTASK_ID.log \
compute-cmvn-stats --spk2utt=ark:$data/split$njobs/TASK_ID/spk2utt \
scp:$data/split$njobs/TASK_ID/feats.scp ark:$dir/TASK_ID.cmvn \
|| error_exit "Computing CMN/CVN stats failed.";
# Align all training data using the supplied model.
echo "Aligning data from $data"
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
if $oldgraphs; then
# for n in `get_splits.pl $njobs`; do
# feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
ls $srcdir/{1..$njobs}.fsts.gz >/dev/null \
|| error_exit "Missing FSTs with --use-graphs option specified."
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/alignTASK_ID.log \
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
"ark:gunzip -c $srcdir/TASK_ID.fsts.gz|" "$feats" "ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|| error_exit "Error doing alignment.";
else
# for n in `get_splits.pl $njobs`; do
# feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
tra="ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt $data/split$njobs/TASK_ID/text|";
# We could just use gmm-align in the next line, but it's less efficient as
# it compiles the training graphs one by one.
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/alignTASK_ID.log \
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
ark:- "$feats" "ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|| error_exit "Error doing alignment.";
fi
echo "Done aligning data."

Просмотреть файл

@ -0,0 +1,125 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
# This script just generates lattices for a single broken-up
# piece of the data.
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readfloat () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
[[ "$retval" =~ ^-?[1-9]*\.*[0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not a real number."
echo $retval
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
accwt=1.0
beam=30.0
latgen=0
njobs=4
qcmd="" # Options for the submit_jobs.sh script
PROG=`basename $0`;
usage="Usage: $PROG [options] <graph-dir> <data-dir> <decode-dir>\n
e.g.: $PROG exp/mono/graph_bg data/dev exp/mono/decode_dev_bg\n\n
Options:\n
--help\t\tPrint this message and exit\n
--accwt FLOAT\tScaling for acoustic likelihoods (default=$accwt).\n
--beam FLOAT\tDecoder beam (default=$beam)\n
--latgen\tGenerate lattices (off by default)\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--accwt)
shift; accwt=`readfloat $1`; shift ;;
--beam)
shift; beam=`readfloat $1`; shift ;;
--latgen) shift; latgen=1 ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--qcmd)
shift; qcmd="--qcmd=${1}"; shift ;;
-*) error_exit "Unknown argument: $1, exiting\n$usage" ;;
*) break ;; # end of options: interpreted as the data-dir
esac
done
if [ $# != 3 ]; then
error_exit $usage;
fi
[ -f path.sh ] && . path.sh
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
mkdir -p $dir
requirements="$data/feats.scp $srcdir/final.mdl $graphdir/HCLG.fst"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_deltas.sh: no such file $f";
exit 1;
fi
done
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
if [ $njobs -gt 1 ]; then
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
split_data.sh $data $njobs
fi
mydata=$data/split$njobs/TASK_ID
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | add-deltas ark:- ark:- |"
fi
if [ $latgen -eq 1 ]; then
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/decode.TASK_ID.log \
gmm-latgen-faster --max-active=7000 --beam=$beam --lattice-beam=6.0 \
--acoustic-scale=$accwt --word-symbol-table=$graphdir/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" \
"ark:|gzip -c > $dir/lat.TASK_ID.gz" || error_exit "Decoding failed.";
else
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/decode.TASK_ID.log \
gmm-decode-faster --beam=$beam --acoustic-scale=$accwt \
--word-symbol-table=$graphdir/words.txt $srcdir/final.mdl \
$graphdir/HCLG.fst "$feats" ark,t:$dir/test.TASK_ID.tra \
|| error_exit "Decoding failed.";
fi

111
egs/timit/s4/steps/make_mfcc.sh Executable file
Просмотреть файл

@ -0,0 +1,111 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from .. (one directory up from here)
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
njobs=4 # Default number of jobs
stage=-4 # Default starting stage (start with calculating CMN/CVN stats)
qcmd="" # Options for the submit_jobs.sh script
PROG=`basename $0`;
usage="Usage: $PROG [options] <data-dir> <log-dir> <abs-path-to-mfccdir>\n\n
Options:\n
--help\t\tPrint this message and exit\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--qcmd)
shift; qcmd="--qcmd=${1}"; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
*) break ;; # end of options: interpreted as the data-dir
esac
done
if [ $# != 3 ]; then
error_exit $usage;
fi
[ -f path.sh ] && . path.sh
data=$1
logdir=$2
mfccdir=$3
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfccdir || exit 1;
mkdir -p $logdir || exit 1;
scp=$data/wav.scp
config=conf/mfcc.conf
required="$scp $config"
for f in $required; do
if [ ! -f $f ]; then
echo "make_mfcc.sh: no such file $f"
exit 1;
fi
done
# note: in general, the double-parenthesis construct in bash "((" is "C-style
# syntax" where we can get rid of the $ for variable names, and omit spaces.
# The "for" loop in this style is a special construct.
split_scps=""
for ((n=1; n<=njobs; n++)); do
split_scps="$split_scps $logdir/wav$n.scp"
done
split_scp.pl $scp $split_scps || exit 1;
rm -f $logdir/.error.$name 2>/dev/null
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$logdir/make_mfcc.TASK_ID.log \
compute-mfcc-feats --verbose=2 --config=$config scp:$logdir/wavTASK_ID.scp \
ark,scp:$mfccdir/mfcc_$name.TASK_ID.ark,$mfccdir/mfcc_$name.TASK_ID.scp \
|| error_exit "Error producing mfcc features for $name:"`tail $logdir/make_mfcc.*.log`
# concatenate the .scp files together.
rm $data/feats.scp 2>/dev/null
for ((n=1; n<=njobs; n++)); do
cat $mfccdir/mfcc_$name.$n.scp >> $data/feats.scp
done
# rm $logdir/wav*.scp
echo "Succeeded creating MFCC features for $name"

Просмотреть файл

@ -0,0 +1,256 @@
#!/bin/bash
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using (e.g. MFCC) + delta + acceleration features and
# cepstral mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using the same
# type of features.
#
# This script starts from previously generated state-level alignments
# (in $alidir), e.g. generated by a previous monophone or triphone
# system. To build a context-dependent triphone system, we build
# decision trees that map a 3-phone phonetic context window to a
# pdf index. It's not really clear which is the right reference, but
# on is "Tree-based state tying for high accuracy acoustic modelling"
# by Steve Young et al.
# In a typical approach, there are decision trees for
# each monophone HMM-state (i.e. 3 per phone), and each one gets to
# ask questions about the left and right phone. These questions
# correspond to sets of phones, corresponding to phonetic classes
# (e.g. vowel, consonant, liquid, solar, ... ). In Kaldi, we prefer
# fully automatic algorithms, and anyway we're not sure where to get
# these types of lists, so we just generate the classes automatically.
# This is based on a top-down binary tree clustering of the phones
# (see "cluster-phones"), where we take single-Gaussian statistics for
# just the central state of each phone (assuming this to be more
# representative of the phones), and we get a tree structure on the
# phones; each class corresponds to a node of the tree (it contains all
# the phones that are children of that node). Note: you could
# replace questions.txt with something derived from manually written
# questions.
# Also, the roots of the tree correspond to classes of phones (typically
# corresponding to "real phones", because the actual phones may contain
# word-begin/end and stress information), and the tree gets to ask
# questions also about the central phone, and about the state in the HMM.
# After building the tree, we do a number of iterations of Gaussian
# Mixture Model training; on selected iterations we redo the Viterbi
# alignments (initially, these are taken from the previous system).
# The Gaussian mixture splitting, whereby we go from a single Gaussian
# per state to multiple Gaussians, is done on all iterations (although
# we stop doing this a few iterations before the end). We don't have
# a fixed number of Gaussians per state, but we have an overall target
# #Gaussians that's specified on each iteration, and we allocate
# the Gaussians among states according to a power-law where the #Gaussians
# is proportional to the count to the power 0.2. The target
# increases linearly during training [note: logarithmically seems more
# natural but didn't work as well.]
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
njobs=4 # Default number of jobs
stage=-4 # Default starting stage (start with tree building)
qcmd="" # Options for the submit_jobs.sh script
PROG=`basename $0`;
usage="Usage: $PROG [options] <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>\n
e.g.: $PROG 2000 10000 data/train_si84 data/lang exp/mono_ali exp/tri1\n\n
Options:\n
--help\t\tPrint this message and exit\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
--stage INT\tStarting stage (e.g. -4 for tree building; 2 for iter 2; default=$stage)\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--qcmd)
shift; qcmd=" --qcmd=${1}"; shift ;;
--stage)
shift; stage=`readint $1`; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
*) break ;; # end of options: interpreted as num-leaves
esac
done
if [ $# != 6 ]; then
error_exit $usage;
fi
[ -f path.sh ] && . path.sh
numleaves=$1
totgauss=$2
data=$3
lang=$4
alidir=$5
dir=$6
if [ ! -f $alidir/final.mdl ]; then
echo "Error: alignment dir $alidir does not contain final.mdl"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="10 20 30";
silphonelist=`cat $lang/silphones.csl`
numiters=35 # Number of iterations of training
maxiterinc=25 # Last iter to increase #Gauss on.
numgauss=$numleaves
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
if [ -f $lang/oov.txt ]; then
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
else
oov_opt='--ignore-oov'
fi
mkdir -p $dir/log
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
split_data.sh $data $njobs
fi
# for n in `get_splits.pl $njobs`; do
featspart="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$njobs/TASK_ID/utt2spk ark:$alidir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
if [ $stage -le -3 ]; then
# The next stage assumes we won't need the context of silence, which
# assumes something about $lang/roots.txt, but it seems pretty safe.
echo "Accumulating tree stats"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc_tree.TASK_ID.log \
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$featspart" \
"ark:gunzip -c $alidir/TASK_ID.ali.gz|" $dir/TASK_ID.treeacc \
|| error_exit "Error accumulating tree stats";
sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log \
|| error_exit "Error summing tree stats.";
rm $dir/*.treeacc
fi
if [ $stage -le -2 ]; then
# preparing questions, roots file...
echo "Computing questions for tree clustering"
( sym2int.pl $lang/phones.txt $lang/phonesets_cluster.txt > $dir/phonesets.txt
cluster-phones $dir/treeacc $dir/phonesets.txt $dir/questions.txt \
2> $dir/log/questions.log
[ -f $lang/extra_questions.txt ] && \
sym2int.pl $lang/phones.txt $lang/extra_questions.txt \
>> $dir/questions.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst \
2>$dir/log/compile_questions.log
sym2int.pl --ignore-oov $lang/phones.txt $lang/roots.txt > $dir/roots.txt
) || error_exit "Error in generating questions for tree clustering."
echo "Building tree"
submit_jobs.sh "$qcmd" --log=$dir/log/train_tree.log \
build-tree --verbose=1 --max-leaves=$numleaves $dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree \
|| error_exit "Error in building tree.";
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log \
|| error_exit "Error in initializing the model.";
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/log/mixup.log || error_exit "Error mixing up to $numgauss Gaussains";
rm $dir/treeacc
fi
if [ $stage -le -1 ]; then
# Convert alignments in $alidir, to use as initial alignments.
# This assumes that $alidir was split in $njobs pieces, just like the
# current dir. Just do this locally-- it's very fast.
echo "Converting old alignments"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh --njobs=$njobs --log=$dir/log/convertTASK_ID.log \
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
"ark:gunzip -c $alidir/TASK_ID.ali.gz|" \
"ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|| error_exit "Error converting old alignments.";
fi
if [ $stage -le 0 ]; then
# Make training graphs (this is split in $njobs parts).
echo "Compiling training graphs"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/compile_graphsTASK_ID.log \
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt < $data/split$njobs/TASK_ID/text |" \
"ark:|gzip -c >$dir/TASK_ID.fsts.gz" \
|| error_exit "Error compiling training graphs";
fi
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if [ $stage -le $x ]; then
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.$x.TASK_ID.log \
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
"ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|| error_exit "Error aligning data on iteration $x";
fi # Realign iters
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc.$x.TASK_ID.log \
gmm-acc-stats-ali $dir/$x.mdl "$featspart" \
"ark,s,cs:gunzip -c $dir/TASK_ID.ali.gz|" $dir/$x.TASK_ID.acc \
|| error_exit "Error accumulating stats on iteration $x";
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log \
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
"gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl \
|| error_exit "Error in pass $x extimation.";
rm -f r/$x.mdl $dir/$x.*.acc rm $dir/$x.occs
fi # Completed a training stage.
if [[ $x -le $maxiterinc ]]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
( cd $dir; rm -f final.{mdl,occs}; ln -s $x.mdl final.mdl; \
ln -s $x.occs final.occs; )
# Print out summary of the warning messages.
for x in $dir/log/*.log; do
n=`grep WARNING $x | wc -l`;
if [ $n -ne 0 ]; then echo $n warnings in $x; fi;
done
echo Done

202
egs/timit/s4/steps/train_mono.sh Executable file
Просмотреть файл

@ -0,0 +1,202 @@
#!/bin/bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Flat start and monophone training, with delta-delta features.
# This script applies cepstral mean normalization (per speaker).
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
njobs=4 # Default number of jobs
stage=-4 # Default starting stage (start with calculating CMN/CVN stats)
qcmd="" # Options for the submit_jobs.sh script
PROG=`basename $0`;
usage="Usage: $PROG [options] <data-dir> <lang-dir> <exp-dir>\n
e.g.: $PROG data/train.1k data/lang exp/mono\n\n
Options:\n
--help\t\tPrint this message and exit\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
--stage INT\tStarting stage (e.g. -4 for CMN/CVN stats; 2 for iter 2; default=$stage)\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--qcmd)
shift; qcmd="--qcmd=${1}"; shift ;;
--stage)
shift; stage=`readint $1`; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
*) break ;; # end of options: interpreted as the data-dir
esac
done
if [ $# != 3 ]; then
error_exit $usage;
fi
data=$1
lang=$2
dir=$3
[ -f path.sh ] && . path.sh
# Configuration:
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=40 # Number of iterations of training
maxiterinc=30 # Last iter to increase #Gauss on.
numgauss=300 # Initial num-Gauss (must be more than #states=3*phones).
totgauss=1000 # Target #Gaussians.
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
if [ -f $lang/oov.txt ]; then
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
else
oov_opt='--ignore-oov'
fi
mkdir -p $dir/log
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
split_data.sh $data $njobs
fi
if [ $stage -le -3 ]; then
echo "Computing cepstral mean and variance statistics"
# for n in `get_splits.pl $njobs`; do # do this locally; it's fast.
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/cmvnTASK_ID.log \
compute-cmvn-stats --spk2utt=ark:$data/split$njobs/TASK_ID/spk2utt \
scp:$data/split$njobs/TASK_ID/feats.scp ark:$dir/TASK_ID.cmvn \
|| error_exit "Computing CMN/CVN stats failed.";
fi
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $dir/*.cmvn|\" scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# for n in `get_splits.pl $njobs`; do
# for n in `seq 1 $njobs`; do
featspart="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$njobs/TASK_ID/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
if [ $stage -le -2 ]; then
echo "Initializing monophone system."
if [ -f $lang/phonesets_mono.txt ]; then
echo "Using shared phones from $lang/phonesets_mono.txt"
# In recipes with stress and position markers, this pools together
# the stats for the different versions of the same phone (also for
# the various silence phones).
sym2int.pl $lang/phones.txt $lang/phonesets_mono.txt > $dir/phonesets.int
shared_phones_opt="--shared-phones=$dir/phonesets.int"
fi
gmm-init-mono $shared_phones_opt \
"--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
$dir/0.mdl $dir/tree 2> $dir/log/init.log \
|| error_exit "Monophone model initialization failed.";
fi
if [ $stage -le -1 ]; then
echo "Compiling training graphs"
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/compile_graphsTASK_ID.log \
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
"ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt < $data/split$njobs/TASK_ID/text|" \
"ark:|gzip -c >$dir/TASK_ID.fsts.gz" \
|| error_exit "Error compiling training graphs.";
fi
if [ $stage -le 0 ]; then
echo "Aligning data equally (pass 0)"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.0.TASK_ID.log \
align-equal-compiled "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
ark,t,f:- \| \
gmm-acc-stats-ali --binary=true $dir/0.mdl "$featspart" \
ark:- $dir/0.TASK_ID.acc \
|| error_exit "Error in pass 0 accumulation";
# In the following steps, the --min-gaussian-occupancy=3 option is important,
# otherwise we cannot est "rare" phones and later on, they never align properly.
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
$dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl \
2> $dir/log/update.0.log || error_exit "Error in pass 0 estimation.";
rm $dir/0.*.acc
fi # Finished 0'th training iteration.
beam=6 # will change to 10 below after 1st pass
x=1
while [ $x -lt $numiters ]; do
echo "Pass $x"
if [ $stage -le $x ]; then
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.$x.TASK_ID.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] \
$dir/$x.mdl "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
"ark,t:|gzip -c >$dir/TASK_ID.ali.gz" \
|| error_exit "Error in pass $x alignment.";
fi # Realign iters
# for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc.$x.TASK_ID.log \
gmm-acc-stats-ali $dir/$x.mdl "$featspart" \
"ark:gunzip -c $dir/TASK_ID.ali.gz|" $dir/$x.TASK_ID.acc \
|| error_exit "Error in pass $x accumulation.";
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log \
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
"gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl \
|| error_exit "Error in pass $x extimation.";
rm -f $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs
fi # Completed a training stage.
if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss];
fi
beam=10
x=$[$x+1];
done
( cd $dir; rm -f final.{mdl,occs}; ln -s $x.mdl final.mdl; \
ln -s $x.occs final.occs; )
# Print out summary of the warning messages.
for x in $dir/log/*.log; do
n=`grep WARNING $x | wc -l`;
if [ $n -ne 0 ]; then echo $n warnings in $x; fi;
done
echo Done
# example of showing the alignments:
# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/0.ali.gz|" | head -4

Просмотреть файл

@ -0,0 +1,58 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds some specified number of disambig symbols to a symbol table.
# Adds these as #1, #2, etc.
# If the --include-zero option is specified, includes an extra one
# #0.
$include_zero = 0;
if($ARGV[0] eq "--include-zero") {
$include_zero = 1;
shift @ARGV;
}
if(@ARGV != 2) {
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
}
$input = $ARGV[0];
$nsyms = $ARGV[1];
open(F, "<$input") || die "Opening file $input";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "Bad line $_";
$lastsym = $A[1];
print;
}
if(!defined($lastsym)){
die "Empty symbol file?";
}
if($include_zero) {
$lastsym++;
print "#0 $lastsym\n";
}
for($n = 1; $n <= $nsyms; $n++) {
$y = $n + $lastsym;
print "#$n $y\n";
}

Просмотреть файл

@ -0,0 +1,101 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds disambiguation symbols to a lexicon.
# Outputs still in the normal lexicon format.
# Disambig syms are numbered #1, #2, #3, etc. (#0
# reserved for symbol in grammar).
# Outputs the number of disambig syms to the standard output.
if(@ARGV != 2) {
die "Usage: add_lex_disambig.pl lexicon.txt lexicon_disambig.txt "
}
$lexfn = shift @ARGV;
$lexoutfn = shift @ARGV;
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
# (1) Read in the lexicon.
@L = ( );
while(<L>) {
@A = split(" ", $_);
push @L, join(" ", @A);
}
# (2) Work out the count of each phone-sequence in the
# lexicon.
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
$count{join(" ",@A)}++;
}
# (3) For each left sub-sequence of each phone-sequence, note down
# that exists (for identifying prefixes of longer strings).
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
while(@A > 0) {
pop @A; # Remove last phone
$issubseq{join(" ",@A)} = 1;
}
}
# (4) For each entry in the lexicon:
# if the phone sequence is unique and is not a
# prefix of another word, no diambig symbol.
# Else output #1, or #2, #3, ... if the same phone-seq
# has already been assigned a disambig symbol.
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
$max_disambig = 0;
foreach $l (@L) {
@A = split(" ", $l);
$word = shift @A;
$phnseq = join(" ",@A);
if(!defined $issubseq{$phnseq}
&& $count{$phnseq}==1) {
; # Do nothing.
} else {
if($phnseq eq "") { # need disambig symbols for the empty string
# that are not use anywhere else.
$max_disambig++;
$reserved{$max_disambig} = 1;
$phnseq = "#$max_disambig";
} else {
$curnumber = $disambig_of{$phnseq};
if(!defined{$curnumber}) { $curnumber = 0; }
$curnumber++; # now 1 or 2, ...
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
if($curnumber > $max_disambig) {
$max_disambig = $curnumber;
}
$disambig_of{$phnseq} = $curnumber;
$phnseq = $phnseq . " #" . $curnumber;
}
}
print O "$word\t$phnseq\n";
}
print $max_disambig . "\n";

145
egs/timit/s4/utils/decode.sh Executable file
Просмотреть файл

@ -0,0 +1,145 @@
#!/bin/bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readint () {
local retval=${1/#*=/}; # In case --switch=ARG format was used
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not an integer."
echo $retval
}
function read_dirname () {
local dir_name=${1/#*=/}; # In case --switch=ARG format was used
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
echo $retval
}
orig_args="$*"
njobs="" # Total number of jobs unset by default. Will set to #speakers (if
# using a grid) or 4 (if not), unless specified by user.
lang="" # Option for sclite scoring (off by default)
opts=""
qcmd="" # Options for the submit_jobs.sh script
PROG=`basename $0`;
usage="Usage: $PROG [options] <decode_script> <graph-dir> <data-dir> <decode-dir> [extra-args...]\n\n
Options:\n
--help\t\tPrint this message and exit\n
-l DIR\t\tDirectory to find L_align.fst (needed for sclite scoring)\n
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
--opts STRING\tOptions for the decoder script\n
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
";
while [ $# -gt 0 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
-l)
shift; lang=`read_dirname $1`;
[ ! -f "$lang/phones_disambig.txt" -o ! -f "$lang/L_align.fst" ] && \
error_exit "Invalid argument to -l option; expected $lang/phones_disambig.txt and $lang/L_align.fst to exist."
shift ;;
--num-jobs)
shift; njobs=`readint $1`;
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
shift ;;
--opts)
shift; opts="$1"; shift ;;
--qcmd)
shift; qcmd="--qcmd=${1}"; shift ;;
--stage)
shift; stage=`readint $1`; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
*) break ;; # end of options: interpreted as the script to execute
esac
done
if [ $# -lt 4 ]; then
error_exit $usage;
fi
script=$1
graphdir=$2
data=$3
dir=$4
# Make "dir" an absolute pathname.
dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
mkdir -p $dir || exit 1
shift;shift;shift;shift;
# Remaining args will be supplied to decoding script.
extra_args=$*
[ -f path.sh ] && . path.sh
for file in $script $scp $data/utt2spk; do
if [ ! -f "$file" ]; then
echo "decode.sh: no such file $file"
exit 1
fi
done
if [ ! -f $graphdir/HCLG.fst -a ! -f $graphdir/G.fst ]; then
# Note: most scripts expect HCLG.fst in graphdir, but the
# "*_fromlats.sh" script(s) require(s) a "lang" dir in that
# position
echo No such file: $graphdir/HCLG.fst or $graphdir/G.fst
exit 1;
fi
if [ -z "$njobs" ]; then # Figure out num-jobs; user did not specify.
if [ -z "$qcmd" ]; then
njobs=4
else # running on queue...
njobs=`utt2spk_to_spk2utt.pl $data/utt2spk | wc -l`
fi
fi
echo "Decoding with num-jobs = $njobs"
if [[ $njobs -gt 1 || ! -d $data/split$njobs || \
$data/split$njobs -ot $data/feats.scp ]]; then
split_data.sh $data $njobs
fi
#for n in `get_splits.pl $njobs`; do
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/partTASK_ID.log \
$script $opts -j $njobs TASK_ID $graphdir $data $dir $extra_args \
|| error_exit "Error in decoding script: command was decode.sh $orig_args"
if ls $dir/lat.*.gz >&/dev/null; then
if [ -n "$lang" ]; then
# sclite scoring: $lang directory supplied only for this reason.
[ ! -f $data/stm ] && \
error_exit "Expected $data/stm to exist (-l only used for sclite scoring)"
score_lats_ctm.sh $dir $lang $data || \
error_exit "Error in scoring of lattices using sclite."
else
score_lats.sh $dir $graphdir/words.txt $data || \
error_exit "Error in scoring of latices.";
fi
elif ls $dir/*.txt >&/dev/null; then
score_text.sh $dir $data || error_exit "Error in scoring of hypotheses.";
else
eror_exit "No output found in $dir, not scoring.";
fi

Просмотреть файл

@ -0,0 +1,23 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script replaces epsilon with #0 on the input side only, of the G.fst
# acceptor.
while(<>){
s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
print;
}

Просмотреть файл

@ -0,0 +1,41 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script takes a list of utterance-ids or any file whose first field
# of each line is an utterance-id, and filters an scp
# file (or any file whose first field is an utterance id), printing
# out only those lines whose first field is in id_list.
if(@ARGV < 1 || @ARGV > 2) {
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
}
$idlist = shift @ARGV;
open(F, "<$idlist") || die "Could not open id-list file $idlist";
while(<F>) {
@A = split;
@A>=1 || die "Invalid id-list file line $_";
$seen{$A[0]} = 1;
}
while(<>) {
@A = split;
@A > 0 || die "Invalid scp file line $_";
if($seen{$A[0]}) {
print $_;
}
}

90
egs/timit/s4/utils/int2sym.pl Executable file
Просмотреть файл

@ -0,0 +1,90 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_noninteger = 0;
$ignore_first_field = 0;
$field = -1;
for($x = 0; $x < 2; $x++) {
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--field") {
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
if ($field < 1) { die "Bad argument to --field option: $field"; }
}
}
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
$zfield = $field-1; # Change to zero-based indexing.
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input] > output\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$int2sym{$A[1]} = $A[0];
}
sub int2sym {
my $a = shift @_;
my $pos = shift @_;
if($a !~ m:^\d+$:) { # not all digits..
if($ignore_noninteger) {
print $a . " ";
next;
} else {
if($pos == 0) {
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
} else {
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
}
}
}
$s = $int2sym{$a};
if(!defined ($s)) {
die "int2sym.pl: integer $a not in symbol table $symtab.";
}
return $s;
}
$error = 0;
while(<>) {
@A = split(" ", $_);
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
if ($field != -1) {
if ($zfield <= $#A && $zfield >= 0) {
$a = $A[$zfield];
$A[$zfield] = int2sym($a, $zfield);
}
print join(" ", @A);
} else {
for ($pos = 0; $pos <= $#A; $pos++) {
$a = $A[$pos];
$s = int2sym($a, $pos);
print $s . " ";
}
}
print "\n";
}

Просмотреть файл

@ -0,0 +1,122 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# makes lexicon FST (no pron-probs involved).
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
}
$lexfn = shift @ARGV;
if(@ARGV == 0) {
$silprob = 0.0;
} elsif (@ARGV == 2){
($silprob,$silphone) = @ARGV;
} else {
($silprob,$silphone,$sildisambig) = @ARGV;
}
if($silprob != 0.0) {
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
$silcost = -log($silprob);
$nosilcost = -log(1.0 - $silprob);
}
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
sub is_sil {
# Return true (1) if provided with a phone-sequence
# that means silence.
# @_ is the parameters of the function
# This function returns true if @_ equals ( $silphone )
# or something of the form ( "#0", $silphone, "#1" )
# where the "#0" and "#1" are disambiguation symbols.
return ( @_ == 1 && $_[0] eq $silphone ||
(@_ == 3 && $_[1] eq $silphone &&
$_[0] =~ m/^\#\d+$/ &&
$_[0] =~ m/^\#\d+$/));
}
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
$loopstate = 0;
$nexststate = 1; # next unallocated state.
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
} else {
$ns = $loopstate;
}
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
}
}
print "$loopstate\t0\n"; # final-cost.
} else { # have silence probs.
$startstate = 0;
$loopstate = 1;
$silstate = 2; # state from where we go to loopstate after emitting silence.
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
if (!defined $sildisambig) {
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
$nextstate = 3;
} else {
$disambigstate = 3;
$nextstate = 4;
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
}
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
} else {
if(!is_sil(@A)){
# This is non-deterministic but relatively compact,
# and avoids epsilons.
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
} else {
# no point putting opt-sil after silence word.
print "$s\t$loopstate\t$p\t$word_or_eps\n";
}
$word_or_eps = "<eps>";
}
}
}
print "$loopstate\t0\n"; # final-cost.
}

134
egs/timit/s4/utils/mkgraph.sh Executable file
Просмотреть файл

@ -0,0 +1,134 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script creates a fully expanded decoding graph (HCLG) that represents
# all the language-model, pronunciation dictionary (lexicon), context-dependency,
# and HMM structure in our model. The output is a Finite State Transducer
# that has word-ids on the output, and pdf-ids on the input (these are indexes
# that resolve to Gaussian Mixture Models).
# See
# http://kaldi.sourceforge.net/graph_recipe_test.html
# (this is compiled from this repository using Doxygen,
# the source for this part is in src/doc/graph_recipe_test.dox)
N=3
P=1
clean=false
for x in 1 2 3; do
if [ $1 == "--mono" ]; then
N=1;
P=0;
shift;
fi
if [ $1 == "--clean" ]; then
clean=true
shift;
fi
done
if [ $# != 3 ]; then
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
lang=$1
tree=$2/tree
model=$2/final.mdl
dir=$3
if $clean; then rm -r $lang/tmp; fi
mkdir -p $dir
tscale=1.0
loopscale=0.1
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
# would have to use -o instead), -f means file exists, and -ot means older than).
required="$lang/L.fst $lang/G.fst $lang/phones_disambig.txt $lang/words.txt $lang/silphones.csl $model $tree"
for f in $required; do
[ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
done
mkdir -p $lang/tmp
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
fi
if [ ! -f $lang/phones_disambig.txt ]; then
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
exit 1;
fi
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
clg=$lang/tmp/CLG_${N}_${P}.fst
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
fstcomposecontext --context-size=$N --central-position=$P \
--read-disambig-syms=$lang/tmp/disambig_phones.list \
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
fstisstochastic $clg || echo "warning: CLG not stochastic."
fi
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model \
|| $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
> $dir/Ha.fst || exit 1;
fi
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
$dir/HCLGa.fst -ot $clg ]]; then
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
fi
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
add-self-loops --self-loop-scale=$loopscale --reorder=true \
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
fi
fi
# keep a copy of the lexicon and a list of silence phones with HCLG...
# this means we can decode without refrence to the $lang directory.
cp $lang/words.txt $dir/
cp $lang/silphones.csl $dir/
# to make const fst:
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
echo "Finished making decoding graphs in $dir"

27
egs/timit/s4/utils/s2eps.pl Executable file
Просмотреть файл

@ -0,0 +1,27 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script replaces <s> and </s> with <eps> (on both input and output sides),
# for the G.fst acceptor.
while(<>){
@A = split(" ", $_);
if ( @A >= 4 ) {
if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
}
print join("\t", @A) . "\n";
}

Просмотреть файл

@ -0,0 +1,56 @@
#!/bin/bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
if [ -f ./path.sh ]; then . ./path.sh; fi
if [ $# -ne 4 ]; then
echo "Usage: score_lats.sh <decode-dir> <word-symbol-table> <data-dir> <phone-map>"
exit 1;
fi
dir=$1
symtab=$2
data=$3
phonemap=$4
if [ ! -f $symtab ]; then
echo No such word symbol table file $symtab
exit 1;
fi
if [ ! -f $data/text ]; then
echo Could not find transcriptions in $data/text
exit 1
fi
trans=$data/text
cp $trans $dir/test.trans
for inv_acwt in `seq 1 7`; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$symtab \
"ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
cat $dir/${inv_acwt}.tra \
| int2sym.pl --ignore-first-field $symtab \
| timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 |
compute-wer --text --mode=present ark:$dir/test.trans ark,p:- \
>& $dir/wer_$inv_acwt
done

Просмотреть файл

@ -0,0 +1,50 @@
#!/bin/bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
if [ -f ./path.sh ]; then . ./path.sh; fi
if [ $# -ne 4 ]; then
echo "Usage: score_text.sh <decode-dir> <word-symbol-table> <data-dir> <phone-map>"
exit 1;
fi
dir=$1
symtab=$2
data=$3
phonemap=$4
if [ ! -f $data/text ]; then
echo Could not find transcriptions in $data/text
exit 1
fi
trans=$data/text
sort -k1,1 $trans > $dir/test.trans
# We assume the transcripts are already in integer form.
cat $dir/*.tra | sort -k1,1 \
| int2sym.pl --ignore-first-field $symtab \
| timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \
> $dir/text
compute-wer --text --mode=present ark:$dir/test.trans ark,p:$dir/text \
>& $dir/wer
grep WER $dir/wer

57
egs/timit/s4/utils/silphones.pl Executable file
Просмотреть файл

@ -0,0 +1,57 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# creates integer lists of silence and non-silence phones in files,
# e.g. silphones.csl="1:2:3 \n"
# and nonsilphones.csl="4:5:6:7:...:24\n";
if(@ARGV != 4) {
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
}
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
open(S,"<$symtab") || die "Opening symbol table $symtab";
foreach $s (split(" ", $sillist)) {
$issil{$s} = 1;
}
@sil = ();
@nonsil = ();
while(<S>){
@A = split(" ", $_);
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
($sym, $int) = @A;
if($int != 0) {
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
else { push @nonsil, $int; }
}
}
foreach $k(keys %issil) {
if(!$seensil{$k}) { die "No such silence phone $k"; }
}
open(F, ">$silphones") || die "opening silphones file $silphones";
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
print F join(":", @sil) . "\n";
print G join(":", @nonsil) . "\n";
close(F);
close(G);
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }

Просмотреть файл

@ -0,0 +1,79 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
if [ $# != 2 ]; then
echo "Usage: split_data.sh data-dir num-to-split"
exit 1
fi
data=$1
numsplit=$2
if [ $numsplit -le 0 ]; then
echo "Invalid num-split argument $numsplit";
exit 1;
fi
n=0;
feats=""
wavs=""
utt2spks=""
texts=""
nu=`cat $data/utt2spk | wc -l`
nf=`cat $data/feats.scp | wc -l`
nt=`cat $data/text | wc -l`
if [ $nu -ne $nf ]; then
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf);"
echo "this script may produce incorrectly split data."
echo "use utils/fix_data_dir.sh to fix this."
fi
if [ $nt -ne 0 -a $nu -ne $nt ]; then
echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt);"
echo "this script may produce incorrectly split data."
echo "use utils/fix_data_dir.sh to fix this."
fi
# utilsscripts/get_split.pl returns "0 1 2 3" or "00 01 .. 18 19" or whatever.
# for n in `get_splits.pl $numsplit`; do
for n in `seq 1 $numsplit`; do # Changed this to usual number sequence -Arnab
mkdir -p $data/split$numsplit/$n
feats="$feats $data/split$numsplit/$n/feats.scp"
wavs="$wavs $data/split$numsplit/$n/wav.scp"
texts="$texts $data/split$numsplit/$n/text"
utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
done
split_scp.pl --utt2spk=$data/utt2spk $data/utt2spk $utt2spks
split_scp.pl --utt2spk=$data/utt2spk $data/feats.scp $feats
[ -f $data/wav.scp ] && \
split_scp.pl --utt2spk=$data/utt2spk $data/wav.scp $wavs
[ -f $data/text ] && \
split_scp.pl --utt2spk=$data/utt2spk $data/text $texts
# for n in `get_splits.pl $numsplit`; do
for n in `seq 1 $numsplit`; do # Changed this to usual number sequence -Arnab
utt2spk_to_spk2utt.pl $data/split$numsplit/$n/utt2spk \
> $data/split$numsplit/$n/spk2utt
# for completeness, also split the spk2gender file
[ -f $data/spk2gender ] && \
filter_scp.pl $data/split$numsplit/$n/spk2utt $data/spk2gender \
> $data/split$numsplit/$n/spk2gender
done
exit 0

211
egs/timit/s4/utils/split_scp.pl Executable file
Просмотреть файл

@ -0,0 +1,211 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program splits up any kind of .scp or archive-type file.
# If there is no utt2spk option it will work on any text file and
# will split it up with an approximately equal number of lines in
# each but.
# With the --utt2spk option it will work on anything that has the
# utterance-id as the first entry on each line; the utt2spk file is
# of the form "utterance speaker" (on each line).
# It splits it into equal size chunks as far as it can. If you use
# the utt2spk option it will make sure these chunks coincide with
# speaker boundaries. In this case, if there are more chunks
# than speakers (and in some other circumstances), some of the
# resulting chunks will be empty and it
# will print a warning.
# You will normally call this like:
# split_scp.pl scp scp.1 scp.2 scp.3 ...
# or
# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
# Note that you can use this script to split the utt2spk file itself,
# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
# You can also call the scripts like:
# split_scp.pl -j 3 0 scp scp.0
# [note: with this option, it assumes zero-based indexing of the split parts,
# i.e. the second number must be 0 <= n < num-jobs.]
$num_jobs = 0;
$job_id = 0;
$utt2spk_file = "";
for ($x = 1; $x <= 2; $x++) {
if ($ARGV[0] eq "-j") {
shift @ARGV;
$num_jobs = shift @ARGV;
$job_id = shift @ARGV;
if ($num_jobs <= 0 || $job_id < 0 || $job_id >= $num_jobs) {
die "Invalid num-jobs and job-id: $num_jobs and $job_id";
}
}
if ($ARGV[0] =~ "--utt2spk=(.+)") {
$utt2spk_file=$1;
shift;
}
}
if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
die "Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ... \n" .
" or: split_scp.pl -j num-jobs job-id [--utt2spk=<utt2spk_file>] in.scp [out.scp]\n" .
" ... where 0 <= job-id < num-jobs.";
}
$inscp = shift @ARGV;
if ($num_jobs == 0) { # without -j option
@OUTPUTS = @ARGV;
} else {
for ($j = 0; $j < $num_jobs; $j++) {
if ($j == $job_id) {
if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
else { push @OUTPUTS, "-"; }
} else {
push @OUTPUTS, "/dev/null";
}
}
}
if ($utt2spk_file ne "") { # We have the --utt2spk option...
open(U, "<$utt2spk_file") || die "Failed to open utt2spk file $utt2spk_file";
while(<U>) {
@A = split;
@A == 2 || die "Bad line $_ in utt2spk file $utt2spk_file";
($u,$s) = @A;
$utt2spk{$u} = $s;
}
open(I, "<$inscp") || die "Opening input scp file $inscp";
@spkrs = ();
while(<I>) {
@A = split;
if(@A == 0) { die "Empty or space-only line in scp file $inscp"; }
$u = $A[0];
$s = $utt2spk{$u};
if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
if(!defined $spk_count{$s}) {
push @spkrs, $s;
$spk_count{$s} = 0;
$spk_data{$s} = "";
}
$spk_count{$s}++;
$spk_data{$s} = $spk_data{$s} . $_;
}
# Now split as equally as possible ..
# First allocate spks to files by allocating an approximately
# equal number of speakers.
$numspks = @spkrs; # number of speakers.
$numscps = @OUTPUTS; # number of output files.
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
$scparray[$scpidx] = []; # [] is array reference.
}
for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
$scpidx = int(($spkidx*$numscps) / $numspks);
$spk = $spkrs[$spkidx];
push @{$scparray[$scpidx]}, $spk;
$scpcount[$scpidx] += $spk_count{$spk};
}
# Now will try to reassign beginning + ending speakers
# to different scp's and see if it gets more balanced.
# Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
# We can show that if considering changing just 2 scp's, we minimize
# this by minimizing the squared difference in sizes. This is
# equivalent to minimizing the absolute difference in sizes. This
# shows this method is bound to converge.
$changed = 1;
while($changed) {
$changed = 0;
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
# First try to reassign ending spk of this scp.
if($scpidx < $numscps-1) {
$sz = @{$scparray[$scpidx]};
if($sz > 0) {
$spk = $scparray[$scpidx]->[$sz-1];
$count = $spk_count{$spk};
$nutt1 = $scpcount[$scpidx];
$nutt2 = $scpcount[$scpidx+1];
if( abs( ($nutt2+$count) - ($nutt1-$count))
< abs($nutt2 - $nutt1)) { # Would decrease
# size-diff by reassigning spk...
$scpcount[$scpidx+1] += $count;
$scpcount[$scpidx] -= $count;
pop @{$scparray[$scpidx]};
unshift @{$scparray[$scpidx+1]}, $spk;
$changed = 1;
}
}
}
if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
$spk = $scparray[$scpidx]->[0];
$count = $spk_count{$spk};
$nutt1 = $scpcount[$scpidx-1];
$nutt2 = $scpcount[$scpidx];
if( abs( ($nutt2-$count) - ($nutt1+$count))
< abs($nutt2 - $nutt1)) { # Would decrease
# size-diff by reassigning spk...
$scpcount[$scpidx-1] += $count;
$scpcount[$scpidx] -= $count;
shift @{$scparray[$scpidx]};
push @{$scparray[$scpidx-1]}, $spk;
$changed = 1;
}
}
}
}
# Now print out the files...
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
$scpfn = $OUTPUTS[$scpidx];
open(F, ">$scpfn") || die "Could not open scp file $scpfn for writing.";
$count = 0;
if(@{$scparray[$scpidx]} == 0) {
print STDERR "Warning: split_scp.pl producing empty .scp file $scpfn (too many splits and too few speakers?)\n";
} else {
foreach $spk ( @{$scparray[$scpidx]} ) {
print F $spk_data{$spk};
$count += $spk_count{$spk};
}
if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
}
close(F);
}
} else {
# This block is the "normal" case where there is no --utt2spk
# option and we just break into equal size chunks.
open(I, "<$inscp") || die "Opening input scp file $inscp";
$numscps = @OUTPUTS; # size of array.
@F = ();
while(<I>) {
push @F, $_;
}
$numlines = @F;
if($numlines == 0) {
print STDERR "split_scp.pl: warning: empty input scp file $inscp";
}
$linesperscp = int( ($numlines+($numscps-1)) / $numscps); # the +$(numscps-1) forces rounding up.
# [just doing int() rounds down].
for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
$scpfile = $OUTPUTS[$scpidx];
open(O, ">$scpfile") || die "Opening output scp file $scpfile";
for($n = $linesperscp * $scpidx; $n < $numlines && $n < $linesperscp*($scpidx+1); $n++) {
print O $F[$n];
}
close(O) || die "Closing scp file $scpfile";
}
}

125
egs/timit/s4/utils/submit_jobs.sh Executable file
Просмотреть файл

@ -0,0 +1,125 @@
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
function error_exit () {
echo -e "$@" >&2; exit 1;
}
function readposint () {
local retval=`expr "X$1" : '[^=]*=\(.*\)'`;
retval=${retval#0*} # Strip any leading 0's
[[ "$retval" =~ ^[1-9][0-9]*$ ]] \
|| error_exit "Argument \"$retval\" not a positive integer."
echo $retval
}
PROG=`basename $0`;
usage="Usage: $PROG [options] --log=logfile command\n
Runs the supplied command and redirect the stdout & stderr to logfile.\n
With the --qcmd option, the command is submitted to a grid engine.\n
Any 'TASK_ID' in logfile or command is replaced with job number or \$SGE_TASK_ID (for SGE).\n\n
Required arguments:\n
--log=FILE\tOutput of command redirected to this file.\n\n
Options:\n
--njobs=INT\tNumber of jobs to run (default=1). Assumes split data exists.\n
--qcmd=STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
";
if [ $# -lt 2 ]; then
error_exit $usage;
fi
NJOBS=1 # Default number of jobs
QCMD="" # No grid usage by default
while [ $# -gt 1 ]; do
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
--help) echo -e $usage; exit 0 ;;
--qcmd=*)
QCMD=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
--njobs=*)
NJOBS=`readposint $1`; shift ;;
--log=*)
LOGF=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
'') shift ;; # Handle any empty arguments
*) break ;; # interpreted as the command to execute
esac
done
logfile_base=`basename $LOGF .log`
logfile_dir=`dirname $LOGF`
mkdir -p $logfile_dir;
# Now, parse the command to execute
exec_cmd="";
while [ $# -gt 0 ]; do
case "$1" in
*\"*) exec_cmd=$exec_cmd"'''$1''' "; shift ;;
*\ *) exec_cmd=$exec_cmd"\"$1\" "; shift ;;
*) exec_cmd=$exec_cmd"$1 "; shift ;;
esac
done
function run_locally {
rm -f $logfile_dir/.error;
for n in `seq 1 $NJOBS`; do
local this_logfile=${logfile_base//TASK_ID/$n}
this_logfile=$logfile_dir"/"$this_logfile".log"
local this_command=${exec_cmd//TASK_ID/$n}
( echo -e "# Command:\n# $this_command";
echo "# Running on: "`hostname`;
echo "# Started at: "`date`;
eval $this_command || touch $logfile_dir/.error
echo "# Finished at: "`date` ) >> $this_logfile 2>&1 &
done
wait;
[ -f $logfile_dir/.error ] && { rm -f $logfile_dir/.error; \
error_exit "One (or more) locally run jobs failed."; }
exit 0;
}
function run_on_grid {
local this_logfile=${logfile_base//TASK_ID/\$SGE_TASK_ID}
this_logfile=$logfile_dir"/"$this_logfile".log"
# If log files are in a separate 'log' directory, create the job submission
# scripts one level up.
local qdir=${logfile_dir/%log/q}
mkdir -p $qdir
local qlog=$qdir/queue.log
local this_command=${exec_cmd//TASK_ID/\$SGE_TASK_ID}
local run_this=$qdir"/"${logfile_base//TASK_ID/}".sh"
run_this=${run_this//../.}
printf "#!/bin/bash\n#\$ -S /bin/bash\n#\$ -V -cwd -j y\n" > $run_this
{ printf "set -e\n";
printf "{ cd %s\n . path.sh\n echo Running on: \`hostname\`\n" "$PWD";
printf " echo Started at: \`date\`\n $this_command\n ret=\$\?\n";
printf " echo Finished at: \`date\`\n} >& %s\nexit \$ret\n" "$this_logfile"
printf "# Submitted with:\n"
printf "# $QCMD -sync y -o $qlog -t 1-$NJOBS $run_this >> $qlog 2>&1\n"
} >> $run_this
$QCMD -sync y -o $qlog -t 1-${NJOBS} $run_this >> $qlog 2>&1
exit $?
}
if [ -z "$QCMD" ]; then
run_locally;
else
run_on_grid;
fi

82
egs/timit/s4/utils/sym2int.pl Executable file
Просмотреть файл

@ -0,0 +1,82 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_oov = 0;
$ignore_first_field = 0;
for($x = 0; $x < 3; $x++) {
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
# Else will complain and put nothing out.
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
}
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$sym2int{$A[0]} = $A[1] + 0;
}
$num_warning = 0;
$max_warning = 20;
$error = 0;
while(<>) {
@A = split(" ", $_);
if(@A == 0) {
die "Empty line in transcriptions input.";
}
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
@B = ();
foreach $a (@A) {
$i = $sym2int{$a};
if(!defined ($i)) {
if (defined $map_oov) {
if (!defined $sym2int{$map_oov}) {
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
}
if ($num_warning++ < $max_warning) {
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
if ($num_warning == $max_warning) {
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
}
}
$i = $sym2int{$map_oov};
} elsif($ignore_oov) {
$i = $a; # just print them out unmodified..
} else {
die "sym2int.pl: undefined symbol $a\n";
}
}
push @B, $i;
}
print join(" ", @B);
print "\n";
}
if($error) { exit(1); }
else { exit(0); }

Просмотреть файл

@ -0,0 +1,39 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# converts an utt2spk file to a spk2utt file.
# Takes input from the stdin or from a file argument;
# output goes to the standard out.
if ( @ARGV > 1 ) {
die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
}
while(<>){
@A = split(" ", $_);
@A == 2 || die "Invalid line in utt2spk file: $_";
($u,$s) = @A;
if(!$seen_spk{$s}) {
$seen_spk{$s} = 1;
push @spklist, $s;
}
$uttlist{$s} = $uttlist{$s} . "$u ";
}
foreach $s (@spklist) {
$l = $uttlist{$s};
$l =~ s: $::; # remove trailing space.
print "$s $l\n";
}

Просмотреть файл

@ -61,3 +61,4 @@ nnet_cpu: base util matrix
rnn: base util matrix lat
FSTROOT = /mnt/matylda5/iveselyk/DEVEL/kaldi/sandbox/karel/tools/openfst

40
src/configure поставляемый
Просмотреть файл

@ -31,16 +31,22 @@ ATLASROOT=`rel2abs ../tools/ATLAS/`
FSTROOT=`rel2abs ../tools/openfst`
function usage {
echo 'Usage: ./configure [--atlas-root=ATLASROOT] [--fst-root=FSTROOT]
echo 'Usage: ./configure [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT]
[--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR] [--mathlib=ATLAS|MKL|CLAPACK]';
}
threaded_atlas=false # By default, use the un-threaded version of ATLAS.
while [ $# -gt 0 ];
do
case "$1" in
--help) usage; exit 0 ;;
--atlas-root=*)
ATLASROOT=`read_dirname $1`; shift ;;
--threaded-atlas=yes)
threaded_atlas=true; shift ;;
--threaded-atlas=no)
threaded_atlas=false; shift ;;
--fst-root=*)
FSTROOT=`read_dirname $1`; shift ;;
--mkl-root=*)
@ -137,10 +143,12 @@ function linux_check_static {
}
function linux_configure_static {
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the first one below.
if $threaded_atlas; then pt=pt; else pt=""; fi
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
for dir in /usr{,/local}/lib{64,}{,/atlas,/atlas-sse2,/atlas-sse3} \
`pwd`/../tools/ATLAS/build/install/lib/ $ATLASROOT/lib; do
linux_check_static && ATLASLIBDIR=$dir && break
linux_check_static && ATLASLIBDIR=$dir
done
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
echo "Could not find libatlas.a in any of the obvious places... will try dynamic libraries."
@ -168,13 +176,14 @@ function linux_configure_static {
return ;
fi
for x in libcblas.a libatlas.a libf77blas.a; do
for x in lib${pt}cblas.a libatlas.a lib${pt}f77blas.a; do
if [ ! -f $ATLASLIBDIR/$x ]; then
echo "Configuring static ATLAS libraries failed: Could not find library $x in directory $ATLASLIBDIR"
return 1;
fi
ATLASLIBS="$ATLASLIBS $ATLASLIBDIR/$x"
done
if $threaded_atlas; then ATLASLIBS="$ATLASLIBS"; fi
echo ATLASINC = $ATLASROOT/include >> kaldi.mk
echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
@ -189,21 +198,24 @@ function linux_check_dynamic {
# will exit with success if $dir seems to contain ATLAS libraries with
# right architecture (compatible with default "nm")
if [ -f $dir/libatlas.so ]; then # candidate...
if nm $dir/libatlas.so 2>&1 | grep "File format not recognized" >/dev/null; then
if nm --dynamic $dir/libatlas.so 2>&1 | grep "File format not recognized" >/dev/null; then
echo "Directory $dir may contain dynamic ATLAS libraries but seems to be wrong architecture";
return 1;
fi
echo "Atlas found in $dir";
return 0;
else
return 1;
echo "No libatlas.so in $dir";
return 1;
fi
}
function linux_configure_dynamic {
if $threaded_atlas; then pt=pt; else pt=""; fi
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
for dir in /usr{,/local}/lib{,64}{,/atlas,/atlas-sse2,/atlas-sse3} \
`pwd`/../tools/ATLAS/build/install/lib/ $ATLASROOT/lib; do
linux_check_dyamic && ATLASLIBDIR=$dir
linux_check_dynamic && ATLASLIBDIR=$dir
done
if [ -z $ATLASLIBDIR ]; then
echo "Could not find libatlas.so in any of the obvious places."
@ -221,7 +233,7 @@ function linux_configure_dynamic {
# for all the names we have encountered.
for libname in lapack lapack_atlas clapack; do
if [ -f $ATLASLIBDIR/lib${libname}.so -a "$ATLASLIBS" == "" ]; then
if nm $ATLASLIBDIR/lib${libname}.so | grep ATL_cgetrf >/dev/null; then
if nm --dynamic $ATLASLIBDIR/lib${libname}.so | grep ATL_cgetrf >/dev/null; then
ATLASLIBS="-L$ATLASLIBDIR -l${libname}"
echo "Using library $ATLASLIBS as ATLAS's CLAPACK library."
fi
@ -232,13 +244,14 @@ function linux_configure_dynamic {
return 1;
fi
for x in cblas atlas f77blas; do
for x in ${pt}cblas atlas ${pt}f77blas; do
if [ ! -f $ATLASLIBDIR/lib$x.so ]; then
echo "Configuring dynamic ATLAS libraries failed: Could not find library $x in directory $ATLASLIBDIR"
return 1;
fi
ATLASLIBS="$ATLASLIBS -l$x"
done
if $threaded_atlas; then ATLASLIBS="$ATLASLIBS"; fi
echo ATLASINC = $ATLASROOT/include >> kaldi.mk
echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
@ -274,6 +287,11 @@ fi
cp makefiles/common.mk kaldi.mk
# Removing any previously defined FSTROOT in Makefile
cp Makefile Makefile.bak
grep -v ^'FSTROOT =' Makefile.bak > Makefile
[ cmp Makefile Makefile.bak >&/dev/null ] || rm Makefile.bak
# Most of the OS-specific steps below will append to kaldi.mk
echo "Doing OS specific configurations ..."
@ -289,6 +307,7 @@ if [ "`uname`" == "Darwin" ]; then
failure "Static OpenFST library not found: See ../tools/INSTALL"
fi
echo FSTROOT = $FSTROOT >> kaldi.mk
echo FSTROOT = $FSTROOT >> Makefile
# posix_memalign and gcc -rdynamic options not present on OS X 10.5.*
osx_ver=`sw_vers | grep ProductVersion | awk '{print $2}' | sed -e 's?\.[^.]*$??'`
echo "Configuring for OS X version $osx_ver ..."
@ -316,6 +335,8 @@ if [ "`uname -o`" == "Cygwin" ]; then
if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then
failure "please first install package liblapack0"
fi
echo FSTROOT = $FSTROOT >> kaldi.mk
echo FSTROOT = $FSTROOT >> Makefile
cat makefiles/cygwin.mk >> kaldi.mk
echo "Configuration succeeded for platform cygwin"
exit 0
@ -326,6 +347,7 @@ if [ "`uname`" == "Linux" ]; then
failure "Static OpenFST library not found: See ../tools/INSTALL"
fi
echo FSTROOT = $FSTROOT >> kaldi.mk
echo FSTROOT = $FSTROOT >> Makefile
echo "On Linux: Checking for linear algebra header files ..."
if [ $MATHLIB == "ATLAS" ]; then