зеркало из https://github.com/mozilla/kaldi.git
sync with trunk
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/karel@844 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
caeb968dbe
Коммит
6c217ca5ce
|
@ -21,4 +21,13 @@ should be Dan Povey (dpovey@microsoft.com). In addition to specific questions,
|
|||
please let me know if there are specific aspects of the project that you feel
|
||||
could be improved, that you find confusing, etc., and which missing features you
|
||||
most wish it had.
|
||||
|
||||
|
||||
|
||||
==SVN-MERGING==
|
||||
Merge with trunk:
|
||||
svn merge ^/trunk ^/sandbox/karel
|
||||
|
||||
When merging, resolve the tree conflicts by:
|
||||
svn resolve --accept working -R .
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
This recipe is using a publicly available subset of Resource Management data,
|
||||
consisting of freely distributed feature files distributed by CMU and some
|
||||
metadata(e.g. the word-pair grammar file) available from LDC's website.
|
||||
|
||||
To run the recipe the data should be downloaded first, for which ./getdata.sh
|
||||
command can be used. Then ./run.sh script can be executed to automatically perform
|
||||
all steps or the commands in it can be started manually by copy/pasting them.
|
||||
|
||||
The script and data layout are based on egs/rm/s3 recipe, with several exceptions:
|
||||
|
||||
- because this recipe uses pre-extracted feature vectors no conversion from .sph
|
||||
to .wav format and consequent feature extraction is needed. The features are just
|
||||
converted from CMU Sphinx feature files to Kaldi Tables.
|
||||
|
||||
- only one test set is available instead of several (e.g. mar87, oct87 and so on)
|
||||
as in the original recipe
|
||||
|
||||
- no speaker-dependent processing
|
||||
|
||||
- on the plus side it requires less disk space (about 220MB)
|
|
@ -0,0 +1 @@
|
|||
--use-energy=false # only non-default option.
|
|
@ -0,0 +1,2 @@
|
|||
# No non-default options for now.
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
<Topology>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
NONSILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
|
||||
<State> 3 </State>
|
||||
</TopologyEntry>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
SILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
|
||||
<State> 5 </State>
|
||||
</TopologyEntry>
|
||||
</Topology>
|
|
@ -0,0 +1,30 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
source path.sh
|
||||
|
||||
# Download and extract CMU's feature files
|
||||
mkdir -p $RM1_ROOT
|
||||
wget -P $RM1_ROOT http://www.speech.cs.cmu.edu/databases/rm1/rm1_cepstra.tar.gz
|
||||
tar -C $RM1_ROOT/ -xf $RM1_ROOT/rm1_cepstra.tar.gz
|
||||
|
||||
# Download the available LDC metadata
|
||||
# For some reason wget needs to be run twice in order to get all needed data ...
|
||||
wget -P $RM1_ROOT -mk --no-parent -r -c -v -nH http://www.ldc.upenn.edu/Catalog/docs/LDC93S3B/
|
||||
wget -P $RM1_ROOT -mk --no-parent -r -c -v -nH http://www.ldc.upenn.edu/Catalog/docs/LDC93S3B/
|
||||
mv $RM1_ROOT/Catalog/docs/LDC93S3B $RM1_ROOT/
|
||||
rm -rf $RM1_ROOT/Catalog
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This script basically calls the supplied decoding script
|
||||
# once for each test set (in parallel on the same machine),
|
||||
# and then averages the resulting WERs.
|
||||
# The interpretation of the decode-dir-1, etc., as inputs,
|
||||
# outputs and so on, depends on the decoding script you call.
|
||||
|
||||
# It assumes the model directory is one level of from decode-dir-1.
|
||||
|
||||
mono_opt=
|
||||
|
||||
if [ "$1" == "--mono" ]; then
|
||||
mono_opt=$1;
|
||||
shift;
|
||||
fi
|
||||
|
||||
script=$1
|
||||
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
|
||||
decode_dir_2=$3
|
||||
decode_dir_3=$4
|
||||
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
|
||||
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1>"
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -x $script -o ! -d $dir ]; then
|
||||
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
|
||||
|
||||
$script $dir data/test data/lang $decode_dir_1/ &
|
||||
wait
|
||||
|
||||
# The publicly available RM subset has just one test set(instead of mar87 etc.),
|
||||
# so no averaging is needed
|
||||
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."
|
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# usage: make_trans.sh prefix in.flist input.snr out.txt out.scp
|
||||
|
||||
# prefix is first letters of the database "key" (rest are numeric)
|
||||
|
||||
# in.flist is just a list of filenames, probably of .sph files.
|
||||
# input.snr is an snr format file from the RM dataset.
|
||||
# out.txt is the output transcriptions in format "key word1 word\n"
|
||||
# out.scp is the output scp file, which is as in.scp but has the
|
||||
# database-key first on each line.
|
||||
|
||||
# Reads from first argument e.g. $rootdir/rm1_audio1/rm1/doc/al_sents.snr
|
||||
# and second argument train_wav.scp
|
||||
# Writes to standard output trans.txt
|
||||
|
||||
if(@ARGV != 5) {
|
||||
die "usage: make_trans.sh prefix in.flist input.snr out.txt out.scp\n";
|
||||
}
|
||||
($prefix, $in_flist, $input_snr, $out_txt, $out_scp) = @ARGV;
|
||||
|
||||
open(F, "<$input_snr") || die "Opening SNOR file $input_snr";
|
||||
|
||||
while(<F>) {
|
||||
if(m/^;/) { next; }
|
||||
m/(.+) \((.+)\)/ || die "bad line $_";
|
||||
$T{$2} = $1;
|
||||
}
|
||||
|
||||
close(F);
|
||||
open(G, "<$in_flist") || die "Opening file list $in_flist";
|
||||
|
||||
open(O, ">$out_txt") || die "Open output transcription file $out_txt";
|
||||
|
||||
open(P, ">$out_scp") || die "Open output scp file $out_scp";
|
||||
|
||||
while(<G>) {
|
||||
$_ =~ m:/(\w+)/(\w+)\.mfc\s+$:i || die "bad scp line $_";
|
||||
$spkname = $1;
|
||||
$uttname = $2;
|
||||
$uttname =~ tr/a-z/A-Z/;
|
||||
defined $T{$uttname} || die "no trans for sent $uttname";
|
||||
$spkname =~ s/_//g; # remove underscore from spk name to make key nicer.
|
||||
$key = $prefix . "_" . $spkname . "_" . $uttname;
|
||||
$key =~ tr/A-Z/a-z/; # Make it all lower case.
|
||||
# to make the numerical and string-sorted orders the same.
|
||||
print O "$key $T{$uttname}\n";
|
||||
print P "$key $_";
|
||||
$n++;
|
||||
}
|
||||
close(O) || die "Closing output.";
|
||||
close(P) || die "Closing output.";
|
||||
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
# modified from a file that was:
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
# Note: when creating your own data preparation scripts, it's a good idea
|
||||
# to make sure that the speaker id (if present) is a prefix of the utterance
|
||||
# id, that the output scp file is sorted on utterance id, and that the
|
||||
# transcription file is exactly the same length as the scp file and is also
|
||||
# sorted on utterance id (missing transcriptions should be removed from the
|
||||
# scp file using e.g. scripts/filter_scp.pl)
|
||||
|
||||
if [ $# != 1 ]; then
|
||||
echo "Usage: ../../local/RM_data_prep.sh /path/to/RM"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
RMROOT=$1
|
||||
|
||||
mkdir -p data/local
|
||||
cd data/local
|
||||
|
||||
if [ ! -d $RMROOT/LDC93S3B -o ! -d $RMROOT/rm1 ]; then
|
||||
echo "Speech data is missing. You can download the data by running ./getdata.sh"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# Make a list of files
|
||||
cat $RMROOT/rm1/etc/rm1_train.fileids | \
|
||||
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > train.flist
|
||||
cat $RMROOT/rm1/etc/rm1_test.fileids | \
|
||||
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > test.flist
|
||||
|
||||
# make_trans.pl also creates the utterance id's and the kaldi-format scp file.
|
||||
|
||||
# training set
|
||||
../../local/make_trans.pl trn train.flist $RMROOT/LDC93S3B/disc_1/doc/al_sents.snr train_trans.txt train.scp
|
||||
mv train_trans.txt tmp; sort -k 1 tmp > train_trans.txt
|
||||
mv train.scp tmp; sort -k 1 tmp > train.scp
|
||||
rm tmp
|
||||
|
||||
# test set
|
||||
../../local/make_trans.pl test test.flist $RMROOT/LDC93S3B/disc_1/doc/al_sents.snr test_trans.txt test.scp
|
||||
mv test_trans.txt tmp; sort -k 1 tmp > test_trans.txt
|
||||
mv test.scp tmp; sort -k 1 tmp > test.scp
|
||||
rm tmp
|
||||
|
||||
# We already have the features, so sph2pipe step is skipped and
|
||||
# given the limited data the speaker-dependent processing is also not used
|
||||
|
||||
../../scripts/make_rm_lm.pl $RMROOT/LDC93S3B/disc_1/doc/wp_gram.txt > G.txt || exit 1;
|
||||
|
||||
# Convert the CMU's lexicon to a form which the other scripts expect
|
||||
# (leave only the first pronunciation variant, convert "'" to "+",
|
||||
# and convert the phones to lower case)
|
||||
cat $RMROOT/rm1/etc/rm1.dic | \
|
||||
egrep -v '\(' | \
|
||||
sed -e "s/'/\+/g" | \
|
||||
sed -e "s/^\([[:alnum:]-]\+\(+[[:alpha:]]\+\)\?\)\(.*\)/\1\L\3/g" > lexicon.txt
|
||||
|
||||
|
||||
echo RM_data_prep succeeded.
|
|
@ -0,0 +1,126 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
# modified from:
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data_list="train test"
|
||||
|
||||
for x in lang lang_test $data_list; do
|
||||
mkdir -p data/$x
|
||||
done
|
||||
|
||||
# Copy stuff into its final location:
|
||||
|
||||
for x in $data_list; do
|
||||
cp data/local/${x}.scp data/$x/mfc.scp || exit 1;
|
||||
cp data/local/${x}_trans.txt data/$x/text || exit 1;
|
||||
done
|
||||
|
||||
# We are not using make_words_symtab.pl for symbol table creation in this
|
||||
# recipe, because CMU's lexicon have several words that are not in the
|
||||
# word-pair grammar
|
||||
cat data/local/lexicon.txt | \
|
||||
awk 'BEGIN{print "<eps>\t0";} {print $1 "\t" NR;} END{print "!SIL\t" NR+1;}' \
|
||||
> data/lang/words.txt
|
||||
scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
|
||||
cp data/lang/words.txt data/lang_test/words.txt
|
||||
|
||||
silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn"
|
||||
# Generate colon-separated lists of silence and non-silence phones.
|
||||
scripts/silphones.pl data/lang/phones.txt "$silphones" data/lang/silphones.csl \
|
||||
data/lang/nonsilphones.csl
|
||||
|
||||
ndisambig=`scripts/add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
|
||||
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
|
||||
scripts/add_disambig.pl data/lang/phones.txt $ndisambig > data/lang_test/phones_disambig.txt
|
||||
cp data/lang_test/phones_disambig.txt data/lang/ # needed for MMI.
|
||||
|
||||
silprob=0.5 # same prob as word
|
||||
scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \
|
||||
fstcompile --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | \
|
||||
fstarcsort --sort_type=olabel > data/lang/L.fst
|
||||
|
||||
# Create L_align.fst, which is as L.fst but with alignment symbols (#1 and #2 at the
|
||||
# beginning and end of words, on the input side)... useful if we
|
||||
# ever need to e.g. create ctm's-- these are used to work out the
|
||||
# word boundaries.
|
||||
|
||||
|
||||
cat data/local/lexicon.txt | \
|
||||
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
|
||||
scripts/make_lexicon_fst.pl - 0.5 sil | \
|
||||
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | \
|
||||
fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
|
||||
|
||||
# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
|
||||
|
||||
scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
|
||||
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
|
||||
> data/lang_test/L_disambig.fst
|
||||
|
||||
cp data/lang_test/L_disambig.fst data/lang/ # Needed for MMI training.
|
||||
|
||||
fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
|
||||
--keep_osymbols=false data/local/G.txt > data/lang_test/G.fst
|
||||
|
||||
# Checking that G is stochastic [note, it wouldn't be for an Arpa]
|
||||
fstisstochastic data/lang_test/G.fst || echo Error: G is not stochastic
|
||||
|
||||
# Checking that G.fst is determinizable.
|
||||
fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
|
||||
|
||||
# Checking that L_disambig.fst is determinizable.
|
||||
fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
|
||||
|
||||
# Checking that disambiguated lexicon times G is determinizable
|
||||
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
|
||||
fstdeterminize >/dev/null || echo Error
|
||||
|
||||
# Checking that LG is stochastic:
|
||||
fsttablecompose data/lang/L.fst data/lang_test/G.fst | \
|
||||
fstisstochastic || echo Error: LG is not stochastic.
|
||||
|
||||
# Checking that L_disambig.G is stochastic:
|
||||
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
|
||||
fstisstochastic || echo Error: LG is not stochastic.
|
||||
|
||||
|
||||
## Check lexicon.
|
||||
## just have a look and make sure it seems sane.
|
||||
echo "First few lines of lexicon FST:"
|
||||
fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head
|
||||
|
||||
|
||||
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
|
||||
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
|
||||
cat conf/topo.proto | sed "s:NONSILENCEPHONES:$nonsilphonelist:" | \
|
||||
sed "s:SILENCEPHONES:$silphonelist:" > data/lang/topo
|
||||
|
||||
for x in phones.txt words.txt silphones.csl nonsilphones.csl topo; do
|
||||
cp data/lang/$x data/lang_test/$x || exit 1;
|
||||
done
|
||||
|
||||
echo RM_format_data succeeded.
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
# path to Kaldi's root directory
|
||||
root=`pwd`/../../..
|
||||
|
||||
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:${root}/src/tiedbin/:$PATH
|
||||
|
||||
# path to the directory in which the subset of RM corpus is stored
|
||||
export RM1_ROOT=`pwd`/data/download
|
||||
|
||||
export LC_ALL=C
|
||||
export LC_LOCALE_ALL=C
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
source ./path.sh
|
||||
|
||||
# call the next line with the directory where the RM data is
|
||||
local/rm_data_prep.sh $RM1_ROOT || exit 1;
|
||||
|
||||
local/rm_format_data.sh || exit 1;
|
||||
|
||||
# the directory, where you want to store MFCC features.
|
||||
featdir=data/rm_feats
|
||||
|
||||
# convert the Sphinx feature files to Kaldi tables
|
||||
for x in train test; do
|
||||
steps/make_mfcc.sh data/$x exp/make_mfcc/$x $featdir || exit 1;
|
||||
done
|
||||
|
||||
scripts/subset_data_dir.sh data/train 1000 data/train.1k || exit 1;
|
||||
|
||||
# train monophone system.
|
||||
steps/train_mono.sh data/train.1k data/lang exp/mono || exit 1;
|
||||
|
||||
# monophone decoding
|
||||
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode || exit 1;
|
||||
|
||||
# Get alignments from monophone system.
|
||||
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
|
||||
|
||||
# train tri1 [first triphone pass]
|
||||
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1 || exit 1;
|
||||
|
||||
# decode tri1
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri1/decode || exit 1;
|
||||
|
||||
# align tri1
|
||||
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
|
||||
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
|
||||
|
||||
# train tri2a [delta+delta-deltas]
|
||||
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
|
||||
|
||||
# decode tri2a
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode || exit 1;
|
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds some specified number of disambig symbols to a symbol table.
|
||||
# Adds these as #1, #2, etc.
|
||||
# If the --include-zero option is specified, includes an extra one
|
||||
# #0.
|
||||
if(!(@ARGV == 2 || (@ARGV ==3 && $ARGV[0] eq "--include-zero"))) {
|
||||
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
|
||||
}
|
||||
|
||||
if(@ARGV == 3) {
|
||||
$include_zero = 1;
|
||||
$ARGV[0] eq "--include-zero" || die "Bad option/first argument $ARGV[0]";
|
||||
shift @ARGV;
|
||||
} else {
|
||||
$include_zero = 0;
|
||||
}
|
||||
|
||||
$input = $ARGV[0];
|
||||
$nsyms = $ARGV[1];
|
||||
|
||||
open(F, "<$input") || die "Opening file $input";
|
||||
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_";
|
||||
$lastsym = $A[1];
|
||||
print;
|
||||
}
|
||||
|
||||
if(!defined($lastsym)){
|
||||
die "Empty symbol file?";
|
||||
}
|
||||
|
||||
if($include_zero) {
|
||||
$lastsym++;
|
||||
print "#0 $lastsym\n";
|
||||
}
|
||||
|
||||
for($n = 1; $n <= $nsyms; $n++) {
|
||||
$y = $n + $lastsym;
|
||||
print "#$n $y\n";
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds disambiguation symbols to a lexicon.
|
||||
# Outputs still in the normal lexicon format.
|
||||
# Disambig syms are numbered #1, #2, #3, etc. (#0
|
||||
# reserved for symbol in grammar).
|
||||
# Outputs the number of disambig syms to the standard output.
|
||||
|
||||
if(@ARGV != 2) {
|
||||
die "Usage: add_lex_disambig.pl [ --sil silphone ] lexicon.txt lexicon_disambig.txt "
|
||||
}
|
||||
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
$lexoutfn = shift @ARGV;
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
# (1) Read in the lexicon.
|
||||
@L = ( );
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
push @L, join(" ", @A);
|
||||
}
|
||||
|
||||
# (2) Work out the count of each phone-sequence in the
|
||||
# lexicon.
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
$count{join(" ",@A)}++;
|
||||
}
|
||||
|
||||
# (3) For each left sub-sequence of each phone-sequence, note down
|
||||
# that exists (for identifying prefixes of longer strings).
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
while(@A > 0) {
|
||||
pop @A; # Remove last phone
|
||||
$issubseq{join(" ",@A)} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# (4) For each entry in the lexicon:
|
||||
# if the phone sequence is unique and is not a
|
||||
# prefix of another word, no diambig symbol.
|
||||
# Else output #1, or #2, #3, ... if the same phone-seq
|
||||
# has already been assigned a disambig symbol.
|
||||
|
||||
|
||||
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
|
||||
|
||||
$max_disambig = 0;
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
$word = shift @A;
|
||||
$phnseq = join(" ",@A);
|
||||
if(!defined $issubseq{$phnseq}
|
||||
&& $count{$phnseq}==1) {
|
||||
; # Do nothing.
|
||||
} else {
|
||||
if($phnseq eq "") { # need disambig symbols for the empty string
|
||||
# that are not used anywhere else.
|
||||
$max_disambig++;
|
||||
$reserved{$max_disambig} = 1;
|
||||
$phnseq = "#$max_disambig";
|
||||
} else {
|
||||
$curnumber = $disambig_of{$phnseq};
|
||||
if(!defined{$curnumber}) { $curnumber = 0; }
|
||||
$curnumber++; # now 1 or 2, ...
|
||||
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
|
||||
if($curnumber > $max_disambig) {
|
||||
$max_disambig = $curnumber;
|
||||
}
|
||||
$disambig_of{$phnseq} = $curnumber;
|
||||
$phnseq = $phnseq . " #" . $curnumber;
|
||||
}
|
||||
}
|
||||
print O "$word\t$phnseq\n";
|
||||
}
|
||||
|
||||
print $max_disambig . "\n";
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script takes a list of utterance-ids and filters an scp
|
||||
# file (or any file whose first field is an utterance id), printing
|
||||
# out only those lines whose first field is in id_list.
|
||||
|
||||
if(@ARGV < 1 || @ARGV > 2) {
|
||||
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
|
||||
}
|
||||
|
||||
$idlist = shift @ARGV;
|
||||
open(F, "<$idlist") || die "Could not open id-list file $idlist";
|
||||
while(<F>) {
|
||||
@A = split;
|
||||
@A>=1 || die "Invalid id-list file line $_";
|
||||
$seen{$A[0]} = 1;
|
||||
}
|
||||
|
||||
while(<>) {
|
||||
@A = split;
|
||||
@A > 0 || die "Invalid scp file line $_";
|
||||
if($seen{$A[0]}) {
|
||||
print $_;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_noninteger = 0;
|
||||
$ignore_first_field = 0;
|
||||
$field = -1;
|
||||
for($x = 0; $x < 2; $x++) {
|
||||
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--field") {
|
||||
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
|
||||
if ($field < 1) { die "Bad argument to --field option: $field"; }
|
||||
}
|
||||
}
|
||||
|
||||
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
|
||||
$zfield = $field-1; # Change to zero-based indexing.
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input] > output\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$int2sym{$A[1]} = $A[0];
|
||||
}
|
||||
|
||||
sub int2sym {
|
||||
my $a = shift @_;
|
||||
my $pos = shift @_;
|
||||
if($a !~ m:^\d+$:) { # not all digits..
|
||||
if($ignore_noninteger) {
|
||||
print $a . " ";
|
||||
next;
|
||||
} else {
|
||||
if($pos == 0) {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
|
||||
} else {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
$s = $int2sym{$a};
|
||||
if(!defined ($s)) {
|
||||
die "int2sym.pl: integer $a not in symbol table $symtab.";
|
||||
}
|
||||
return $s;
|
||||
}
|
||||
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
if ($field != -1) {
|
||||
if ($zfield <= $#A && $zfield >= 0) {
|
||||
$a = $A[$zfield];
|
||||
$A[$zfield] = int2sym($a, $zfield);
|
||||
}
|
||||
print join(" ", @A);
|
||||
} else {
|
||||
for ($pos = 0; $pos <= $#A; $pos++) {
|
||||
$a = $A[$pos];
|
||||
$s = int2sym($a, $pos);
|
||||
print $s . " ";
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# makes lexicon FST (no pron-probs involved).
|
||||
|
||||
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
|
||||
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
|
||||
}
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
if(@ARGV == 0) {
|
||||
$silprob = 0.0;
|
||||
} elsif (@ARGV == 2){
|
||||
($silprob,$silphone) = @ARGV;
|
||||
} else {
|
||||
($silprob,$silphone,$sildisambig) = @ARGV;
|
||||
}
|
||||
if($silprob != 0.0) {
|
||||
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
|
||||
$silcost = -log($silprob);
|
||||
$nosilcost = -log(1.0 - $silprob);
|
||||
}
|
||||
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
|
||||
|
||||
sub is_sil {
|
||||
# Return true (1) if provided with a phone-sequence
|
||||
# that means silence.
|
||||
# @_ is the parameters of the function
|
||||
# This function returns true if @_ equals ( $silphone )
|
||||
# or something of the form ( "#0", $silphone, "#1" )
|
||||
# where the "#0" and "#1" are disambiguation symbols.
|
||||
return ( @_ == 1 && $_[0] eq $silphone ||
|
||||
(@_ == 3 && $_[1] eq $silphone &&
|
||||
$_[0] =~ m/^\#\d+$/ &&
|
||||
$_[0] =~ m/^\#\d+$/));
|
||||
}
|
||||
|
||||
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
|
||||
$loopstate = 0;
|
||||
$nexststate = 1; # next unallocated state.
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
} else {
|
||||
$ns = $loopstate;
|
||||
}
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
} else { # have silence probs.
|
||||
$startstate = 0;
|
||||
$loopstate = 1;
|
||||
$silstate = 2; # state from where we go to loopstate after emitting silence.
|
||||
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
|
||||
if (!defined $sildisambig) {
|
||||
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
|
||||
$nextstate = 3;
|
||||
} else {
|
||||
$disambigstate = 3;
|
||||
$nextstate = 4;
|
||||
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
|
||||
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
|
||||
}
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
} else {
|
||||
if(!is_sil(@A)){
|
||||
# This is non-deterministic but relatively compact,
|
||||
# and avoids epsilons.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
|
||||
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
|
||||
} else {
|
||||
# no point putting opt-sil after silence word.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\n";
|
||||
}
|
||||
$word_or_eps = "<eps>";
|
||||
}
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# make_phones_symtab.pl < lexicon.txt > phones.txt
|
||||
|
||||
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
for ($i=2; $i<@A; $i++) {
|
||||
$P{$A[$i]} = 1; # seen it.
|
||||
}
|
||||
}
|
||||
|
||||
print "<eps>\t0\n";
|
||||
$n = 1;
|
||||
foreach $p (sort keys %P) {
|
||||
if($p ne "<eps>") {
|
||||
print "$p\t$n\n";
|
||||
$n++;
|
||||
}
|
||||
}
|
||||
|
||||
print "sil\t$n\n";
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Copyright 2010-2011 Yanmin Qian Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This file takes as input the file wp_gram.txt that comes with the RM
|
||||
# distribution, and creates the language model as an acceptor in FST form.
|
||||
|
||||
# make_rm_lm.pl wp_gram.txt > G.txt
|
||||
|
||||
if (@ARGV != 1) {
|
||||
print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n";
|
||||
exit(0);
|
||||
}
|
||||
unless (open(IN_FILE, "@ARGV[0]")) {
|
||||
die ("can't open @ARGV[0]");
|
||||
}
|
||||
|
||||
|
||||
$flag = 0;
|
||||
$count_wrd = 0;
|
||||
$cnt_ends = 0;
|
||||
$init = "";
|
||||
|
||||
while ($line = <IN_FILE>)
|
||||
{
|
||||
chop($line);
|
||||
|
||||
$line =~ s/ //g;
|
||||
|
||||
if(($line =~ /^>/))
|
||||
{
|
||||
if($flag == 0)
|
||||
{
|
||||
$flag = 1;
|
||||
}
|
||||
$line =~ s/>//g;
|
||||
$hashcnt{$init} = $i;
|
||||
$init = $line;
|
||||
$i = 0;
|
||||
$count_wrd++;
|
||||
@LineArray[$count_wrd - 1] = $init;
|
||||
$hashwrd{$init} = 0;
|
||||
}
|
||||
elsif($flag != 0)
|
||||
{
|
||||
|
||||
$hash{$init}[$i] = $line;
|
||||
$i++;
|
||||
if($line =~ /SENTENCE-END/)
|
||||
{
|
||||
$cnt_ends++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{}
|
||||
}
|
||||
|
||||
$hashcnt{$init} = $i;
|
||||
|
||||
$num = 0;
|
||||
$weight = 0;
|
||||
$init_wrd = "SENTENCE-END";
|
||||
$hashwrd{$init_wrd} = @LineArray;
|
||||
for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
|
||||
{
|
||||
$weight = -log(1/$hashcnt{$init_wrd});
|
||||
$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
|
||||
print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n";
|
||||
}
|
||||
$num = $i;
|
||||
|
||||
for($i = 0; $i < @LineArray; $i++)
|
||||
{
|
||||
if(@LineArray[$i] eq 'SENTENCE-END')
|
||||
{}
|
||||
else
|
||||
{
|
||||
if($hashwrd{@LineArray[$i]} == 0)
|
||||
{
|
||||
$num++;
|
||||
$hashwrd{@LineArray[$i]} = $num;
|
||||
}
|
||||
for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
|
||||
{
|
||||
$weight = -log(1/$hashcnt{@LineArray[$i]});
|
||||
if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
|
||||
{
|
||||
$num++;
|
||||
$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
|
||||
}
|
||||
if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
|
||||
{
|
||||
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} <eps> <eps> $weight\n"
|
||||
}
|
||||
else
|
||||
{
|
||||
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print "$hashwrd{$init_wrd} 0\n";
|
||||
close(IN_FILE);
|
||||
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Written by Dan Povey 9/21/2010. Apache 2.0 License.
|
||||
|
||||
# This version of make_roots.pl is specialized for RM.
|
||||
|
||||
# This script creates the file roots.txt which is an input to train-tree.cc. It
|
||||
# specifies how the trees are built. The input file phone-sets.txt is a partial
|
||||
# version of roots.txt in which phones are represented by their spelled form, not
|
||||
# their symbol id's. E.g. at input, phone-sets.txt might contain;
|
||||
# shared not-split sil
|
||||
# Any phones not specified in phone-sets.txt but present in phones.txt will
|
||||
# be given a default treatment. If the --separate option is given, we create
|
||||
# a separate tree root for each of them, otherwise they are all lumped in one set.
|
||||
# The arguments shared|not-shared and split|not-split are needed if any
|
||||
# phones are not specified in phone-sets.txt. What they mean is as follows:
|
||||
# if shared=="shared" then we share the tree-root between different HMM-positions
|
||||
# (0,1,2). If split=="split" then we actually do decision tree splitting on
|
||||
# that root, otherwise we forbid decision-tree splitting. (The main reason we might
|
||||
# set this to false is for silence when
|
||||
# we want to ensure that the HMM-positions will remain with a single PDF id.
|
||||
|
||||
|
||||
$separate = 0;
|
||||
if($ARGV[0] eq "--separate") {
|
||||
$separate = 1;
|
||||
shift @ARGV;
|
||||
}
|
||||
|
||||
if(@ARGV != 4) {
|
||||
die "Usage: make_roots.pl [--separate] phones.txt silence-phone-list[integer,colon-separated] shared|not-shared split|not-split > roots.txt\n";
|
||||
}
|
||||
|
||||
|
||||
($phonesfile, $silphones, $shared, $split) = @ARGV;
|
||||
if($shared ne "shared" && $shared ne "not-shared") {
|
||||
die "Third argument must be \"shared\" or \"not-shared\"\n";
|
||||
}
|
||||
if($split ne "split" && $split ne "not-split") {
|
||||
die "Third argument must be \"split\" or \"not-split\"\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
open(F, "<$phonesfile") || die "Opening file $phonesfile";
|
||||
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
if(@A != 2) {
|
||||
die "Bad line in phones symbol file: ".$_;
|
||||
}
|
||||
if($A[1] != 0) {
|
||||
$symbol2id{$A[0]} = $A[1];
|
||||
$id2symbol{$A[1]} = $A[0];
|
||||
}
|
||||
}
|
||||
|
||||
if($silphones == ""){
|
||||
die "Empty silence phone list in make_roots.pl";
|
||||
}
|
||||
foreach $silphoneid (split(":", $silphones)) {
|
||||
defined $id2symbol{$silphoneid} || die "No such silence phone id $silphoneid";
|
||||
# Give each silence phone its own separate pdfs in each state, but
|
||||
# no sharing (in this recipe; WSJ is different.. in this recipe there
|
||||
#is only one silence phone anyway.)
|
||||
$issil{$silphoneid} = 1;
|
||||
print "not-shared not-split $silphoneid\n";
|
||||
}
|
||||
|
||||
$idlist = "";
|
||||
$remaining_phones = "";
|
||||
|
||||
if($separate){
|
||||
foreach $a (keys %id2symbol) {
|
||||
if(!defined $issil{$a}) {
|
||||
print "$shared $split $a\n";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print "$shared $split ";
|
||||
foreach $a (keys %id2symbol) {
|
||||
if(!defined $issil{$a}) {
|
||||
print "$a ";
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
N=3
|
||||
P=1
|
||||
clean=false
|
||||
|
||||
for x in 1 2 3; do
|
||||
if [ $1 == "--mono" ]; then
|
||||
N=1;
|
||||
P=0;
|
||||
shift;
|
||||
fi
|
||||
if [ $1 == "--clean" ]; then
|
||||
clean=true
|
||||
shift;
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
|
||||
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
lang=$1
|
||||
tree=$2/tree
|
||||
model=$2/final.mdl
|
||||
dir=$3
|
||||
|
||||
if $clean; then rm -r $lang/tmp; fi
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
tscale=1.0
|
||||
loopscale=0.1
|
||||
|
||||
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
|
||||
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
|
||||
# would have to use -o instead), -f means file exists, and -ot means older than).
|
||||
|
||||
mkdir -p $lang/tmp
|
||||
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
|
||||
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
|
||||
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
|
||||
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
|
||||
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
|
||||
fi
|
||||
|
||||
if [ ! -f $lang/phones_disambig.txt ]; then
|
||||
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
|
||||
|
||||
|
||||
clg=$lang/tmp/CLG_${N}_${P}.fst
|
||||
|
||||
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
|
||||
fstcomposecontext --context-size=$N --central-position=$P \
|
||||
--read-disambig-syms=$lang/tmp/disambig_phones.list \
|
||||
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
|
||||
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
|
||||
fstisstochastic $clg || echo "warning: CLG not stochastic."
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
|
||||
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
|
||||
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
|
||||
> $dir/Ha.fst || exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
|
||||
$dir/HCLGa.fst -ot $clg ]]; then
|
||||
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
|
||||
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
|
||||
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
|
||||
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
|
||||
add-self-loops --self-loop-scale=$loopscale --reorder=true \
|
||||
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
|
||||
|
||||
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
|
||||
# No point doing this test if transition-scale not 1, as it is bound to fail.
|
||||
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# to make const fst:
|
||||
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# creates integer lists of silence and non-silence phones in files,
|
||||
# e.g. silphones.csl="1:2:3 \n"
|
||||
# and nonsilphones.csl="4:5:6:7:...:24\n";
|
||||
|
||||
if(@ARGV != 4) {
|
||||
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
|
||||
}
|
||||
|
||||
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
|
||||
open(S,"<$symtab") || die "Opening symbol table $symtab";
|
||||
|
||||
|
||||
foreach $s (split(" ", $sillist)) {
|
||||
$issil{$s} = 1;
|
||||
}
|
||||
|
||||
@sil = ();
|
||||
@nonsil = ();
|
||||
while(<S>){
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
|
||||
($sym, $int) = @A;
|
||||
if($int != 0) {
|
||||
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
|
||||
else { push @nonsil, $int; }
|
||||
}
|
||||
}
|
||||
|
||||
foreach $k(keys %issil) {
|
||||
if(!$seensil{$k}) { die "No such silence phone $k"; }
|
||||
}
|
||||
open(F, ">$silphones") || die "opening silphones file $silphones";
|
||||
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
|
||||
print F join(":", @sil) . "\n";
|
||||
print G join(":", @nonsil) . "\n";
|
||||
close(F);
|
||||
close(G);
|
||||
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
|
||||
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script operates on a directory, such as in data/train/,
|
||||
# that contains some subset of the following files:
|
||||
# feats.scp
|
||||
# wav.scp
|
||||
# spk2utt
|
||||
# utt2spk
|
||||
# text
|
||||
# It creates a subset of that data, consisting of some specified
|
||||
# number of utterances. (The selected utterances are distributed
|
||||
# evenly throughout the file, by the program ./subset_scp.pl).
|
||||
|
||||
# If you give the --per-spk option, it will attempt to select
|
||||
# the supplied number of utterances for each speaker (typically
|
||||
# you would supply a much smaller number in this case).
|
||||
|
||||
perspk=false
|
||||
if [ "$1" == "--per-spk" ]; then
|
||||
perspk=true;
|
||||
shift;
|
||||
fi
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
srcdir=$1
|
||||
numutt=$2
|
||||
destdir=$3
|
||||
|
||||
|
||||
if [ ! -f $srcdir/feats.scp ]; then
|
||||
echo "subset_data_dir.sh: no such file $srcdir/feats.scp"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
## scripting note: $perspk evaluates to true or false
|
||||
## so this becomes the command true or false.
|
||||
if $perspk; then
|
||||
mkdir -p $destdir
|
||||
awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
|
||||
for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); }
|
||||
printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
|
||||
scripts/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
|
||||
scripts/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
|
||||
[ -f $srcdir/wav.scp ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/wav.scp >$destdir/wav.scp
|
||||
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
|
||||
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
|
||||
srcutts=`cat $srcdir/utt2spk | wc -l`
|
||||
destutts=`cat $destdir/utt2spk | wc -l`
|
||||
echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
|
||||
exit 0;
|
||||
else
|
||||
if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
|
||||
echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
mkdir -p $destdir || exit 1;
|
||||
|
||||
# create feats.scp
|
||||
scripts/subset_scp.pl $numutt $srcdir/feats.scp > $destdir/feats.scp || exit 1;
|
||||
|
||||
if [ -f $srcdir/wav.scp ]; then
|
||||
scripts/filter_scp.pl $destdir/feats.scp $srcdir/mfc.scp > $destdir/mfc.scp || exit 1;
|
||||
else
|
||||
rm $destdir/mfc.scp 2>/dev/null
|
||||
fi
|
||||
|
||||
if [ -f $srcdir/utt2spk ]; then
|
||||
scripts/filter_scp.pl $destdir/feats.scp $srcdir/utt2spk > $destdir/utt2spk|| exit 1;
|
||||
scripts/utt2spk_to_spk2utt.pl $destdir/utt2spk > $destdir/spk2utt || exit 1;
|
||||
fi
|
||||
|
||||
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
|
||||
|
||||
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
|
||||
|
||||
echo "Created a $numutt-utterance subset of $srcdir and put it in $destdir."
|
||||
|
||||
exit 0;
|
||||
fi
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This program selects a subset of N elements in the scp.
|
||||
# It selects them evenly from throughout the scp, in order to
|
||||
# avoid selecting too many from the same speaker.
|
||||
# It prints them on the standard output.
|
||||
|
||||
if(@ARGV < 2 ) {
|
||||
die "Usage: subset_scp.pl N in.scp ";
|
||||
}
|
||||
|
||||
$N = shift @ARGV;
|
||||
if($N == 0) {
|
||||
die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
|
||||
}
|
||||
$inscp = shift @ARGV;
|
||||
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
||||
|
||||
@F = ();
|
||||
while(<I>) {
|
||||
push @F, $_;
|
||||
}
|
||||
$numlines = @F;
|
||||
if($N > $numlines) {
|
||||
die "You requested from subset_scp.pl more elements than available: $N > $numlines";
|
||||
}
|
||||
|
||||
sub select_n {
|
||||
my ($start,$end,$num_needed) = @_;
|
||||
my $diff = $end - $start;
|
||||
if($num_needed > $diff) { die "select_n: code error"; }
|
||||
if($diff == 1 ) {
|
||||
if($num_needed > 0) {
|
||||
print $F[$start];
|
||||
}
|
||||
} else {
|
||||
my $halfdiff = int($diff/2);
|
||||
my $halfneeded = int($num_needed/2);
|
||||
select_n($start, $start+$halfdiff, $halfneeded);
|
||||
select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
|
||||
}
|
||||
}
|
||||
select_n(0, $numlines, $N);
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_oov = 0;
|
||||
$ignore_first_field = 0;
|
||||
for($x = 0; $x < 3; $x++) {
|
||||
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
|
||||
# Else will complain and put nothing out.
|
||||
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
|
||||
}
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$sym2int{$A[0]} = $A[1] + 0;
|
||||
}
|
||||
|
||||
$num_warning = 0;
|
||||
$max_warning = 20;
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if(@A == 0) {
|
||||
die "Empty line in transcriptions input.";
|
||||
}
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
@B = ();
|
||||
foreach $a (@A) {
|
||||
$i = $sym2int{$a};
|
||||
if(!defined ($i)) {
|
||||
if (defined $map_oov) {
|
||||
if (!defined $sym2int{$map_oov}) {
|
||||
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
|
||||
}
|
||||
if ($num_warning++ < $max_warning) {
|
||||
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
|
||||
if ($num_warning == $max_warning) {
|
||||
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
|
||||
}
|
||||
}
|
||||
$i = $sym2int{$map_oov};
|
||||
} elsif($ignore_oov) {
|
||||
$i = $a; # just print them out unmodified..
|
||||
} else {
|
||||
die "sym2int.pl: undefined symbol $a\n";
|
||||
}
|
||||
}
|
||||
push @B, $i;
|
||||
}
|
||||
print join(" ", @B);
|
||||
print "\n";
|
||||
}
|
||||
|
||||
if($error) { exit(1); }
|
||||
else { exit(0); }
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
|
||||
# This script does training-data alignment given a model built using
|
||||
# CMN + delta + delta-delta features. Its output, all in its own
|
||||
# experimental directory, is cmvn.ark, ali, tree, and final.mdl
|
||||
# (the last two are just copied from the source directory).
|
||||
|
||||
# Option to use precompiled graphs from last phase, if these
|
||||
# are available (i.e. if they were built with the same data).
|
||||
|
||||
graphs=
|
||||
if [ "$1" == --graphs ]; then
|
||||
shift;
|
||||
graphs=$1
|
||||
shift
|
||||
fi
|
||||
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/align_deltas.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
|
||||
echo " e.g.: steps/align_deltas.sh data/train data/lang exp/tri1 exp/tri1_ali"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
|
||||
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
compute-cmvn-stats scp:$data/feats.scp \
|
||||
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# Align all training data using the supplied model.
|
||||
|
||||
echo "Aligning all training data"
|
||||
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
|
||||
# compute integer form of transcripts.
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|
||||
|| exit 1;
|
||||
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
|
||||
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
|
||||
rm $dir/train.tra
|
||||
else
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
|
||||
"$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
|
||||
fi
|
||||
|
||||
echo "Done."
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and delta-delta plus
|
||||
# cepstral mean subtraction features. Used, for example, to decode
|
||||
# mono/ and tri1/
|
||||
# This script generates lattices and rescores them with different
|
||||
# acoustic weights, in order to explore a range of different
|
||||
# weights.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
srcdir=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
dir=$4
|
||||
graphdir=$srcdir/graph
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
if [ ! -f $srcdir/final.mdl ]; then
|
||||
echo No model file $srcdir/final.mdl
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
|
||||
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# We only do one decoding pass, so there is no point caching the
|
||||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# For Resource Management, we use beam of 20 and acwt of 1/10.
|
||||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
|
@ -0,0 +1,48 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from .. (one directory up from here)
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "usage: make_mfcc.sh <data-dir> <log-dir> <abs-path-to-mfccdir>";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
logdir=$2
|
||||
mfccdir=$3
|
||||
|
||||
# use "name" as part of name of the archive.
|
||||
name=`basename $data`
|
||||
|
||||
mkdir -p $mfccdir || exit 1;
|
||||
mkdir -p $logdir || exit 1;
|
||||
|
||||
scp=$data/mfc.scp
|
||||
if [ ! -f $scp ]; then
|
||||
echo "make_mfcc.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
log=$logdir/make_mfcc.log
|
||||
|
||||
copy-feats --sphinx-in=true \
|
||||
scp:$scp ark,scp:$mfccdir/raw_mfcc_$name.ark,$data/feats.scp 2>$log
|
||||
|
||||
echo "Succeeded creating MFCC features for $name"
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Triphone model training, using delta-delta features and cepstral
|
||||
# mean normalization. It starts from an existing directory (e.g.
|
||||
# exp/mono), supplied as an argument, which is assumed to be built using
|
||||
# the same type of features.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/train_deltas.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
alidir=$3
|
||||
dir=$4
|
||||
|
||||
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
|
||||
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
realign_iters="5 10 15 20";
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
numiters=25 # Number of iterations of training
|
||||
maxiterinc=15 # Last iter to increase #Gauss on.
|
||||
numleaves=1800 # target num-leaves in tree building.
|
||||
numgauss=$[$numleaves + $numleaves/2]; # starting num-Gauss.
|
||||
# Initially mix up to avg. 1.5 Gauss/state ( a bit more
|
||||
# than this, due to state clustering... then slowly mix
|
||||
# up to final amount.
|
||||
totgauss=9000 # Target #Gaussians
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
|
||||
|
||||
echo "Accumulating tree stats"
|
||||
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
|
||||
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
||||
echo "Computing questions for tree clustering"
|
||||
|
||||
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
|
||||
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
|
||||
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
|
||||
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
|
||||
|
||||
# Have to make silence root not-shared because we will not split it.
|
||||
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
|
||||
> $dir/roots.txt 2>$dir/roots.log || exit 1;
|
||||
|
||||
|
||||
echo "Building tree"
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
|
||||
|
||||
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
|
||||
2>$dir/mixup.log || exit 1;
|
||||
|
||||
#rm $dir/treeacc
|
||||
|
||||
# Convert alignments generated from monophone model, to use as initial alignments.
|
||||
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
|
||||
# Debug step only: convert back and check they're the same.
|
||||
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
|
||||
2>/dev/null | cmp - $alidir/ali || exit 1;
|
||||
|
||||
# Make training graphs
|
||||
echo "Compiling training graphs"
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
|
||||
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
|
||||
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo Pass $x
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
|
||||
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc
|
||||
rm $dir/$x.occs
|
||||
if [[ $x -le $maxiterinc ]]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
|
||||
|
||||
echo Done
|
|
@ -0,0 +1,105 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Flat start and monophone training, with delta-delta features.
|
||||
# This script applies cepstral mean normalization (per speaker),
|
||||
# unlike the corresponding script in s1/
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: steps/train_mono.sh <data-dir> <lang-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
dir=$3
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# Configuration:
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
numiters=30 # Number of iterations of training
|
||||
maxiterinc=20 # Last iter to increase #Gauss on.
|
||||
numgauss=250 # Initial num-Gauss (must be more than #states=3*phones).
|
||||
totgauss=1000 # Target #Gaussians.
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
realign_iters="1 2 3 4 5 6 7 8 9 10 12 15 20 25";
|
||||
|
||||
mkdir -p $dir
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
|
||||
compute-cmvn-stats scp:$data/feats.scp ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# compute integer form of transcripts.
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|
||||
|| exit 1;
|
||||
|
||||
echo "Initializing monophone system."
|
||||
|
||||
gmm-init-mono "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
|
||||
$dir/0.mdl $dir/tree 2> $dir/init.log || exit 1;
|
||||
|
||||
|
||||
echo "Compiling training graphs"
|
||||
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
||||
ark:$dir/train.tra "ark:|gzip -c >$dir/graphs.fsts.gz" \
|
||||
2>$dir/compile_graphs.log || exit 1
|
||||
|
||||
echo Pass 0
|
||||
|
||||
align-equal-compiled "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
|
||||
ark,t,f:- 2>$dir/align.0.log | \
|
||||
gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
|
||||
$dir/0.acc 2> $dir/acc.0.log || exit 1;
|
||||
|
||||
# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
|
||||
# we fail to est "rare" phones and later on, they never align properly.
|
||||
|
||||
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
|
||||
$dir/0.mdl $dir/0.acc $dir/1.mdl 2> $dir/update.0.log || exit 1;
|
||||
|
||||
rm $dir/0.acc
|
||||
|
||||
beam=4 # will change to 8 below after 1st pass
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo "Pass $x"
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" t,ark:$dir/cur.ali \
|
||||
2> $dir/align.$x.log || exit 1;
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
|
||||
if [ $x -le $maxiterinc ]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
beam=8
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
|
||||
|
||||
# example of showing the alignments:
|
||||
# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
exp/mono/decode_dev_bg/wer_3
|
||||
compute-wer --text --mode=present ark:exp/mono/decode_dev_bg/test_trans.filt ark,p:-
|
||||
%WER 33.73 [ 5079 / 15057, 392 ins, 1716 del, 2971 sub ]
|
||||
%SER 100.00 [ 400 / 400 ]
|
||||
Scored 400 sentences, 0 not present in hyp.
|
||||
|
||||
exp/mono/decode_test_bg/wer
|
||||
compute-wer --text --mode=present ark:exp/mono/decode_test_bg/test.trans ark,p:exp/mono/decode_test_bg/text
|
||||
%WER 35.68 [ 2574 / 7215, 204 ins, 848 del, 1522 sub ]
|
||||
%SER 100.00 [ 192 / 192 ]
|
||||
Scored 192 sentences, 0 not present in hyp.
|
||||
|
||||
exp/tri1/decode_dev_bg/wer_6
|
||||
compute-wer --text --mode=present ark:exp/tri1/decode_dev_bg/test.trans ark,p:-
|
||||
%WER 28.68 [ 4319 / 15057, 474 ins, 1333 del, 2512 sub ]
|
||||
%SER 100.00 [ 400 / 400 ]
|
||||
Scored 400 sentences, 0 not present in hyp.
|
||||
|
||||
exp/tri1/decode_test_bg/wer
|
||||
compute-wer --text --mode=present ark:exp/tri1/decode_test_bg/test.trans ark,p:exp/tri1/decode_test_bg/text
|
||||
%WER 31.02 [ 2238 / 7215, 226 ins, 704 del, 1308 sub ]
|
||||
%SER 100.00 [ 192 / 192 ]
|
||||
Scored 192 sentences, 0 not present in hyp.
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
faks0
|
||||
fdac1
|
||||
fjem0
|
||||
mgwt0
|
||||
mjar0
|
||||
mmdb1
|
||||
mmdm2
|
||||
mpdf0
|
||||
fcmh0
|
||||
fkms0
|
||||
mbdg0
|
||||
mbwm0
|
||||
mcsh0
|
||||
fadg0
|
||||
fdms0
|
||||
fedw0
|
||||
mgjf0
|
||||
mglb0
|
||||
mrtk0
|
||||
mtaa0
|
||||
mtdt0
|
||||
mthc0
|
||||
mwjg0
|
||||
fnmr0
|
||||
frew0
|
||||
fsem0
|
||||
mbns0
|
||||
mmjr0
|
||||
mdls0
|
||||
mdlf0
|
||||
mdvc0
|
||||
mers0
|
||||
fmah0
|
||||
fdrw0
|
||||
mrcs0
|
||||
mrjm4
|
||||
fcal1
|
||||
mmwh0
|
||||
fjsj0
|
||||
majc0
|
||||
mjsw0
|
||||
mreb0
|
||||
fgjd0
|
||||
fjmg0
|
||||
mroa0
|
||||
mteb0
|
||||
mjfc0
|
||||
mrjr0
|
||||
fmml0
|
||||
mrws1
|
|
@ -0,0 +1 @@
|
|||
--use-energy=false # only non-default option.
|
|
@ -0,0 +1,61 @@
|
|||
aa aa aa
|
||||
ae ae ae
|
||||
ah ah ah
|
||||
ao ao aa
|
||||
aw aw aw
|
||||
ax ax ah
|
||||
ax-h ax ah
|
||||
axr er er
|
||||
ay ay ay
|
||||
b b b
|
||||
bcl vcl sil
|
||||
ch ch ch
|
||||
d d d
|
||||
dcl vcl sil
|
||||
dh dh dh
|
||||
dx dx dx
|
||||
eh eh eh
|
||||
el el l
|
||||
em m m
|
||||
en en n
|
||||
eng ng ng
|
||||
epi epi sil
|
||||
er er er
|
||||
ey ey ey
|
||||
f f f
|
||||
g g g
|
||||
gcl vcl sil
|
||||
h# sil sil
|
||||
hh hh hh
|
||||
hv hh hh
|
||||
ih ih ih
|
||||
ix ix ih
|
||||
iy iy iy
|
||||
jh jh jh
|
||||
k k k
|
||||
kcl cl sil
|
||||
l l l
|
||||
m m m
|
||||
n n n
|
||||
ng ng ng
|
||||
nx n n
|
||||
ow ow ow
|
||||
oy oy oy
|
||||
p p p
|
||||
pau sil sil
|
||||
pcl cl sil
|
||||
q
|
||||
r r r
|
||||
s s s
|
||||
sh sh sh
|
||||
t t t
|
||||
tcl cl sil
|
||||
th th th
|
||||
uh uh uh
|
||||
uw uw uw
|
||||
ux uw uw
|
||||
v v v
|
||||
w w w
|
||||
y y y
|
||||
z z z
|
||||
zh zh sh
|
|
@ -0,0 +1,24 @@
|
|||
mdab0
|
||||
mwbt0
|
||||
felc0
|
||||
mtas1
|
||||
mwew0
|
||||
fpas0
|
||||
mjmp0
|
||||
mlnt0
|
||||
fpkt0
|
||||
mlll0
|
||||
mtls0
|
||||
fjlm0
|
||||
mbpm0
|
||||
mklt0
|
||||
fnlp0
|
||||
mcmj0
|
||||
mjdh0
|
||||
fmgd0
|
||||
mgrt0
|
||||
mnjm0
|
||||
fdhc0
|
||||
mjln0
|
||||
mpam0
|
||||
fmld0
|
|
@ -0,0 +1,20 @@
|
|||
<Topology>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
NONSILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
|
||||
<State> 3 </State>
|
||||
</TopologyEntry>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
SILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
|
||||
<State> 3 </State>
|
||||
</TopologyEntry>
|
||||
</Topology>
|
|
@ -0,0 +1,110 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function read_dirname () {
|
||||
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
|
||||
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
|
||||
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
|
||||
echo $retval
|
||||
}
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG <arguments>\n
|
||||
Prepare train, dev, test file lists for TIMIT.\n\n
|
||||
Required arguments:\n
|
||||
--config-dir=DIR\tDirecory containing the necessary config files\n
|
||||
--corpus-dir=DIR\tDirectory for the GlobalPhone corpus\n
|
||||
--work-dir=DIR\t\tWorking directory\n
|
||||
";
|
||||
|
||||
if [ $# -lt 3 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
while [ $# -gt 0 ];
|
||||
do
|
||||
case "$1" in
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--config-dir=*)
|
||||
CONFDIR=`read_dirname $1`; shift ;;
|
||||
--corpus-dir=*)
|
||||
CORPUS=`read_dirname $1`; shift ;;
|
||||
--work-dir=*)
|
||||
WDIR=`read_dirname $1`; shift ;;
|
||||
*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# (1) check if the config files are in place:
|
||||
cd $CONFDIR
|
||||
[ -f test_spk.list ] || error_exit "$PROG: Eval-set speaker list not found.";
|
||||
|
||||
cd $WDIR
|
||||
[ -f path.sh ] && . path.sh # Sets the PATH to contain necessary executables
|
||||
|
||||
# (2) get the various file lists (for audio, transcription, etc.)
|
||||
mkdir -p data/local
|
||||
timit_prep_flists.sh --corpus-dir=$CORPUS --dev-spk=$CONFDIR/dev_spk.list \
|
||||
--test-spk=$CONFDIR/test_spk.list --work-dir=data
|
||||
|
||||
# (3) Normalize the transcripts.
|
||||
timit_norm_trans.pl -i data/local/train.trans -m $CONFDIR/phones.60-48-39.map \
|
||||
-to 48 > data/local/train.trans2;
|
||||
for x in dev test; do
|
||||
timit_norm_trans.pl -i data/local/${x}.trans -m $CONFDIR/phones.60-48-39.map \
|
||||
-to 39 > data/local/${x}.trans2;
|
||||
done
|
||||
|
||||
# Create the lexicon, which is just an identity mapping
|
||||
cut -d' ' -f2- data/local/train.trans2 | tr ' ' '\n' | sort -u > data/local/p
|
||||
paste data/local/p data/local/p > data/local/lexicon.txt
|
||||
|
||||
# add disambig symbols to the lexicon: TODO: delete
|
||||
ndisambig=`add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
|
||||
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence
|
||||
echo $ndisambig > data/local/lex_ndisambig
|
||||
|
||||
# Get the list of phones and map them to integers (adding the null symbol <eps>
|
||||
# to the list).
|
||||
cut -f2 data/local/lexicon.txt \
|
||||
| awk 'BEGIN{ print "<eps> 0"; } { printf("%s %d\n", $1, NR); }' \
|
||||
> data/local/phones.txt
|
||||
|
||||
# Get the list of words:
|
||||
cut -f1 data/local/lexicon.txt \
|
||||
| awk 'BEGIN{print "<eps> 0";} {printf("%s %d\n", $1, NR);}
|
||||
END{printf("#0 %d\n", NR+1);}' > data/local/words.txt
|
||||
|
||||
# (4) Create the phone bigram LM
|
||||
(
|
||||
[ -z "$IRSTLM" ] && \
|
||||
error_exit "LM building wo'nt work without setting the IRSTLM env variable"
|
||||
cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \
|
||||
> data/local/lm_train.txt
|
||||
build-lm.sh -i data/local/lm_train.txt -n 2 -o data/local/lm_phone_bg.ilm.gz
|
||||
compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \
|
||||
| grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz
|
||||
|
||||
) >& data/prepare_lm.log
|
||||
|
||||
echo "Finished data preparation."
|
|
@ -0,0 +1,136 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
set -o pipefail
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function read_dirname () {
|
||||
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
|
||||
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
|
||||
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
|
||||
echo $retval
|
||||
}
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG <arguments>\n
|
||||
Prepare train, dev, test file lists.\n\n
|
||||
Required arguments:\n
|
||||
--hmm-proto=FILE\tPrototype of the HMM topology\n
|
||||
--work-dir=DIR\t\tWorking directory\n
|
||||
";
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
while [ $# -gt 0 ];
|
||||
do
|
||||
case "$1" in
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--hmm-proto=*)
|
||||
PROTO=`expr "X$1" : '[^=]*=\(.*\)'`;
|
||||
[ -f $PROTO ] || error_exit "Cannot find HMM prototype file '$PROTO'";
|
||||
shift ;;
|
||||
--work-dir=*)
|
||||
WDIR=`read_dirname $1`; shift ;;
|
||||
*) echo "Unknown argument: $1, exiting"; error_exit $usage ;;
|
||||
esac
|
||||
done
|
||||
|
||||
cd $WDIR
|
||||
. path.sh
|
||||
|
||||
echo "Preparing train data"
|
||||
|
||||
# (0) Create a directory to contain files needed in training:
|
||||
for x in train dev test; do
|
||||
mkdir -p data/$x
|
||||
cp data/local/${x}_wav.scp data/$x/wav.scp
|
||||
cp data/local/${x}.trans2 data/$x/text
|
||||
cp data/local/${x}.spk2utt data/$x/spk2utt
|
||||
cp data/local/${x}.utt2spk data/$x/utt2spk
|
||||
done
|
||||
|
||||
mkdir -p data/lang
|
||||
cp data/local/phones.txt -t data/lang/
|
||||
cp data/local/words.txt -t data/lang/
|
||||
|
||||
# (1) Generate colon-separated lists of silence and non-silence phones
|
||||
silphones="cl epi sil vcl";
|
||||
silphones.pl data/lang/phones.txt "$silphones" \
|
||||
data/lang/silphones.csl data/lang/nonsilphones.csl
|
||||
|
||||
# (2) Create the L.fst without disambiguation symbols, for use in training.
|
||||
make_lexicon_fst.pl data/local/lexicon.txt 0.5 sil \
|
||||
| fstcompile --isymbols=data/lang/phones.txt \
|
||||
--osymbols=data/lang/words.txt --keep_isymbols=false \
|
||||
--keep_osymbols=false \
|
||||
| fstarcsort --sort_type=olabel > data/lang/L.fst
|
||||
|
||||
# (3) Create phonesets.txt and extra_questions.txt.
|
||||
timit_make_questions.pl -i data/lang/phones.txt \
|
||||
-m data/lang/phonesets_mono.txt -r data/lang/roots.txt
|
||||
grep -v sil data/lang/phonesets_mono.txt \
|
||||
> data/lang/phonesets_cluster.txt
|
||||
|
||||
# (4), Finally, for training, create the HMM topology prototype:
|
||||
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
|
||||
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
|
||||
sed -e "s:NONSILENCEPHONES:$nonsilphonelist:" \
|
||||
-e "s:SILENCEPHONES:$silphonelist:" $PROTO > data/lang/topo
|
||||
|
||||
echo "Preparing test data"
|
||||
|
||||
# (0) Copy over some files common to traina and test:
|
||||
mkdir -p data/lang_test
|
||||
for f in phones.txt words.txt L.fst silphones.csl nonsilphones.csl; do
|
||||
cp data/lang/$f -t data/lang_test/
|
||||
done
|
||||
|
||||
# (1) Create a list of phones including the disambiguation symbols.
|
||||
# --include-zero includes the #0 symbol that is passed from G.fst
|
||||
ndisambig=`cat data/local/lex_ndisambig`;
|
||||
add_disambig.pl --include-zero data/lang_test/phones.txt $ndisambig \
|
||||
> data/lang_test/phones_disambig.txt
|
||||
cp data/lang_test/phones_disambig.txt -t data/lang/ # for MMI.
|
||||
|
||||
# (2) Create the lexicon FST with disambiguation symbols. There is an extra
|
||||
# step where we create a loop to "pass through" the disambiguation symbols
|
||||
# from G.fst.
|
||||
phone_disambig_symbol=`grep \#0 data/lang_test/phones_disambig.txt | awk '{print $2}'`
|
||||
word_disambig_symbol=`grep \#0 data/lang_test/words.txt | awk '{print $2}'`
|
||||
|
||||
make_lexicon_fst.pl data/local/lexicon_disambig.txt 0.5 sil '#'$ndisambig \
|
||||
| fstcompile --isymbols=data/lang_test/phones_disambig.txt \
|
||||
--osymbols=data/lang_test/words.txt --keep_isymbols=false \
|
||||
--keep_osymbols=false \
|
||||
| fstaddselfloops "echo $phone_disambig_symbol |" \
|
||||
"echo $word_disambig_symbol |" \
|
||||
| fstarcsort --sort_type=olabel > data/lang_test/L_disambig.fst
|
||||
|
||||
# Needed for discriminative training
|
||||
cp data/lang_test/L_disambig.fst -t data/lang/
|
||||
|
||||
# (3) Convert the language model to FST, and create decoding configuration.
|
||||
timit_format_lms.sh data
|
||||
|
||||
echo "Succeeded in formatting data."
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
#set -o pipefail
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function read_dirname () {
|
||||
[ -d "$1" ] || error_exit "Argument '$1' not a directory";
|
||||
local retval=`cd $1 2>/dev/null && pwd || exit 1`
|
||||
echo $retval
|
||||
}
|
||||
|
||||
function format_lms () {
|
||||
local lm_suffix=$1;
|
||||
local work_dir=$2
|
||||
local test=$work_dir/lang_test_${lm_suffix}
|
||||
|
||||
mkdir -p $test
|
||||
for f in phones.txt words.txt phones_disambig.txt L.fst L_disambig.fst \
|
||||
silphones.csl nonsilphones.csl; do
|
||||
cp $work_dir/lang_test/$f $test
|
||||
done
|
||||
|
||||
# Removing all "illegal" combinations of <s> and </s>, which are supposed to
|
||||
# occur only at being/end of utt. These can cause determinization failures
|
||||
# of CLG [ends up being epsilon cycles].
|
||||
gunzip -c $work_dir/local/lm_${lm_suffix}.arpa.gz \
|
||||
| egrep -v '<s> <s>|</s> <s>|</s> </s>' \
|
||||
| arpa2fst - | fstprint \
|
||||
| eps2disambig.pl | s2eps.pl \
|
||||
| fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false \
|
||||
| fstrmepsilon > $test/G.fst
|
||||
set +e
|
||||
fstisstochastic $test/G.fst
|
||||
set -e
|
||||
}
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG data_dir\n
|
||||
Convert ARPA-format language models to FSTs.\n";
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
WDIR=`read_dirname $1`;
|
||||
|
||||
# Next, for each type of language model, create the corresponding FST
|
||||
# and the corresponding lang_test directory.
|
||||
|
||||
echo "Preparing language models for test"
|
||||
format_lms phone_bg $WDIR >& $WDIR/format_lms.log
|
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# 'phonesets_mono' contains sets of phones that are shared when building the
|
||||
# monophone system and when asking questions based on an automatic clustering
|
||||
# of phones, for the triphone system.
|
||||
# 'roots' contain the information about which phones share a common root in
|
||||
# the phonetic decision tree and which have distinct pdfs. It also states
|
||||
# whether the tree-building should split the roots or not.
|
||||
|
||||
my $usage = "Usage: timit_make_questions.pl -i phones -m phoneset_mono -r roots\
|
||||
Creates sharerd phonesets for monophone and context-dependent training.\
|
||||
Required arguments:\
|
||||
-i\tInput list of phones (can contain stress/position markers)\
|
||||
-m\tOutput shared phoneset for use in monophone training\
|
||||
-r\tOutput sharing and splitting info for context-dependent training\n";
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
my ($in_phones, $mono, $roots, %phoneset);
|
||||
GetOptions ("i=s" => \$in_phones, # Input list of phones
|
||||
"m=s" => \$mono, # Shared phone-set for monophone system
|
||||
"r=s" => \$roots ); # roots file for context-dependent systems
|
||||
|
||||
die "$usage" unless(defined($in_phones) && defined($mono) && defined($roots));
|
||||
|
||||
open(P, "<$in_phones") or die "Cannot read from file '$in_phones': $!";
|
||||
open(MONO, ">$mono") or die "Cannot write to file '$mono': $!";
|
||||
open(ROOTS, ">$roots") or die "Cannot write to file '$roots': $!";
|
||||
|
||||
while (<P>) {
|
||||
next if m/eps|sil|vcl|cl|epi/;
|
||||
chomp;
|
||||
m/^(\S+)(_.)?\s+\S+$/ or die "Bad line: $_\n";
|
||||
my $full_phone = defined($2)? $1.$2 : $1;
|
||||
push @{$phoneset{$1}}, $full_phone;
|
||||
}
|
||||
|
||||
print MONO "cl epi sil vcl\n";
|
||||
print ROOTS "not-shared not-split cl epi sil vcl\n";
|
||||
foreach my $p (sort keys %phoneset) {
|
||||
print MONO join(" ", @{$phoneset{$p}}), "\n";
|
||||
print ROOTS "shared split ", join(" ", @{$phoneset{$p}}), "\n";
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script normalizes the TIMIT phonetic transcripts that have been
|
||||
# extracted in a format where each line contains an utterance ID followed by
|
||||
# the transcript, e.g.:
|
||||
# fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#
|
||||
|
||||
my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
|
||||
Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a
|
||||
smaller set defined by the -m option. This script assumes that the mapping is
|
||||
done in the \"standard\" fashion, i.e. to 48 or 39 phones. The input is
|
||||
assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
|
||||
be changed using the -from option. The input format is assumed to be utterance
|
||||
ID followed by transcript on the same line.\n";
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
die "$usage" unless(@ARGV >= 1);
|
||||
my ($in_trans, $phone_map, $num_phones_out);
|
||||
my $num_phones_in = 60;
|
||||
GetOptions ("i=s" => \$in_trans, # Input transcription
|
||||
"m=s" => \$phone_map, # File containing phone mappings
|
||||
"from=i" => \$num_phones_in, # Input #phones: must be 60 or 48
|
||||
"to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39
|
||||
|
||||
die $usage unless(defined($in_trans) && defined($phone_map) &&
|
||||
defined($num_phones_out));
|
||||
if ($num_phones_in != 60 && $num_phones_in != 48) {
|
||||
die "Can only used 60 or 48 for -from (used $num_phones_in)."
|
||||
}
|
||||
if ($num_phones_out != 48 && $num_phones_out != 39) {
|
||||
die "Can only used 48 or 39 for -to (used $num_phones_out)."
|
||||
}
|
||||
unless ($num_phones_out < $num_phones_in) {
|
||||
die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
|
||||
}
|
||||
|
||||
|
||||
open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
|
||||
my (%phonemap, %seen_phones);
|
||||
my $num_seen_phones = 0;
|
||||
while (<M>) {
|
||||
chomp;
|
||||
next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
|
||||
m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
|
||||
my $mapped_from = ($num_phones_in == 60)? $1 : $2;
|
||||
my $mapped_to = ($num_phones_out == 48)? $2 : $3;
|
||||
if (!defined($seen_phones{$mapped_to})) {
|
||||
$seen_phones{$mapped_to} = 1;
|
||||
$num_seen_phones += 1;
|
||||
}
|
||||
$phonemap{$mapped_from} = $mapped_to;
|
||||
}
|
||||
if ($num_seen_phones != $num_phones_out) {
|
||||
die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
|
||||
}
|
||||
|
||||
open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
|
||||
while (<T>) {
|
||||
chomp;
|
||||
$_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
|
||||
my $utt_id = $1;
|
||||
my $trans = $2;
|
||||
|
||||
$trans =~ s/q//g; # Remove glottal stops.
|
||||
$trans =~ s/^\s*//; $trans =~ s/\s*$//; # Normalize spaces
|
||||
|
||||
print $utt_id;
|
||||
for my $phone (split(/\s+/, $trans)) {
|
||||
print " $phonemap{$phone}"
|
||||
}
|
||||
print "\n";
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
set -o pipefail
|
||||
|
||||
function read_dirname () {
|
||||
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
|
||||
[ -d "$dir_name" ] || { echo "Argument '$dir_name' not a directory" >&2; \
|
||||
exit 1; }
|
||||
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
|
||||
echo $retval
|
||||
}
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG <arguments>\n
|
||||
Prepare train, dev, test file lists for TIMIT.\n\n
|
||||
Required arguments:\n
|
||||
--corpus-dir=DIR\tDirectory for the TIMIT corpus\n
|
||||
--dev-spk=FILE\tDevelopment set speaker list\n
|
||||
--test-spk=FILE\tCore test set speaker list\n
|
||||
--work-dir=DIR\t\tPlace to write the files (in a subdirectory with the 2-letter language code)\n
|
||||
";
|
||||
|
||||
if [ $# -lt 3 ]; then
|
||||
echo -e $usage; exit 1;
|
||||
fi
|
||||
|
||||
while [ $# -gt 0 ];
|
||||
do
|
||||
case "$1" in
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--corpus-dir=*)
|
||||
CORPUS=`read_dirname $1`; shift ;;
|
||||
--dev-spk=*)
|
||||
DEVSPK=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
|
||||
--test-spk=*)
|
||||
TESTSPK=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
|
||||
--work-dir=*)
|
||||
WDIR=`read_dirname $1`; shift ;;
|
||||
*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -d "$CORPUS/train" -a ! -d "$CORPUS/TRAIN" ]; then
|
||||
echo "Expecting directory $CORPUS/train or $CORPUS/TRAIN to exist."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
tmpdir=$(mktemp -d);
|
||||
trap 'rm -rf "$tmpdir"' EXIT
|
||||
|
||||
# Get the list of speakers. The list of speakers in the 24-speaker core test
|
||||
# set and the 50-speaker development set must be supplied to the script. All
|
||||
# speakers in the 'train' directory are used for training.
|
||||
tr '[:upper:]' '[:lower:]' < $DEVSPK > $tmpdir/dev_spk # Just in case!
|
||||
tr '[:upper:]' '[:lower:]' < $TESTSPK > $tmpdir/test_spk # Just in case!
|
||||
|
||||
ls -d "$CORPUS"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
|
||||
|
||||
|
||||
ODIR=$WDIR/local # Directory to write file lists & transcripts
|
||||
mkdir -p $ODIR
|
||||
|
||||
for x in train dev test; do
|
||||
# First, find the list of audio files (use only si & sx utterances).
|
||||
# Note: train & test sets are under different directories, but doing find on
|
||||
# both and grepping for the speakers will work correctly.
|
||||
find $CORPUS/{train,test} -not \( -name 'sa*' \) -name '*.wav' \
|
||||
| grep -f $tmpdir/${x}_spk > $ODIR/${x}_sph.flist
|
||||
sed -e 's:.*/\(.*\)/\(.*\).wav$:\1_\2:' $ODIR/${x}_sph.flist \
|
||||
> $tmpdir/${x}_sph.uttids
|
||||
paste $tmpdir/${x}_sph.uttids $ODIR/${x}_sph.flist \
|
||||
| sort -k1,1 > $ODIR/${x}_sph.scp
|
||||
|
||||
# Now, get the transcripts: each line of the output contains an utterance
|
||||
# ID followed by the transcript.
|
||||
find $CORPUS/{train,test} -not \( -name 'sa*' \) -name '*.phn' \
|
||||
| grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
|
||||
sed -e 's:.*/\(.*\)/\(.*\).phn$:\1_\2:' $tmpdir/${x}_phn.flist \
|
||||
> $tmpdir/${x}_phn.uttids
|
||||
while read line; do
|
||||
[ -f $line ] || error_exit "Cannot find transcription file '$line'";
|
||||
cut -f3 -d' ' "$line" | tr '\n' ' ' | sed -e 's: *$:\n:'
|
||||
done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
|
||||
paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
|
||||
| sort -k1,1 > $ODIR/${x}.trans
|
||||
|
||||
# # Intersect the set of utterances with transcripts with the set of those
|
||||
# # with valid audio.
|
||||
# cut -f1 $tmpdir/${x}.trans \
|
||||
# | join $tmpdir/${x}_basenames_wav2 - > $tmpdir/${x}_basenames
|
||||
# # Get the common set of WAV files and transcripts.
|
||||
# join $tmpdir/${x}_basenames $tmpdir/${x}_wav.scp \
|
||||
# > $ODIR/${x}_wav.scp
|
||||
# join $tmpdir/${x}_basenames $tmpdir/${x}.trans \
|
||||
# > $ODIR/${x}.trans
|
||||
|
||||
awk '{printf("%s sph2pipe -f wav %s |\n", $1, $2);}' < $ODIR/${x}_sph.scp \
|
||||
> $ODIR/${x}_wav.scp
|
||||
|
||||
sed -e 's:_.*$::' $tmpdir/${x}_sph.uttids \
|
||||
| paste -d' ' $tmpdir/${x}_sph.uttids - | sort -k1,1 \
|
||||
> $ODIR/${x}.utt2spk
|
||||
utt2spk_to_spk2utt.pl $ODIR/${x}.utt2spk \
|
||||
> $ODIR/${x}.spk2utt;
|
||||
done
|
|
@ -0,0 +1,34 @@
|
|||
# This contains the locations of the tools and data required for running
|
||||
# the GlobalPhone experiments.
|
||||
|
||||
KALDIROOT=`cd ../../..; pwd`
|
||||
KALDISRC=$KALDIROOT/src
|
||||
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
|
||||
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
|
||||
KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/tiedbin:$KALDISRC/lm
|
||||
|
||||
FSTBIN=$KALDIROOT/tools/openfst/bin
|
||||
LMBIN=$KALDIROOT/tools/irstlm/bin
|
||||
|
||||
[ -d $PWD/local ] || { echo "Expecting 'local' subdirectory"; exit 1; }
|
||||
[ -d $PWD/utils ] || { echo "Expecting 'utils' subdirectory"; exit 1; }
|
||||
[ -d $PWD/steps ] || { echo "Expecting 'steps' subdirectory"; exit 1; }
|
||||
|
||||
LOCALUTILS=$PWD/local
|
||||
KALDIUTILS=$PWD/utils
|
||||
KALDISTEPS=$PWD/steps
|
||||
SCRIPTS=$LOCALUTILS:$KALDIUTILS:$KALDISTEPS
|
||||
|
||||
# If you already have shorten and sox on your path, comment the following out.
|
||||
# Else use install.sh to install them first in the specified locations.
|
||||
SPH2PIPE=$KALDIROOT/tools/sph2pipe_v2.5
|
||||
[ -x $SPH2PIPE/sph2pipe ] || { echo "Cannot find sph2pipe executable"; }
|
||||
TOOLS=$SPH2PIPE
|
||||
|
||||
export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$TOOLS
|
||||
export LC_ALL=C
|
||||
export IRSTLM=$KALDIROOT/tools/irstlm
|
||||
|
||||
# Site-specific configs:
|
||||
[ `hostname -y` == ecdf ] && \
|
||||
{ . /etc/profile.d/modules.sh; module add intel/mkl; }
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
exit 1;
|
||||
# This script shows the steps needed to build a phone recognizer for TIMIT.
|
||||
|
||||
# This recipe follows the setup first described in:
|
||||
# K. F. Lee and H. W. Hon, "Speaker-independent phone recognition using hidden Markov models," 1988
|
||||
# where the training set is mapped to 48 phones and the results are presented
|
||||
# on a 39-phone subset of that.
|
||||
|
||||
# Set WORKDIR to someplace with enough disk space. That is where MFCCs will
|
||||
# get created, as well as the LM in ARPA & FST formats.
|
||||
WORKDIR=/path/with/disk/space
|
||||
cp -r conf local utils steps path.sh $WORKDIR
|
||||
cd $WORKDIR
|
||||
local/timit_data_prep.sh --config-dir=$PWD/conf --corpus-dir=/path/to/TIMIT --work-dir=$WORKDIR
|
||||
|
||||
local/timit_format_data.sh --hmm-proto=conf/topo.proto --work-dir=$PWD
|
||||
|
||||
# Now make MFCC features.
|
||||
mfccdir=$WORKDIR/data/MFCC
|
||||
for x in train dev test; do
|
||||
steps/make_mfcc.sh --num-jobs 6 data/$x exp/make_mfcc/$x $mfccdir
|
||||
done
|
||||
|
||||
decode_cmd="qsub -q all.q@@blade -l ram_free=500M,mem_free=500M"
|
||||
train_cmd="qsub -q all.q@@blade -l ram_free=200M,mem_free=200M"
|
||||
|
||||
steps/train_mono.sh --num-jobs 10 --qcmd "$train_cmd" \
|
||||
data/train data/lang exp/mono
|
||||
utils/mkgraph.sh --mono data/lang_test_phone_bg exp/mono exp/mono/graph_bg
|
||||
steps/decode_deltas.sh --accwt 1.0 --beam 20.0 --latgen --num-jobs 6 \
|
||||
--qcmd "$decode_cmd" exp/mono/graph_bg data/dev exp/mono/decode_dev_bg
|
||||
utils/score_lats.sh exp/mono/decode_dev_bg exp/mono/graph_bg/words.txt \
|
||||
data/dev conf/phones.60-48-39.map
|
||||
opt_accwt=`grep WER exp/mono/decode_dev_bg/wer_* \
|
||||
| sed -e 's?.*wer_??' -e 's?:%WER??' -e 's?\[.*??' | sort -k2,2 -g \
|
||||
| head -1 | awk '{print 1/$1}'`
|
||||
steps/decode_deltas.sh --accwt $opt_accwt --beam 20.0 --num-jobs 4 \
|
||||
--qcmd "$decode_cmd" exp/mono/graph_bg data/test exp/mono/decode_test_bg
|
||||
utils/score_text.sh exp/mono/decode_test_bg exp/mono/graph_bg/words.txt \
|
||||
data/test conf/phones.60-48-39.map
|
||||
|
||||
steps/align_deltas.sh --num-jobs 10 --qcmd "$train_cmd" \
|
||||
data/train data/lang exp/mono exp/mono_ali
|
||||
|
||||
steps/train_deltas.sh --num-jobs 10 --qcmd "$train_cmd" \
|
||||
2000 10000 data/train data/lang exp/mono_ali exp/tri1
|
||||
|
||||
utils/mkgraph.sh data/lang_test_phone_bg exp/tri1 exp/tri1/graph_bg
|
||||
steps/decode_deltas.sh --accwt 1.0 --beam 20.0 --latgen --num-jobs 6 \
|
||||
--qcmd "$decode_cmd" exp/tri1/graph_bg data/dev exp/tri1/decode_dev_bg
|
||||
utils/score_lats.sh exp/tri1/decode_dev_bg exp/tri1/graph_bg/words.txt \
|
||||
data/dev conf/phones.60-48-39.map
|
||||
opt_accwt=`grep WER exp/tri1/decode_dev_bg/wer_* \
|
||||
| sed -e 's?.*wer_??' -e 's?:%WER??' -e 's?\[.*??' | sort -k2,2 -g \
|
||||
| head -1 | awk '{print 1/$1}'`
|
||||
steps/decode_deltas.sh --accwt $opt_accwt --beam 20.0 --num-jobs 4 \
|
||||
--qcmd "$decode_cmd" exp/tri1/graph_bg data/test exp/tri1/decode_test_bg
|
||||
utils/score_text.sh exp/tri1/decode_test_bg exp/tri1/graph_bg/words.txt \
|
||||
data/test conf/phones.60-48-39.map
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
|
||||
# This script does training-data alignment given a model built using
|
||||
# CMN + delta + delta-delta features. It splits the data into
|
||||
# four chunks and does everything in parallel on the same machine.
|
||||
# Its output, all in its own experimental directory, is (assuming
|
||||
# you don't change the #jobs with --num-job option),
|
||||
# {0,1,2,3}.cmvn {0,1,2,3}.ali.gz, tree, final.mdl
|
||||
# and final.occs (the last three are just copied from the source directory).
|
||||
|
||||
|
||||
# Option to use precompiled graphs from last phase, if these
|
||||
# are available (i.e. if they were built with the same data).
|
||||
# These must be split into four pieces.
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
njobs=4 # Default number of jobs
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
oldgraphs=false
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <data-dir> <lang-dir> <src-dir> <exp-dir>\n
|
||||
e.g.: $PROG data/train data/lang exp/tri1 exp/tri1_ali\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
--use-graphs\tReuse older graphs\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd=" --qcmd=${1}"; shift ;;
|
||||
--use-graphs)
|
||||
oldgraphs=true; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as the data-dir
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
if [ -f $lang/oov.txt ]; then
|
||||
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
|
||||
else
|
||||
oov_opt='--ignore-oov'
|
||||
fi
|
||||
|
||||
mkdir -p $dir
|
||||
# Create copy of the tree and model and occs...
|
||||
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1;
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
|
||||
split_data.sh $data $njobs
|
||||
fi
|
||||
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
# for n in `get_splits.pl $njobs`; do # Do this locally; it's fast.
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/cmvnTASK_ID.log \
|
||||
compute-cmvn-stats --spk2utt=ark:$data/split$njobs/TASK_ID/spk2utt \
|
||||
scp:$data/split$njobs/TASK_ID/feats.scp ark:$dir/TASK_ID.cmvn \
|
||||
|| error_exit "Computing CMN/CVN stats failed.";
|
||||
|
||||
|
||||
# Align all training data using the supplied model.
|
||||
echo "Aligning data from $data"
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
if $oldgraphs; then
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
# feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
ls $srcdir/{1..$njobs}.fsts.gz >/dev/null \
|
||||
|| error_exit "Missing FSTs with --use-graphs option specified."
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/alignTASK_ID.log \
|
||||
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
|
||||
"ark:gunzip -c $srcdir/TASK_ID.fsts.gz|" "$feats" "ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|
||||
|| error_exit "Error doing alignment.";
|
||||
|
||||
else
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
# feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
# compute integer form of transcripts.
|
||||
tra="ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt $data/split$njobs/TASK_ID/text|";
|
||||
# We could just use gmm-align in the next line, but it's less efficient as
|
||||
# it compiles the training graphs one by one.
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/alignTASK_ID.log \
|
||||
compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
|
||||
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/final.mdl \
|
||||
ark:- "$feats" "ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|
||||
|| error_exit "Error doing alignment.";
|
||||
fi
|
||||
|
||||
echo "Done aligning data."
|
|
@ -0,0 +1,125 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and delta-delta plus
|
||||
# cepstral mean subtraction features. Used, for example, to decode
|
||||
# mono/ and tri1/
|
||||
# This script just generates lattices for a single broken-up
|
||||
# piece of the data.
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readfloat () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
[[ "$retval" =~ ^-?[1-9]*\.*[0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not a real number."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
accwt=1.0
|
||||
beam=30.0
|
||||
latgen=0
|
||||
njobs=4
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <graph-dir> <data-dir> <decode-dir>\n
|
||||
e.g.: $PROG exp/mono/graph_bg data/dev exp/mono/decode_dev_bg\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--accwt FLOAT\tScaling for acoustic likelihoods (default=$accwt).\n
|
||||
--beam FLOAT\tDecoder beam (default=$beam)\n
|
||||
--latgen\tGenerate lattices (off by default)\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--accwt)
|
||||
shift; accwt=`readfloat $1`; shift ;;
|
||||
--beam)
|
||||
shift; beam=`readfloat $1`; shift ;;
|
||||
--latgen) shift; latgen=1 ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd="--qcmd=${1}"; shift ;;
|
||||
-*) error_exit "Unknown argument: $1, exiting\n$usage" ;;
|
||||
*) break ;; # end of options: interpreted as the data-dir
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
requirements="$data/feats.scp $srcdir/final.mdl $graphdir/HCLG.fst"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_deltas.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
# We only do one decoding pass, so there is no point caching the
|
||||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
if [ $njobs -gt 1 ]; then
|
||||
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
|
||||
split_data.sh $data $njobs
|
||||
fi
|
||||
mydata=$data/split$njobs/TASK_ID
|
||||
feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
fi
|
||||
|
||||
if [ $latgen -eq 1 ]; then
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/decode.TASK_ID.log \
|
||||
gmm-latgen-faster --max-active=7000 --beam=$beam --lattice-beam=6.0 \
|
||||
--acoustic-scale=$accwt --word-symbol-table=$graphdir/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" \
|
||||
"ark:|gzip -c > $dir/lat.TASK_ID.gz" || error_exit "Decoding failed.";
|
||||
else
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/decode.TASK_ID.log \
|
||||
gmm-decode-faster --beam=$beam --acoustic-scale=$accwt \
|
||||
--word-symbol-table=$graphdir/words.txt $srcdir/final.mdl \
|
||||
$graphdir/HCLG.fst "$feats" ark,t:$dir/test.TASK_ID.tra \
|
||||
|| error_exit "Decoding failed.";
|
||||
fi
|
|
@ -0,0 +1,111 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from .. (one directory up from here)
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
njobs=4 # Default number of jobs
|
||||
stage=-4 # Default starting stage (start with calculating CMN/CVN stats)
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <data-dir> <log-dir> <abs-path-to-mfccdir>\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd="--qcmd=${1}"; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as the data-dir
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
data=$1
|
||||
logdir=$2
|
||||
mfccdir=$3
|
||||
|
||||
# use "name" as part of name of the archive.
|
||||
name=`basename $data`
|
||||
|
||||
mkdir -p $mfccdir || exit 1;
|
||||
mkdir -p $logdir || exit 1;
|
||||
|
||||
scp=$data/wav.scp
|
||||
config=conf/mfcc.conf
|
||||
required="$scp $config"
|
||||
|
||||
for f in $required; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "make_mfcc.sh: no such file $f"
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
# note: in general, the double-parenthesis construct in bash "((" is "C-style
|
||||
# syntax" where we can get rid of the $ for variable names, and omit spaces.
|
||||
# The "for" loop in this style is a special construct.
|
||||
|
||||
split_scps=""
|
||||
for ((n=1; n<=njobs; n++)); do
|
||||
split_scps="$split_scps $logdir/wav$n.scp"
|
||||
done
|
||||
|
||||
split_scp.pl $scp $split_scps || exit 1;
|
||||
|
||||
rm -f $logdir/.error.$name 2>/dev/null
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$logdir/make_mfcc.TASK_ID.log \
|
||||
compute-mfcc-feats --verbose=2 --config=$config scp:$logdir/wavTASK_ID.scp \
|
||||
ark,scp:$mfccdir/mfcc_$name.TASK_ID.ark,$mfccdir/mfcc_$name.TASK_ID.scp \
|
||||
|| error_exit "Error producing mfcc features for $name:"`tail $logdir/make_mfcc.*.log`
|
||||
|
||||
# concatenate the .scp files together.
|
||||
rm $data/feats.scp 2>/dev/null
|
||||
for ((n=1; n<=njobs; n++)); do
|
||||
cat $mfccdir/mfcc_$name.$n.scp >> $data/feats.scp
|
||||
done
|
||||
|
||||
# rm $logdir/wav*.scp
|
||||
|
||||
echo "Succeeded creating MFCC features for $name"
|
|
@ -0,0 +1,256 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Triphone model training, using (e.g. MFCC) + delta + acceleration features and
|
||||
# cepstral mean normalization. It starts from an existing directory (e.g.
|
||||
# exp/mono), supplied as an argument, which is assumed to be built using the same
|
||||
# type of features.
|
||||
#
|
||||
# This script starts from previously generated state-level alignments
|
||||
# (in $alidir), e.g. generated by a previous monophone or triphone
|
||||
# system. To build a context-dependent triphone system, we build
|
||||
# decision trees that map a 3-phone phonetic context window to a
|
||||
# pdf index. It's not really clear which is the right reference, but
|
||||
# on is "Tree-based state tying for high accuracy acoustic modelling"
|
||||
# by Steve Young et al.
|
||||
# In a typical approach, there are decision trees for
|
||||
# each monophone HMM-state (i.e. 3 per phone), and each one gets to
|
||||
# ask questions about the left and right phone. These questions
|
||||
# correspond to sets of phones, corresponding to phonetic classes
|
||||
# (e.g. vowel, consonant, liquid, solar, ... ). In Kaldi, we prefer
|
||||
# fully automatic algorithms, and anyway we're not sure where to get
|
||||
# these types of lists, so we just generate the classes automatically.
|
||||
# This is based on a top-down binary tree clustering of the phones
|
||||
# (see "cluster-phones"), where we take single-Gaussian statistics for
|
||||
# just the central state of each phone (assuming this to be more
|
||||
# representative of the phones), and we get a tree structure on the
|
||||
# phones; each class corresponds to a node of the tree (it contains all
|
||||
# the phones that are children of that node). Note: you could
|
||||
# replace questions.txt with something derived from manually written
|
||||
# questions.
|
||||
# Also, the roots of the tree correspond to classes of phones (typically
|
||||
# corresponding to "real phones", because the actual phones may contain
|
||||
# word-begin/end and stress information), and the tree gets to ask
|
||||
# questions also about the central phone, and about the state in the HMM.
|
||||
# After building the tree, we do a number of iterations of Gaussian
|
||||
# Mixture Model training; on selected iterations we redo the Viterbi
|
||||
# alignments (initially, these are taken from the previous system).
|
||||
# The Gaussian mixture splitting, whereby we go from a single Gaussian
|
||||
# per state to multiple Gaussians, is done on all iterations (although
|
||||
# we stop doing this a few iterations before the end). We don't have
|
||||
# a fixed number of Gaussians per state, but we have an overall target
|
||||
# #Gaussians that's specified on each iteration, and we allocate
|
||||
# the Gaussians among states according to a power-law where the #Gaussians
|
||||
# is proportional to the count to the power 0.2. The target
|
||||
# increases linearly during training [note: logarithmically seems more
|
||||
# natural but didn't work as well.]
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
njobs=4 # Default number of jobs
|
||||
stage=-4 # Default starting stage (start with tree building)
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <num-leaves> <tot-gauss> <data-dir> <lang-dir> <ali-dir> <exp-dir>\n
|
||||
e.g.: $PROG 2000 10000 data/train_si84 data/lang exp/mono_ali exp/tri1\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
--stage INT\tStarting stage (e.g. -4 for tree building; 2 for iter 2; default=$stage)\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd=" --qcmd=${1}"; shift ;;
|
||||
--stage)
|
||||
shift; stage=`readint $1`; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as num-leaves
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 6 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
numleaves=$1
|
||||
totgauss=$2
|
||||
data=$3
|
||||
lang=$4
|
||||
alidir=$5
|
||||
dir=$6
|
||||
|
||||
if [ ! -f $alidir/final.mdl ]; then
|
||||
echo "Error: alignment dir $alidir does not contain final.mdl"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
realign_iters="10 20 30";
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
numiters=35 # Number of iterations of training
|
||||
maxiterinc=25 # Last iter to increase #Gauss on.
|
||||
numgauss=$numleaves
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
|
||||
if [ -f $lang/oov.txt ]; then
|
||||
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
|
||||
else
|
||||
oov_opt='--ignore-oov'
|
||||
fi
|
||||
|
||||
mkdir -p $dir/log
|
||||
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
|
||||
split_data.sh $data $njobs
|
||||
fi
|
||||
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
featspart="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$njobs/TASK_ID/utt2spk ark:$alidir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
# The next stage assumes we won't need the context of silence, which
|
||||
# assumes something about $lang/roots.txt, but it seems pretty safe.
|
||||
echo "Accumulating tree stats"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc_tree.TASK_ID.log \
|
||||
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$featspart" \
|
||||
"ark:gunzip -c $alidir/TASK_ID.ali.gz|" $dir/TASK_ID.treeacc \
|
||||
|| error_exit "Error accumulating tree stats";
|
||||
|
||||
sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log \
|
||||
|| error_exit "Error summing tree stats.";
|
||||
rm $dir/*.treeacc
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
# preparing questions, roots file...
|
||||
echo "Computing questions for tree clustering"
|
||||
( sym2int.pl $lang/phones.txt $lang/phonesets_cluster.txt > $dir/phonesets.txt
|
||||
cluster-phones $dir/treeacc $dir/phonesets.txt $dir/questions.txt \
|
||||
2> $dir/log/questions.log
|
||||
[ -f $lang/extra_questions.txt ] && \
|
||||
sym2int.pl $lang/phones.txt $lang/extra_questions.txt \
|
||||
>> $dir/questions.txt
|
||||
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst \
|
||||
2>$dir/log/compile_questions.log
|
||||
sym2int.pl --ignore-oov $lang/phones.txt $lang/roots.txt > $dir/roots.txt
|
||||
) || error_exit "Error in generating questions for tree clustering."
|
||||
|
||||
echo "Building tree"
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/train_tree.log \
|
||||
build-tree --verbose=1 --max-leaves=$numleaves $dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $lang/topo $dir/tree \
|
||||
|| error_exit "Error in building tree.";
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log \
|
||||
|| error_exit "Error in initializing the model.";
|
||||
|
||||
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
|
||||
2>$dir/log/mixup.log || error_exit "Error mixing up to $numgauss Gaussains";
|
||||
|
||||
rm $dir/treeacc
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
# Convert alignments in $alidir, to use as initial alignments.
|
||||
# This assumes that $alidir was split in $njobs pieces, just like the
|
||||
# current dir. Just do this locally-- it's very fast.
|
||||
echo "Converting old alignments"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh --njobs=$njobs --log=$dir/log/convertTASK_ID.log \
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
|
||||
"ark:gunzip -c $alidir/TASK_ID.ali.gz|" \
|
||||
"ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|
||||
|| error_exit "Error converting old alignments.";
|
||||
fi
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
# Make training graphs (this is split in $njobs parts).
|
||||
echo "Compiling training graphs"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/compile_graphsTASK_ID.log \
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt < $data/split$njobs/TASK_ID/text |" \
|
||||
"ark:|gzip -c >$dir/TASK_ID.fsts.gz" \
|
||||
|| error_exit "Error compiling training graphs";
|
||||
fi
|
||||
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo Pass $x
|
||||
if [ $stage -le $x ]; then
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.$x.TASK_ID.log \
|
||||
gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
|
||||
"ark:|gzip -c >$dir/TASK_ID.ali.gz" \
|
||||
|| error_exit "Error aligning data on iteration $x";
|
||||
fi # Realign iters
|
||||
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc.$x.TASK_ID.log \
|
||||
gmm-acc-stats-ali $dir/$x.mdl "$featspart" \
|
||||
"ark,s,cs:gunzip -c $dir/TASK_ID.ali.gz|" $dir/$x.TASK_ID.acc \
|
||||
|| error_exit "Error accumulating stats on iteration $x";
|
||||
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log \
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
|
||||
"gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl \
|
||||
|| error_exit "Error in pass $x extimation.";
|
||||
rm -f r/$x.mdl $dir/$x.*.acc rm $dir/$x.occs
|
||||
fi # Completed a training stage.
|
||||
if [[ $x -le $maxiterinc ]]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm -f final.{mdl,occs}; ln -s $x.mdl final.mdl; \
|
||||
ln -s $x.occs final.occs; )
|
||||
|
||||
# Print out summary of the warning messages.
|
||||
for x in $dir/log/*.log; do
|
||||
n=`grep WARNING $x | wc -l`;
|
||||
if [ $n -ne 0 ]; then echo $n warnings in $x; fi;
|
||||
done
|
||||
|
||||
echo Done
|
|
@ -0,0 +1,202 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Flat start and monophone training, with delta-delta features.
|
||||
# This script applies cepstral mean normalization (per speaker).
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
njobs=4 # Default number of jobs
|
||||
stage=-4 # Default starting stage (start with calculating CMN/CVN stats)
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <data-dir> <lang-dir> <exp-dir>\n
|
||||
e.g.: $PROG data/train.1k data/lang exp/mono\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
--stage INT\tStarting stage (e.g. -4 for CMN/CVN stats; 2 for iter 2; default=$stage)\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd="--qcmd=${1}"; shift ;;
|
||||
--stage)
|
||||
shift; stage=`readint $1`; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as the data-dir
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
dir=$3
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
# Configuration:
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
numiters=40 # Number of iterations of training
|
||||
maxiterinc=30 # Last iter to increase #Gauss on.
|
||||
numgauss=300 # Initial num-Gauss (must be more than #states=3*phones).
|
||||
totgauss=1000 # Target #Gaussians.
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
|
||||
if [ -f $lang/oov.txt ]; then
|
||||
oov_opt="--map-oov '"`cat $lang/oov.txt`"'"
|
||||
else
|
||||
oov_opt='--ignore-oov'
|
||||
fi
|
||||
|
||||
mkdir -p $dir/log
|
||||
if [ ! -d $data/split$njobs -o $data/split$njobs -ot $data/feats.scp ]; then
|
||||
split_data.sh $data $njobs
|
||||
fi
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
# for n in `get_splits.pl $njobs`; do # do this locally; it's fast.
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/cmvnTASK_ID.log \
|
||||
compute-cmvn-stats --spk2utt=ark:$data/split$njobs/TASK_ID/spk2utt \
|
||||
scp:$data/split$njobs/TASK_ID/feats.scp ark:$dir/TASK_ID.cmvn \
|
||||
|| error_exit "Computing CMN/CVN stats failed.";
|
||||
fi
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $dir/*.cmvn|\" scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
# for n in `seq 1 $njobs`; do
|
||||
featspart="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$njobs/TASK_ID/utt2spk ark:$dir/TASK_ID.cmvn scp:$data/split$njobs/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
echo "Initializing monophone system."
|
||||
if [ -f $lang/phonesets_mono.txt ]; then
|
||||
echo "Using shared phones from $lang/phonesets_mono.txt"
|
||||
# In recipes with stress and position markers, this pools together
|
||||
# the stats for the different versions of the same phone (also for
|
||||
# the various silence phones).
|
||||
sym2int.pl $lang/phones.txt $lang/phonesets_mono.txt > $dir/phonesets.int
|
||||
shared_phones_opt="--shared-phones=$dir/phonesets.int"
|
||||
fi
|
||||
|
||||
gmm-init-mono $shared_phones_opt \
|
||||
"--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
|
||||
$dir/0.mdl $dir/tree 2> $dir/log/init.log \
|
||||
|| error_exit "Monophone model initialization failed.";
|
||||
fi
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
echo "Compiling training graphs"
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/compile_graphsTASK_ID.log \
|
||||
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
||||
"ark:sym2int.pl $oov_opt --ignore-first-field $lang/words.txt < $data/split$njobs/TASK_ID/text|" \
|
||||
"ark:|gzip -c >$dir/TASK_ID.fsts.gz" \
|
||||
|| error_exit "Error compiling training graphs.";
|
||||
fi
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "Aligning data equally (pass 0)"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.0.TASK_ID.log \
|
||||
align-equal-compiled "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
|
||||
ark,t,f:- \| \
|
||||
gmm-acc-stats-ali --binary=true $dir/0.mdl "$featspart" \
|
||||
ark:- $dir/0.TASK_ID.acc \
|
||||
|| error_exit "Error in pass 0 accumulation";
|
||||
|
||||
# In the following steps, the --min-gaussian-occupancy=3 option is important,
|
||||
# otherwise we cannot est "rare" phones and later on, they never align properly.
|
||||
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
|
||||
$dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl \
|
||||
2> $dir/log/update.0.log || error_exit "Error in pass 0 estimation.";
|
||||
|
||||
rm $dir/0.*.acc
|
||||
fi # Finished 0'th training iteration.
|
||||
|
||||
beam=6 # will change to 10 below after 1st pass
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo "Pass $x"
|
||||
if [ $stage -le $x ]; then
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/align.$x.TASK_ID.log \
|
||||
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] \
|
||||
$dir/$x.mdl "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \
|
||||
"ark,t:|gzip -c >$dir/TASK_ID.ali.gz" \
|
||||
|| error_exit "Error in pass $x alignment.";
|
||||
fi # Realign iters
|
||||
|
||||
# for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/log/acc.$x.TASK_ID.log \
|
||||
gmm-acc-stats-ali $dir/$x.mdl "$featspart" \
|
||||
"ark:gunzip -c $dir/TASK_ID.ali.gz|" $dir/$x.TASK_ID.acc \
|
||||
|| error_exit "Error in pass $x accumulation.";
|
||||
|
||||
submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log \
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
|
||||
"gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl \
|
||||
|| error_exit "Error in pass $x extimation.";
|
||||
rm -f $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs
|
||||
fi # Completed a training stage.
|
||||
if [ $x -le $maxiterinc ]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
beam=10
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm -f final.{mdl,occs}; ln -s $x.mdl final.mdl; \
|
||||
ln -s $x.occs final.occs; )
|
||||
|
||||
# Print out summary of the warning messages.
|
||||
for x in $dir/log/*.log; do
|
||||
n=`grep WARNING $x | wc -l`;
|
||||
if [ $n -ne 0 ]; then echo $n warnings in $x; fi;
|
||||
done
|
||||
|
||||
echo Done
|
||||
|
||||
# example of showing the alignments:
|
||||
# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/0.ali.gz|" | head -4
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds some specified number of disambig symbols to a symbol table.
|
||||
# Adds these as #1, #2, etc.
|
||||
# If the --include-zero option is specified, includes an extra one
|
||||
# #0.
|
||||
|
||||
$include_zero = 0;
|
||||
if($ARGV[0] eq "--include-zero") {
|
||||
$include_zero = 1;
|
||||
shift @ARGV;
|
||||
}
|
||||
|
||||
if(@ARGV != 2) {
|
||||
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
|
||||
}
|
||||
|
||||
|
||||
$input = $ARGV[0];
|
||||
$nsyms = $ARGV[1];
|
||||
|
||||
open(F, "<$input") || die "Opening file $input";
|
||||
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_";
|
||||
$lastsym = $A[1];
|
||||
print;
|
||||
}
|
||||
|
||||
if(!defined($lastsym)){
|
||||
die "Empty symbol file?";
|
||||
}
|
||||
|
||||
if($include_zero) {
|
||||
$lastsym++;
|
||||
print "#0 $lastsym\n";
|
||||
}
|
||||
|
||||
for($n = 1; $n <= $nsyms; $n++) {
|
||||
$y = $n + $lastsym;
|
||||
print "#$n $y\n";
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds disambiguation symbols to a lexicon.
|
||||
# Outputs still in the normal lexicon format.
|
||||
# Disambig syms are numbered #1, #2, #3, etc. (#0
|
||||
# reserved for symbol in grammar).
|
||||
# Outputs the number of disambig syms to the standard output.
|
||||
|
||||
if(@ARGV != 2) {
|
||||
die "Usage: add_lex_disambig.pl lexicon.txt lexicon_disambig.txt "
|
||||
}
|
||||
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
$lexoutfn = shift @ARGV;
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
# (1) Read in the lexicon.
|
||||
@L = ( );
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
push @L, join(" ", @A);
|
||||
}
|
||||
|
||||
# (2) Work out the count of each phone-sequence in the
|
||||
# lexicon.
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
$count{join(" ",@A)}++;
|
||||
}
|
||||
|
||||
# (3) For each left sub-sequence of each phone-sequence, note down
|
||||
# that exists (for identifying prefixes of longer strings).
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
while(@A > 0) {
|
||||
pop @A; # Remove last phone
|
||||
$issubseq{join(" ",@A)} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# (4) For each entry in the lexicon:
|
||||
# if the phone sequence is unique and is not a
|
||||
# prefix of another word, no diambig symbol.
|
||||
# Else output #1, or #2, #3, ... if the same phone-seq
|
||||
# has already been assigned a disambig symbol.
|
||||
|
||||
|
||||
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
|
||||
|
||||
$max_disambig = 0;
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
$word = shift @A;
|
||||
$phnseq = join(" ",@A);
|
||||
if(!defined $issubseq{$phnseq}
|
||||
&& $count{$phnseq}==1) {
|
||||
; # Do nothing.
|
||||
} else {
|
||||
if($phnseq eq "") { # need disambig symbols for the empty string
|
||||
# that are not use anywhere else.
|
||||
$max_disambig++;
|
||||
$reserved{$max_disambig} = 1;
|
||||
$phnseq = "#$max_disambig";
|
||||
} else {
|
||||
$curnumber = $disambig_of{$phnseq};
|
||||
if(!defined{$curnumber}) { $curnumber = 0; }
|
||||
$curnumber++; # now 1 or 2, ...
|
||||
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
|
||||
if($curnumber > $max_disambig) {
|
||||
$max_disambig = $curnumber;
|
||||
}
|
||||
$disambig_of{$phnseq} = $curnumber;
|
||||
$phnseq = $phnseq . " #" . $curnumber;
|
||||
}
|
||||
}
|
||||
print O "$word\t$phnseq\n";
|
||||
}
|
||||
|
||||
print $max_disambig . "\n";
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readint () {
|
||||
local retval=${1/#*=/}; # In case --switch=ARG format was used
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not an integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
function read_dirname () {
|
||||
local dir_name=${1/#*=/}; # In case --switch=ARG format was used
|
||||
[ -d "$dir_name" ] || error_exit "Argument '$dir_name' not a directory";
|
||||
local retval=`cd $dir_name 2>/dev/null && pwd || exit 1`
|
||||
echo $retval
|
||||
}
|
||||
|
||||
orig_args="$*"
|
||||
njobs="" # Total number of jobs unset by default. Will set to #speakers (if
|
||||
# using a grid) or 4 (if not), unless specified by user.
|
||||
lang="" # Option for sclite scoring (off by default)
|
||||
opts=""
|
||||
qcmd="" # Options for the submit_jobs.sh script
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] <decode_script> <graph-dir> <data-dir> <decode-dir> [extra-args...]\n\n
|
||||
Options:\n
|
||||
--help\t\tPrint this message and exit\n
|
||||
-l DIR\t\tDirectory to find L_align.fst (needed for sclite scoring)\n
|
||||
--num-jobs INT\tNumber of parallel jobs to run (default=$njobs).\n
|
||||
--opts STRING\tOptions for the decoder script\n
|
||||
--qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
";
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
-l)
|
||||
shift; lang=`read_dirname $1`;
|
||||
[ ! -f "$lang/phones_disambig.txt" -o ! -f "$lang/L_align.fst" ] && \
|
||||
error_exit "Invalid argument to -l option; expected $lang/phones_disambig.txt and $lang/L_align.fst to exist."
|
||||
shift ;;
|
||||
--num-jobs)
|
||||
shift; njobs=`readint $1`;
|
||||
[ $njobs -lt 1 ] && error_exit "--num-jobs arg '$njobs' not positive.";
|
||||
shift ;;
|
||||
--opts)
|
||||
shift; opts="$1"; shift ;;
|
||||
--qcmd)
|
||||
shift; qcmd="--qcmd=${1}"; shift ;;
|
||||
--stage)
|
||||
shift; stage=`readint $1`; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
*) break ;; # end of options: interpreted as the script to execute
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
if [ $# -lt 4 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
script=$1
|
||||
graphdir=$2
|
||||
data=$3
|
||||
dir=$4
|
||||
# Make "dir" an absolute pathname.
|
||||
dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
|
||||
mkdir -p $dir || exit 1
|
||||
shift;shift;shift;shift;
|
||||
# Remaining args will be supplied to decoding script.
|
||||
extra_args=$*
|
||||
|
||||
[ -f path.sh ] && . path.sh
|
||||
|
||||
for file in $script $scp $data/utt2spk; do
|
||||
if [ ! -f "$file" ]; then
|
||||
echo "decode.sh: no such file $file"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ! -f $graphdir/HCLG.fst -a ! -f $graphdir/G.fst ]; then
|
||||
# Note: most scripts expect HCLG.fst in graphdir, but the
|
||||
# "*_fromlats.sh" script(s) require(s) a "lang" dir in that
|
||||
# position
|
||||
echo No such file: $graphdir/HCLG.fst or $graphdir/G.fst
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -z "$njobs" ]; then # Figure out num-jobs; user did not specify.
|
||||
if [ -z "$qcmd" ]; then
|
||||
njobs=4
|
||||
else # running on queue...
|
||||
njobs=`utt2spk_to_spk2utt.pl $data/utt2spk | wc -l`
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Decoding with num-jobs = $njobs"
|
||||
if [[ $njobs -gt 1 || ! -d $data/split$njobs || \
|
||||
$data/split$njobs -ot $data/feats.scp ]]; then
|
||||
split_data.sh $data $njobs
|
||||
fi
|
||||
|
||||
#for n in `get_splits.pl $njobs`; do
|
||||
submit_jobs.sh "$qcmd" --njobs=$njobs --log=$dir/partTASK_ID.log \
|
||||
$script $opts -j $njobs TASK_ID $graphdir $data $dir $extra_args \
|
||||
|| error_exit "Error in decoding script: command was decode.sh $orig_args"
|
||||
|
||||
if ls $dir/lat.*.gz >&/dev/null; then
|
||||
if [ -n "$lang" ]; then
|
||||
# sclite scoring: $lang directory supplied only for this reason.
|
||||
[ ! -f $data/stm ] && \
|
||||
error_exit "Expected $data/stm to exist (-l only used for sclite scoring)"
|
||||
score_lats_ctm.sh $dir $lang $data || \
|
||||
error_exit "Error in scoring of lattices using sclite."
|
||||
else
|
||||
score_lats.sh $dir $graphdir/words.txt $data || \
|
||||
error_exit "Error in scoring of latices.";
|
||||
fi
|
||||
elif ls $dir/*.txt >&/dev/null; then
|
||||
score_text.sh $dir $data || error_exit "Error in scoring of hypotheses.";
|
||||
else
|
||||
eror_exit "No output found in $dir, not scoring.";
|
||||
fi
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script replaces epsilon with #0 on the input side only, of the G.fst
|
||||
# acceptor.
|
||||
|
||||
while(<>){
|
||||
s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
|
||||
print;
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script takes a list of utterance-ids or any file whose first field
|
||||
# of each line is an utterance-id, and filters an scp
|
||||
# file (or any file whose first field is an utterance id), printing
|
||||
# out only those lines whose first field is in id_list.
|
||||
|
||||
if(@ARGV < 1 || @ARGV > 2) {
|
||||
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
|
||||
}
|
||||
|
||||
$idlist = shift @ARGV;
|
||||
open(F, "<$idlist") || die "Could not open id-list file $idlist";
|
||||
while(<F>) {
|
||||
@A = split;
|
||||
@A>=1 || die "Invalid id-list file line $_";
|
||||
$seen{$A[0]} = 1;
|
||||
}
|
||||
|
||||
while(<>) {
|
||||
@A = split;
|
||||
@A > 0 || die "Invalid scp file line $_";
|
||||
if($seen{$A[0]}) {
|
||||
print $_;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_noninteger = 0;
|
||||
$ignore_first_field = 0;
|
||||
$field = -1;
|
||||
for($x = 0; $x < 2; $x++) {
|
||||
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--field") {
|
||||
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
|
||||
if ($field < 1) { die "Bad argument to --field option: $field"; }
|
||||
}
|
||||
}
|
||||
|
||||
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
|
||||
$zfield = $field-1; # Change to zero-based indexing.
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input] > output\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$int2sym{$A[1]} = $A[0];
|
||||
}
|
||||
|
||||
sub int2sym {
|
||||
my $a = shift @_;
|
||||
my $pos = shift @_;
|
||||
if($a !~ m:^\d+$:) { # not all digits..
|
||||
if($ignore_noninteger) {
|
||||
print $a . " ";
|
||||
next;
|
||||
} else {
|
||||
if($pos == 0) {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
|
||||
} else {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
$s = $int2sym{$a};
|
||||
if(!defined ($s)) {
|
||||
die "int2sym.pl: integer $a not in symbol table $symtab.";
|
||||
}
|
||||
return $s;
|
||||
}
|
||||
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
if ($field != -1) {
|
||||
if ($zfield <= $#A && $zfield >= 0) {
|
||||
$a = $A[$zfield];
|
||||
$A[$zfield] = int2sym($a, $zfield);
|
||||
}
|
||||
print join(" ", @A);
|
||||
} else {
|
||||
for ($pos = 0; $pos <= $#A; $pos++) {
|
||||
$a = $A[$pos];
|
||||
$s = int2sym($a, $pos);
|
||||
print $s . " ";
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# makes lexicon FST (no pron-probs involved).
|
||||
|
||||
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
|
||||
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
|
||||
}
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
if(@ARGV == 0) {
|
||||
$silprob = 0.0;
|
||||
} elsif (@ARGV == 2){
|
||||
($silprob,$silphone) = @ARGV;
|
||||
} else {
|
||||
($silprob,$silphone,$sildisambig) = @ARGV;
|
||||
}
|
||||
if($silprob != 0.0) {
|
||||
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
|
||||
$silcost = -log($silprob);
|
||||
$nosilcost = -log(1.0 - $silprob);
|
||||
}
|
||||
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
|
||||
|
||||
sub is_sil {
|
||||
# Return true (1) if provided with a phone-sequence
|
||||
# that means silence.
|
||||
# @_ is the parameters of the function
|
||||
# This function returns true if @_ equals ( $silphone )
|
||||
# or something of the form ( "#0", $silphone, "#1" )
|
||||
# where the "#0" and "#1" are disambiguation symbols.
|
||||
return ( @_ == 1 && $_[0] eq $silphone ||
|
||||
(@_ == 3 && $_[1] eq $silphone &&
|
||||
$_[0] =~ m/^\#\d+$/ &&
|
||||
$_[0] =~ m/^\#\d+$/));
|
||||
}
|
||||
|
||||
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
|
||||
$loopstate = 0;
|
||||
$nexststate = 1; # next unallocated state.
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
} else {
|
||||
$ns = $loopstate;
|
||||
}
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
} else { # have silence probs.
|
||||
$startstate = 0;
|
||||
$loopstate = 1;
|
||||
$silstate = 2; # state from where we go to loopstate after emitting silence.
|
||||
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
|
||||
if (!defined $sildisambig) {
|
||||
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
|
||||
$nextstate = 3;
|
||||
} else {
|
||||
$disambigstate = 3;
|
||||
$nextstate = 4;
|
||||
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
|
||||
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
|
||||
}
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
} else {
|
||||
if(!is_sil(@A)){
|
||||
# This is non-deterministic but relatively compact,
|
||||
# and avoids epsilons.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
|
||||
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
|
||||
} else {
|
||||
# no point putting opt-sil after silence word.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\n";
|
||||
}
|
||||
$word_or_eps = "<eps>";
|
||||
}
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script creates a fully expanded decoding graph (HCLG) that represents
|
||||
# all the language-model, pronunciation dictionary (lexicon), context-dependency,
|
||||
# and HMM structure in our model. The output is a Finite State Transducer
|
||||
# that has word-ids on the output, and pdf-ids on the input (these are indexes
|
||||
# that resolve to Gaussian Mixture Models).
|
||||
# See
|
||||
# http://kaldi.sourceforge.net/graph_recipe_test.html
|
||||
# (this is compiled from this repository using Doxygen,
|
||||
# the source for this part is in src/doc/graph_recipe_test.dox)
|
||||
|
||||
|
||||
N=3
|
||||
P=1
|
||||
clean=false
|
||||
|
||||
for x in 1 2 3; do
|
||||
if [ $1 == "--mono" ]; then
|
||||
N=1;
|
||||
P=0;
|
||||
shift;
|
||||
fi
|
||||
if [ $1 == "--clean" ]; then
|
||||
clean=true
|
||||
shift;
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
|
||||
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
lang=$1
|
||||
tree=$2/tree
|
||||
model=$2/final.mdl
|
||||
dir=$3
|
||||
|
||||
if $clean; then rm -r $lang/tmp; fi
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
tscale=1.0
|
||||
loopscale=0.1
|
||||
|
||||
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
|
||||
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
|
||||
# would have to use -o instead), -f means file exists, and -ot means older than).
|
||||
|
||||
required="$lang/L.fst $lang/G.fst $lang/phones_disambig.txt $lang/words.txt $lang/silphones.csl $model $tree"
|
||||
for f in $required; do
|
||||
[ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $lang/tmp
|
||||
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
|
||||
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
|
||||
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
|
||||
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
|
||||
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
|
||||
fi
|
||||
|
||||
if [ ! -f $lang/phones_disambig.txt ]; then
|
||||
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
|
||||
|
||||
|
||||
clg=$lang/tmp/CLG_${N}_${P}.fst
|
||||
|
||||
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
|
||||
fstcomposecontext --context-size=$N --central-position=$P \
|
||||
--read-disambig-syms=$lang/tmp/disambig_phones.list \
|
||||
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
|
||||
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
|
||||
fstisstochastic $clg || echo "warning: CLG not stochastic."
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model \
|
||||
|| $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
|
||||
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
|
||||
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
|
||||
> $dir/Ha.fst || exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
|
||||
$dir/HCLGa.fst -ot $clg ]]; then
|
||||
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
|
||||
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
|
||||
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
|
||||
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
|
||||
add-self-loops --self-loop-scale=$loopscale --reorder=true \
|
||||
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
|
||||
|
||||
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
|
||||
# No point doing this test if transition-scale not 1, as it is bound to fail.
|
||||
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
|
||||
fi
|
||||
fi
|
||||
|
||||
# keep a copy of the lexicon and a list of silence phones with HCLG...
|
||||
# this means we can decode without refrence to the $lang directory.
|
||||
cp $lang/words.txt $dir/
|
||||
cp $lang/silphones.csl $dir/
|
||||
|
||||
# to make const fst:
|
||||
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
|
||||
|
||||
echo "Finished making decoding graphs in $dir"
|
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script replaces <s> and </s> with <eps> (on both input and output sides),
|
||||
# for the G.fst acceptor.
|
||||
|
||||
while(<>){
|
||||
@A = split(" ", $_);
|
||||
if ( @A >= 4 ) {
|
||||
if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
|
||||
if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
|
||||
}
|
||||
print join("\t", @A) . "\n";
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
if [ $# -ne 4 ]; then
|
||||
echo "Usage: score_lats.sh <decode-dir> <word-symbol-table> <data-dir> <phone-map>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
dir=$1
|
||||
symtab=$2
|
||||
data=$3
|
||||
phonemap=$4
|
||||
|
||||
if [ ! -f $symtab ]; then
|
||||
echo No such word symbol table file $symtab
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -f $data/text ]; then
|
||||
echo Could not find transcriptions in $data/text
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
trans=$data/text
|
||||
cp $trans $dir/test.trans
|
||||
|
||||
for inv_acwt in `seq 1 7`; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$symtab \
|
||||
"ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
cat $dir/${inv_acwt}.tra \
|
||||
| int2sym.pl --ignore-first-field $symtab \
|
||||
| timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 |
|
||||
compute-wer --text --mode=present ark:$dir/test.trans ark,p:- \
|
||||
>& $dir/wer_$inv_acwt
|
||||
done
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
if [ $# -ne 4 ]; then
|
||||
echo "Usage: score_text.sh <decode-dir> <word-symbol-table> <data-dir> <phone-map>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
dir=$1
|
||||
symtab=$2
|
||||
data=$3
|
||||
phonemap=$4
|
||||
|
||||
if [ ! -f $data/text ]; then
|
||||
echo Could not find transcriptions in $data/text
|
||||
exit 1
|
||||
fi
|
||||
|
||||
trans=$data/text
|
||||
sort -k1,1 $trans > $dir/test.trans
|
||||
|
||||
# We assume the transcripts are already in integer form.
|
||||
cat $dir/*.tra | sort -k1,1 \
|
||||
| int2sym.pl --ignore-first-field $symtab \
|
||||
| timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \
|
||||
> $dir/text
|
||||
|
||||
compute-wer --text --mode=present ark:$dir/test.trans ark,p:$dir/text \
|
||||
>& $dir/wer
|
||||
|
||||
grep WER $dir/wer
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# creates integer lists of silence and non-silence phones in files,
|
||||
# e.g. silphones.csl="1:2:3 \n"
|
||||
# and nonsilphones.csl="4:5:6:7:...:24\n";
|
||||
|
||||
if(@ARGV != 4) {
|
||||
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
|
||||
}
|
||||
|
||||
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
|
||||
open(S,"<$symtab") || die "Opening symbol table $symtab";
|
||||
|
||||
|
||||
foreach $s (split(" ", $sillist)) {
|
||||
$issil{$s} = 1;
|
||||
}
|
||||
|
||||
@sil = ();
|
||||
@nonsil = ();
|
||||
while(<S>){
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
|
||||
($sym, $int) = @A;
|
||||
if($int != 0) {
|
||||
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
|
||||
else { push @nonsil, $int; }
|
||||
}
|
||||
}
|
||||
|
||||
foreach $k(keys %issil) {
|
||||
if(!$seensil{$k}) { die "No such silence phone $k"; }
|
||||
}
|
||||
open(F, ">$silphones") || die "opening silphones file $silphones";
|
||||
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
|
||||
print F join(":", @sil) . "\n";
|
||||
print G join(":", @nonsil) . "\n";
|
||||
close(F);
|
||||
close(G);
|
||||
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
|
||||
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
|
||||
if [ $# != 2 ]; then
|
||||
echo "Usage: split_data.sh data-dir num-to-split"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
data=$1
|
||||
numsplit=$2
|
||||
|
||||
if [ $numsplit -le 0 ]; then
|
||||
echo "Invalid num-split argument $numsplit";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
n=0;
|
||||
feats=""
|
||||
wavs=""
|
||||
utt2spks=""
|
||||
texts=""
|
||||
|
||||
nu=`cat $data/utt2spk | wc -l`
|
||||
nf=`cat $data/feats.scp | wc -l`
|
||||
nt=`cat $data/text | wc -l`
|
||||
if [ $nu -ne $nf ]; then
|
||||
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf);"
|
||||
echo "this script may produce incorrectly split data."
|
||||
echo "use utils/fix_data_dir.sh to fix this."
|
||||
fi
|
||||
if [ $nt -ne 0 -a $nu -ne $nt ]; then
|
||||
echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt);"
|
||||
echo "this script may produce incorrectly split data."
|
||||
echo "use utils/fix_data_dir.sh to fix this."
|
||||
fi
|
||||
|
||||
# utilsscripts/get_split.pl returns "0 1 2 3" or "00 01 .. 18 19" or whatever.
|
||||
# for n in `get_splits.pl $numsplit`; do
|
||||
for n in `seq 1 $numsplit`; do # Changed this to usual number sequence -Arnab
|
||||
mkdir -p $data/split$numsplit/$n
|
||||
feats="$feats $data/split$numsplit/$n/feats.scp"
|
||||
wavs="$wavs $data/split$numsplit/$n/wav.scp"
|
||||
texts="$texts $data/split$numsplit/$n/text"
|
||||
utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
|
||||
done
|
||||
|
||||
split_scp.pl --utt2spk=$data/utt2spk $data/utt2spk $utt2spks
|
||||
split_scp.pl --utt2spk=$data/utt2spk $data/feats.scp $feats
|
||||
[ -f $data/wav.scp ] && \
|
||||
split_scp.pl --utt2spk=$data/utt2spk $data/wav.scp $wavs
|
||||
[ -f $data/text ] && \
|
||||
split_scp.pl --utt2spk=$data/utt2spk $data/text $texts
|
||||
|
||||
# for n in `get_splits.pl $numsplit`; do
|
||||
for n in `seq 1 $numsplit`; do # Changed this to usual number sequence -Arnab
|
||||
utt2spk_to_spk2utt.pl $data/split$numsplit/$n/utt2spk \
|
||||
> $data/split$numsplit/$n/spk2utt
|
||||
# for completeness, also split the spk2gender file
|
||||
[ -f $data/spk2gender ] && \
|
||||
filter_scp.pl $data/split$numsplit/$n/spk2utt $data/spk2gender \
|
||||
> $data/split$numsplit/$n/spk2gender
|
||||
done
|
||||
|
||||
exit 0
|
|
@ -0,0 +1,211 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
|
||||
# This program splits up any kind of .scp or archive-type file.
|
||||
# If there is no utt2spk option it will work on any text file and
|
||||
# will split it up with an approximately equal number of lines in
|
||||
# each but.
|
||||
# With the --utt2spk option it will work on anything that has the
|
||||
# utterance-id as the first entry on each line; the utt2spk file is
|
||||
# of the form "utterance speaker" (on each line).
|
||||
# It splits it into equal size chunks as far as it can. If you use
|
||||
# the utt2spk option it will make sure these chunks coincide with
|
||||
# speaker boundaries. In this case, if there are more chunks
|
||||
# than speakers (and in some other circumstances), some of the
|
||||
# resulting chunks will be empty and it
|
||||
# will print a warning.
|
||||
# You will normally call this like:
|
||||
# split_scp.pl scp scp.1 scp.2 scp.3 ...
|
||||
# or
|
||||
# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
|
||||
# Note that you can use this script to split the utt2spk file itself,
|
||||
# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
|
||||
|
||||
# You can also call the scripts like:
|
||||
# split_scp.pl -j 3 0 scp scp.0
|
||||
# [note: with this option, it assumes zero-based indexing of the split parts,
|
||||
# i.e. the second number must be 0 <= n < num-jobs.]
|
||||
|
||||
$num_jobs = 0;
|
||||
$job_id = 0;
|
||||
$utt2spk_file = "";
|
||||
|
||||
for ($x = 1; $x <= 2; $x++) {
|
||||
if ($ARGV[0] eq "-j") {
|
||||
shift @ARGV;
|
||||
$num_jobs = shift @ARGV;
|
||||
$job_id = shift @ARGV;
|
||||
if ($num_jobs <= 0 || $job_id < 0 || $job_id >= $num_jobs) {
|
||||
die "Invalid num-jobs and job-id: $num_jobs and $job_id";
|
||||
}
|
||||
}
|
||||
if ($ARGV[0] =~ "--utt2spk=(.+)") {
|
||||
$utt2spk_file=$1;
|
||||
shift;
|
||||
}
|
||||
}
|
||||
|
||||
if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
|
||||
die "Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ... \n" .
|
||||
" or: split_scp.pl -j num-jobs job-id [--utt2spk=<utt2spk_file>] in.scp [out.scp]\n" .
|
||||
" ... where 0 <= job-id < num-jobs.";
|
||||
}
|
||||
|
||||
$inscp = shift @ARGV;
|
||||
if ($num_jobs == 0) { # without -j option
|
||||
@OUTPUTS = @ARGV;
|
||||
} else {
|
||||
for ($j = 0; $j < $num_jobs; $j++) {
|
||||
if ($j == $job_id) {
|
||||
if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
|
||||
else { push @OUTPUTS, "-"; }
|
||||
} else {
|
||||
push @OUTPUTS, "/dev/null";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($utt2spk_file ne "") { # We have the --utt2spk option...
|
||||
open(U, "<$utt2spk_file") || die "Failed to open utt2spk file $utt2spk_file";
|
||||
while(<U>) {
|
||||
@A = split;
|
||||
@A == 2 || die "Bad line $_ in utt2spk file $utt2spk_file";
|
||||
($u,$s) = @A;
|
||||
$utt2spk{$u} = $s;
|
||||
}
|
||||
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
||||
@spkrs = ();
|
||||
while(<I>) {
|
||||
@A = split;
|
||||
if(@A == 0) { die "Empty or space-only line in scp file $inscp"; }
|
||||
$u = $A[0];
|
||||
$s = $utt2spk{$u};
|
||||
if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
|
||||
if(!defined $spk_count{$s}) {
|
||||
push @spkrs, $s;
|
||||
$spk_count{$s} = 0;
|
||||
$spk_data{$s} = "";
|
||||
}
|
||||
$spk_count{$s}++;
|
||||
$spk_data{$s} = $spk_data{$s} . $_;
|
||||
}
|
||||
# Now split as equally as possible ..
|
||||
# First allocate spks to files by allocating an approximately
|
||||
# equal number of speakers.
|
||||
$numspks = @spkrs; # number of speakers.
|
||||
$numscps = @OUTPUTS; # number of output files.
|
||||
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
|
||||
$scparray[$scpidx] = []; # [] is array reference.
|
||||
}
|
||||
for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
|
||||
$scpidx = int(($spkidx*$numscps) / $numspks);
|
||||
$spk = $spkrs[$spkidx];
|
||||
push @{$scparray[$scpidx]}, $spk;
|
||||
$scpcount[$scpidx] += $spk_count{$spk};
|
||||
}
|
||||
|
||||
# Now will try to reassign beginning + ending speakers
|
||||
# to different scp's and see if it gets more balanced.
|
||||
# Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
|
||||
# We can show that if considering changing just 2 scp's, we minimize
|
||||
# this by minimizing the squared difference in sizes. This is
|
||||
# equivalent to minimizing the absolute difference in sizes. This
|
||||
# shows this method is bound to converge.
|
||||
|
||||
$changed = 1;
|
||||
while($changed) {
|
||||
$changed = 0;
|
||||
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
|
||||
# First try to reassign ending spk of this scp.
|
||||
if($scpidx < $numscps-1) {
|
||||
$sz = @{$scparray[$scpidx]};
|
||||
if($sz > 0) {
|
||||
$spk = $scparray[$scpidx]->[$sz-1];
|
||||
$count = $spk_count{$spk};
|
||||
$nutt1 = $scpcount[$scpidx];
|
||||
$nutt2 = $scpcount[$scpidx+1];
|
||||
if( abs( ($nutt2+$count) - ($nutt1-$count))
|
||||
< abs($nutt2 - $nutt1)) { # Would decrease
|
||||
# size-diff by reassigning spk...
|
||||
$scpcount[$scpidx+1] += $count;
|
||||
$scpcount[$scpidx] -= $count;
|
||||
pop @{$scparray[$scpidx]};
|
||||
unshift @{$scparray[$scpidx+1]}, $spk;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
|
||||
$spk = $scparray[$scpidx]->[0];
|
||||
$count = $spk_count{$spk};
|
||||
$nutt1 = $scpcount[$scpidx-1];
|
||||
$nutt2 = $scpcount[$scpidx];
|
||||
if( abs( ($nutt2-$count) - ($nutt1+$count))
|
||||
< abs($nutt2 - $nutt1)) { # Would decrease
|
||||
# size-diff by reassigning spk...
|
||||
$scpcount[$scpidx-1] += $count;
|
||||
$scpcount[$scpidx] -= $count;
|
||||
shift @{$scparray[$scpidx]};
|
||||
push @{$scparray[$scpidx-1]}, $spk;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Now print out the files...
|
||||
for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
|
||||
$scpfn = $OUTPUTS[$scpidx];
|
||||
open(F, ">$scpfn") || die "Could not open scp file $scpfn for writing.";
|
||||
$count = 0;
|
||||
if(@{$scparray[$scpidx]} == 0) {
|
||||
print STDERR "Warning: split_scp.pl producing empty .scp file $scpfn (too many splits and too few speakers?)\n";
|
||||
} else {
|
||||
foreach $spk ( @{$scparray[$scpidx]} ) {
|
||||
print F $spk_data{$spk};
|
||||
$count += $spk_count{$spk};
|
||||
}
|
||||
if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
|
||||
}
|
||||
close(F);
|
||||
}
|
||||
} else {
|
||||
# This block is the "normal" case where there is no --utt2spk
|
||||
# option and we just break into equal size chunks.
|
||||
|
||||
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
||||
|
||||
$numscps = @OUTPUTS; # size of array.
|
||||
@F = ();
|
||||
while(<I>) {
|
||||
push @F, $_;
|
||||
}
|
||||
$numlines = @F;
|
||||
if($numlines == 0) {
|
||||
print STDERR "split_scp.pl: warning: empty input scp file $inscp";
|
||||
}
|
||||
$linesperscp = int( ($numlines+($numscps-1)) / $numscps); # the +$(numscps-1) forces rounding up.
|
||||
# [just doing int() rounds down].
|
||||
for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
|
||||
$scpfile = $OUTPUTS[$scpidx];
|
||||
open(O, ">$scpfile") || die "Opening output scp file $scpfile";
|
||||
for($n = $linesperscp * $scpidx; $n < $numlines && $n < $linesperscp*($scpidx+1); $n++) {
|
||||
print O $F[$n];
|
||||
}
|
||||
close(O) || die "Closing scp file $scpfile";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
#!/bin/bash -u
|
||||
|
||||
# Copyright 2012 Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
|
||||
function error_exit () {
|
||||
echo -e "$@" >&2; exit 1;
|
||||
}
|
||||
|
||||
function readposint () {
|
||||
local retval=`expr "X$1" : '[^=]*=\(.*\)'`;
|
||||
retval=${retval#0*} # Strip any leading 0's
|
||||
[[ "$retval" =~ ^[1-9][0-9]*$ ]] \
|
||||
|| error_exit "Argument \"$retval\" not a positive integer."
|
||||
echo $retval
|
||||
}
|
||||
|
||||
PROG=`basename $0`;
|
||||
usage="Usage: $PROG [options] --log=logfile command\n
|
||||
Runs the supplied command and redirect the stdout & stderr to logfile.\n
|
||||
With the --qcmd option, the command is submitted to a grid engine.\n
|
||||
Any 'TASK_ID' in logfile or command is replaced with job number or \$SGE_TASK_ID (for SGE).\n\n
|
||||
Required arguments:\n
|
||||
--log=FILE\tOutput of command redirected to this file.\n\n
|
||||
Options:\n
|
||||
--njobs=INT\tNumber of jobs to run (default=1). Assumes split data exists.\n
|
||||
--qcmd=STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n
|
||||
";
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
error_exit $usage;
|
||||
fi
|
||||
|
||||
NJOBS=1 # Default number of jobs
|
||||
QCMD="" # No grid usage by default
|
||||
while [ $# -gt 1 ]; do
|
||||
case "${1# *}" in # ${1# *} strips any leading spaces from the arguments
|
||||
--help) echo -e $usage; exit 0 ;;
|
||||
--qcmd=*)
|
||||
QCMD=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
|
||||
--njobs=*)
|
||||
NJOBS=`readposint $1`; shift ;;
|
||||
--log=*)
|
||||
LOGF=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
|
||||
-*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
|
||||
'') shift ;; # Handle any empty arguments
|
||||
*) break ;; # interpreted as the command to execute
|
||||
esac
|
||||
done
|
||||
|
||||
logfile_base=`basename $LOGF .log`
|
||||
logfile_dir=`dirname $LOGF`
|
||||
mkdir -p $logfile_dir;
|
||||
|
||||
# Now, parse the command to execute
|
||||
exec_cmd="";
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
*\"*) exec_cmd=$exec_cmd"'''$1''' "; shift ;;
|
||||
*\ *) exec_cmd=$exec_cmd"\"$1\" "; shift ;;
|
||||
*) exec_cmd=$exec_cmd"$1 "; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
function run_locally {
|
||||
rm -f $logfile_dir/.error;
|
||||
for n in `seq 1 $NJOBS`; do
|
||||
local this_logfile=${logfile_base//TASK_ID/$n}
|
||||
this_logfile=$logfile_dir"/"$this_logfile".log"
|
||||
local this_command=${exec_cmd//TASK_ID/$n}
|
||||
( echo -e "# Command:\n# $this_command";
|
||||
echo "# Running on: "`hostname`;
|
||||
echo "# Started at: "`date`;
|
||||
eval $this_command || touch $logfile_dir/.error
|
||||
echo "# Finished at: "`date` ) >> $this_logfile 2>&1 &
|
||||
done
|
||||
wait;
|
||||
[ -f $logfile_dir/.error ] && { rm -f $logfile_dir/.error; \
|
||||
error_exit "One (or more) locally run jobs failed."; }
|
||||
exit 0;
|
||||
}
|
||||
|
||||
function run_on_grid {
|
||||
local this_logfile=${logfile_base//TASK_ID/\$SGE_TASK_ID}
|
||||
this_logfile=$logfile_dir"/"$this_logfile".log"
|
||||
# If log files are in a separate 'log' directory, create the job submission
|
||||
# scripts one level up.
|
||||
local qdir=${logfile_dir/%log/q}
|
||||
mkdir -p $qdir
|
||||
local qlog=$qdir/queue.log
|
||||
local this_command=${exec_cmd//TASK_ID/\$SGE_TASK_ID}
|
||||
local run_this=$qdir"/"${logfile_base//TASK_ID/}".sh"
|
||||
run_this=${run_this//../.}
|
||||
printf "#!/bin/bash\n#\$ -S /bin/bash\n#\$ -V -cwd -j y\n" > $run_this
|
||||
{ printf "set -e\n";
|
||||
printf "{ cd %s\n . path.sh\n echo Running on: \`hostname\`\n" "$PWD";
|
||||
printf " echo Started at: \`date\`\n $this_command\n ret=\$\?\n";
|
||||
printf " echo Finished at: \`date\`\n} >& %s\nexit \$ret\n" "$this_logfile"
|
||||
printf "# Submitted with:\n"
|
||||
printf "# $QCMD -sync y -o $qlog -t 1-$NJOBS $run_this >> $qlog 2>&1\n"
|
||||
} >> $run_this
|
||||
$QCMD -sync y -o $qlog -t 1-${NJOBS} $run_this >> $qlog 2>&1
|
||||
exit $?
|
||||
}
|
||||
|
||||
if [ -z "$QCMD" ]; then
|
||||
run_locally;
|
||||
else
|
||||
run_on_grid;
|
||||
fi
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_oov = 0;
|
||||
$ignore_first_field = 0;
|
||||
for($x = 0; $x < 3; $x++) {
|
||||
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
|
||||
# Else will complain and put nothing out.
|
||||
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
|
||||
}
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$sym2int{$A[0]} = $A[1] + 0;
|
||||
}
|
||||
|
||||
$num_warning = 0;
|
||||
$max_warning = 20;
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if(@A == 0) {
|
||||
die "Empty line in transcriptions input.";
|
||||
}
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
@B = ();
|
||||
foreach $a (@A) {
|
||||
$i = $sym2int{$a};
|
||||
if(!defined ($i)) {
|
||||
if (defined $map_oov) {
|
||||
if (!defined $sym2int{$map_oov}) {
|
||||
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
|
||||
}
|
||||
if ($num_warning++ < $max_warning) {
|
||||
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
|
||||
if ($num_warning == $max_warning) {
|
||||
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
|
||||
}
|
||||
}
|
||||
$i = $sym2int{$map_oov};
|
||||
} elsif($ignore_oov) {
|
||||
$i = $a; # just print them out unmodified..
|
||||
} else {
|
||||
die "sym2int.pl: undefined symbol $a\n";
|
||||
}
|
||||
}
|
||||
push @B, $i;
|
||||
}
|
||||
print join(" ", @B);
|
||||
print "\n";
|
||||
}
|
||||
|
||||
if($error) { exit(1); }
|
||||
else { exit(0); }
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# converts an utt2spk file to a spk2utt file.
|
||||
# Takes input from the stdin or from a file argument;
|
||||
# output goes to the standard out.
|
||||
|
||||
if ( @ARGV > 1 ) {
|
||||
die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
|
||||
}
|
||||
|
||||
while(<>){
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Invalid line in utt2spk file: $_";
|
||||
($u,$s) = @A;
|
||||
if(!$seen_spk{$s}) {
|
||||
$seen_spk{$s} = 1;
|
||||
push @spklist, $s;
|
||||
}
|
||||
$uttlist{$s} = $uttlist{$s} . "$u ";
|
||||
}
|
||||
foreach $s (@spklist) {
|
||||
$l = $uttlist{$s};
|
||||
$l =~ s: $::; # remove trailing space.
|
||||
print "$s $l\n";
|
||||
}
|
|
@ -61,3 +61,4 @@ nnet_cpu: base util matrix
|
|||
rnn: base util matrix lat
|
||||
|
||||
|
||||
FSTROOT = /mnt/matylda5/iveselyk/DEVEL/kaldi/sandbox/karel/tools/openfst
|
||||
|
|
|
@ -31,16 +31,22 @@ ATLASROOT=`rel2abs ../tools/ATLAS/`
|
|||
FSTROOT=`rel2abs ../tools/openfst`
|
||||
|
||||
function usage {
|
||||
echo 'Usage: ./configure [--atlas-root=ATLASROOT] [--fst-root=FSTROOT]
|
||||
echo 'Usage: ./configure [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT]
|
||||
[--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR] [--mathlib=ATLAS|MKL|CLAPACK]';
|
||||
}
|
||||
|
||||
threaded_atlas=false # By default, use the un-threaded version of ATLAS.
|
||||
|
||||
while [ $# -gt 0 ];
|
||||
do
|
||||
case "$1" in
|
||||
--help) usage; exit 0 ;;
|
||||
--atlas-root=*)
|
||||
ATLASROOT=`read_dirname $1`; shift ;;
|
||||
--threaded-atlas=yes)
|
||||
threaded_atlas=true; shift ;;
|
||||
--threaded-atlas=no)
|
||||
threaded_atlas=false; shift ;;
|
||||
--fst-root=*)
|
||||
FSTROOT=`read_dirname $1`; shift ;;
|
||||
--mkl-root=*)
|
||||
|
@ -137,10 +143,12 @@ function linux_check_static {
|
|||
}
|
||||
|
||||
function linux_configure_static {
|
||||
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the first one below.
|
||||
if $threaded_atlas; then pt=pt; else pt=""; fi
|
||||
|
||||
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
|
||||
for dir in /usr{,/local}/lib{64,}{,/atlas,/atlas-sse2,/atlas-sse3} \
|
||||
`pwd`/../tools/ATLAS/build/install/lib/ $ATLASROOT/lib; do
|
||||
linux_check_static && ATLASLIBDIR=$dir && break
|
||||
linux_check_static && ATLASLIBDIR=$dir
|
||||
done
|
||||
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
|
||||
echo "Could not find libatlas.a in any of the obvious places... will try dynamic libraries."
|
||||
|
@ -168,13 +176,14 @@ function linux_configure_static {
|
|||
return ;
|
||||
fi
|
||||
|
||||
for x in libcblas.a libatlas.a libf77blas.a; do
|
||||
for x in lib${pt}cblas.a libatlas.a lib${pt}f77blas.a; do
|
||||
if [ ! -f $ATLASLIBDIR/$x ]; then
|
||||
echo "Configuring static ATLAS libraries failed: Could not find library $x in directory $ATLASLIBDIR"
|
||||
return 1;
|
||||
fi
|
||||
ATLASLIBS="$ATLASLIBS $ATLASLIBDIR/$x"
|
||||
done
|
||||
if $threaded_atlas; then ATLASLIBS="$ATLASLIBS"; fi
|
||||
|
||||
echo ATLASINC = $ATLASROOT/include >> kaldi.mk
|
||||
echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
|
||||
|
@ -189,21 +198,24 @@ function linux_check_dynamic {
|
|||
# will exit with success if $dir seems to contain ATLAS libraries with
|
||||
# right architecture (compatible with default "nm")
|
||||
if [ -f $dir/libatlas.so ]; then # candidate...
|
||||
if nm $dir/libatlas.so 2>&1 | grep "File format not recognized" >/dev/null; then
|
||||
if nm --dynamic $dir/libatlas.so 2>&1 | grep "File format not recognized" >/dev/null; then
|
||||
echo "Directory $dir may contain dynamic ATLAS libraries but seems to be wrong architecture";
|
||||
return 1;
|
||||
fi
|
||||
echo "Atlas found in $dir";
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
echo "No libatlas.so in $dir";
|
||||
return 1;
|
||||
fi
|
||||
}
|
||||
|
||||
function linux_configure_dynamic {
|
||||
if $threaded_atlas; then pt=pt; else pt=""; fi
|
||||
if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
|
||||
for dir in /usr{,/local}/lib{,64}{,/atlas,/atlas-sse2,/atlas-sse3} \
|
||||
`pwd`/../tools/ATLAS/build/install/lib/ $ATLASROOT/lib; do
|
||||
linux_check_dyamic && ATLASLIBDIR=$dir
|
||||
linux_check_dynamic && ATLASLIBDIR=$dir
|
||||
done
|
||||
if [ -z $ATLASLIBDIR ]; then
|
||||
echo "Could not find libatlas.so in any of the obvious places."
|
||||
|
@ -221,7 +233,7 @@ function linux_configure_dynamic {
|
|||
# for all the names we have encountered.
|
||||
for libname in lapack lapack_atlas clapack; do
|
||||
if [ -f $ATLASLIBDIR/lib${libname}.so -a "$ATLASLIBS" == "" ]; then
|
||||
if nm $ATLASLIBDIR/lib${libname}.so | grep ATL_cgetrf >/dev/null; then
|
||||
if nm --dynamic $ATLASLIBDIR/lib${libname}.so | grep ATL_cgetrf >/dev/null; then
|
||||
ATLASLIBS="-L$ATLASLIBDIR -l${libname}"
|
||||
echo "Using library $ATLASLIBS as ATLAS's CLAPACK library."
|
||||
fi
|
||||
|
@ -232,13 +244,14 @@ function linux_configure_dynamic {
|
|||
return 1;
|
||||
fi
|
||||
|
||||
for x in cblas atlas f77blas; do
|
||||
for x in ${pt}cblas atlas ${pt}f77blas; do
|
||||
if [ ! -f $ATLASLIBDIR/lib$x.so ]; then
|
||||
echo "Configuring dynamic ATLAS libraries failed: Could not find library $x in directory $ATLASLIBDIR"
|
||||
return 1;
|
||||
fi
|
||||
ATLASLIBS="$ATLASLIBS -l$x"
|
||||
done
|
||||
if $threaded_atlas; then ATLASLIBS="$ATLASLIBS"; fi
|
||||
|
||||
echo ATLASINC = $ATLASROOT/include >> kaldi.mk
|
||||
echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
|
||||
|
@ -274,6 +287,11 @@ fi
|
|||
|
||||
cp makefiles/common.mk kaldi.mk
|
||||
|
||||
# Removing any previously defined FSTROOT in Makefile
|
||||
cp Makefile Makefile.bak
|
||||
grep -v ^'FSTROOT =' Makefile.bak > Makefile
|
||||
[ cmp Makefile Makefile.bak >&/dev/null ] || rm Makefile.bak
|
||||
|
||||
# Most of the OS-specific steps below will append to kaldi.mk
|
||||
echo "Doing OS specific configurations ..."
|
||||
|
||||
|
@ -289,6 +307,7 @@ if [ "`uname`" == "Darwin" ]; then
|
|||
failure "Static OpenFST library not found: See ../tools/INSTALL"
|
||||
fi
|
||||
echo FSTROOT = $FSTROOT >> kaldi.mk
|
||||
echo FSTROOT = $FSTROOT >> Makefile
|
||||
# posix_memalign and gcc -rdynamic options not present on OS X 10.5.*
|
||||
osx_ver=`sw_vers | grep ProductVersion | awk '{print $2}' | sed -e 's?\.[^.]*$??'`
|
||||
echo "Configuring for OS X version $osx_ver ..."
|
||||
|
@ -316,6 +335,8 @@ if [ "`uname -o`" == "Cygwin" ]; then
|
|||
if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then
|
||||
failure "please first install package liblapack0"
|
||||
fi
|
||||
echo FSTROOT = $FSTROOT >> kaldi.mk
|
||||
echo FSTROOT = $FSTROOT >> Makefile
|
||||
cat makefiles/cygwin.mk >> kaldi.mk
|
||||
echo "Configuration succeeded for platform cygwin"
|
||||
exit 0
|
||||
|
@ -326,6 +347,7 @@ if [ "`uname`" == "Linux" ]; then
|
|||
failure "Static OpenFST library not found: See ../tools/INSTALL"
|
||||
fi
|
||||
echo FSTROOT = $FSTROOT >> kaldi.mk
|
||||
echo FSTROOT = $FSTROOT >> Makefile
|
||||
|
||||
echo "On Linux: Checking for linear algebra header files ..."
|
||||
if [ $MATHLIB == "ATLAS" ]; then
|
||||
|
|
Загрузка…
Ссылка в новой задаче