Removing the egs/rm/s4 recipe. It's based on the older generation 's3' recipes, and now there are better examples using free data in Kaldi

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4654 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Vassil Panayotov 2014-11-25 12:26:58 +00:00
Родитель 970a5484d4
Коммит adfa50a60e
30 изменённых файлов: 0 добавлений и 1986 удалений

Просмотреть файл

@ -11,8 +11,6 @@ Each subdirectory of this directory contains the
scripts for a sequence of experiments.
s5 is the currently recommmended setup.
s4: A recipe based on freely available subset of RM data, distributed by CMU
s5: This is the "new-new-style" recipe. It is now finished.
All further work will be on top of this style of recipe. Note:
unlike previous recipes, this now uses the same underlying

Просмотреть файл

@ -1,21 +0,0 @@
This recipe is using a publicly available subset of Resource Management data,
distributed by CMU.
To run the recipe the data should be downloaded first, for which ./getdata.sh
command can be used. Then ./run.sh script can be executed to automatically perform
all steps or the commands can be started manually by copy/pasting them.
The script and data layout are based on egs/rm/s3 recipe, with several exceptions:
- because this recipe uses pre-extracted feature vectors no conversion from .sph
to .wav format and consequent feature extraction is needed. The features are just
converted from CMU Sphinx feature files to Kaldi Tables.
- only one test set is available instead of several (e.g. mar87, oct87 and so on)
as in the original recipe
- no speaker-dependent processing
- only the steps up to tri2a stage are implemented
- on the plus side it requires less disk space (about 220MB)

Просмотреть файл

@ -1 +0,0 @@
--use-energy=false # only non-default option.

Просмотреть файл

@ -1,2 +0,0 @@
# No non-default options for now.

Просмотреть файл

@ -1,22 +0,0 @@
<Topology>
<TopologyEntry>
<ForPhones>
NONSILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
<State> 3 </State>
</TopologyEntry>
<TopologyEntry>
<ForPhones>
SILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
<State> 5 </State>
</TopologyEntry>
</Topology>

Просмотреть файл

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2012 Vassil Panayotov
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
source path.sh
# Download and extract CMU's feature files
mkdir -p $RM1_ROOT
wget -P $RM1_ROOT http://www.speech.cs.cmu.edu/databases/rm1/rm1_cepstra.tar.gz ||
wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/rm1_cepstra.tar.gz
tar -C $RM1_ROOT/ -xf $RM1_ROOT/rm1_cepstra.tar.gz
# Download the G.fst graph produced from 'wp_gram.txt'
wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/RM_G.fst

Просмотреть файл

@ -1,40 +0,0 @@
#!/bin/bash
# This script basically calls the supplied decoding script
# once for each test set (in parallel on the same machine),
# and then averages the resulting WERs.
# The interpretation of the decode-dir-1, etc., as inputs,
# outputs and so on, depends on the decoding script you call.
# It assumes the model directory is one level of from decode-dir-1.
mono_opt=
if [ "$1" == "--mono" ]; then
mono_opt=$1;
shift;
fi
script=$1
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
decode_dir_2=$3
decode_dir_3=$4
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
if [ $# -ne 2 ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1>"
exit 1;
fi
if [ ! -x $script -o ! -d $dir ]; then
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
exit 1;
fi
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
$script $dir data/test data/lang $decode_dir_1/ &
wait
# The publicly available RM subset has just one test set(instead of mar87 etc.),
# so no averaging is needed
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."

Просмотреть файл

@ -1,69 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# usage: make_trans.sh prefix in.flist input.snr out.txt out.scp
# prefix is first letters of the database "key" (rest are numeric)
# in.flist is just a list of filenames, probably of .sph files.
# input.snr is an snr format file from the RM dataset.
# out.txt is the output transcriptions in format "key word1 word\n"
# out.scp is the output scp file, which is as in.scp but has the
# database-key first on each line.
# Reads from first argument e.g. $rootdir/rm1_audio1/rm1/doc/al_sents.snr
# and second argument train_wav.scp
# Writes to standard output trans.txt
if(@ARGV != 5) {
die "usage: make_trans.sh prefix in.flist input.snr out.txt out.scp\n";
}
($prefix, $in_flist, $input_snr, $out_txt, $out_scp) = @ARGV;
open(F, "<$input_snr") || die "Opening SNOR file $input_snr";
while(<F>) {
if(m/^;/) { next; }
m/(.+) \((.+)\)/ || die "bad line $_";
$T{$2} = $1;
}
close(F);
open(G, "<$in_flist") || die "Opening file list $in_flist";
open(O, ">$out_txt") || die "Open output transcription file $out_txt";
open(P, ">$out_scp") || die "Open output scp file $out_scp";
while(<G>) {
$_ =~ m:/(\w+)/(\w+)\.mfc\s+$:i || die "bad scp line $_";
$spkname = $1;
$uttname = $2;
$uttname =~ tr/a-z/A-Z/;
defined $T{$uttname} || die "no trans for sent $uttname";
$spkname =~ s/_//g; # remove underscore from spk name to make key nicer.
$key = $prefix . "_" . $spkname . "_" . $uttname;
$key =~ tr/A-Z/a-z/; # Make it all lower case.
# to make the numerical and string-sorted orders the same.
print O "$key $T{$uttname}\n";
print P "$key $_";
$n++;
}
close(O) || die "Closing output.";
close(P) || die "Closing output.";

Просмотреть файл

@ -1,92 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Copyright 2012 Vassil Panayotov
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from one directory above this script.
# Note: when creating your own data preparation scripts, it's a good idea
# to make sure that the speaker id (if present) is a prefix of the utterance
# id, that the output scp file is sorted on utterance id, and that the
# transcription file is exactly the same length as the scp file and is also
# sorted on utterance id (missing transcriptions should be removed from the
# scp file using e.g. scripts/filter_scp.pl)
if [ $# != 1 ]; then
echo "Usage: ../../local/RM_data_prep.sh /path/to/RM"
exit 1;
fi
export LC_ALL=C
RMROOT=$1
mkdir -p data/local
cd data/local
if [ ! -f $RMROOT/RM_G.fst -o ! -d $RMROOT/rm1 ]; then
echo "Required data is missing. You can download the data by running ./getdata.sh"
exit 1;
fi
# Make a list of files
cat $RMROOT/rm1/etc/rm1_train.fileids | \
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > train.flist
cat $RMROOT/rm1/etc/rm1_test.fileids | \
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > test.flist
# make_trans.pl also creates the utterance id's and the kaldi-format scp file.
# this is needed, because the original "al_sents.snr" file is not available
# (and because CMU's train utterances have tags like '<sil>' added)
cat $RMROOT/rm1/etc/rm1_train.transcription |\
tr '[a-z]' '[A-Z]' |\
sed -E -e 's:</?S(IL)?>: :g' -e 's:\([0-9]\): :g' -e 's: +: :g' -e 's:^ +::' |\
cat $RMROOT/rm1/etc/rm1_test.transcription - \
> al_sents.snr
# training set
../../local/make_trans.pl trn train.flist al_sents.snr train_trans.txt train.scp
mv train_trans.txt tmp; sort -k 1 tmp > train_trans.txt
mv train.scp tmp; sort -k 1 tmp > train.scp
rm tmp
# test set
../../local/make_trans.pl test test.flist al_sents.snr test_trans.txt test.scp
mv test_trans.txt tmp; sort -k 1 tmp > test_trans.txt
mv test.scp tmp; sort -k 1 tmp > test.scp
rm tmp
# We already have the features, so sph2pipe step is skipped and
# given the limited data the speaker-dependent processing is also not used
# "wp_gram.txt" is no longer available from LDC's website, so we are just using a
# pre-built grammar WFST (G.fst). The word-pair grammar is a finite-state description
# of the allowed utterances, which just enumerates the words that can follow each word
# in the vocabulary. G.fst is constructed by adding output arcs to each node
# representing a word, one for each word that is allowed to follow, and the
# probability mass is distributed uniformly among all these arcs.
#../../scripts/make_rm_lm.pl $RMROOT/LDC93S3B/disc_1/doc/wp_gram.txt > G.txt || exit 1;
cp $RMROOT/RM_G.fst ./G.fst
# Convert the CMU's lexicon to a form which the other scripts expect
# (leave only the first pronunciation variant and convert the phones to lower case)
cat $RMROOT/rm1/etc/rm1.dic | \
egrep -v '\(' | \
sed -e "s/^\([[:alnum:]-]\+\('[[:alpha:]]\+\)\?\)\(.*\)/\1\L\3/g" > lexicon.txt
echo RM_data_prep succeeded.

Просмотреть файл

@ -1,128 +0,0 @@
#!/bin/bash
#
# Copyright 2012 Vassil Panayotov
# modified from:
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from one directory above this script.
if [ -f path.sh ]; then . path.sh; fi
data_list="train test"
for x in lang lang_test $data_list; do
mkdir -p data/$x
done
# Copy stuff into its final location:
for x in $data_list; do
cp data/local/${x}.scp data/$x/mfc.scp || exit 1;
cp data/local/${x}_trans.txt data/$x/text || exit 1;
done
# We are not using make_words_symtab.pl for symbol table creation in this
# recipe, because CMU's lexicon have several words that are not in the
# word-pair grammar
cat data/local/lexicon.txt | \
awk 'BEGIN{print "<eps>\t0";} {print $1 "\t" NR;} END{print "!SIL\t" NR+1;}' \
> data/lang/words.txt
scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
cp data/lang/words.txt data/lang_test/words.txt
silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn"
# Generate colon-separated lists of silence and non-silence phones.
scripts/silphones.pl data/lang/phones.txt "$silphones" data/lang/silphones.csl \
data/lang/nonsilphones.csl
ndisambig=`scripts/add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
scripts/add_disambig.pl data/lang/phones.txt $ndisambig > data/lang_test/phones_disambig.txt
cp data/lang_test/phones_disambig.txt data/lang/ # needed for MMI.
silprob=0.5 # same prob as word
scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \
fstcompile --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt \
--keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > data/lang/L.fst
# Create L_align.fst, which is as L.fst but with alignment symbols (#1 and #2 at the
# beginning and end of words, on the input side)... useful if we
# ever need to e.g. create ctm's-- these are used to work out the
# word boundaries.
cat data/local/lexicon.txt | \
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
scripts/make_lexicon_fst.pl - 0.5 sil | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
> data/lang_test/L_disambig.fst
cp data/lang_test/L_disambig.fst data/lang/ # Needed for MMI training.
# Compilation is no longer needed, because we are using a pre-built G.fst
#fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
# --keep_osymbols=false data/local/G.txt > data/lang_test/G.fst
cp data/local/G.fst data/lang_test/
# Checking that G is stochastic [note, it wouldn't be for an Arpa]
fstisstochastic data/lang_test/G.fst || echo Error: G is not stochastic
# Checking that G.fst is determinizable.
fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
# Checking that L_disambig.fst is determinizable.
fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
# Checking that disambiguated lexicon times G is determinizable
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
fstdeterminize >/dev/null || echo Error
# Checking that LG is stochastic:
fsttablecompose data/lang/L.fst data/lang_test/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
# Checking that L_disambig.G is stochastic:
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
## Check lexicon.
## just have a look and make sure it seems sane.
echo "First few lines of lexicon FST:"
fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
cat conf/topo.proto | sed "s:NONSILENCEPHONES:$nonsilphonelist:" | \
sed "s:SILENCEPHONES:$silphonelist:" > data/lang/topo
for x in phones.txt words.txt silphones.csl nonsilphones.csl topo; do
cp data/lang/$x data/lang_test/$x || exit 1;
done
echo RM_format_data succeeded.

Просмотреть файл

@ -1,13 +0,0 @@
#!/bin/bash
# path to Kaldi's root directory
root=`pwd`/../../..
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:$PATH
# path to the directory in which the subset of RM corpus is stored
export RM1_ROOT=`pwd`/data/download
export LC_ALL=C
export LC_LOCALE_ALL=C

Просмотреть файл

@ -1,57 +0,0 @@
#!/bin/bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
source ./path.sh
# call the next line with the directory where the RM data is
local/rm_data_prep.sh $RM1_ROOT || exit 1;
local/rm_format_data.sh || exit 1;
# the directory, where you want to store MFCC features.
featdir=data/rm_feats
# convert the Sphinx feature files to Kaldi tables
for x in train test; do
steps/make_mfcc.sh data/$x exp/make_mfcc/$x $featdir || exit 1;
done
scripts/subset_data_dir.sh data/train 1000 data/train.1k || exit 1;
# train monophone system.
steps/train_mono.sh data/train.1k data/lang exp/mono || exit 1;
# monophone decoding
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode || exit 1;
# Get alignments from monophone system.
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
# train tri1 [first triphone pass]
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1 || exit 1;
# decode tri1
local/decode.sh steps/decode_deltas.sh exp/tri1/decode || exit 1;
# align tri1
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
# train tri2a [delta+delta-deltas]
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
# decode tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode || exit 1;

Просмотреть файл

@ -1,58 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds some specified number of disambig symbols to a symbol table.
# Adds these as #1, #2, etc.
# If the --include-zero option is specified, includes an extra one
# #0.
if(!(@ARGV == 2 || (@ARGV ==3 && $ARGV[0] eq "--include-zero"))) {
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
}
if(@ARGV == 3) {
$include_zero = 1;
$ARGV[0] eq "--include-zero" || die "Bad option/first argument $ARGV[0]";
shift @ARGV;
} else {
$include_zero = 0;
}
$input = $ARGV[0];
$nsyms = $ARGV[1];
open(F, "<$input") || die "Opening file $input";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "Bad line $_";
$lastsym = $A[1];
print;
}
if(!defined($lastsym)){
die "Empty symbol file?";
}
if($include_zero) {
$lastsym++;
print "#0 $lastsym\n";
}
for($n = 1; $n <= $nsyms; $n++) {
$y = $n + $lastsym;
print "#$n $y\n";
}

Просмотреть файл

@ -1,101 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds disambiguation symbols to a lexicon.
# Outputs still in the normal lexicon format.
# Disambig syms are numbered #1, #2, #3, etc. (#0
# reserved for symbol in grammar).
# Outputs the number of disambig syms to the standard output.
if(@ARGV != 2) {
die "Usage: add_lex_disambig.pl [ --sil silphone ] lexicon.txt lexicon_disambig.txt "
}
$lexfn = shift @ARGV;
$lexoutfn = shift @ARGV;
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
# (1) Read in the lexicon.
@L = ( );
while(<L>) {
@A = split(" ", $_);
push @L, join(" ", @A);
}
# (2) Work out the count of each phone-sequence in the
# lexicon.
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
$count{join(" ",@A)}++;
}
# (3) For each left sub-sequence of each phone-sequence, note down
# that exists (for identifying prefixes of longer strings).
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
while(@A > 0) {
pop @A; # Remove last phone
$issubseq{join(" ",@A)} = 1;
}
}
# (4) For each entry in the lexicon:
# if the phone sequence is unique and is not a
# prefix of another word, no diambig symbol.
# Else output #1, or #2, #3, ... if the same phone-seq
# has already been assigned a disambig symbol.
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
$max_disambig = 0;
foreach $l (@L) {
@A = split(" ", $l);
$word = shift @A;
$phnseq = join(" ",@A);
if(!defined $issubseq{$phnseq}
&& $count{$phnseq}==1) {
; # Do nothing.
} else {
if($phnseq eq "") { # need disambig symbols for the empty string
# that are not used anywhere else.
$max_disambig++;
$reserved{$max_disambig} = 1;
$phnseq = "#$max_disambig";
} else {
$curnumber = $disambig_of{$phnseq};
if(!defined{$curnumber}) { $curnumber = 0; }
$curnumber++; # now 1 or 2, ...
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
if($curnumber > $max_disambig) {
$max_disambig = $curnumber;
}
$disambig_of{$phnseq} = $curnumber;
$phnseq = $phnseq . " #" . $curnumber;
}
}
print O "$word\t$phnseq\n";
}
print $max_disambig . "\n";

Просмотреть файл

@ -1,40 +0,0 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script takes a list of utterance-ids and filters an scp
# file (or any file whose first field is an utterance id), printing
# out only those lines whose first field is in id_list.
if(@ARGV < 1 || @ARGV > 2) {
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
}
$idlist = shift @ARGV;
open(F, "<$idlist") || die "Could not open id-list file $idlist";
while(<F>) {
@A = split;
@A>=1 || die "Invalid id-list file line $_";
$seen{$A[0]} = 1;
}
while(<>) {
@A = split;
@A > 0 || die "Invalid scp file line $_";
if($seen{$A[0]}) {
print $_;
}
}

Просмотреть файл

@ -1,90 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_noninteger = 0;
$ignore_first_field = 0;
$field = -1;
for($x = 0; $x < 2; $x++) {
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--field") {
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
if ($field < 1) { die "Bad argument to --field option: $field"; }
}
}
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
$zfield = $field-1; # Change to zero-based indexing.
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input] > output\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$int2sym{$A[1]} = $A[0];
}
sub int2sym {
my $a = shift @_;
my $pos = shift @_;
if($a !~ m:^\d+$:) { # not all digits..
if($ignore_noninteger) {
print $a . " ";
next;
} else {
if($pos == 0) {
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
} else {
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
}
}
}
$s = $int2sym{$a};
if(!defined ($s)) {
die "int2sym.pl: integer $a not in symbol table $symtab.";
}
return $s;
}
$error = 0;
while(<>) {
@A = split(" ", $_);
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
if ($field != -1) {
if ($zfield <= $#A && $zfield >= 0) {
$a = $A[$zfield];
$A[$zfield] = int2sym($a, $zfield);
}
print join(" ", @A);
} else {
for ($pos = 0; $pos <= $#A; $pos++) {
$a = $A[$pos];
$s = int2sym($a, $pos);
print $s . " ";
}
}
print "\n";
}

Просмотреть файл

@ -1,122 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# makes lexicon FST (no pron-probs involved).
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
}
$lexfn = shift @ARGV;
if(@ARGV == 0) {
$silprob = 0.0;
} elsif (@ARGV == 2){
($silprob,$silphone) = @ARGV;
} else {
($silprob,$silphone,$sildisambig) = @ARGV;
}
if($silprob != 0.0) {
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
$silcost = -log($silprob);
$nosilcost = -log(1.0 - $silprob);
}
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
sub is_sil {
# Return true (1) if provided with a phone-sequence
# that means silence.
# @_ is the parameters of the function
# This function returns true if @_ equals ( $silphone )
# or something of the form ( "#0", $silphone, "#1" )
# where the "#0" and "#1" are disambiguation symbols.
return ( @_ == 1 && $_[0] eq $silphone ||
(@_ == 3 && $_[1] eq $silphone &&
$_[0] =~ m/^\#\d+$/ &&
$_[0] =~ m/^\#\d+$/));
}
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
$loopstate = 0;
$nexststate = 1; # next unallocated state.
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
} else {
$ns = $loopstate;
}
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
}
}
print "$loopstate\t0\n"; # final-cost.
} else { # have silence probs.
$startstate = 0;
$loopstate = 1;
$silstate = 2; # state from where we go to loopstate after emitting silence.
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
if (!defined $sildisambig) {
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
$nextstate = 3;
} else {
$disambigstate = 3;
$nextstate = 4;
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
}
while(<L>) {
@A = split(" ", $_);
$w = shift @A;
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if(@A > 0) {
$ns = $nextstate++;
print "$s\t$ns\t$p\t$word_or_eps\n";
$word_or_eps = "<eps>";
$s = $ns;
} else {
if(!is_sil(@A)){
# This is non-deterministic but relatively compact,
# and avoids epsilons.
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
} else {
# no point putting opt-sil after silence word.
print "$s\t$loopstate\t$p\t$word_or_eps\n";
}
$word_or_eps = "<eps>";
}
}
}
print "$loopstate\t0\n"; # final-cost.
}

Просмотреть файл

@ -1,37 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# make_phones_symtab.pl < lexicon.txt > phones.txt
while(<>) {
@A = split(" ", $_);
for ($i=2; $i<@A; $i++) {
$P{$A[$i]} = 1; # seen it.
}
}
print "<eps>\t0\n";
$n = 1;
foreach $p (sort keys %P) {
if($p ne "<eps>") {
print "$p\t$n\n";
$n++;
}
}
print "sil\t$n\n";

Просмотреть файл

@ -1,119 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Yanmin Qian Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This file takes as input the file wp_gram.txt that comes with the RM
# distribution, and creates the language model as an acceptor in FST form.
# make_rm_lm.pl wp_gram.txt > G.txt
if (@ARGV != 1) {
print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n";
exit(0);
}
unless (open(IN_FILE, "@ARGV[0]")) {
die ("can't open @ARGV[0]");
}
$flag = 0;
$count_wrd = 0;
$cnt_ends = 0;
$init = "";
while ($line = <IN_FILE>)
{
chop($line);
$line =~ s/ //g;
if(($line =~ /^>/))
{
if($flag == 0)
{
$flag = 1;
}
$line =~ s/>//g;
$hashcnt{$init} = $i;
$init = $line;
$i = 0;
$count_wrd++;
@LineArray[$count_wrd - 1] = $init;
$hashwrd{$init} = 0;
}
elsif($flag != 0)
{
$hash{$init}[$i] = $line;
$i++;
if($line =~ /SENTENCE-END/)
{
$cnt_ends++;
}
}
else
{}
}
$hashcnt{$init} = $i;
$num = 0;
$weight = 0;
$init_wrd = "SENTENCE-END";
$hashwrd{$init_wrd} = @LineArray;
for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
{
$weight = -log(1/$hashcnt{$init_wrd});
$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n";
}
$num = $i;
for($i = 0; $i < @LineArray; $i++)
{
if(@LineArray[$i] eq 'SENTENCE-END')
{}
else
{
if($hashwrd{@LineArray[$i]} == 0)
{
$num++;
$hashwrd{@LineArray[$i]} = $num;
}
for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
{
$weight = -log(1/$hashcnt{@LineArray[$i]});
if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
{
$num++;
$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
}
if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
{
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} <eps> <eps> $weight\n"
}
else
{
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n";
}
}
}
}
print "$hashwrd{$init_wrd} 0\n";
close(IN_FILE);

Просмотреть файл

@ -1,102 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Written by Dan Povey 9/21/2010. Apache 2.0 License.
# This version of make_roots.pl is specialized for RM.
# This script creates the file roots.txt which is an input to train-tree.cc. It
# specifies how the trees are built. The input file phone-sets.txt is a partial
# version of roots.txt in which phones are represented by their spelled form, not
# their symbol id's. E.g. at input, phone-sets.txt might contain;
# shared not-split sil
# Any phones not specified in phone-sets.txt but present in phones.txt will
# be given a default treatment. If the --separate option is given, we create
# a separate tree root for each of them, otherwise they are all lumped in one set.
# The arguments shared|not-shared and split|not-split are needed if any
# phones are not specified in phone-sets.txt. What they mean is as follows:
# if shared=="shared" then we share the tree-root between different HMM-positions
# (0,1,2). If split=="split" then we actually do decision tree splitting on
# that root, otherwise we forbid decision-tree splitting. (The main reason we might
# set this to false is for silence when
# we want to ensure that the HMM-positions will remain with a single PDF id.
$separate = 0;
if($ARGV[0] eq "--separate") {
$separate = 1;
shift @ARGV;
}
if(@ARGV != 4) {
die "Usage: make_roots.pl [--separate] phones.txt silence-phone-list[integer,colon-separated] shared|not-shared split|not-split > roots.txt\n";
}
($phonesfile, $silphones, $shared, $split) = @ARGV;
if($shared ne "shared" && $shared ne "not-shared") {
die "Third argument must be \"shared\" or \"not-shared\"\n";
}
if($split ne "split" && $split ne "not-split") {
die "Third argument must be \"split\" or \"not-split\"\n";
}
open(F, "<$phonesfile") || die "Opening file $phonesfile";
while(<F>) {
@A = split(" ", $_);
if(@A != 2) {
die "Bad line in phones symbol file: ".$_;
}
if($A[1] != 0) {
$symbol2id{$A[0]} = $A[1];
$id2symbol{$A[1]} = $A[0];
}
}
if($silphones == ""){
die "Empty silence phone list in make_roots.pl";
}
foreach $silphoneid (split(":", $silphones)) {
defined $id2symbol{$silphoneid} || die "No such silence phone id $silphoneid";
# Give each silence phone its own separate pdfs in each state, but
# no sharing (in this recipe; WSJ is different.. in this recipe there
#is only one silence phone anyway.)
$issil{$silphoneid} = 1;
print "not-shared not-split $silphoneid\n";
}
$idlist = "";
$remaining_phones = "";
if($separate){
foreach $a (keys %id2symbol) {
if(!defined $issil{$a}) {
print "$shared $split $a\n";
}
}
} else {
print "$shared $split ";
foreach $a (keys %id2symbol) {
if(!defined $issil{$a}) {
print "$a ";
}
}
print "\n";
}

Просмотреть файл

@ -1,112 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
N=3
P=1
clean=false
for x in 1 2 3; do
if [ $1 == "--mono" ]; then
N=1;
P=0;
shift;
fi
if [ $1 == "--clean" ]; then
clean=true
shift;
fi
done
if [ $# != 3 ]; then
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
lang=$1
tree=$2/tree
model=$2/final.mdl
dir=$3
if $clean; then rm -r $lang/tmp; fi
mkdir -p $dir
tscale=1.0
loopscale=0.1
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
# would have to use -o instead), -f means file exists, and -ot means older than).
mkdir -p $lang/tmp
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
fi
if [ ! -f $lang/phones_disambig.txt ]; then
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
exit 1;
fi
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
clg=$lang/tmp/CLG_${N}_${P}.fst
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
fstcomposecontext --context-size=$N --central-position=$P \
--read-disambig-syms=$lang/tmp/disambig_phones.list \
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
fstisstochastic $clg || echo "warning: CLG not stochastic."
fi
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
> $dir/Ha.fst || exit 1;
fi
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
$dir/HCLGa.fst -ot $clg ]]; then
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
fi
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
add-self-loops --self-loop-scale=$loopscale --reorder=true \
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
fi
fi
# to make const fst:
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst

Просмотреть файл

@ -1,57 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# creates integer lists of silence and non-silence phones in files,
# e.g. silphones.csl="1:2:3 \n"
# and nonsilphones.csl="4:5:6:7:...:24\n";
if(@ARGV != 4) {
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
}
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
open(S,"<$symtab") || die "Opening symbol table $symtab";
foreach $s (split(" ", $sillist)) {
$issil{$s} = 1;
}
@sil = ();
@nonsil = ();
while(<S>){
@A = split(" ", $_);
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
($sym, $int) = @A;
if($int != 0) {
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
else { push @nonsil, $int; }
}
}
foreach $k(keys %issil) {
if(!$seensil{$k}) { die "No such silence phone $k"; }
}
open(F, ">$silphones") || die "opening silphones file $silphones";
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
print F join(":", @sil) . "\n";
print G join(":", @nonsil) . "\n";
close(F);
close(G);
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }

Просмотреть файл

@ -1,99 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script operates on a directory, such as in data/train/,
# that contains some subset of the following files:
# feats.scp
# wav.scp
# spk2utt
# utt2spk
# text
# It creates a subset of that data, consisting of some specified
# number of utterances. (The selected utterances are distributed
# evenly throughout the file, by the program ./subset_scp.pl).
# If you give the --per-spk option, it will attempt to select
# the supplied number of utterances for each speaker (typically
# you would supply a much smaller number in this case).
perspk=false
if [ "$1" == "--per-spk" ]; then
perspk=true;
shift;
fi
if [ $# != 3 ]; then
echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
exit 1;
fi
srcdir=$1
numutt=$2
destdir=$3
if [ ! -f $srcdir/feats.scp ]; then
echo "subset_data_dir.sh: no such file $srcdir/feats.scp"
exit 1;
fi
## scripting note: $perspk evaluates to true or false
## so this becomes the command true or false.
if $perspk; then
mkdir -p $destdir
awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); }
printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
scripts/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
scripts/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
[ -f $srcdir/wav.scp ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/wav.scp >$destdir/wav.scp
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
srcutts=`cat $srcdir/utt2spk | wc -l`
destutts=`cat $destdir/utt2spk | wc -l`
echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
exit 0;
else
if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
exit 1;
fi
mkdir -p $destdir || exit 1;
# create feats.scp
scripts/subset_scp.pl $numutt $srcdir/feats.scp > $destdir/feats.scp || exit 1;
if [ -f $srcdir/wav.scp ]; then
scripts/filter_scp.pl $destdir/feats.scp $srcdir/mfc.scp > $destdir/mfc.scp || exit 1;
else
rm $destdir/mfc.scp 2>/dev/null
fi
if [ -f $srcdir/utt2spk ]; then
scripts/filter_scp.pl $destdir/feats.scp $srcdir/utt2spk > $destdir/utt2spk|| exit 1;
scripts/utt2spk_to_spk2utt.pl $destdir/utt2spk > $destdir/spk2utt || exit 1;
fi
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
echo "Created a $numutt-utterance subset of $srcdir and put it in $destdir."
exit 0;
fi

Просмотреть файл

@ -1,59 +0,0 @@
#!/usr/bin/perl -w
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This program selects a subset of N elements in the scp.
# It selects them evenly from throughout the scp, in order to
# avoid selecting too many from the same speaker.
# It prints them on the standard output.
if(@ARGV < 2 ) {
die "Usage: subset_scp.pl N in.scp ";
}
$N = shift @ARGV;
if($N == 0) {
die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
}
$inscp = shift @ARGV;
open(I, "<$inscp") || die "Opening input scp file $inscp";
@F = ();
while(<I>) {
push @F, $_;
}
$numlines = @F;
if($N > $numlines) {
die "You requested from subset_scp.pl more elements than available: $N > $numlines";
}
sub select_n {
my ($start,$end,$num_needed) = @_;
my $diff = $end - $start;
if($num_needed > $diff) { die "select_n: code error"; }
if($diff == 1 ) {
if($num_needed > 0) {
print $F[$start];
}
} else {
my $halfdiff = int($diff/2);
my $halfneeded = int($num_needed/2);
select_n($start, $start+$halfdiff, $halfneeded);
select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
}
}
select_n(0, $numlines, $N);

Просмотреть файл

@ -1,82 +0,0 @@
#!/usr/bin/perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
$ignore_oov = 0;
$ignore_first_field = 0;
for($x = 0; $x < 3; $x++) {
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
# Else will complain and put nothing out.
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
}
$symtab = shift @ARGV;
if(!defined $symtab) {
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$sym2int{$A[0]} = $A[1] + 0;
}
$num_warning = 0;
$max_warning = 20;
$error = 0;
while(<>) {
@A = split(" ", $_);
if(@A == 0) {
die "Empty line in transcriptions input.";
}
if($ignore_first_field) {
$key = shift @A;
print $key . " ";
}
@B = ();
foreach $a (@A) {
$i = $sym2int{$a};
if(!defined ($i)) {
if (defined $map_oov) {
if (!defined $sym2int{$map_oov}) {
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
}
if ($num_warning++ < $max_warning) {
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
if ($num_warning == $max_warning) {
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
}
}
$i = $sym2int{$map_oov};
} elsif($ignore_oov) {
$i = $a; # just print them out unmodified..
} else {
die "sym2int.pl: undefined symbol $a\n";
}
}
push @B, $i;
}
print join(" ", @B);
print "\n";
}
if($error) { exit(1); }
else { exit(0); }

Просмотреть файл

@ -1,78 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using
# CMN + delta + delta-delta features. Its output, all in its own
# experimental directory, is cmvn.ark, ali, tree, and final.mdl
# (the last two are just copied from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
graphs=
if [ "$1" == --graphs ]; then
shift;
graphs=$1
shift
fi
if [ $# != 4 ]; then
echo "Usage: steps/align_deltas.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/align_deltas.sh data/train data/lang exp/tri1 exp/tri1_ali"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
srcdir=$3
dir=$4
mkdir -p $dir
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats scp:$data/feats.scp \
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# Align all training data using the supplied model.
echo "Aligning all training data"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
"$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
fi
echo "Done."

Просмотреть файл

@ -1,77 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
# This script generates lattices and rescores them with different
# acoustic weights, in order to explore a range of different
# weights.
if [ $# != 4 ]; then
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 20 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done

Просмотреть файл

@ -1,48 +0,0 @@
#!/bin/bash
# Copyright 2012 Vassil Panayotov
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from .. (one directory up from here)
if [ $# != 3 ]; then
echo "usage: make_mfcc.sh <data-dir> <log-dir> <abs-path-to-mfccdir>";
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
logdir=$2
mfccdir=$3
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfccdir || exit 1;
mkdir -p $logdir || exit 1;
scp=$data/mfc.scp
if [ ! -f $scp ]; then
echo "make_mfcc.sh: no such file $f";
exit 1;
fi
log=$logdir/make_mfcc.log
copy-feats --sphinx-in=true \
scp:$scp ark,scp:$mfccdir/raw_mfcc_$name.ark,$data/feats.scp 2>$log
echo "Succeeded creating MFCC features for $name"

Просмотреть файл

@ -1,126 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 4 ]; then
echo "Usage: steps/train_deltas.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
maxiterinc=15 # Last iter to increase #Gauss on.
numleaves=1800 # target num-leaves in tree building.
numgauss=$[$numleaves + $numleaves/2]; # starting num-Gauss.
# Initially mix up to avg. 1.5 Gauss/state ( a bit more
# than this, due to state clustering... then slowly mix
# up to final amount.
totgauss=9000 # Target #Gaussians
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
echo "Building tree"
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
gmm-init-model --write-occs=$dir/1.occs \
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/mixup.log || exit 1;
#rm $dir/treeacc
# Convert alignments generated from monophone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc
rm $dir/$x.occs
if [[ $x -le $maxiterinc ]]; then
numgauss=$[$numgauss+$incgauss];
fi
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done

Просмотреть файл

@ -1,105 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Flat start and monophone training, with delta-delta features.
# This script applies cepstral mean normalization (per speaker),
# unlike the corresponding script in s1/
if [ $# != 3 ]; then
echo "Usage: steps/train_mono.sh <data-dir> <lang-dir> <exp-dir>"
echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
exit 1;
fi
data=$1
lang=$2
dir=$3
if [ -f path.sh ]; then . path.sh; fi
# Configuration:
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=30 # Number of iterations of training
maxiterinc=20 # Last iter to increase #Gauss on.
numgauss=250 # Initial num-Gauss (must be more than #states=3*phones).
totgauss=1000 # Target #Gaussians.
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
realign_iters="1 2 3 4 5 6 7 8 9 10 12 15 20 25";
mkdir -p $dir
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats scp:$data/feats.scp ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Initializing monophone system."
gmm-init-mono "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
$dir/0.mdl $dir/tree 2> $dir/init.log || exit 1;
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
ark:$dir/train.tra "ark:|gzip -c >$dir/graphs.fsts.gz" \
2>$dir/compile_graphs.log || exit 1
echo Pass 0
align-equal-compiled "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark,t,f:- 2>$dir/align.0.log | \
gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
$dir/0.acc 2> $dir/acc.0.log || exit 1;
# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
# we fail to est "rare" phones and later on, they never align properly.
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
$dir/0.mdl $dir/0.acc $dir/1.mdl 2> $dir/update.0.log || exit 1;
rm $dir/0.acc
beam=4 # will change to 8 below after 1st pass
x=1
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" t,ark:$dir/cur.ali \
2> $dir/align.$x.log || exit 1;
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss];
fi
beam=8
x=$[$x+1]
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
# example of showing the alignments:
# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4