зеркало из https://github.com/mozilla/kaldi.git
Removing the egs/rm/s4 recipe. It's based on the older generation 's3' recipes, and now there are better examples using free data in Kaldi
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4654 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
970a5484d4
Коммит
adfa50a60e
|
@ -11,8 +11,6 @@ Each subdirectory of this directory contains the
|
|||
scripts for a sequence of experiments.
|
||||
s5 is the currently recommmended setup.
|
||||
|
||||
s4: A recipe based on freely available subset of RM data, distributed by CMU
|
||||
|
||||
s5: This is the "new-new-style" recipe. It is now finished.
|
||||
All further work will be on top of this style of recipe. Note:
|
||||
unlike previous recipes, this now uses the same underlying
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
This recipe is using a publicly available subset of Resource Management data,
|
||||
distributed by CMU.
|
||||
|
||||
To run the recipe the data should be downloaded first, for which ./getdata.sh
|
||||
command can be used. Then ./run.sh script can be executed to automatically perform
|
||||
all steps or the commands can be started manually by copy/pasting them.
|
||||
|
||||
The script and data layout are based on egs/rm/s3 recipe, with several exceptions:
|
||||
|
||||
- because this recipe uses pre-extracted feature vectors no conversion from .sph
|
||||
to .wav format and consequent feature extraction is needed. The features are just
|
||||
converted from CMU Sphinx feature files to Kaldi Tables.
|
||||
|
||||
- only one test set is available instead of several (e.g. mar87, oct87 and so on)
|
||||
as in the original recipe
|
||||
|
||||
- no speaker-dependent processing
|
||||
|
||||
- only the steps up to tri2a stage are implemented
|
||||
|
||||
- on the plus side it requires less disk space (about 220MB)
|
|
@ -1 +0,0 @@
|
|||
--use-energy=false # only non-default option.
|
|
@ -1,2 +0,0 @@
|
|||
# No non-default options for now.
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
<Topology>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
NONSILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
|
||||
<State> 3 </State>
|
||||
</TopologyEntry>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
SILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
|
||||
<State> 5 </State>
|
||||
</TopologyEntry>
|
||||
</Topology>
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
source path.sh
|
||||
|
||||
# Download and extract CMU's feature files
|
||||
mkdir -p $RM1_ROOT
|
||||
wget -P $RM1_ROOT http://www.speech.cs.cmu.edu/databases/rm1/rm1_cepstra.tar.gz ||
|
||||
wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/rm1_cepstra.tar.gz
|
||||
tar -C $RM1_ROOT/ -xf $RM1_ROOT/rm1_cepstra.tar.gz
|
||||
|
||||
# Download the G.fst graph produced from 'wp_gram.txt'
|
||||
wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/RM_G.fst
|
|
@ -1,40 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This script basically calls the supplied decoding script
|
||||
# once for each test set (in parallel on the same machine),
|
||||
# and then averages the resulting WERs.
|
||||
# The interpretation of the decode-dir-1, etc., as inputs,
|
||||
# outputs and so on, depends on the decoding script you call.
|
||||
|
||||
# It assumes the model directory is one level of from decode-dir-1.
|
||||
|
||||
mono_opt=
|
||||
|
||||
if [ "$1" == "--mono" ]; then
|
||||
mono_opt=$1;
|
||||
shift;
|
||||
fi
|
||||
|
||||
script=$1
|
||||
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
|
||||
decode_dir_2=$3
|
||||
decode_dir_3=$4
|
||||
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
|
||||
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1>"
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -x $script -o ! -d $dir ]; then
|
||||
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
|
||||
|
||||
$script $dir data/test data/lang $decode_dir_1/ &
|
||||
wait
|
||||
|
||||
# The publicly available RM subset has just one test set(instead of mar87 etc.),
|
||||
# so no averaging is needed
|
||||
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."
|
|
@ -1,69 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# usage: make_trans.sh prefix in.flist input.snr out.txt out.scp
|
||||
|
||||
# prefix is first letters of the database "key" (rest are numeric)
|
||||
|
||||
# in.flist is just a list of filenames, probably of .sph files.
|
||||
# input.snr is an snr format file from the RM dataset.
|
||||
# out.txt is the output transcriptions in format "key word1 word\n"
|
||||
# out.scp is the output scp file, which is as in.scp but has the
|
||||
# database-key first on each line.
|
||||
|
||||
# Reads from first argument e.g. $rootdir/rm1_audio1/rm1/doc/al_sents.snr
|
||||
# and second argument train_wav.scp
|
||||
# Writes to standard output trans.txt
|
||||
|
||||
if(@ARGV != 5) {
|
||||
die "usage: make_trans.sh prefix in.flist input.snr out.txt out.scp\n";
|
||||
}
|
||||
($prefix, $in_flist, $input_snr, $out_txt, $out_scp) = @ARGV;
|
||||
|
||||
open(F, "<$input_snr") || die "Opening SNOR file $input_snr";
|
||||
|
||||
while(<F>) {
|
||||
if(m/^;/) { next; }
|
||||
m/(.+) \((.+)\)/ || die "bad line $_";
|
||||
$T{$2} = $1;
|
||||
}
|
||||
|
||||
close(F);
|
||||
open(G, "<$in_flist") || die "Opening file list $in_flist";
|
||||
|
||||
open(O, ">$out_txt") || die "Open output transcription file $out_txt";
|
||||
|
||||
open(P, ">$out_scp") || die "Open output scp file $out_scp";
|
||||
|
||||
while(<G>) {
|
||||
$_ =~ m:/(\w+)/(\w+)\.mfc\s+$:i || die "bad scp line $_";
|
||||
$spkname = $1;
|
||||
$uttname = $2;
|
||||
$uttname =~ tr/a-z/A-Z/;
|
||||
defined $T{$uttname} || die "no trans for sent $uttname";
|
||||
$spkname =~ s/_//g; # remove underscore from spk name to make key nicer.
|
||||
$key = $prefix . "_" . $spkname . "_" . $uttname;
|
||||
$key =~ tr/A-Z/a-z/; # Make it all lower case.
|
||||
# to make the numerical and string-sorted orders the same.
|
||||
print O "$key $T{$uttname}\n";
|
||||
print P "$key $_";
|
||||
$n++;
|
||||
}
|
||||
close(O) || die "Closing output.";
|
||||
close(P) || die "Closing output.";
|
||||
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
# Note: when creating your own data preparation scripts, it's a good idea
|
||||
# to make sure that the speaker id (if present) is a prefix of the utterance
|
||||
# id, that the output scp file is sorted on utterance id, and that the
|
||||
# transcription file is exactly the same length as the scp file and is also
|
||||
# sorted on utterance id (missing transcriptions should be removed from the
|
||||
# scp file using e.g. scripts/filter_scp.pl)
|
||||
|
||||
if [ $# != 1 ]; then
|
||||
echo "Usage: ../../local/RM_data_prep.sh /path/to/RM"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
RMROOT=$1
|
||||
|
||||
mkdir -p data/local
|
||||
cd data/local
|
||||
|
||||
if [ ! -f $RMROOT/RM_G.fst -o ! -d $RMROOT/rm1 ]; then
|
||||
echo "Required data is missing. You can download the data by running ./getdata.sh"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# Make a list of files
|
||||
cat $RMROOT/rm1/etc/rm1_train.fileids | \
|
||||
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > train.flist
|
||||
cat $RMROOT/rm1/etc/rm1_test.fileids | \
|
||||
xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > test.flist
|
||||
|
||||
# make_trans.pl also creates the utterance id's and the kaldi-format scp file.
|
||||
|
||||
# this is needed, because the original "al_sents.snr" file is not available
|
||||
# (and because CMU's train utterances have tags like '<sil>' added)
|
||||
cat $RMROOT/rm1/etc/rm1_train.transcription |\
|
||||
tr '[a-z]' '[A-Z]' |\
|
||||
sed -E -e 's:</?S(IL)?>: :g' -e 's:\([0-9]\): :g' -e 's: +: :g' -e 's:^ +::' |\
|
||||
cat $RMROOT/rm1/etc/rm1_test.transcription - \
|
||||
> al_sents.snr
|
||||
|
||||
# training set
|
||||
../../local/make_trans.pl trn train.flist al_sents.snr train_trans.txt train.scp
|
||||
mv train_trans.txt tmp; sort -k 1 tmp > train_trans.txt
|
||||
mv train.scp tmp; sort -k 1 tmp > train.scp
|
||||
rm tmp
|
||||
|
||||
# test set
|
||||
../../local/make_trans.pl test test.flist al_sents.snr test_trans.txt test.scp
|
||||
mv test_trans.txt tmp; sort -k 1 tmp > test_trans.txt
|
||||
mv test.scp tmp; sort -k 1 tmp > test.scp
|
||||
rm tmp
|
||||
|
||||
# We already have the features, so sph2pipe step is skipped and
|
||||
# given the limited data the speaker-dependent processing is also not used
|
||||
|
||||
# "wp_gram.txt" is no longer available from LDC's website, so we are just using a
|
||||
# pre-built grammar WFST (G.fst). The word-pair grammar is a finite-state description
|
||||
# of the allowed utterances, which just enumerates the words that can follow each word
|
||||
# in the vocabulary. G.fst is constructed by adding output arcs to each node
|
||||
# representing a word, one for each word that is allowed to follow, and the
|
||||
# probability mass is distributed uniformly among all these arcs.
|
||||
#../../scripts/make_rm_lm.pl $RMROOT/LDC93S3B/disc_1/doc/wp_gram.txt > G.txt || exit 1;
|
||||
cp $RMROOT/RM_G.fst ./G.fst
|
||||
|
||||
# Convert the CMU's lexicon to a form which the other scripts expect
|
||||
# (leave only the first pronunciation variant and convert the phones to lower case)
|
||||
cat $RMROOT/rm1/etc/rm1.dic | \
|
||||
egrep -v '\(' | \
|
||||
sed -e "s/^\([[:alnum:]-]\+\('[[:alpha:]]\+\)\?\)\(.*\)/\1\L\3/g" > lexicon.txt
|
||||
|
||||
|
||||
echo RM_data_prep succeeded.
|
|
@ -1,128 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
# modified from:
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data_list="train test"
|
||||
|
||||
for x in lang lang_test $data_list; do
|
||||
mkdir -p data/$x
|
||||
done
|
||||
|
||||
# Copy stuff into its final location:
|
||||
|
||||
for x in $data_list; do
|
||||
cp data/local/${x}.scp data/$x/mfc.scp || exit 1;
|
||||
cp data/local/${x}_trans.txt data/$x/text || exit 1;
|
||||
done
|
||||
|
||||
# We are not using make_words_symtab.pl for symbol table creation in this
|
||||
# recipe, because CMU's lexicon have several words that are not in the
|
||||
# word-pair grammar
|
||||
cat data/local/lexicon.txt | \
|
||||
awk 'BEGIN{print "<eps>\t0";} {print $1 "\t" NR;} END{print "!SIL\t" NR+1;}' \
|
||||
> data/lang/words.txt
|
||||
scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
|
||||
cp data/lang/words.txt data/lang_test/words.txt
|
||||
|
||||
silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn"
|
||||
# Generate colon-separated lists of silence and non-silence phones.
|
||||
scripts/silphones.pl data/lang/phones.txt "$silphones" data/lang/silphones.csl \
|
||||
data/lang/nonsilphones.csl
|
||||
|
||||
ndisambig=`scripts/add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
|
||||
ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
|
||||
scripts/add_disambig.pl data/lang/phones.txt $ndisambig > data/lang_test/phones_disambig.txt
|
||||
cp data/lang_test/phones_disambig.txt data/lang/ # needed for MMI.
|
||||
|
||||
silprob=0.5 # same prob as word
|
||||
scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \
|
||||
fstcompile --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | \
|
||||
fstarcsort --sort_type=olabel > data/lang/L.fst
|
||||
|
||||
# Create L_align.fst, which is as L.fst but with alignment symbols (#1 and #2 at the
|
||||
# beginning and end of words, on the input side)... useful if we
|
||||
# ever need to e.g. create ctm's-- these are used to work out the
|
||||
# word boundaries.
|
||||
|
||||
|
||||
cat data/local/lexicon.txt | \
|
||||
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
|
||||
scripts/make_lexicon_fst.pl - 0.5 sil | \
|
||||
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | \
|
||||
fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
|
||||
|
||||
# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
|
||||
|
||||
scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
|
||||
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
|
||||
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
|
||||
> data/lang_test/L_disambig.fst
|
||||
|
||||
cp data/lang_test/L_disambig.fst data/lang/ # Needed for MMI training.
|
||||
|
||||
# Compilation is no longer needed, because we are using a pre-built G.fst
|
||||
#fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
|
||||
# --keep_osymbols=false data/local/G.txt > data/lang_test/G.fst
|
||||
cp data/local/G.fst data/lang_test/
|
||||
|
||||
# Checking that G is stochastic [note, it wouldn't be for an Arpa]
|
||||
fstisstochastic data/lang_test/G.fst || echo Error: G is not stochastic
|
||||
|
||||
# Checking that G.fst is determinizable.
|
||||
fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
|
||||
|
||||
# Checking that L_disambig.fst is determinizable.
|
||||
fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
|
||||
|
||||
# Checking that disambiguated lexicon times G is determinizable
|
||||
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
|
||||
fstdeterminize >/dev/null || echo Error
|
||||
|
||||
# Checking that LG is stochastic:
|
||||
fsttablecompose data/lang/L.fst data/lang_test/G.fst | \
|
||||
fstisstochastic || echo Error: LG is not stochastic.
|
||||
|
||||
# Checking that L_disambig.G is stochastic:
|
||||
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
|
||||
fstisstochastic || echo Error: LG is not stochastic.
|
||||
|
||||
|
||||
## Check lexicon.
|
||||
## just have a look and make sure it seems sane.
|
||||
echo "First few lines of lexicon FST:"
|
||||
fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head
|
||||
|
||||
|
||||
silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
|
||||
nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
|
||||
cat conf/topo.proto | sed "s:NONSILENCEPHONES:$nonsilphonelist:" | \
|
||||
sed "s:SILENCEPHONES:$silphonelist:" > data/lang/topo
|
||||
|
||||
for x in phones.txt words.txt silphones.csl nonsilphones.csl topo; do
|
||||
cp data/lang/$x data/lang_test/$x || exit 1;
|
||||
done
|
||||
|
||||
echo RM_format_data succeeded.
|
|
@ -1,13 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# path to Kaldi's root directory
|
||||
root=`pwd`/../../..
|
||||
|
||||
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:$PATH
|
||||
|
||||
# path to the directory in which the subset of RM corpus is stored
|
||||
export RM1_ROOT=`pwd`/data/download
|
||||
|
||||
export LC_ALL=C
|
||||
export LC_LOCALE_ALL=C
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
source ./path.sh
|
||||
|
||||
# call the next line with the directory where the RM data is
|
||||
local/rm_data_prep.sh $RM1_ROOT || exit 1;
|
||||
|
||||
local/rm_format_data.sh || exit 1;
|
||||
|
||||
# the directory, where you want to store MFCC features.
|
||||
featdir=data/rm_feats
|
||||
|
||||
# convert the Sphinx feature files to Kaldi tables
|
||||
for x in train test; do
|
||||
steps/make_mfcc.sh data/$x exp/make_mfcc/$x $featdir || exit 1;
|
||||
done
|
||||
|
||||
scripts/subset_data_dir.sh data/train 1000 data/train.1k || exit 1;
|
||||
|
||||
# train monophone system.
|
||||
steps/train_mono.sh data/train.1k data/lang exp/mono || exit 1;
|
||||
|
||||
# monophone decoding
|
||||
local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode || exit 1;
|
||||
|
||||
# Get alignments from monophone system.
|
||||
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
|
||||
|
||||
# train tri1 [first triphone pass]
|
||||
steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1 || exit 1;
|
||||
|
||||
# decode tri1
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri1/decode || exit 1;
|
||||
|
||||
# align tri1
|
||||
steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
|
||||
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
|
||||
|
||||
# train tri2a [delta+delta-deltas]
|
||||
steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
|
||||
|
||||
# decode tri2a
|
||||
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode || exit 1;
|
|
@ -1,58 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds some specified number of disambig symbols to a symbol table.
|
||||
# Adds these as #1, #2, etc.
|
||||
# If the --include-zero option is specified, includes an extra one
|
||||
# #0.
|
||||
if(!(@ARGV == 2 || (@ARGV ==3 && $ARGV[0] eq "--include-zero"))) {
|
||||
die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
|
||||
}
|
||||
|
||||
if(@ARGV == 3) {
|
||||
$include_zero = 1;
|
||||
$ARGV[0] eq "--include-zero" || die "Bad option/first argument $ARGV[0]";
|
||||
shift @ARGV;
|
||||
} else {
|
||||
$include_zero = 0;
|
||||
}
|
||||
|
||||
$input = $ARGV[0];
|
||||
$nsyms = $ARGV[1];
|
||||
|
||||
open(F, "<$input") || die "Opening file $input";
|
||||
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_";
|
||||
$lastsym = $A[1];
|
||||
print;
|
||||
}
|
||||
|
||||
if(!defined($lastsym)){
|
||||
die "Empty symbol file?";
|
||||
}
|
||||
|
||||
if($include_zero) {
|
||||
$lastsym++;
|
||||
print "#0 $lastsym\n";
|
||||
}
|
||||
|
||||
for($n = 1; $n <= $nsyms; $n++) {
|
||||
$y = $n + $lastsym;
|
||||
print "#$n $y\n";
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Adds disambiguation symbols to a lexicon.
|
||||
# Outputs still in the normal lexicon format.
|
||||
# Disambig syms are numbered #1, #2, #3, etc. (#0
|
||||
# reserved for symbol in grammar).
|
||||
# Outputs the number of disambig syms to the standard output.
|
||||
|
||||
if(@ARGV != 2) {
|
||||
die "Usage: add_lex_disambig.pl [ --sil silphone ] lexicon.txt lexicon_disambig.txt "
|
||||
}
|
||||
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
$lexoutfn = shift @ARGV;
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
# (1) Read in the lexicon.
|
||||
@L = ( );
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
push @L, join(" ", @A);
|
||||
}
|
||||
|
||||
# (2) Work out the count of each phone-sequence in the
|
||||
# lexicon.
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
$count{join(" ",@A)}++;
|
||||
}
|
||||
|
||||
# (3) For each left sub-sequence of each phone-sequence, note down
|
||||
# that exists (for identifying prefixes of longer strings).
|
||||
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
shift @A; # Remove word.
|
||||
while(@A > 0) {
|
||||
pop @A; # Remove last phone
|
||||
$issubseq{join(" ",@A)} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# (4) For each entry in the lexicon:
|
||||
# if the phone sequence is unique and is not a
|
||||
# prefix of another word, no diambig symbol.
|
||||
# Else output #1, or #2, #3, ... if the same phone-seq
|
||||
# has already been assigned a disambig symbol.
|
||||
|
||||
|
||||
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
|
||||
|
||||
$max_disambig = 0;
|
||||
foreach $l (@L) {
|
||||
@A = split(" ", $l);
|
||||
$word = shift @A;
|
||||
$phnseq = join(" ",@A);
|
||||
if(!defined $issubseq{$phnseq}
|
||||
&& $count{$phnseq}==1) {
|
||||
; # Do nothing.
|
||||
} else {
|
||||
if($phnseq eq "") { # need disambig symbols for the empty string
|
||||
# that are not used anywhere else.
|
||||
$max_disambig++;
|
||||
$reserved{$max_disambig} = 1;
|
||||
$phnseq = "#$max_disambig";
|
||||
} else {
|
||||
$curnumber = $disambig_of{$phnseq};
|
||||
if(!defined{$curnumber}) { $curnumber = 0; }
|
||||
$curnumber++; # now 1 or 2, ...
|
||||
while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
|
||||
if($curnumber > $max_disambig) {
|
||||
$max_disambig = $curnumber;
|
||||
}
|
||||
$disambig_of{$phnseq} = $curnumber;
|
||||
$phnseq = $phnseq . " #" . $curnumber;
|
||||
}
|
||||
}
|
||||
print O "$word\t$phnseq\n";
|
||||
}
|
||||
|
||||
print $max_disambig . "\n";
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script takes a list of utterance-ids and filters an scp
|
||||
# file (or any file whose first field is an utterance id), printing
|
||||
# out only those lines whose first field is in id_list.
|
||||
|
||||
if(@ARGV < 1 || @ARGV > 2) {
|
||||
die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
|
||||
}
|
||||
|
||||
$idlist = shift @ARGV;
|
||||
open(F, "<$idlist") || die "Could not open id-list file $idlist";
|
||||
while(<F>) {
|
||||
@A = split;
|
||||
@A>=1 || die "Invalid id-list file line $_";
|
||||
$seen{$A[0]} = 1;
|
||||
}
|
||||
|
||||
while(<>) {
|
||||
@A = split;
|
||||
@A > 0 || die "Invalid scp file line $_";
|
||||
if($seen{$A[0]}) {
|
||||
print $_;
|
||||
}
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_noninteger = 0;
|
||||
$ignore_first_field = 0;
|
||||
$field = -1;
|
||||
for($x = 0; $x < 2; $x++) {
|
||||
if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--field") {
|
||||
shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
|
||||
if ($field < 1) { die "Bad argument to --field option: $field"; }
|
||||
}
|
||||
}
|
||||
|
||||
if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
|
||||
$zfield = $field-1; # Change to zero-based indexing.
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input] > output\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$int2sym{$A[1]} = $A[0];
|
||||
}
|
||||
|
||||
sub int2sym {
|
||||
my $a = shift @_;
|
||||
my $pos = shift @_;
|
||||
if($a !~ m:^\d+$:) { # not all digits..
|
||||
if($ignore_noninteger) {
|
||||
print $a . " ";
|
||||
next;
|
||||
} else {
|
||||
if($pos == 0) {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
|
||||
} else {
|
||||
die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
$s = $int2sym{$a};
|
||||
if(!defined ($s)) {
|
||||
die "int2sym.pl: integer $a not in symbol table $symtab.";
|
||||
}
|
||||
return $s;
|
||||
}
|
||||
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
if ($field != -1) {
|
||||
if ($zfield <= $#A && $zfield >= 0) {
|
||||
$a = $A[$zfield];
|
||||
$A[$zfield] = int2sym($a, $zfield);
|
||||
}
|
||||
print join(" ", @A);
|
||||
} else {
|
||||
for ($pos = 0; $pos <= $#A; $pos++) {
|
||||
$a = $A[$pos];
|
||||
$s = int2sym($a, $pos);
|
||||
print $s . " ";
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# makes lexicon FST (no pron-probs involved).
|
||||
|
||||
if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
|
||||
die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
|
||||
}
|
||||
|
||||
$lexfn = shift @ARGV;
|
||||
if(@ARGV == 0) {
|
||||
$silprob = 0.0;
|
||||
} elsif (@ARGV == 2){
|
||||
($silprob,$silphone) = @ARGV;
|
||||
} else {
|
||||
($silprob,$silphone,$sildisambig) = @ARGV;
|
||||
}
|
||||
if($silprob != 0.0) {
|
||||
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
|
||||
$silcost = -log($silprob);
|
||||
$nosilcost = -log(1.0 - $silprob);
|
||||
}
|
||||
|
||||
|
||||
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
|
||||
|
||||
|
||||
|
||||
sub is_sil {
|
||||
# Return true (1) if provided with a phone-sequence
|
||||
# that means silence.
|
||||
# @_ is the parameters of the function
|
||||
# This function returns true if @_ equals ( $silphone )
|
||||
# or something of the form ( "#0", $silphone, "#1" )
|
||||
# where the "#0" and "#1" are disambiguation symbols.
|
||||
return ( @_ == 1 && $_[0] eq $silphone ||
|
||||
(@_ == 3 && $_[1] eq $silphone &&
|
||||
$_[0] =~ m/^\#\d+$/ &&
|
||||
$_[0] =~ m/^\#\d+$/));
|
||||
}
|
||||
|
||||
if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
|
||||
$loopstate = 0;
|
||||
$nexststate = 1; # next unallocated state.
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
} else {
|
||||
$ns = $loopstate;
|
||||
}
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
} else { # have silence probs.
|
||||
$startstate = 0;
|
||||
$loopstate = 1;
|
||||
$silstate = 2; # state from where we go to loopstate after emitting silence.
|
||||
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
|
||||
if (!defined $sildisambig) {
|
||||
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
|
||||
$nextstate = 3;
|
||||
} else {
|
||||
$disambigstate = 3;
|
||||
$nextstate = 4;
|
||||
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
|
||||
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
|
||||
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
|
||||
}
|
||||
while(<L>) {
|
||||
@A = split(" ", $_);
|
||||
$w = shift @A;
|
||||
|
||||
$s = $loopstate;
|
||||
$word_or_eps = $w;
|
||||
while (@A > 0) {
|
||||
$p = shift @A;
|
||||
if(@A > 0) {
|
||||
$ns = $nextstate++;
|
||||
print "$s\t$ns\t$p\t$word_or_eps\n";
|
||||
$word_or_eps = "<eps>";
|
||||
$s = $ns;
|
||||
} else {
|
||||
if(!is_sil(@A)){
|
||||
# This is non-deterministic but relatively compact,
|
||||
# and avoids epsilons.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
|
||||
print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
|
||||
} else {
|
||||
# no point putting opt-sil after silence word.
|
||||
print "$s\t$loopstate\t$p\t$word_or_eps\n";
|
||||
}
|
||||
$word_or_eps = "<eps>";
|
||||
}
|
||||
}
|
||||
}
|
||||
print "$loopstate\t0\n"; # final-cost.
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# make_phones_symtab.pl < lexicon.txt > phones.txt
|
||||
|
||||
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
for ($i=2; $i<@A; $i++) {
|
||||
$P{$A[$i]} = 1; # seen it.
|
||||
}
|
||||
}
|
||||
|
||||
print "<eps>\t0\n";
|
||||
$n = 1;
|
||||
foreach $p (sort keys %P) {
|
||||
if($p ne "<eps>") {
|
||||
print "$p\t$n\n";
|
||||
$n++;
|
||||
}
|
||||
}
|
||||
|
||||
print "sil\t$n\n";
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Copyright 2010-2011 Yanmin Qian Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This file takes as input the file wp_gram.txt that comes with the RM
|
||||
# distribution, and creates the language model as an acceptor in FST form.
|
||||
|
||||
# make_rm_lm.pl wp_gram.txt > G.txt
|
||||
|
||||
if (@ARGV != 1) {
|
||||
print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n";
|
||||
exit(0);
|
||||
}
|
||||
unless (open(IN_FILE, "@ARGV[0]")) {
|
||||
die ("can't open @ARGV[0]");
|
||||
}
|
||||
|
||||
|
||||
$flag = 0;
|
||||
$count_wrd = 0;
|
||||
$cnt_ends = 0;
|
||||
$init = "";
|
||||
|
||||
while ($line = <IN_FILE>)
|
||||
{
|
||||
chop($line);
|
||||
|
||||
$line =~ s/ //g;
|
||||
|
||||
if(($line =~ /^>/))
|
||||
{
|
||||
if($flag == 0)
|
||||
{
|
||||
$flag = 1;
|
||||
}
|
||||
$line =~ s/>//g;
|
||||
$hashcnt{$init} = $i;
|
||||
$init = $line;
|
||||
$i = 0;
|
||||
$count_wrd++;
|
||||
@LineArray[$count_wrd - 1] = $init;
|
||||
$hashwrd{$init} = 0;
|
||||
}
|
||||
elsif($flag != 0)
|
||||
{
|
||||
|
||||
$hash{$init}[$i] = $line;
|
||||
$i++;
|
||||
if($line =~ /SENTENCE-END/)
|
||||
{
|
||||
$cnt_ends++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{}
|
||||
}
|
||||
|
||||
$hashcnt{$init} = $i;
|
||||
|
||||
$num = 0;
|
||||
$weight = 0;
|
||||
$init_wrd = "SENTENCE-END";
|
||||
$hashwrd{$init_wrd} = @LineArray;
|
||||
for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
|
||||
{
|
||||
$weight = -log(1/$hashcnt{$init_wrd});
|
||||
$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
|
||||
print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n";
|
||||
}
|
||||
$num = $i;
|
||||
|
||||
for($i = 0; $i < @LineArray; $i++)
|
||||
{
|
||||
if(@LineArray[$i] eq 'SENTENCE-END')
|
||||
{}
|
||||
else
|
||||
{
|
||||
if($hashwrd{@LineArray[$i]} == 0)
|
||||
{
|
||||
$num++;
|
||||
$hashwrd{@LineArray[$i]} = $num;
|
||||
}
|
||||
for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
|
||||
{
|
||||
$weight = -log(1/$hashcnt{@LineArray[$i]});
|
||||
if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
|
||||
{
|
||||
$num++;
|
||||
$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
|
||||
}
|
||||
if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
|
||||
{
|
||||
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} <eps> <eps> $weight\n"
|
||||
}
|
||||
else
|
||||
{
|
||||
print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print "$hashwrd{$init_wrd} 0\n";
|
||||
close(IN_FILE);
|
||||
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Written by Dan Povey 9/21/2010. Apache 2.0 License.
|
||||
|
||||
# This version of make_roots.pl is specialized for RM.
|
||||
|
||||
# This script creates the file roots.txt which is an input to train-tree.cc. It
|
||||
# specifies how the trees are built. The input file phone-sets.txt is a partial
|
||||
# version of roots.txt in which phones are represented by their spelled form, not
|
||||
# their symbol id's. E.g. at input, phone-sets.txt might contain;
|
||||
# shared not-split sil
|
||||
# Any phones not specified in phone-sets.txt but present in phones.txt will
|
||||
# be given a default treatment. If the --separate option is given, we create
|
||||
# a separate tree root for each of them, otherwise they are all lumped in one set.
|
||||
# The arguments shared|not-shared and split|not-split are needed if any
|
||||
# phones are not specified in phone-sets.txt. What they mean is as follows:
|
||||
# if shared=="shared" then we share the tree-root between different HMM-positions
|
||||
# (0,1,2). If split=="split" then we actually do decision tree splitting on
|
||||
# that root, otherwise we forbid decision-tree splitting. (The main reason we might
|
||||
# set this to false is for silence when
|
||||
# we want to ensure that the HMM-positions will remain with a single PDF id.
|
||||
|
||||
|
||||
$separate = 0;
|
||||
if($ARGV[0] eq "--separate") {
|
||||
$separate = 1;
|
||||
shift @ARGV;
|
||||
}
|
||||
|
||||
if(@ARGV != 4) {
|
||||
die "Usage: make_roots.pl [--separate] phones.txt silence-phone-list[integer,colon-separated] shared|not-shared split|not-split > roots.txt\n";
|
||||
}
|
||||
|
||||
|
||||
($phonesfile, $silphones, $shared, $split) = @ARGV;
|
||||
if($shared ne "shared" && $shared ne "not-shared") {
|
||||
die "Third argument must be \"shared\" or \"not-shared\"\n";
|
||||
}
|
||||
if($split ne "split" && $split ne "not-split") {
|
||||
die "Third argument must be \"split\" or \"not-split\"\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
open(F, "<$phonesfile") || die "Opening file $phonesfile";
|
||||
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
if(@A != 2) {
|
||||
die "Bad line in phones symbol file: ".$_;
|
||||
}
|
||||
if($A[1] != 0) {
|
||||
$symbol2id{$A[0]} = $A[1];
|
||||
$id2symbol{$A[1]} = $A[0];
|
||||
}
|
||||
}
|
||||
|
||||
if($silphones == ""){
|
||||
die "Empty silence phone list in make_roots.pl";
|
||||
}
|
||||
foreach $silphoneid (split(":", $silphones)) {
|
||||
defined $id2symbol{$silphoneid} || die "No such silence phone id $silphoneid";
|
||||
# Give each silence phone its own separate pdfs in each state, but
|
||||
# no sharing (in this recipe; WSJ is different.. in this recipe there
|
||||
#is only one silence phone anyway.)
|
||||
$issil{$silphoneid} = 1;
|
||||
print "not-shared not-split $silphoneid\n";
|
||||
}
|
||||
|
||||
$idlist = "";
|
||||
$remaining_phones = "";
|
||||
|
||||
if($separate){
|
||||
foreach $a (keys %id2symbol) {
|
||||
if(!defined $issil{$a}) {
|
||||
print "$shared $split $a\n";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print "$shared $split ";
|
||||
foreach $a (keys %id2symbol) {
|
||||
if(!defined $issil{$a}) {
|
||||
print "$a ";
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
N=3
|
||||
P=1
|
||||
clean=false
|
||||
|
||||
for x in 1 2 3; do
|
||||
if [ $1 == "--mono" ]; then
|
||||
N=1;
|
||||
P=0;
|
||||
shift;
|
||||
fi
|
||||
if [ $1 == "--clean" ]; then
|
||||
clean=true
|
||||
shift;
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
|
||||
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
lang=$1
|
||||
tree=$2/tree
|
||||
model=$2/final.mdl
|
||||
dir=$3
|
||||
|
||||
if $clean; then rm -r $lang/tmp; fi
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
tscale=1.0
|
||||
loopscale=0.1
|
||||
|
||||
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
|
||||
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
|
||||
# would have to use -o instead), -f means file exists, and -ot means older than).
|
||||
|
||||
mkdir -p $lang/tmp
|
||||
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
|
||||
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
|
||||
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
|
||||
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
|
||||
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
|
||||
fi
|
||||
|
||||
if [ ! -f $lang/phones_disambig.txt ]; then
|
||||
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
|
||||
|
||||
|
||||
clg=$lang/tmp/CLG_${N}_${P}.fst
|
||||
|
||||
if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
|
||||
fstcomposecontext --context-size=$N --central-position=$P \
|
||||
--read-disambig-syms=$lang/tmp/disambig_phones.list \
|
||||
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
|
||||
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
|
||||
fstisstochastic $clg || echo "warning: CLG not stochastic."
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
|
||||
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
|
||||
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
|
||||
> $dir/Ha.fst || exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
|
||||
$dir/HCLGa.fst -ot $clg ]]; then
|
||||
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
|
||||
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
|
||||
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
|
||||
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
|
||||
fi
|
||||
|
||||
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
|
||||
add-self-loops --self-loop-scale=$loopscale --reorder=true \
|
||||
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
|
||||
|
||||
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
|
||||
# No point doing this test if transition-scale not 1, as it is bound to fail.
|
||||
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# to make const fst:
|
||||
# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# creates integer lists of silence and non-silence phones in files,
|
||||
# e.g. silphones.csl="1:2:3 \n"
|
||||
# and nonsilphones.csl="4:5:6:7:...:24\n";
|
||||
|
||||
if(@ARGV != 4) {
|
||||
die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
|
||||
}
|
||||
|
||||
($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
|
||||
open(S,"<$symtab") || die "Opening symbol table $symtab";
|
||||
|
||||
|
||||
foreach $s (split(" ", $sillist)) {
|
||||
$issil{$s} = 1;
|
||||
}
|
||||
|
||||
@sil = ();
|
||||
@nonsil = ();
|
||||
while(<S>){
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
|
||||
($sym, $int) = @A;
|
||||
if($int != 0) {
|
||||
if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
|
||||
else { push @nonsil, $int; }
|
||||
}
|
||||
}
|
||||
|
||||
foreach $k(keys %issil) {
|
||||
if(!$seensil{$k}) { die "No such silence phone $k"; }
|
||||
}
|
||||
open(F, ">$silphones") || die "opening silphones file $silphones";
|
||||
open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
|
||||
print F join(":", @sil) . "\n";
|
||||
print G join(":", @nonsil) . "\n";
|
||||
close(F);
|
||||
close(G);
|
||||
if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
|
||||
if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }
|
||||
|
|
@ -1,99 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script operates on a directory, such as in data/train/,
|
||||
# that contains some subset of the following files:
|
||||
# feats.scp
|
||||
# wav.scp
|
||||
# spk2utt
|
||||
# utt2spk
|
||||
# text
|
||||
# It creates a subset of that data, consisting of some specified
|
||||
# number of utterances. (The selected utterances are distributed
|
||||
# evenly throughout the file, by the program ./subset_scp.pl).
|
||||
|
||||
# If you give the --per-spk option, it will attempt to select
|
||||
# the supplied number of utterances for each speaker (typically
|
||||
# you would supply a much smaller number in this case).
|
||||
|
||||
perspk=false
|
||||
if [ "$1" == "--per-spk" ]; then
|
||||
perspk=true;
|
||||
shift;
|
||||
fi
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
srcdir=$1
|
||||
numutt=$2
|
||||
destdir=$3
|
||||
|
||||
|
||||
if [ ! -f $srcdir/feats.scp ]; then
|
||||
echo "subset_data_dir.sh: no such file $srcdir/feats.scp"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
## scripting note: $perspk evaluates to true or false
|
||||
## so this becomes the command true or false.
|
||||
if $perspk; then
|
||||
mkdir -p $destdir
|
||||
awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
|
||||
for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); }
|
||||
printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
|
||||
scripts/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
|
||||
scripts/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
|
||||
[ -f $srcdir/wav.scp ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/wav.scp >$destdir/wav.scp
|
||||
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
|
||||
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
|
||||
srcutts=`cat $srcdir/utt2spk | wc -l`
|
||||
destutts=`cat $destdir/utt2spk | wc -l`
|
||||
echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
|
||||
exit 0;
|
||||
else
|
||||
if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
|
||||
echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
mkdir -p $destdir || exit 1;
|
||||
|
||||
# create feats.scp
|
||||
scripts/subset_scp.pl $numutt $srcdir/feats.scp > $destdir/feats.scp || exit 1;
|
||||
|
||||
if [ -f $srcdir/wav.scp ]; then
|
||||
scripts/filter_scp.pl $destdir/feats.scp $srcdir/mfc.scp > $destdir/mfc.scp || exit 1;
|
||||
else
|
||||
rm $destdir/mfc.scp 2>/dev/null
|
||||
fi
|
||||
|
||||
if [ -f $srcdir/utt2spk ]; then
|
||||
scripts/filter_scp.pl $destdir/feats.scp $srcdir/utt2spk > $destdir/utt2spk|| exit 1;
|
||||
scripts/utt2spk_to_spk2utt.pl $destdir/utt2spk > $destdir/spk2utt || exit 1;
|
||||
fi
|
||||
|
||||
[ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
|
||||
|
||||
[ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
|
||||
|
||||
echo "Created a $numutt-utterance subset of $srcdir and put it in $destdir."
|
||||
|
||||
exit 0;
|
||||
fi
|
|
@ -1,59 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This program selects a subset of N elements in the scp.
|
||||
# It selects them evenly from throughout the scp, in order to
|
||||
# avoid selecting too many from the same speaker.
|
||||
# It prints them on the standard output.
|
||||
|
||||
if(@ARGV < 2 ) {
|
||||
die "Usage: subset_scp.pl N in.scp ";
|
||||
}
|
||||
|
||||
$N = shift @ARGV;
|
||||
if($N == 0) {
|
||||
die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
|
||||
}
|
||||
$inscp = shift @ARGV;
|
||||
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
||||
|
||||
@F = ();
|
||||
while(<I>) {
|
||||
push @F, $_;
|
||||
}
|
||||
$numlines = @F;
|
||||
if($N > $numlines) {
|
||||
die "You requested from subset_scp.pl more elements than available: $N > $numlines";
|
||||
}
|
||||
|
||||
sub select_n {
|
||||
my ($start,$end,$num_needed) = @_;
|
||||
my $diff = $end - $start;
|
||||
if($num_needed > $diff) { die "select_n: code error"; }
|
||||
if($diff == 1 ) {
|
||||
if($num_needed > 0) {
|
||||
print $F[$start];
|
||||
}
|
||||
} else {
|
||||
my $halfdiff = int($diff/2);
|
||||
my $halfneeded = int($num_needed/2);
|
||||
select_n($start, $start+$halfdiff, $halfneeded);
|
||||
select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
|
||||
}
|
||||
}
|
||||
select_n(0, $numlines, $N);
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
$ignore_oov = 0;
|
||||
$ignore_first_field = 0;
|
||||
for($x = 0; $x < 3; $x++) {
|
||||
# Note: it will just print OOVS unmodified if you specify --ignore-oov.
|
||||
# Else will complain and put nothing out.
|
||||
if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
|
||||
if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
|
||||
}
|
||||
|
||||
$symtab = shift @ARGV;
|
||||
if(!defined $symtab) {
|
||||
die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
|
||||
}
|
||||
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
|
||||
while(<F>) {
|
||||
@A = split(" ", $_);
|
||||
@A == 2 || die "bad line in symbol table file: $_";
|
||||
$sym2int{$A[0]} = $A[1] + 0;
|
||||
}
|
||||
|
||||
$num_warning = 0;
|
||||
$max_warning = 20;
|
||||
$error = 0;
|
||||
while(<>) {
|
||||
@A = split(" ", $_);
|
||||
if(@A == 0) {
|
||||
die "Empty line in transcriptions input.";
|
||||
}
|
||||
if($ignore_first_field) {
|
||||
$key = shift @A;
|
||||
print $key . " ";
|
||||
}
|
||||
@B = ();
|
||||
foreach $a (@A) {
|
||||
$i = $sym2int{$a};
|
||||
if(!defined ($i)) {
|
||||
if (defined $map_oov) {
|
||||
if (!defined $sym2int{$map_oov}) {
|
||||
die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
|
||||
}
|
||||
if ($num_warning++ < $max_warning) {
|
||||
print STDERR "sym2int.pl: replacing $a with $map_oov\n";
|
||||
if ($num_warning == $max_warning) {
|
||||
print STDERR "sym2int.pl: not warning for OOVs any more times\n";
|
||||
}
|
||||
}
|
||||
$i = $sym2int{$map_oov};
|
||||
} elsif($ignore_oov) {
|
||||
$i = $a; # just print them out unmodified..
|
||||
} else {
|
||||
die "sym2int.pl: undefined symbol $a\n";
|
||||
}
|
||||
}
|
||||
push @B, $i;
|
||||
}
|
||||
print join(" ", @B);
|
||||
print "\n";
|
||||
}
|
||||
|
||||
if($error) { exit(1); }
|
||||
else { exit(0); }
|
||||
|
||||
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
|
||||
# This script does training-data alignment given a model built using
|
||||
# CMN + delta + delta-delta features. Its output, all in its own
|
||||
# experimental directory, is cmvn.ark, ali, tree, and final.mdl
|
||||
# (the last two are just copied from the source directory).
|
||||
|
||||
# Option to use precompiled graphs from last phase, if these
|
||||
# are available (i.e. if they were built with the same data).
|
||||
|
||||
graphs=
|
||||
if [ "$1" == --graphs ]; then
|
||||
shift;
|
||||
graphs=$1
|
||||
shift
|
||||
fi
|
||||
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/align_deltas.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
|
||||
echo " e.g.: steps/align_deltas.sh data/train data/lang exp/tri1 exp/tri1_ali"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1; # Create copy of the tree and model and occs...
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
|
||||
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
compute-cmvn-stats scp:$data/feats.scp \
|
||||
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# Align all training data using the supplied model.
|
||||
|
||||
echo "Aligning all training data"
|
||||
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
|
||||
# compute integer form of transcripts.
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|
||||
|| exit 1;
|
||||
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
|
||||
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
|
||||
rm $dir/train.tra
|
||||
else
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
|
||||
"$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
|
||||
fi
|
||||
|
||||
echo "Done."
|
|
@ -1,77 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Decoding script that works with a GMM model and delta-delta plus
|
||||
# cepstral mean subtraction features. Used, for example, to decode
|
||||
# mono/ and tri1/
|
||||
# This script generates lattices and rescores them with different
|
||||
# acoustic weights, in order to explore a range of different
|
||||
# weights.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
srcdir=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
dir=$4
|
||||
graphdir=$srcdir/graph
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
if [ ! -f $srcdir/final.mdl ]; then
|
||||
echo No model file $srcdir/final.mdl
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
|
||||
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# We only do one decoding pass, so there is no point caching the
|
||||
# CMVN stats-- we make them part of a pipe.
|
||||
feats="ark:compute-cmvn-stats scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# For Resource Management, we use beam of 20 and acwt of 1/10.
|
||||
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
|
||||
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
|
||||
|
||||
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
|
||||
ark,t:$dir/test.tra ark,t:$dir/test.ali \
|
||||
2> $dir/decode.log || exit 1;
|
||||
|
||||
# In this setup there are no non-scored words, so
|
||||
# scoring is simple.
|
||||
|
||||
# Now rescore lattices with various acoustic scales, and compute the WER.
|
||||
for inv_acwt in 4 5 6 7 8 9 10; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
|
||||
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
|
||||
2>$dir/rescore_${inv_acwt}.log
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
|
||||
>& $dir/wer_${inv_acwt}
|
||||
done
|
|
@ -1,48 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from .. (one directory up from here)
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "usage: make_mfcc.sh <data-dir> <log-dir> <abs-path-to-mfccdir>";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
logdir=$2
|
||||
mfccdir=$3
|
||||
|
||||
# use "name" as part of name of the archive.
|
||||
name=`basename $data`
|
||||
|
||||
mkdir -p $mfccdir || exit 1;
|
||||
mkdir -p $logdir || exit 1;
|
||||
|
||||
scp=$data/mfc.scp
|
||||
if [ ! -f $scp ]; then
|
||||
echo "make_mfcc.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
log=$logdir/make_mfcc.log
|
||||
|
||||
copy-feats --sphinx-in=true \
|
||||
scp:$scp ark,scp:$mfccdir/raw_mfcc_$name.ark,$data/feats.scp 2>$log
|
||||
|
||||
echo "Succeeded creating MFCC features for $name"
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Triphone model training, using delta-delta features and cepstral
|
||||
# mean normalization. It starts from an existing directory (e.g.
|
||||
# exp/mono), supplied as an argument, which is assumed to be built using
|
||||
# the same type of features.
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/train_deltas.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
alidir=$3
|
||||
dir=$4
|
||||
|
||||
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
|
||||
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
realign_iters="5 10 15 20";
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
numiters=25 # Number of iterations of training
|
||||
maxiterinc=15 # Last iter to increase #Gauss on.
|
||||
numleaves=1800 # target num-leaves in tree building.
|
||||
numgauss=$[$numleaves + $numleaves/2]; # starting num-Gauss.
|
||||
# Initially mix up to avg. 1.5 Gauss/state ( a bit more
|
||||
# than this, due to state clustering... then slowly mix
|
||||
# up to final amount.
|
||||
totgauss=9000 # Target #Gaussians
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
|
||||
|
||||
echo "Accumulating tree stats"
|
||||
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
|
||||
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
||||
echo "Computing questions for tree clustering"
|
||||
|
||||
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
|
||||
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
|
||||
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
|
||||
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
|
||||
|
||||
# Have to make silence root not-shared because we will not split it.
|
||||
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
|
||||
> $dir/roots.txt 2>$dir/roots.log || exit 1;
|
||||
|
||||
|
||||
echo "Building tree"
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
|
||||
|
||||
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
|
||||
2>$dir/mixup.log || exit 1;
|
||||
|
||||
#rm $dir/treeacc
|
||||
|
||||
# Convert alignments generated from monophone model, to use as initial alignments.
|
||||
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
|
||||
# Debug step only: convert back and check they're the same.
|
||||
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
|
||||
2>/dev/null | cmp - $alidir/ali || exit 1;
|
||||
|
||||
# Make training graphs
|
||||
echo "Compiling training graphs"
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
|
||||
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
|
||||
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo Pass $x
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
|
||||
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc
|
||||
rm $dir/$x.occs
|
||||
if [[ $x -le $maxiterinc ]]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
|
||||
|
||||
echo Done
|
|
@ -1,105 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# To be run from ..
|
||||
# Flat start and monophone training, with delta-delta features.
|
||||
# This script applies cepstral mean normalization (per speaker),
|
||||
# unlike the corresponding script in s1/
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: steps/train_mono.sh <data-dir> <lang-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
dir=$3
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# Configuration:
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
numiters=30 # Number of iterations of training
|
||||
maxiterinc=20 # Last iter to increase #Gauss on.
|
||||
numgauss=250 # Initial num-Gauss (must be more than #states=3*phones).
|
||||
totgauss=1000 # Target #Gaussians.
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
realign_iters="1 2 3 4 5 6 7 8 9 10 12 15 20 25";
|
||||
|
||||
mkdir -p $dir
|
||||
echo "Computing cepstral mean and variance statistics"
|
||||
|
||||
compute-cmvn-stats scp:$data/feats.scp ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
|
||||
|
||||
feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
|
||||
# compute integer form of transcripts.
|
||||
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|
||||
|| exit 1;
|
||||
|
||||
echo "Initializing monophone system."
|
||||
|
||||
gmm-init-mono "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39 \
|
||||
$dir/0.mdl $dir/tree 2> $dir/init.log || exit 1;
|
||||
|
||||
|
||||
echo "Compiling training graphs"
|
||||
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
||||
ark:$dir/train.tra "ark:|gzip -c >$dir/graphs.fsts.gz" \
|
||||
2>$dir/compile_graphs.log || exit 1
|
||||
|
||||
echo Pass 0
|
||||
|
||||
align-equal-compiled "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
|
||||
ark,t,f:- 2>$dir/align.0.log | \
|
||||
gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
|
||||
$dir/0.acc 2> $dir/acc.0.log || exit 1;
|
||||
|
||||
# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
|
||||
# we fail to est "rare" phones and later on, they never align properly.
|
||||
|
||||
gmm-est --min-gaussian-occupancy=3 --mix-up=$numgauss \
|
||||
$dir/0.mdl $dir/0.acc $dir/1.mdl 2> $dir/update.0.log || exit 1;
|
||||
|
||||
rm $dir/0.acc
|
||||
|
||||
beam=4 # will change to 8 below after 1st pass
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo "Pass $x"
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" t,ark:$dir/cur.ali \
|
||||
2> $dir/align.$x.log || exit 1;
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
|
||||
if [ $x -le $maxiterinc ]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
beam=8
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
|
||||
|
||||
# example of showing the alignments:
|
||||
# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4
|
||||
|
Загрузка…
Ссылка в новой задаче