Removing the egs/rm/s4 recipe. It's based on the older generation 's3' recipes, and now there are better examples using free data in Kaldi

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4654 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2014-11-25 12:26:58 +00:00 · 2014-11-25 12:26:58 +00:00 · adfa50a60e
--- a/egs/rm/README.txt
+++ b/egs/rm/README.txt
@ -11,8 +11,6 @@ Each subdirectory of this directory contains the
 scripts for a sequence of experiments. 
 s5 is the currently recommmended setup.

-  s4: A recipe based on freely available subset of RM data, distributed by CMU
-
  s5: This is the "new-new-style" recipe.  It is now finished.
      All further work will be on top of this style of recipe.  Note: 
      unlike previous recipes, this now uses the same underlying
--- a/egs/rm/s4/README.txt
+++ b/egs/rm/s4/README.txt
@ -1,21 +0,0 @@
-This recipe is using a publicly available subset of Resource Management data, 
-distributed by CMU.
-
-To run the recipe the data should be downloaded first, for which ./getdata.sh
-command can be used. Then ./run.sh script can be executed to automatically perform
-all steps or the commands can be started manually by copy/pasting them. 
-
-The script and data layout are based on egs/rm/s3 recipe, with several exceptions:
-
- because this recipe uses pre-extracted feature vectors no conversion from .sph
-to .wav format and consequent feature extraction is needed. The features are just
-converted from CMU Sphinx feature files to Kaldi Tables.
-
- only one test set is available instead of several (e.g. mar87, oct87 and so on)
-as in the original recipe
-
- no speaker-dependent processing
-
- only the steps up to tri2a stage are implemented
-
- on the plus side it requires less disk space (about 220MB)
--- a/egs/rm/s4/conf/mfcc.conf
+++ b/egs/rm/s4/conf/mfcc.conf
@ -1 +0,0 @@
--use-energy=false   # only non-default option.
--- a/egs/rm/s4/conf/plp.conf
+++ b/egs/rm/s4/conf/plp.conf
@ -1,2 +0,0 @@
-# No non-default options for now.
-
--- a/egs/rm/s4/conf/topo.proto
+++ b/egs/rm/s4/conf/topo.proto
@ -1,22 +0,0 @@
-<Topology> 
-<TopologyEntry> 
-<ForPhones>
-NONSILENCEPHONES
-</ForPhones> 
-<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State> 
-<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State> 
-<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State> 
-<State> 3 </State>
-</TopologyEntry> 
-<TopologyEntry> 
-<ForPhones>
-SILENCEPHONES
-</ForPhones> 
-<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State> 
-<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
-<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
-<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
-<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State> 
-<State> 5 </State>
-</TopologyEntry> 
-</Topology> 
--- a/egs/rm/s4/getdata.sh
+++ b/egs/rm/s4/getdata.sh
@ -1,27 +0,0 @@
-#!/bin/bash
-
-# Copyright 2012 Vassil Panayotov
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-source path.sh
-
-# Download and extract CMU's feature files
-mkdir -p $RM1_ROOT
-wget -P $RM1_ROOT http://www.speech.cs.cmu.edu/databases/rm1/rm1_cepstra.tar.gz ||
- wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/rm1_cepstra.tar.gz
-tar -C $RM1_ROOT/ -xf $RM1_ROOT/rm1_cepstra.tar.gz
-
-# Download the G.fst graph produced from 'wp_gram.txt'
-wget -P $RM1_ROOT http://sourceforge.net/projects/kaldi/files/RM_G.fst
--- a/egs/rm/s4/local/decode.sh
+++ b/egs/rm/s4/local/decode.sh
@ -1,40 +0,0 @@
-#!/bin/bash
-
-# This script basically calls the supplied decoding script
-# once for each test set (in parallel on the same machine),
-# and then averages the resulting WERs.
-# The interpretation of the decode-dir-1, etc., as inputs,
-# outputs and so on, depends on the decoding script you call.
-
-# It assumes the model directory is one level of from decode-dir-1.
-
-mono_opt=
-
-if [ "$1" == "--mono" ]; then
-   mono_opt=$1;
-   shift;
-fi
-
-script=$1
-decode_dir_1=$2 # e.g. exp/sgmm3b/decode
-decode_dir_2=$3
-decode_dir_3=$4
-dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
-
-if [ $# -ne 2 ]; then 
-  echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1>"
-  exit 1;
-fi
-if [ ! -x $script -o ! -d $dir ]; then
-  echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
-  exit 1;
-fi
-
-scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
-
-$script $dir data/test data/lang $decode_dir_1/ &
-wait
-
-# The publicly available RM subset has just one test set(instead of mar87 etc.),
-# so no averaging is needed
-grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."
--- a/egs/rm/s4/local/make_trans.pl
+++ b/egs/rm/s4/local/make_trans.pl
@ -1,69 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# usage:  make_trans.sh prefix in.flist input.snr out.txt out.scp
-
-# prefix is first letters of the database "key" (rest are numeric)
-
-# in.flist is just a list of filenames, probably of .sph files.
-# input.snr is an snr format file from the RM dataset.  
-# out.txt is the output transcriptions in format "key word1 word\n"
-# out.scp is the output scp file, which is as in.scp but has the
-# database-key first on each line.
-
-# Reads from first argument e.g. $rootdir/rm1_audio1/rm1/doc/al_sents.snr
-# and second argument train_wav.scp 
-# Writes to standard output trans.txt
-
-if(@ARGV != 5) {
-    die "usage:  make_trans.sh prefix in.flist input.snr out.txt out.scp\n";
-}
-($prefix, $in_flist, $input_snr, $out_txt, $out_scp) = @ARGV;
-
-open(F, "<$input_snr") || die "Opening SNOR file $input_snr";
-
-while(<F>) {
-    if(m/^;/) { next; }
-    m/(.+) \((.+)\)/ || die "bad line $_";
-    $T{$2} = $1;
-}
-
-close(F);
-open(G, "<$in_flist") || die "Opening file list $in_flist";
-
-open(O, ">$out_txt") || die "Open output transcription file $out_txt";
-
-open(P, ">$out_scp") || die "Open output scp file $out_scp";
-
-while(<G>) {
-    $_ =~ m:/(\w+)/(\w+)\.mfc\s+$:i || die "bad scp line $_";
-    $spkname = $1;
-    $uttname = $2;
-    $uttname  =~ tr/a-z/A-Z/;
-    defined $T{$uttname} || die "no trans for sent $uttname";
-    $spkname =~ s/_//g; # remove underscore from spk name to make key nicer.
-    $key = $prefix . "_" . $spkname . "_" . $uttname;
-    $key =~ tr/A-Z/a-z/; # Make it all lower case.
-     # to make the numerical and string-sorted orders the same.
-    print O "$key $T{$uttname}\n";
-    print P "$key $_";
-    $n++;
-} 
-close(O) || die "Closing output.";
-close(P) || die "Closing output.";
-
-
--- a/egs/rm/s4/local/rm_data_prep.sh
+++ b/egs/rm/s4/local/rm_data_prep.sh
@ -1,92 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-# Copyright 2012 Vassil Panayotov
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from one directory above this script.
-
-# Note: when creating your own data preparation scripts, it's a good idea
-# to make sure that the speaker id (if present) is a prefix of the utterance
-# id, that the output scp file is sorted on utterance id, and that the 
-# transcription file is exactly the same length as the scp file and is also
-# sorted on utterance id (missing transcriptions should be removed from the
-# scp file using e.g. scripts/filter_scp.pl)
-
-if [ $# != 1 ]; then
-  echo "Usage: ../../local/RM_data_prep.sh /path/to/RM"
-  exit 1; 
-fi 
-
-export LC_ALL=C
-
-RMROOT=$1
-
-mkdir -p data/local
-cd data/local
-
-if [ ! -f $RMROOT/RM_G.fst -o ! -d $RMROOT/rm1 ]; then
-  echo "Required data is missing. You can download the data by running ./getdata.sh"
-  exit 1; 
-fi
-
-# Make a list of files
-cat $RMROOT/rm1/etc/rm1_train.fileids | \
-    xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > train.flist
-cat $RMROOT/rm1/etc/rm1_test.fileids | \
-    xargs -I_x_ echo $RMROOT/rm1/feat/_x_.mfc > test.flist
-
-# make_trans.pl also creates the utterance id's and the kaldi-format scp file.
-
-# this is needed, because the original "al_sents.snr" file is not available
-# (and because CMU's train utterances have tags like '<sil>' added)
-cat $RMROOT/rm1/etc/rm1_train.transcription |\
- tr '[a-z]' '[A-Z]' |\
- sed -E -e 's:</?S(IL)?>: :g' -e 's:\([0-9]\): :g' -e 's:  +: :g' -e 's:^ +::' |\
- cat $RMROOT/rm1/etc/rm1_test.transcription - \
- > al_sents.snr
-
-# training set
-../../local/make_trans.pl trn train.flist al_sents.snr train_trans.txt train.scp
-mv train_trans.txt tmp; sort -k 1 tmp > train_trans.txt
-mv train.scp tmp; sort -k 1 tmp > train.scp
-rm tmp
-
-# test set
-../../local/make_trans.pl test test.flist al_sents.snr test_trans.txt test.scp
-mv test_trans.txt tmp; sort -k 1 tmp > test_trans.txt
-mv test.scp tmp; sort -k 1 tmp > test.scp
-rm tmp
-
-# We already have the features, so sph2pipe step is skipped and
-# given the limited data the speaker-dependent processing is also not used 
-
-# "wp_gram.txt" is no longer available from LDC's website, so we are just using a
-# pre-built grammar WFST (G.fst). The word-pair grammar is a finite-state description
-# of the allowed utterances, which just enumerates the words that can follow each word
-# in the vocabulary. G.fst is constructed by adding output arcs to each node 
-# representing a word, one for each word that is allowed to follow, and the 
-# probability mass is distributed uniformly among all these arcs.
-#../../scripts/make_rm_lm.pl $RMROOT/LDC93S3B/disc_1/doc/wp_gram.txt  > G.txt || exit 1;
-cp $RMROOT/RM_G.fst ./G.fst
-
-# Convert the CMU's lexicon to a form which the other scripts expect
-# (leave only the first pronunciation variant and convert the phones to lower case)
-cat $RMROOT/rm1/etc/rm1.dic | \
-  egrep -v '\(' | \
-  sed -e "s/^\([[:alnum:]-]\+\('[[:alpha:]]\+\)\?\)\(.*\)/\1\L\3/g" > lexicon.txt
-
-
-echo RM_data_prep succeeded.
--- a/egs/rm/s4/local/rm_format_data.sh
+++ b/egs/rm/s4/local/rm_format_data.sh
@ -1,128 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2012 Vassil Panayotov
-# modified from:
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from one directory above this script.
-
-
-
-if [ -f path.sh ]; then . path.sh; fi
-
-data_list="train test"
-
-for x in lang lang_test $data_list; do
-  mkdir -p data/$x
-done
-
-# Copy stuff into its final location:
-
-for x in $data_list; do
-  cp data/local/${x}.scp data/$x/mfc.scp || exit 1;
-  cp data/local/${x}_trans.txt data/$x/text || exit 1;
-done
-
-# We are not using make_words_symtab.pl for symbol table creation in this
-# recipe, because CMU's lexicon have several words that are not in the 
-# word-pair grammar
-cat data/local/lexicon.txt | \
- awk 'BEGIN{print "<eps>\t0";} {print $1 "\t" NR;} END{print "!SIL\t" NR+1;}' \
- > data/lang/words.txt
-scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
-cp data/lang/words.txt data/lang_test/words.txt
-
-silphones="sil"; # This would in general be a space-separated list of all silence phones.  E.g. "sil vn"
-# Generate colon-separated lists of silence and non-silence phones.
-scripts/silphones.pl data/lang/phones.txt "$silphones" data/lang/silphones.csl \
-  data/lang/nonsilphones.csl
-
-ndisambig=`scripts/add_lex_disambig.pl data/local/lexicon.txt data/local/lexicon_disambig.txt`
-ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
-scripts/add_disambig.pl data/lang/phones.txt $ndisambig > data/lang_test/phones_disambig.txt
-cp data/lang_test/phones_disambig.txt data/lang/ # needed for MMI.
-
-silprob=0.5  # same prob as word
-scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil  | \
-  fstcompile --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt \
-   --keep_isymbols=false --keep_osymbols=false | \
-   fstarcsort --sort_type=olabel > data/lang/L.fst
-
-# Create L_align.fst, which is as L.fst but with alignment symbols (#1 and #2 at the
-# beginning and end of words, on the input side)... useful if we
-# ever need to e.g. create ctm's-- these are used to work out the
-# word boundaries.
-
-
-cat data/local/lexicon.txt | \
- awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
- scripts/make_lexicon_fst.pl - 0.5 sil | \
- fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
-  --keep_isymbols=false --keep_osymbols=false | \
- fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
-
-# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
-
-scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
-   fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
-   --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
-    > data/lang_test/L_disambig.fst
-
-cp data/lang_test/L_disambig.fst data/lang/  # Needed for MMI training.
-
-# Compilation is no longer needed, because we are using a pre-built G.fst
-#fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
-#    --keep_osymbols=false data/local/G.txt > data/lang_test/G.fst
-cp data/local/G.fst data/lang_test/
-
-# Checking that G is stochastic [note, it wouldn't be for an Arpa]
-fstisstochastic data/lang_test/G.fst || echo Error: G is not stochastic
-
-# Checking that G.fst is determinizable.
-fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
-
-# Checking that L_disambig.fst is determinizable.
-fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
-
-# Checking that disambiguated lexicon times G is determinizable
-fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
-   fstdeterminize >/dev/null || echo Error
-
-# Checking that LG is stochastic:
-fsttablecompose data/lang/L.fst data/lang_test/G.fst | \
-   fstisstochastic || echo Error: LG is not stochastic.
-
-# Checking that L_disambig.G is stochastic:
-fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
-   fstisstochastic || echo Error: LG is not stochastic.
-
-
-## Check lexicon.
-## just have a look and make sure it seems sane.
-echo "First few lines of lexicon FST:"
-fstprint   --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst  | head
-
-
-silphonelist=`cat data/lang/silphones.csl | sed 's/:/ /g'`
-nonsilphonelist=`cat data/lang/nonsilphones.csl | sed 's/:/ /g'`
-cat conf/topo.proto | sed "s:NONSILENCEPHONES:$nonsilphonelist:" | \
-   sed "s:SILENCEPHONES:$silphonelist:" > data/lang/topo 
-
-for x in phones.txt words.txt silphones.csl nonsilphones.csl topo; do
-   cp data/lang/$x data/lang_test/$x || exit 1;
-done
-
-echo RM_format_data succeeded.
--- a/egs/rm/s4/path.sh
+++ b/egs/rm/s4/path.sh
@ -1,13 +0,0 @@
-#!/bin/bash
-
-# path to Kaldi's root directory
-root=`pwd`/../../..
-
-export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:$PATH  
-
-# path to the directory in which the subset of RM corpus is stored
-export RM1_ROOT=`pwd`/data/download
-
-export LC_ALL=C
-export LC_LOCALE_ALL=C
-
--- a/egs/rm/s4/run.sh
+++ b/egs/rm/s4/run.sh
@ -1,57 +0,0 @@
-#!/bin/bash
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-source ./path.sh
-
-# call the next line with the directory where the RM data is
-local/rm_data_prep.sh $RM1_ROOT || exit 1;
-
-local/rm_format_data.sh || exit 1;
-
-# the directory, where you want to store MFCC features.
-featdir=data/rm_feats
-
-# convert the Sphinx feature files to Kaldi tables
-for x in train test; do
- steps/make_mfcc.sh data/$x exp/make_mfcc/$x $featdir  || exit 1;
-done
-
-scripts/subset_data_dir.sh data/train 1000 data/train.1k  || exit 1;
-
-# train monophone system.
-steps/train_mono.sh data/train.1k data/lang exp/mono  || exit 1;
-
-# monophone decoding
-local/decode.sh --mono steps/decode_deltas.sh exp/mono/decode || exit 1;
-
-# Get alignments from monophone system.
-steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
-
-# train tri1 [first triphone pass]
-steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1 || exit 1;
-
-# decode tri1
-local/decode.sh steps/decode_deltas.sh exp/tri1/decode || exit 1;
-
-# align tri1
-steps/align_deltas.sh --graphs "ark,s,cs:gunzip -c exp/tri1/graphs.fsts.gz|" \
-    data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
-
-# train tri2a [delta+delta-deltas]
-steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
-
-# decode tri2a
-local/decode.sh steps/decode_deltas.sh exp/tri2a/decode || exit 1;
--- a/egs/rm/s4/scripts/add_disambig.pl
+++ b/egs/rm/s4/scripts/add_disambig.pl
@ -1,58 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds some specified number of disambig symbols to a symbol table.
-# Adds these as #1, #2, etc.
-# If the --include-zero option is specified, includes an extra one
-# #0.
-if(!(@ARGV == 2 || (@ARGV ==3 && $ARGV[0] eq "--include-zero"))) {
-    die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
-}
-
-if(@ARGV  == 3) {
-    $include_zero = 1;
-    $ARGV[0] eq "--include-zero" || die "Bad option/first argument $ARGV[0]";
-    shift @ARGV;
-} else {
-    $include_zero = 0;
-}
-
-$input = $ARGV[0];
-$nsyms = $ARGV[1];
-
-open(F, "<$input") || die "Opening file $input";
-
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "Bad line $_";
-    $lastsym = $A[1];
-    print;
-}
-
-if(!defined($lastsym)){
- die "Empty symbol file?";
-}
-
-if($include_zero) {
-    $lastsym++;
-    print "#0  $lastsym\n";
-}
-
-for($n = 1; $n <= $nsyms; $n++) {
-    $y = $n + $lastsym;
-    print "#$n  $y\n";
-}
--- a/egs/rm/s4/scripts/add_lex_disambig.pl
+++ b/egs/rm/s4/scripts/add_lex_disambig.pl
@ -1,101 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Adds disambiguation symbols to a lexicon.
-# Outputs still in the normal lexicon format.
-# Disambig syms are numbered #1, #2, #3, etc. (#0 
-# reserved for symbol in grammar).
-# Outputs the number of disambig syms to the standard output.
-
-if(@ARGV != 2) {
-    die "Usage: add_lex_disambig.pl [ --sil silphone ] lexicon.txt lexicon_disambig.txt "
-}
-
-
-$lexfn = shift @ARGV;
-$lexoutfn = shift @ARGV;
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-# (1)  Read in the lexicon.
-@L = ( );
-while(<L>) {
-    @A = split(" ", $_);
-    push @L, join(" ", @A);
-}
-
-# (2) Work out the count of each phone-sequence in the
-# lexicon.
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    $count{join(" ",@A)}++;
-}
-
-# (3) For each left sub-sequence of each phone-sequence, note down
-# that exists (for identifying prefixes of longer strings).
-
-foreach $l (@L) {
-    @A = split(" ", $l);
-    shift @A; # Remove word.
-    while(@A > 0) {
-        pop @A;  # Remove last phone
-        $issubseq{join(" ",@A)} = 1;
-    }
-}
-
-# (4) For each entry in the lexicon:
-#  if the phone sequence is unique and is not a
-#  prefix of another word, no diambig symbol.
-#  Else output #1, or #2, #3, ... if the same phone-seq
-#  has already been assigned a disambig symbol.
-
-
-open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
-
-$max_disambig = 0;
-foreach $l (@L) {
-    @A = split(" ", $l);
-    $word = shift @A;
-    $phnseq = join(" ",@A);
-    if(!defined $issubseq{$phnseq}
-       && $count{$phnseq}==1) {
-        ; # Do nothing.
-    } else {
-        if($phnseq eq "") { # need disambig symbols for the empty string
-            # that are not used anywhere else.
-            $max_disambig++;
-            $reserved{$max_disambig} = 1;
-            $phnseq = "#$max_disambig";
-        } else {
-            $curnumber = $disambig_of{$phnseq};
-            if(!defined{$curnumber}) { $curnumber = 0; }
-            $curnumber++; # now 1 or 2, ... 
-            while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
-            if($curnumber > $max_disambig) {
-                $max_disambig = $curnumber;
-            }
-            $disambig_of{$phnseq} = $curnumber;
-            $phnseq = $phnseq . " #" . $curnumber;
-         }
-    }
-    print O "$word\t$phnseq\n";
-}
-
-print $max_disambig . "\n";
-
--- a/egs/rm/s4/scripts/filter_scp.pl
+++ b/egs/rm/s4/scripts/filter_scp.pl
@ -1,40 +0,0 @@
-#!/usr/bin/perl -w
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids and filters an scp
-# file (or any file whose first field is an utterance id), printing
-# out only those lines whose first field is in id_list.
-
-if(@ARGV < 1 || @ARGV > 2) {
-    die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
-}
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-    @A = split;
-    @A>=1 || die "Invalid id-list file line $_";
-    $seen{$A[0]} = 1;
-}
-
-while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    if($seen{$A[0]}) {
-        print $_;
-    }
-}
--- a/egs/rm/s4/scripts/int2sym.pl
+++ b/egs/rm/s4/scripts/int2sym.pl
@ -1,90 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_noninteger = 0;
-$ignore_first_field = 0;
-$field = -1;
-for($x = 0; $x < 2; $x++) {
-    if($ARGV[0] eq "--ignore-noninteger") { $ignore_noninteger = 1; shift @ARGV; }
-    if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
-    if($ARGV[0] eq "--field") { 
-       shift @ARGV; $field = $ARGV[0]+0; shift @ARGV;
-       if ($field < 1) { die "Bad argument to --field option: $field"; }
-    }
-}
-
-if ($ignore_first_field && $field > 0) { die "Incompatible options ignore-first-field and field"; }
-$zfield = $field-1; # Change to zero-based indexing.
-
-$symtab = shift @ARGV;
-if(!defined $symtab) {
-    die "Usage: sym2int.pl symtab [input] > output\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $int2sym{$A[1]} = $A[0];
-}
-
-sub int2sym {
-    my $a = shift @_;
-    my $pos = shift @_;
-    if($a !~  m:^\d+$:) { # not all digits..
-        if($ignore_noninteger) {
-            print $a . " ";
-            next;
-        } else {
-            if($pos == 0) {
-                die "int2sym.pl: found noninteger token $a (try --ignore-first-field)\n";
-            } else {
-                die "int2sym.pl: found noninteger token $a (try --ignore-noninteger if valid input)\n";
-            }
-        }
-    }
-    $s = $int2sym{$a};
-    if(!defined ($s)) {
-        die "int2sym.pl: integer $a not in symbol table $symtab.";
-    }
-    return $s;
-}
-
-$error = 0;
-while(<>) {
-    @A = split(" ", $_);
-    if($ignore_first_field) {
-        $key = shift @A;
-        print $key . " ";
-    }
-    if ($field != -1) {
-        if ($zfield <= $#A && $zfield >= 0) {
-            $a = $A[$zfield];
-            $A[$zfield] = int2sym($a, $zfield);
-        }
-        print join(" ", @A);
-    } else {
-        for ($pos = 0; $pos <= $#A; $pos++) {
-            $a = $A[$pos];
-            $s = int2sym($a, $pos);
-            print $s . " ";
-        }
-    }
-    print "\n";
-}
-
-
-
--- a/egs/rm/s4/scripts/make_lexicon_fst.pl
+++ b/egs/rm/s4/scripts/make_lexicon_fst.pl
@ -1,122 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# makes lexicon FST (no pron-probs involved).
-
-if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
-    die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] lexiconfst.txt"
-}
-
-$lexfn = shift @ARGV;
-if(@ARGV == 0) {
-    $silprob = 0.0;
-} elsif (@ARGV == 2){ 
-    ($silprob,$silphone) = @ARGV;
-} else {
-    ($silprob,$silphone,$sildisambig) = @ARGV;
-}
-if($silprob != 0.0) {
-    $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
-    $silcost = -log($silprob);
-    $nosilcost = -log(1.0 - $silprob);
-}
-
-
-open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
-
-
-
-sub is_sil {
-    # Return true (1) if provided with a phone-sequence
-    # that means silence.
-    # @_ is the parameters of the function
-    # This function returns true if @_ equals ( $silphone )
-    # or something of the form ( "#0", $silphone, "#1" )
-    # where the "#0" and "#1" are disambiguation symbols.
-    return ( @_ == 1 && $_[0] eq $silphone ||
-             (@_ == 3 && $_[1] eq $silphone &&
-              $_[0] =~ m/^\#\d+$/ &&
-              $_[0] =~ m/^\#\d+$/));
-}
-
-if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
-    $loopstate = 0;
-    $nexststate = 1; # next unallocated state.
-    while(<L>) {
-        @A = split(" ", $_);
-        $w = shift @A;
-
-        $s = $loopstate;
-        $word_or_eps = $w;
-        while (@A > 0) {
-            $p = shift @A;
-            if(@A > 0) {
-                $ns = $nextstate++;
-            } else {
-                $ns = $loopstate;
-            }
-            print "$s\t$ns\t$p\t$word_or_eps\n";
-            $word_or_eps = "<eps>";
-            $s = $ns;
-        }
-    }
-    print "$loopstate\t0\n"; # final-cost.
-} else { # have silence probs.
-    $startstate = 0;
-    $loopstate = 1;
-    $silstate = 2; # state from where we go to loopstate after emitting silence.
-    print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
-    if (!defined $sildisambig) {
-        print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-        print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
-        $nextstate = 3;
-    } else {
-        $disambigstate = 3;
-        $nextstate = 4;
-        print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
-        print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
-        print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
-    }
-    while(<L>) {
-        @A = split(" ", $_);
-        $w = shift @A;
-
-        $s = $loopstate;
-        $word_or_eps = $w;
-        while (@A > 0) {
-            $p = shift @A;
-            if(@A > 0) {
-                $ns = $nextstate++;
-                print "$s\t$ns\t$p\t$word_or_eps\n";
-                $word_or_eps = "<eps>";
-                $s = $ns;
-            } else {
-                if(!is_sil(@A)){
-                    # This is non-deterministic but relatively compact,
-                    # and avoids epsilons.
-                    print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
-                    print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
-                } else {
-                    # no point putting opt-sil after silence word.
-                    print "$s\t$loopstate\t$p\t$word_or_eps\n";
-                }
-                $word_or_eps = "<eps>";
-            }
-        }            
-    }
-    print "$loopstate\t0\n"; # final-cost.
-}
--- a/egs/rm/s4/scripts/make_phones_symtab.pl
+++ b/egs/rm/s4/scripts/make_phones_symtab.pl
@ -1,37 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# make_phones_symtab.pl < lexicon.txt > phones.txt
-
-
-while(<>) {
-    @A = split(" ", $_);
-    for ($i=2; $i<@A; $i++) {
-        $P{$A[$i]} = 1; # seen it.
-    }
-}
-
-print "<eps>\t0\n";
-$n = 1;
-foreach $p (sort keys %P) {
-    if($p ne "<eps>") {
-        print "$p\t$n\n";
-        $n++;
-    }
-}
-
-print "sil\t$n\n";
-
--- a/egs/rm/s4/scripts/make_rm_lm.pl
+++ b/egs/rm/s4/scripts/make_rm_lm.pl
@ -1,119 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This file takes as input the file wp_gram.txt that comes with the RM
-# distribution, and creates the language model as an acceptor in FST form.
-
-# make_rm_lm.pl   wp_gram.txt > G.txt
-
-if (@ARGV != 1) {
-    print "usage: make_rm_lm.pl  wp_gram.txt > G.txt\n";
-    exit(0);
-}
-unless (open(IN_FILE, "@ARGV[0]")) {
-    die ("can't open @ARGV[0]");
-}
-
-
-$flag = 0;
-$count_wrd = 0;
-$cnt_ends = 0;
-$init = "";
-
-while ($line = <IN_FILE>)
-{	
-	chop($line);
-
-    $line =~ s/ //g;
-    
-	if(($line =~ /^>/)) 
-	{
-		if($flag == 0) 
-		{
-			$flag = 1;
-		}
-		$line =~ s/>//g;
-		$hashcnt{$init} = $i;
-		$init = $line;
-		$i = 0;
-		$count_wrd++;
-		@LineArray[$count_wrd - 1] = $init;
- 		$hashwrd{$init} = 0;
-	}
-	elsif($flag != 0)
-	{
-		
-		$hash{$init}[$i] = $line;
-		$i++; 			
-		if($line =~ /SENTENCE-END/)
-		{
-			$cnt_ends++;
-		}
- 	} 
-	else
-	{}
-}
-
-$hashcnt{$init} = $i;
-
-$num = 0;
-$weight = 0;
-$init_wrd = "SENTENCE-END";
-$hashwrd{$init_wrd} = @LineArray;
-for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
-{
-	$weight = -log(1/$hashcnt{$init_wrd});
-	$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
-	print "0    $hashwrd{$hash{$init_wrd}[$i]}    $hash{$init_wrd}[$i]    $hash{$init_wrd}[$i]    $weight\n";
-}
-$num = $i;
-
-for($i = 0; $i < @LineArray; $i++)
-{
-	if(@LineArray[$i] eq 'SENTENCE-END')
-	{}
-	else
-	{
-		if($hashwrd{@LineArray[$i]} == 0)
-		{
-			$num++;
-			$hashwrd{@LineArray[$i]} = $num;
-		}
-		for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
-		{
-			$weight = -log(1/$hashcnt{@LineArray[$i]});
-			if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
-			{
-				$num++;
-				$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
-			}
-			if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
-			{
-				print "$hashwrd{@LineArray[$i]}    $hashwrd{$hash{@LineArray[$i]}[$j]}    <eps>    <eps>    $weight\n"
-                }
-			else
-			{
-				print "$hashwrd{@LineArray[$i]}    $hashwrd{$hash{@LineArray[$i]}[$j]}    $hash{@LineArray[$i]}[$j]    $hash{@LineArray[$i]}[$j]    $weight\n";
-			}
-		}
-	}
-}
-
-print "$hashwrd{$init_wrd}    0\n";
-close(IN_FILE);
-
-
--- a/egs/rm/s4/scripts/make_roots.pl
+++ b/egs/rm/s4/scripts/make_roots.pl
@ -1,102 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Written by Dan Povey 9/21/2010.  Apache 2.0 License.
-
-# This version of make_roots.pl is specialized for RM.
-
-# This script creates the file roots.txt which is an input to train-tree.cc.  It
-# specifies how the trees are built.  The input file phone-sets.txt is a partial
-# version of roots.txt in which phones are represented by their spelled form, not
-# their symbol id's.  E.g. at input, phone-sets.txt might contain;
-#  shared not-split  sil
-# Any phones not specified in phone-sets.txt but present in phones.txt will
-# be given a default treatment.  If the --separate option is given, we create
-# a separate tree root for each of them, otherwise they are all lumped in one set.
-# The arguments shared|not-shared and split|not-split are needed if any
-# phones are not specified in phone-sets.txt.  What they mean is as follows:
-# if shared=="shared" then we share the tree-root between different HMM-positions
-# (0,1,2).  If split=="split" then we actually do decision tree splitting on
-# that root, otherwise we forbid decision-tree splitting.  (The main reason we might 
-# set this to false is for silence when
-# we want to ensure that the HMM-positions will remain with a single PDF id.
-
-
-$separate = 0;
-if($ARGV[0] eq "--separate") {
-    $separate = 1;
-    shift @ARGV;
-}
-
-if(@ARGV != 4) {
-    die "Usage: make_roots.pl [--separate] phones.txt silence-phone-list[integer,colon-separated] shared|not-shared split|not-split > roots.txt\n";
-}
-
-
-($phonesfile, $silphones, $shared, $split) = @ARGV;
-if($shared ne "shared" && $shared ne "not-shared") {
-    die "Third argument must be \"shared\" or \"not-shared\"\n";
-}
-if($split ne "split" && $split ne "not-split") {
-    die "Third argument must be \"split\" or \"not-split\"\n";
-}
-
-
-
-open(F, "<$phonesfile") || die "Opening file $phonesfile";
-
-while(<F>) {
-    @A = split(" ", $_);
-    if(@A != 2) {
-        die "Bad line in phones symbol file: ".$_;
-    }
-    if($A[1] != 0) {
-        $symbol2id{$A[0]} = $A[1];
-        $id2symbol{$A[1]} = $A[0];
-    }
-}
-
-if($silphones == ""){ 
-    die "Empty silence phone list in make_roots.pl";
-}
-foreach $silphoneid (split(":", $silphones)) {
-    defined $id2symbol{$silphoneid} || die "No such silence phone id $silphoneid";
-    # Give each silence phone its own separate pdfs in each state, but
-    # no sharing (in this recipe; WSJ is different.. in this recipe there
-    #is only one silence phone anyway.)
-    $issil{$silphoneid} = 1;
-    print "not-shared not-split $silphoneid\n";
-}
-
-$idlist = "";
-$remaining_phones = "";
-
-if($separate){
-    foreach $a (keys %id2symbol) {
-        if(!defined $issil{$a}) {
-            print "$shared $split $a\n";
-        }
-    }
-} else {
-    print "$shared $split ";
-    foreach $a (keys %id2symbol) {
-        if(!defined $issil{$a}) {
-            print "$a ";
-        }
-    }
-    print "\n";
-}
--- a/egs/rm/s4/scripts/mkgraph.sh
+++ b/egs/rm/s4/scripts/mkgraph.sh
@ -1,112 +0,0 @@
-#!/bin/bash
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-N=3
-P=1
-clean=false
-
-for x in 1 2 3; do 
-  if [ $1 == "--mono" ]; then
-    N=1;
-    P=0;
-    shift;
-  fi
-  if [ $1 == "--clean" ]; then
-    clean=true
-    shift;
-  fi
-
-done
-
-if [ $# != 3 ]; then
-   echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
-   echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
-   exit 1;
-fi
-
-if [ -f path.sh ]; then . path.sh; fi
-
-lang=$1
-tree=$2/tree
-model=$2/final.mdl
-dir=$3
-
-if $clean; then rm -r $lang/tmp; fi
-
-mkdir -p $dir
-
-tscale=1.0
-loopscale=0.1
-
-# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
-# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
-# would have to use -o instead),  -f means file exists, and -ot means older than).
-
-mkdir -p $lang/tmp
-if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
-      $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
-  fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
-    fstminimizeencoded  > $lang/tmp/LG.fst || exit 1;
-  fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
-fi
-
-if [ ! -f $lang/phones_disambig.txt ]; then
-  echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
-  exit 1;
-fi
-
-grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
-
-
-clg=$lang/tmp/CLG_${N}_${P}.fst
-
-if [[ ! -f $clg || $clg -ot $lang/tmp/LG.fst ]]; then
-  fstcomposecontext --context-size=$N --central-position=$P \
-   --read-disambig-syms=$lang/tmp/disambig_phones.list \
-   --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
-    $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
-  fstisstochastic $clg  || echo "warning: CLG not stochastic."
-fi
-
-if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
-  make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
-    --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
-     > $dir/Ha.fst  || exit 1;
-fi
-
-if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
-      $dir/HCLGa.fst -ot $clg ]]; then
-  fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
-    | fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
-     fstminimizeencoded > $dir/HCLGa.fst || exit 1;
-  fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
-fi
-
-if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
-  add-self-loops --self-loop-scale=$loopscale --reorder=true \
-    $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
-
-  if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
-    # No point doing this test if transition-scale not 1, as it is bound to fail. 
-    fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
-  fi
-fi
-
-
-# to make const fst:
-# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
-
--- a/egs/rm/s4/scripts/silphones.pl
+++ b/egs/rm/s4/scripts/silphones.pl
@ -1,57 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# creates integer lists of silence and non-silence phones in files,
-# e.g. silphones.csl="1:2:3 \n"
-# and nonsilphones.csl="4:5:6:7:...:24\n";
-
-if(@ARGV != 4) {
-    die "Usage: silphones.pl phones.txt \"sil1 sil2 sil3\" silphones.csl nonsilphones.csl";
-}
-
-($symtab, $sillist, $silphones, $nonsilphones) = @ARGV;
-open(S,"<$symtab") || die "Opening symbol table $symtab";
-
-
-foreach $s (split(" ", $sillist)) {
-    $issil{$s} = 1;
-}
-
-@sil = ();
-@nonsil = ();
-while(<S>){
-    @A = split(" ", $_);
-    @A == 2 || die "Bad line $_ in phone-symbol-table file $symtab";
-    ($sym, $int) = @A;
-    if($int != 0) {
-        if($issil{$sym}) { push @sil, $int; $seensil{$sym}=1; }
-        else { push @nonsil, $int; }
-    }
-}
-
-foreach $k(keys %issil) {
-    if(!$seensil{$k}) { die "No such silence phone $k"; }
-}
-open(F, ">$silphones") || die "opening silphones file $silphones";
-open(G, ">$nonsilphones") || die "opening nonsilphones file $nonsilphones";
-print F join(":", @sil) . "\n";
-print G join(":", @nonsil) . "\n";
-close(F);
-close(G);
-if(@sil == 0) { print STDERR "Warning: silphones.pl no silence phones.\n" }
-if(@nonsil == 0) { print STDERR "Warning: silphones.pl no non-silence phones.\n" }
-
--- a/egs/rm/s4/scripts/subset_data_dir.sh
+++ b/egs/rm/s4/scripts/subset_data_dir.sh
@ -1,99 +0,0 @@
-#!/bin/bash
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# This script operates on a directory, such as in data/train/,
-# that contains some subset of the following files:
-#  feats.scp
-#  wav.scp
-#  spk2utt
-#  utt2spk
-#  text
-# It creates a subset of that data, consisting of some specified
-# number of utterances.  (The selected utterances are distributed
-# evenly throughout the file, by the program ./subset_scp.pl).
-
-# If you give the --per-spk option, it will attempt to select
-# the supplied number of utterances for each speaker (typically
-# you would supply a much smaller number in this case).
-
-perspk=false
-if [ "$1" == "--per-spk" ]; then
-  perspk=true;
-  shift;
-fi
-
-if [ $# != 3 ]; then
-  echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
-  exit 1;
-fi
-
-srcdir=$1
-numutt=$2
-destdir=$3
-
-
-if [ ! -f $srcdir/feats.scp ]; then
-  echo "subset_data_dir.sh: no such file $srcdir/feats.scp" 
-  exit 1;
-fi
-
-
-## scripting note: $perspk evaluates to true or false
-## so this becomes the command true or false.
-if $perspk; then
-  mkdir -p $destdir
-  awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
-         for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); } 
-         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
-  scripts/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
-  scripts/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
-  [ -f $srcdir/wav.scp ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/wav.scp >$destdir/wav.scp
-  [ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
-  [ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-  srcutts=`cat $srcdir/utt2spk | wc -l`
-  destutts=`cat $destdir/utt2spk | wc -l`
-  echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
-  exit 0;
-else
-  if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
-    echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
-    exit 1;
-  fi 
-
-  mkdir -p $destdir || exit 1;
-
-  # create feats.scp
-  scripts/subset_scp.pl $numutt $srcdir/feats.scp > $destdir/feats.scp || exit 1;
- 
-  if [ -f $srcdir/wav.scp ]; then
-    scripts/filter_scp.pl $destdir/feats.scp $srcdir/mfc.scp > $destdir/mfc.scp || exit 1;
-  else
-    rm $destdir/mfc.scp 2>/dev/null
-  fi
-
-  if [ -f $srcdir/utt2spk ]; then
-    scripts/filter_scp.pl $destdir/feats.scp $srcdir/utt2spk > $destdir/utt2spk|| exit 1;
-    scripts/utt2spk_to_spk2utt.pl $destdir/utt2spk > $destdir/spk2utt || exit 1;
-  fi
-
-  [ -f $srcdir/text ] && scripts/filter_scp.pl $destdir/feats.scp <$srcdir/text >$destdir/text
-
-  [ -f $srcdir/spk2gender ] && scripts/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
-
-  echo "Created a $numutt-utterance subset of $srcdir and put it in $destdir."
-
-  exit 0;
-fi
--- a/egs/rm/s4/scripts/subset_scp.pl
+++ b/egs/rm/s4/scripts/subset_scp.pl
@ -1,59 +0,0 @@
-#!/usr/bin/perl -w
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program selects a subset of N elements in the scp.
-# It selects them evenly from throughout the scp, in order to
-# avoid selecting too many from the same speaker.
-# It prints them on the standard output.
-
-if(@ARGV < 2 ) {
-    die "Usage: subset_scp.pl N in.scp ";
-}
-
-$N = shift @ARGV;
-if($N == 0) {
-    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
-}
-$inscp = shift @ARGV;
-open(I, "<$inscp") || die "Opening input scp file $inscp";
-
-@F = ();
-while(<I>) {
-    push @F, $_;
-}
-$numlines = @F;
-if($N > $numlines) {
-    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
-}
-
-sub select_n {
-    my ($start,$end,$num_needed) = @_;
-    my $diff = $end - $start;
-    if($num_needed > $diff) { die "select_n: code error"; }
-    if($diff == 1 ) {
-        if($num_needed  > 0) {
-            print $F[$start];
-        }
-    } else {
-        my $halfdiff = int($diff/2);
-        my $halfneeded = int($num_needed/2);
-        select_n($start, $start+$halfdiff, $halfneeded);
-        select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
-    }
-}
-select_n(0, $numlines, $N);
-
--- a/egs/rm/s4/scripts/sym2int.pl
+++ b/egs/rm/s4/scripts/sym2int.pl
@ -1,82 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-$ignore_oov = 0;
-$ignore_first_field = 0;
-for($x = 0; $x < 3; $x++) {
-    # Note: it will just print OOVS unmodified if you specify --ignore-oov.
-    # Else will complain and put nothing out.
-    if($ARGV[0] eq "--ignore-oov") { $ignore_oov = 1; shift @ARGV; } 
-    if($ARGV[0] eq "--ignore-first-field") { $ignore_first_field = 1; shift @ARGV; }
-    if($ARGV[0] eq "--map-oov") { shift @ARGV; $map_oov = shift @ARGV; }
-}
-
-$symtab = shift @ARGV;
-if(!defined $symtab) {
-    die "Usage: sym2int.pl symtab [input transcriptions] > output transcriptions\n";
-}
-open(F, "<$symtab") || die "Error opening symbol table file $symtab";
-while(<F>) {
-    @A = split(" ", $_);
-    @A == 2 || die "bad line in symbol table file: $_";
-    $sym2int{$A[0]} = $A[1] + 0;
-}
-
-$num_warning = 0;
-$max_warning = 20;
-$error = 0;
-while(<>) {
-    @A = split(" ", $_);
-    if(@A == 0) {
-        die "Empty line in transcriptions input.";
-    }
-    if($ignore_first_field) {
-        $key = shift @A;
-        print $key . " ";
-    }
-    @B = ();
-    foreach $a (@A) {
-        $i = $sym2int{$a};
-        if(!defined ($i)) {
-            if (defined $map_oov) {
-                if (!defined $sym2int{$map_oov}) {
-                    die "sym2int.pl: invalid map-oov option $map_oov (symbol not defined in $symtab)";
-                }
-                if ($num_warning++ < $max_warning) {
-                    print STDERR "sym2int.pl: replacing $a with $map_oov\n";
-                    if ($num_warning == $max_warning) {
-                        print STDERR "sym2int.pl: not warning for OOVs any more times\n";
-                    }
-                }
-                $i = $sym2int{$map_oov};
-            } elsif($ignore_oov) {
-                $i = $a; # just print them out unmodified..
-            } else {
-                die "sym2int.pl: undefined symbol $a\n";
-            }
-        }
-        push @B, $i;
-    }
-    print join(" ", @B);
-    print "\n";
-}
-
-if($error) { exit(1); }
-else { exit(0); }
-
-
-
--- a/egs/rm/s4/steps/align_deltas.sh
+++ b/egs/rm/s4/steps/align_deltas.sh
@ -1,78 +0,0 @@
-#!/bin/bash
-# Copyright 2010-2011 Microsoft Corporation  Arnab Ghoshal
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from ..
-
-# This script does training-data alignment given a model built using 
-# CMN + delta + delta-delta features.  Its output, all in its own
-# experimental directory, is cmvn.ark, ali, tree, and final.mdl 
-# (the last two are just copied from the source directory). 
-
-# Option to use precompiled graphs from last phase, if these
-# are available (i.e. if they were built with the same data).
-
-graphs=
-if [ "$1" == --graphs ]; then
-   shift;
-   graphs=$1
-   shift
-fi
-
-
-if [ $# != 4 ]; then
-   echo "Usage: steps/align_deltas.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
-   echo " e.g.: steps/align_deltas.sh data/train data/lang exp/tri1 exp/tri1_ali"
-   exit 1;
-fi
-
-if [ -f path.sh ]; then . path.sh; fi
-
-data=$1
-lang=$2
-srcdir=$3
-dir=$4
-
-
-
-mkdir -p $dir
-cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1;  # Create copy of the tree and model and occs...
-
-scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
-
-
-
-echo "Computing cepstral mean and variance statistics"
-compute-cmvn-stats scp:$data/feats.scp \
-     ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
-
-feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
-
-# Align all training data using the supplied model.
-
-echo "Aligning all training data"
-if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
-  # compute integer form of transcripts.
-  scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
-    || exit 1;
-  gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
-   "$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
-  rm $dir/train.tra
-else
-  gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
-   "$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
-fi
-
-echo "Done."
--- a/egs/rm/s4/steps/decode_deltas.sh
+++ b/egs/rm/s4/steps/decode_deltas.sh
@ -1,77 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# Decoding script that works with a GMM model and delta-delta plus
-# cepstral mean subtraction features.  Used, for example, to decode
-# mono/ and tri1/
-# This script generates lattices and rescores them with different
-# acoustic weights, in order to explore a range of different
-# weights.
-
-if [ $# != 4 ]; then
-   echo "Usage: steps/decode_deltas.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
-   echo " e.g.: steps/decode_deltas.sh exp/mono data/test_feb89 data/lang_test exp/mono/decode/feb89"
-   exit 1;
-fi
-
-srcdir=$1
-data=$2
-lang=$3
-dir=$4
-graphdir=$srcdir/graph
-
-mkdir -p $dir
-
-if [ -f path.sh ]; then . path.sh; fi
-
-if [ ! -f $srcdir/final.mdl ]; then
-   echo No model file $srcdir/final.mdl
-   exit 1;
-fi
-
-if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
-   echo "Graph $graphdir/HCLG.fst does not exist or is too old."
-   exit 1;
-fi
-
-# We only do one decoding pass, so there is no point caching the
-# CMVN stats-- we make them part of a pipe.
-feats="ark:compute-cmvn-stats scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false  ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
-
-# For Resource Management, we use beam of 20 and acwt of 1/10.
-# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
-# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
-
-gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
-  $srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
-  ark,t:$dir/test.tra ark,t:$dir/test.ali \
-     2> $dir/decode.log || exit 1;
-
-# In this setup there are no non-scored words, so
-# scoring is simple.
-
-# Now rescore lattices with various acoustic scales, and compute the WER.
-for inv_acwt in 4 5 6 7 8 9 10; do
-  acwt=`perl -e "print (1.0/$inv_acwt);"`
-  lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
-     "ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
-     2>$dir/rescore_${inv_acwt}.log
-
-  scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
-   compute-wer --mode=present ark:-  ark,p:$dir/${inv_acwt}.tra \
-    >& $dir/wer_${inv_acwt}
-done
--- a/egs/rm/s4/steps/make_mfcc.sh
+++ b/egs/rm/s4/steps/make_mfcc.sh
@ -1,48 +0,0 @@
-#!/bin/bash 
-# Copyright 2012 Vassil Panayotov
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from .. (one directory up from here)
-
-if [ $# != 3 ]; then
-   echo "usage: make_mfcc.sh <data-dir> <log-dir> <abs-path-to-mfccdir>";
-   exit 1;
-fi
-
-if [ -f path.sh ]; then . path.sh; fi
-
-data=$1
-logdir=$2
-mfccdir=$3
-
-# use "name" as part of name of the archive.
-name=`basename $data`
-
-mkdir -p $mfccdir || exit 1;
-mkdir -p $logdir || exit 1;
-
-scp=$data/mfc.scp
-if [ ! -f $scp ]; then
-   echo "make_mfcc.sh: no such file $f";
-   exit 1;
-fi
-
-log=$logdir/make_mfcc.log
-
-copy-feats --sphinx-in=true \
- scp:$scp ark,scp:$mfccdir/raw_mfcc_$name.ark,$data/feats.scp 2>$log
-
-echo "Succeeded creating MFCC features for $name"
-
--- a/egs/rm/s4/steps/train_deltas.sh
+++ b/egs/rm/s4/steps/train_deltas.sh
@ -1,126 +0,0 @@
-#!/bin/bash
-# Copyright 2010-2011 Microsoft Corporation  Arnab Ghoshal
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from ..
-# Triphone model training, using delta-delta features and cepstral
-# mean normalization.  It starts from an existing directory (e.g.
-# exp/mono), supplied as an argument, which is assumed to be built using
-# the same type of features.
-
-if [ $# != 4 ]; then
-   echo "Usage: steps/train_deltas.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
-   echo " e.g.: steps/train_deltas.sh data/train data/lang exp/mono_ali exp/tri1"
-   exit 1;
-fi
-
-if [ -f path.sh ]; then . path.sh; fi
-
-data=$1
-lang=$2
-alidir=$3
-dir=$4
-
-if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
-  echo "Error: alignment dir $alidir does not contain final.mdl and ali"
-  exit 1;
-fi
-
-scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
-realign_iters="5 10 15 20";  
-silphonelist=`cat $lang/silphones.csl`
-numiters=25    # Number of iterations of training
-maxiterinc=15 # Last iter to increase #Gauss on.
-numleaves=1800 # target num-leaves in tree building.
-numgauss=$[$numleaves + $numleaves/2];  # starting num-Gauss.
-     # Initially mix up to avg. 1.5 Gauss/state ( a bit more
-     # than this, due to state clustering... then slowly mix 
-     # up to final amount.
-totgauss=9000 # Target #Gaussians
-incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
-
-
-mkdir -p $dir
-
-
-feats="ark:apply-cmvn --norm-vars=false ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
-
-
-
-echo "Accumulating tree stats"
-acc-tree-stats  --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
-   ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log  || exit 1;
-
-
-echo "Computing questions for tree clustering"
-
-cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
-cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
-scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
-compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
-
-# Have to make silence root not-shared because we will not split it.
-scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
-    > $dir/roots.txt 2>$dir/roots.log || exit 1;
-
-
-echo "Building tree"
-build-tree --verbose=1 --max-leaves=$numleaves \
-    $dir/treeacc $dir/roots.txt \
-    $dir/questions.qst $lang/topo $dir/tree  2> $dir/train_tree.log || exit 1;
-
-gmm-init-model  --write-occs=$dir/1.occs  \
-    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
-
-gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
-   2>$dir/mixup.log || exit 1;
-
-#rm $dir/treeacc
-
-# Convert alignments generated from monophone model, to use as initial alignments.
-
-convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log 
-  # Debug step only: convert back and check they're the same.
-  convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
-   2>/dev/null | cmp - $alidir/ali || exit 1; 
-
-# Make training graphs
-echo "Compiling training graphs"
-compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
-  "ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
-  "ark:|gzip -c >$dir/graphs.fsts.gz"  2>$dir/compile_graphs.log  || exit 1;
-
-x=1
-while [ $x -lt $numiters ]; do
-   echo Pass $x
-   if echo $realign_iters | grep -w $x >/dev/null; then
-     echo "Aligning data"
-     gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
-             "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
-             ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
-   fi
-   gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log  || exit 1;
-   gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
-   rm $dir/$x.mdl $dir/$x.acc
-   rm $dir/$x.occs 
-   if [[ $x -le $maxiterinc ]]; then 
-      numgauss=$[$numgauss+$incgauss];
-   fi
-   x=$[$x+1];
-done
-
-( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
-
-echo Done
--- a/egs/rm/s4/steps/train_mono.sh
+++ b/egs/rm/s4/steps/train_mono.sh
@ -1,105 +0,0 @@
-#!/bin/bash
-# Copyright 2010-2011 Microsoft Corporation  Arnab Ghoshal
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# To be run from ..
-# Flat start and monophone training, with delta-delta features.
-# This script applies cepstral mean normalization (per speaker),
-# unlike the corresponding script in s1/
-
-if [ $# != 3 ]; then
-   echo "Usage: steps/train_mono.sh <data-dir> <lang-dir> <exp-dir>"
-   echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
-   exit 1;
-fi
-
-
-data=$1
-lang=$2
-dir=$3
-
-if [ -f path.sh ]; then . path.sh; fi
-
-# Configuration:
-scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
-numiters=30    # Number of iterations of training
-maxiterinc=20 # Last iter to increase #Gauss on.
-numgauss=250 # Initial num-Gauss (must be more than #states=3*phones).
-totgauss=1000 # Target #Gaussians.  
-incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
-realign_iters="1 2 3 4 5 6 7 8 9 10 12 15 20 25";
-
-mkdir -p $dir
-echo "Computing cepstral mean and variance statistics"
-
-compute-cmvn-stats  scp:$data/feats.scp ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
-
-feats="ark:apply-cmvn --norm-vars=false ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
-
-# compute integer form of transcripts.
-scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
-  || exit 1;
-
-echo "Initializing monophone system."
-
-gmm-init-mono "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo 39  \
-   $dir/0.mdl $dir/tree 2> $dir/init.log || exit 1;
-
-
-echo "Compiling training graphs"
-compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst \
-  ark:$dir/train.tra  "ark:|gzip -c >$dir/graphs.fsts.gz"  \
-  2>$dir/compile_graphs.log || exit 1 
-
-echo Pass 0
-
-align-equal-compiled "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
-   ark,t,f:-  2>$dir/align.0.log | \
- gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
-     $dir/0.acc 2> $dir/acc.0.log  || exit 1;
-
-# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
-# we fail to est "rare" phones and later on, they never align properly.
-
-gmm-est --min-gaussian-occupancy=3  --mix-up=$numgauss \
-    $dir/0.mdl $dir/0.acc $dir/1.mdl 2> $dir/update.0.log || exit 1;
-
-rm $dir/0.acc
-
-beam=4 # will change to 8 below after 1st pass
-x=1
-while [ $x -lt $numiters ]; do
-  echo "Pass $x"
-  if echo $realign_iters | grep -w $x >/dev/null; then
-    echo "Aligning data"
-    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] $dir/$x.mdl \
-        "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" t,ark:$dir/cur.ali \
-        2> $dir/align.$x.log || exit 1;
-  fi
-  gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log  || exit 1;
-  gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
-  rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
-  if [ $x -le $maxiterinc ]; then
-     numgauss=$[$numgauss+$incgauss];
-  fi
-  beam=8
-  x=$[$x+1]
-done
-
-( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
-
-# example of showing the alignments:
-# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4
-
				`@ -1 +0,0 @@`
				`--use-energy=false # only non-default option.`