From 6042ff8d14fae5529ead40665a6da6cb57422246 Mon Sep 17 00:00:00 2001 From: Jan Trmal Date: Sat, 26 Mar 2016 22:25:23 -0400 Subject: [PATCH 01/10] proof of concept for review --- egs/ami/s5/path.sh | 3 ++- egs/rm/s5/path.sh | 3 ++- src/path.sh | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 src/path.sh diff --git a/egs/ami/s5/path.sh b/egs/ami/s5/path.sh index bf1acb48d..ad81f59de 100644 --- a/egs/ami/s5/path.sh +++ b/egs/ami/s5/path.sh @@ -1,6 +1,7 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/nnet3bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/chainbin:$KALDI_ROOT/src/lmbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +. $KALDI_ROOT/src/path.sh export LC_ALL=C LMBIN=$KALDI_ROOT/tools/irstlm/bin diff --git a/egs/rm/s5/path.sh b/egs/rm/s5/path.sh index c3be1ca9d..2e7165f91 100755 --- a/egs/rm/s5/path.sh +++ b/egs/rm/s5/path.sh @@ -1,3 +1,4 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/src/path.sh b/src/path.sh new file mode 100644 index 000000000..0d16ba77f --- /dev/null +++ b/src/path.sh @@ -0,0 +1,25 @@ +# we are using BASH_SOURCE[0], because its set correctly even when the file +# is sourced. +# The formatting of the path export command is intentionally weird, because +# this allows for easy diff'ing +this_script_path=$(readlink -f "${BASH_SOURCE[0]}") +my_kaldi_src=$(dirname $this_script_path) +export PATH=\ +$my_kaldi_src/bin:\ +$my_kaldi_src/chainbin:\ +$my_kaldi_src/featbin:\ +$my_kaldi_src/fgmmbin:\ +$my_kaldi_src/fstbin:\ +$my_kaldi_src/gmmbin:\ +$my_kaldi_src/ivectorbin:\ +$my_kaldi_src/kwsbin:\ +$my_kaldi_src/latbin:\ +$my_kaldi_src/lmbin:\ +$my_kaldi_src/nnet2bin:\ +$my_kaldi_src/nnet3bin:\ +$my_kaldi_src/nnetbin:\ +$my_kaldi_src/online2bin:\ +$my_kaldi_src/onlinebin:\ +$my_kaldi_src/sgmm2bin:\ +$my_kaldi_src/sgmmbin:\ +$PATH From 6353fed090ecc09ec095802e360934168577000a Mon Sep 17 00:00:00 2001 From: Jan Trmal Date: Sat, 26 Mar 2016 23:43:26 -0400 Subject: [PATCH 02/10] small issue in the arpa2fst code --- src/lm/arpa-file-parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lm/arpa-file-parser.cc b/src/lm/arpa-file-parser.cc index 02f90a1b9..3dc39e62f 100644 --- a/src/lm/arpa-file-parser.cc +++ b/src/lm/arpa-file-parser.cc @@ -218,7 +218,7 @@ void ArpaFileParser::Read(std::istream &is, bool binary) { } } if (ngram_count > ngram_counts_[cur_order - 1]) { - PARSE_ERR << "Header said there would be " << ngram_counts_[cur_order] + PARSE_ERR << "Header said there would be " << ngram_counts_[cur_order - 1] << " n-grams of order " << cur_order << ", but we saw " << ngram_count; } From 9c0391c1ca624e9c4878d7d7f6c672a734c18932 Mon Sep 17 00:00:00 2001 From: Jan Trmal Date: Sat, 26 Mar 2016 23:46:43 -0400 Subject: [PATCH 03/10] all other path.sh fixed --- egs/ami/s5/path.sh | 3 ++- egs/aspire/s5/path.sh | 4 +++- egs/aurora4/s5/path.sh | 4 +++- egs/bn_music_speech/v1/path.sh | 6 ++++-- egs/callhome_egyptian/s5/path.sh | 4 +++- egs/chime1/s5/path.sh | 4 +++- egs/chime2/s5/path.sh | 4 +++- egs/chime3/s5/path.sh | 4 +++- egs/csj/s5/path.sh | 5 +++-- egs/farsdat/s5/path.sh | 6 ++++-- egs/fisher_callhome_spanish/s5/path.sh | 4 +++- egs/fisher_english/s5/path.sh | 4 +++- egs/fisher_swbd/s5/path.sh | 4 +++- egs/gale_arabic/s5/path.sh | 4 +++- egs/gale_mandarin/s5/path.sh | 4 +++- egs/hkust/s5/path.sh | 4 +++- egs/librispeech/s5/path.sh | 4 +++- egs/lre/v1/path.sh | 6 ++++-- egs/lre07/v1/path.sh | 6 ++++-- egs/reverb/s5/path.sh | 4 +++- egs/rm/s5/path.sh | 1 + egs/sprakbanken/s5/path.sh | 6 ++++-- egs/sre08/v1/path.sh | 6 ++++-- egs/sre10/v1/path.sh | 6 ++++-- egs/sre10/v2/path.sh | 6 ++++-- egs/swahili/s5/path.sh | 9 +++++++-- egs/swbd/s5/path.sh | 4 +++- egs/swbd/s5b/path.sh | 6 ++++-- egs/swbd/s5c/path.sh | 4 +++- egs/tedlium/s5/path.sh | 5 ++++- egs/thchs30/s5/path.sh | 6 +++--- egs/tidigits/s5/path.sh | 5 ++++- egs/timit/s5/path.sh | 6 ++++-- egs/voxforge/s5/path.sh | 5 ++++- egs/vystadial_cz/s5/path.sh | 7 +++++-- egs/vystadial_en/s5/path.sh | 9 +++++---- egs/wsj/s5/path.sh | 6 ++++-- egs/yesno/s5/path.sh | 9 +++++++-- 38 files changed, 137 insertions(+), 57 deletions(-) diff --git a/egs/ami/s5/path.sh b/egs/ami/s5/path.sh index ad81f59de..d401e773c 100644 --- a/egs/ami/s5/path.sh +++ b/egs/ami/s5/path.sh @@ -1,6 +1,7 @@ export KALDI_ROOT=`pwd`/../../.. -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/aspire/s5/path.sh b/egs/aspire/s5/path.sh index e93eb33f2..e8c2b01b0 100755 --- a/egs/aspire/s5/path.sh +++ b/egs/aspire/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/aurora4/s5/path.sh b/egs/aurora4/s5/path.sh index fee0b9b0c..3f50635df 100755 --- a/egs/aurora4/s5/path.sh +++ b/egs/aurora4/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/bn_music_speech/v1/path.sh b/egs/bn_music_speech/v1/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/bn_music_speech/v1/path.sh +++ b/egs/bn_music_speech/v1/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/callhome_egyptian/s5/path.sh b/egs/callhome_egyptian/s5/path.sh index c2ed6ab72..e8c2b01b0 100755 --- a/egs/callhome_egyptian/s5/path.sh +++ b/egs/callhome_egyptian/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/nnet3bin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet-cpubin/:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/chime1/s5/path.sh b/egs/chime1/s5/path.sh index 59966f91a..e8c2b01b0 100755 --- a/egs/chime1/s5/path.sh +++ b/egs/chime1/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/chime2/s5/path.sh b/egs/chime2/s5/path.sh index fee0b9b0c..3f50635df 100755 --- a/egs/chime2/s5/path.sh +++ b/egs/chime2/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/chime3/s5/path.sh b/egs/chime3/s5/path.sh index 577b1544a..335d400c0 100755 --- a/egs/chime3/s5/path.sh +++ b/egs/chime3/s5/path.sh @@ -1,6 +1,8 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/tools/kaldi_lm/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/tools/kaldi_lm/:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt-3.51 diff --git a/egs/csj/s5/path.sh b/egs/csj/s5/path.sh index edf752674..db6a2eafe 100644 --- a/egs/csj/s5/path.sh +++ b/egs/csj/s5/path.sh @@ -1,7 +1,8 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH -#$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$KALDI_ROOT/tools/srilm/bin/i686:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export PATH=$PATH:/usr/local/cuda/bin export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:/usr/local/cuda/bin/nvcc diff --git a/egs/farsdat/s5/path.sh b/egs/farsdat/s5/path.sh index 1e48f21b3..8ff4ca4ce 100755 --- a/egs/farsdat/s5/path.sh +++ b/egs/farsdat/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/fisher_callhome_spanish/s5/path.sh b/egs/fisher_callhome_spanish/s5/path.sh index 423d1dd00..e8c2b01b0 100755 --- a/egs/fisher_callhome_spanish/s5/path.sh +++ b/egs/fisher_callhome_spanish/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/src/nnet:$KALDI_ROOT/src/nnet2:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet-cpubin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/fisher_english/s5/path.sh b/egs/fisher_english/s5/path.sh index bee653150..e8c2b01b0 100755 --- a/egs/fisher_english/s5/path.sh +++ b/egs/fisher_english/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/fisher_swbd/s5/path.sh b/egs/fisher_swbd/s5/path.sh index 3b05dc5e2..e8d8f9c62 100755 --- a/egs/fisher_swbd/s5/path.sh +++ b/egs/fisher_swbd/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../../ export PWD=`pwd` -export PATH=$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/src/onlinebin:$KALDI_ROOT/src/online2bin:$PWD/stanford-utils:$KALDI_ROOT/src/stanford-bin/:$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet-cpubin/:$KALDI_ROOT/src/kwsbin:$PWD:$KALDI_ROOT/tools/kaldi_lm:$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$PATH +export PATH=$KALDI_ROOT/src/ivectorbin:$PWD/stanford-utils:$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$KALDI_ROOT/tools/kaldi_lm:$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/gale_arabic/s5/path.sh b/egs/gale_arabic/s5/path.sh index db21a99a7..e47d4940c 100755 --- a/egs/gale_arabic/s5/path.sh +++ b/egs/gale_arabic/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=$(pwd)/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/gale_mandarin/s5/path.sh b/egs/gale_mandarin/s5/path.sh index db21a99a7..e47d4940c 100755 --- a/egs/gale_mandarin/s5/path.sh +++ b/egs/gale_mandarin/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=$(pwd)/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/hkust/s5/path.sh b/egs/hkust/s5/path.sh index e49bed09e..87c501ce5 100755 --- a/egs/hkust/s5/path.sh +++ b/egs/hkust/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. #export KALDI_ROOT=/home/dpovey/kaldi-trunk-test -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/librispeech/s5/path.sh b/egs/librispeech/s5/path.sh index 74b6e31ad..eab6a3625 100755 --- a/egs/librispeech/s5/path.sh +++ b/egs/librispeech/s5/path.sh @@ -1,5 +1,7 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C # we use this both in the (optional) LM training and the G2P-related scripts diff --git a/egs/lre/v1/path.sh b/egs/lre/v1/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/lre/v1/path.sh +++ b/egs/lre/v1/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/lre07/v1/path.sh b/egs/lre07/v1/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/lre07/v1/path.sh +++ b/egs/lre07/v1/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/reverb/s5/path.sh b/egs/reverb/s5/path.sh index eea6b7a82..e8c2b01b0 100644 --- a/egs/reverb/s5/path.sh +++ b/egs/reverb/s5/path.sh @@ -1,3 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/rm/s5/path.sh b/egs/rm/s5/path.sh index 2e7165f91..e8c2b01b0 100755 --- a/egs/rm/s5/path.sh +++ b/egs/rm/s5/path.sh @@ -1,4 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/sprakbanken/s5/path.sh b/egs/sprakbanken/s5/path.sh index 9df7df54e..3f50635df 100755 --- a/egs/sprakbanken/s5/path.sh +++ b/egs/sprakbanken/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/sre08/v1/path.sh b/egs/sre08/v1/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/sre08/v1/path.sh +++ b/egs/sre08/v1/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/sre10/v1/path.sh b/egs/sre10/v1/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/sre10/v1/path.sh +++ b/egs/sre10/v1/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/sre10/v2/path.sh b/egs/sre10/v2/path.sh index 7cf73af8c..66560e36b 100755 --- a/egs/sre10/v2/path.sh +++ b/egs/sre10/v2/path.sh @@ -1,3 +1,5 @@ -export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/swahili/s5/path.sh b/egs/swahili/s5/path.sh index 3dc94fa83..fedef4517 100755 --- a/egs/swahili/s5/path.sh +++ b/egs/swahili/s5/path.sh @@ -1,11 +1,16 @@ #!/bin/bash +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +#export PATH=$PWD/utils/:$PWD/steps/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh +export LC_ALL=C DATA_DIR=$PWD/data LEXICON=$DATA_DIR/local/dict/lexicon.txt EXP_DIR="dev test" TRAIN_DIR="train" -export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$PWD/steps/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH export LC_ALL=C diff --git a/egs/swbd/s5/path.sh b/egs/swbd/s5/path.sh index e1d916917..d127b54fc 100755 --- a/egs/swbd/s5/path.sh +++ b/egs/swbd/s5/path.sh @@ -1,6 +1,8 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh #export KALDI_ROOT=/home/dpovey/kaldi-trunk-test -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C export MKL_NUM_THREADS=16 diff --git a/egs/swbd/s5b/path.sh b/egs/swbd/s5b/path.sh index db666cc10..3f50635df 100755 --- a/egs/swbd/s5b/path.sh +++ b/egs/swbd/s5b/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH -#$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$KALDI_ROOT/tools/srilm/bin/i686:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/swbd/s5c/path.sh b/egs/swbd/s5c/path.sh index c6b8450c8..955962c60 100755 --- a/egs/swbd/s5c/path.sh +++ b/egs/swbd/s5c/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/chainbin:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/tedlium/s5/path.sh b/egs/tedlium/s5/path.sh index dcefaea23..c3203f90b 100755 --- a/egs/tedlium/s5/path.sh +++ b/egs/tedlium/s5/path.sh @@ -1,3 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH:$KALDI_ROOT/tools/sph2pipe_v2.5 +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/tools/sph2pipe_v2.5 +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/thchs30/s5/path.sh b/egs/thchs30/s5/path.sh index bc199673f..61999be08 100755 --- a/egs/thchs30/s5/path.sh +++ b/egs/thchs30/s5/path.sh @@ -1,7 +1,7 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh - -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$PWD:$PATH - +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/tidigits/s5/path.sh b/egs/tidigits/s5/path.sh index 3ee460789..3f50635df 100755 --- a/egs/tidigits/s5/path.sh +++ b/egs/tidigits/s5/path.sh @@ -1,3 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/timit/s5/path.sh b/egs/timit/s5/path.sh index 1e48f21b3..8ff4ca4ce 100755 --- a/egs/timit/s5/path.sh +++ b/egs/timit/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/voxforge/s5/path.sh b/egs/voxforge/s5/path.sh index 373d8e329..7234dfbf5 100755 --- a/egs/voxforge/s5/path.sh +++ b/egs/voxforge/s5/path.sh @@ -1,5 +1,8 @@ export KALDI_ROOT=`pwd`/../../.. -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/onlinebin/:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh # VoxForge data will be stored in: export DATA_ROOT="/home/dpovey/kaldi-clean/egs/voxforge/s5/voxforge" # e.g. something like /media/secondary/voxforge diff --git a/egs/vystadial_cz/s5/path.sh b/egs/vystadial_cz/s5/path.sh index 98bd2fab4..20db6f37d 100755 --- a/egs/vystadial_cz/s5/path.sh +++ b/egs/vystadial_cz/s5/path.sh @@ -1,9 +1,12 @@ # Needed for "correct" sorting +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils:$PWD/steps:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C -export KALDI_ROOT=../../.. # adding Kaldi binaries to path -export PATH=$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PWD/utils:$PWD/steps:$PATH diff --git a/egs/vystadial_en/s5/path.sh b/egs/vystadial_en/s5/path.sh index d34cd4cbe..70953bfd8 100755 --- a/egs/vystadial_en/s5/path.sh +++ b/egs/vystadial_en/s5/path.sh @@ -1,9 +1,10 @@ -# Needed for "correct" sorting +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$PWD:$PWD/utils:$PWD/steps:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C -export KALDI_ROOT=../../.. -# adding Kaldi binaries to path -export PATH=$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PWD/utils:$PWD/steps:$PATH srilm_bin=$KALDI_ROOT/tools/srilm/bin/ diff --git a/egs/wsj/s5/path.sh b/egs/wsj/s5/path.sh index fba716c0a..3f50635df 100755 --- a/egs/wsj/s5/path.sh +++ b/egs/wsj/s5/path.sh @@ -1,4 +1,6 @@ export KALDI_ROOT=`pwd`/../../.. -[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/nnet3bin/:$KALDI_ROOT/src/ctcbin/:$PWD:$PATH +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C diff --git a/egs/yesno/s5/path.sh b/egs/yesno/s5/path.sh index 708524a55..c01898056 100644 --- a/egs/yesno/s5/path.sh +++ b/egs/yesno/s5/path.sh @@ -1,3 +1,8 @@ - -export PATH=$PWD/utils/:$PWD/../../../src/bin:$PWD/../../../tools/openfst/bin:$PWD/../../../src/fstbin/:$PWD/../../../src/gmmbin/:$PWD/../../../src/featbin/:$PWD/../../../src/lm/:$PWD/../../../src/sgmmbin/:$PWD/../../../src/fgmmbin/:$PWD/../../../src/latbin/:$PWD:$PATH +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/src/path.sh ] && echo >&2 "The standard file $KALDI_ROOT/src/path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/src/path.sh export LC_ALL=C + + From 99b69bb813c137bca18f8973e54545aa0a0c5447 Mon Sep 17 00:00:00 2001 From: Jan Trmal Date: Mon, 28 Mar 2016 14:12:16 -0400 Subject: [PATCH 04/10] yesno eg fixes --- egs/yesno/s5/input/task.arpabo | 2 +- egs/yesno/s5/run.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/yesno/s5/input/task.arpabo b/egs/yesno/s5/input/task.arpabo index 415391c98..5c6b525b9 100644 --- a/egs/yesno/s5/input/task.arpabo +++ b/egs/yesno/s5/input/task.arpabo @@ -1,6 +1,6 @@ \data\ -ngram 1=3 +ngram 1=4 \1-grams: -1 NO diff --git a/egs/yesno/s5/run.sh b/egs/yesno/s5/run.sh index 3e5d59a96..12b00273f 100755 --- a/egs/yesno/s5/run.sh +++ b/egs/yesno/s5/run.sh @@ -26,6 +26,7 @@ local/prepare_lm.sh for x in train_yesno test_yesno; do steps/make_mfcc.sh --nj 1 data/$x exp/make_mfcc/$x mfcc steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc + utils/fix_data_dir.sh data/$x done # Mono training From a90af6e5e591d4c4684dc7c7460ef212393ea8b3 Mon Sep 17 00:00:00 2001 From: BogdanvL Date: Tue, 29 Mar 2016 16:39:18 +0200 Subject: [PATCH 05/10] Update run.sh Extended version of GP recipe --- egs/gp/s5/run.sh | 297 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 211 insertions(+), 86 deletions(-) diff --git a/egs/gp/s5/run.sh b/egs/gp/s5/run.sh index ed345efef..933d3a4f5 100755 --- a/egs/gp/s5/run.sh +++ b/egs/gp/s5/run.sh @@ -2,6 +2,16 @@ # Copyright 2012 Arnab Ghoshal +# +# Copyright 2016 by Idiap Research Institute, http://www.idiap.ch +# +# See the file COPYING for the licence associated with this software. +# +# Author(s): +# Bogdan Vlasenko, February 2016 +# + + # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,47 +32,37 @@ echo "This shell script may run as-is on your system, but it is recommended that you run the commands one by one by copying and pasting into the shell." #exit 1; -[ -f cmd.sh ] && source ./cmd.sh \ - || echo "cmd.sh not found. Jobs may not execute properly." +[ -f cmd.sh ] && source ./cmd.sh || echo "cmd.sh not found. Jobs may not execute properly." # CHECKING FOR AND INSTALLING REQUIRED TOOLS: # This recipe requires shorten (3.6.1) and sox (14.3.2). # If they are not found, the local/gp_install.sh script will install them. -local/gp_check_tools.sh $PWD path.sh +#local/gp_check_tools.sh $PWD path.sh || exit 1; . path.sh || { echo "Cannot source path.sh"; exit 1; } # Set the locations of the GlobalPhone corpus and language models -GP_CORPUS=/mnt/matylda2/data/GLOBALPHONE -# GP_LM=/mnt/matylda6/ijanda/GLOBALPHONE_LM +GP_CORPUS=/idiap/resource/database/GLOBALPHONE GP_LM=$PWD/language_models # Set the languages that will actually be processed -# export GP_LANGUAGES="CZ FR GE PL PO RU SP VN" -export GP_LANGUAGES="CZ FR GE PL PO SP" +export GP_LANGUAGES="FR GE RU" # The following data preparation step actually converts the audio files from # shorten to WAV to take out the empty files and those with compression errors. -local/gp_data_prep.sh --config-dir=$PWD/conf --corpus-dir=$GP_CORPUS \ - --languages="$GP_LANGUAGES" -local/gp_dict_prep.sh --config-dir $PWD/conf $GP_CORPUS $GP_LANGUAGES -# # Use the following to map to X-SAMPA phoneset -# local/gp_dict_prep.sh --config-dir $PWD/conf --map-dir $PWD/conf/xsampa_map \ -# $GP_CORPUS $GP_LANGUAGES +local/gp_data_prep.sh --config-dir=$PWD/conf --corpus-dir=$GP_CORPUS --languages="$GP_LANGUAGES" || exit 1; +local/gp_dict_prep.sh --config-dir $PWD/conf $GP_CORPUS $GP_LANGUAGES || exit 1; for L in $GP_LANGUAGES; do - utils/prepare_lang.sh --position-dependent-phones true \ - data/$L/local/dict "" data/$L/local/lang_tmp data/$L/lang \ - >& data/$L/prepare_lang.log || exit 1; + utils/prepare_lang.sh --position-dependent-phones true \ + data/$L/local/dict "" data/$L/local/lang_tmp data/$L/lang \ + >& data/$L/prepare_lang.log || exit 1; done # Convert the different available language models to FSTs, and create separate # decoding configurations for each. for L in $GP_LANGUAGES; do - # $highmem_cmd data/$L/format_lm.log \ - # local/gp_format_lm.sh --filter-vocab-sri false $GP_LM $L & - $highmem_cmd data/$L/format_lm.log \ - local/gp_format_lm.sh --filter-vocab-sri true $GP_LM $L & + local/gp_format_lm.sh --filter-vocab-sri true $GP_LM $L & done wait @@ -72,13 +72,14 @@ for L in $GP_LANGUAGES; do for x in train dev eval; do ( steps/make_mfcc.sh --nj 6 --cmd "$train_cmd" data/$L/$x \ - exp/$L/make_mfcc/$x $mfccdir; + exp/$L/make_mfcc/$x $mfccdir; steps/compute_cmvn_stats.sh data/$L/$x exp/$L/make_mfcc/$x $mfccdir; ) & done done wait; + for L in $GP_LANGUAGES; do mkdir -p exp/$L/mono; steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ @@ -86,86 +87,107 @@ for L in $GP_LANGUAGES; do done wait; + for L in $GP_LANGUAGES; do for lm_suffix in tgpr_sri; do ( graph_dir=exp/$L/mono/graph_${lm_suffix} mkdir -p $graph_dir - $highmem_cmd $graph_dir/mkgraph.log \ - utils/mkgraph.sh --mono data/$L/lang_test_${lm_suffix} exp/$L/mono \ - $graph_dir + utils/mkgraph.sh --mono data/$L/lang_test_${lm_suffix} exp/$L/mono \ + $graph_dir steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ - exp/$L/mono/decode_dev_${lm_suffix} + exp/$L/mono/decode_dev_${lm_suffix} + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/eval \ + exp/$L/mono/decode_eval_${lm_suffix} ) & done done - +# Train tri1, which is first triphone pass for L in $GP_LANGUAGES; do ( mkdir -p exp/$L/mono_ali steps/align_si.sh --nj 10 --cmd "$train_cmd" \ - data/$L/train data/$L/lang exp/$L/mono exp/$L/mono_ali \ - >& exp/$L/mono_ali/align.log + data/$L/train data/$L/lang exp/$L/mono exp/$L/mono_ali \ + >& exp/$L/mono_ali/align.log num_states=$(grep "^$L" conf/tri.conf | cut -f2) num_gauss=$(grep "^$L" conf/tri.conf | cut -f3) mkdir -p exp/$L/tri1 - steps/train_deltas.sh --cmd "$train_cmd" --cluster-thresh 100 \ - $num_states $num_gauss data/$L/train data/$L/lang exp/$L/mono_ali \ - exp/$L/tri1 >& exp/$L/tri1/train.log - ) & + steps/train_deltas.sh --cmd "$train_cmd" \ + --cluster-thresh 100 $num_states $num_gauss data/$L/train data/$L/lang \ + exp/$L/mono_ali exp/$L/tri1 >& exp/$L/tri1/train.log + ) & done wait; - +# Decode tri1 for L in $GP_LANGUAGES; do for lm_suffix in tgpr_sri; do ( graph_dir=exp/$L/tri1/graph_${lm_suffix} mkdir -p $graph_dir - $highmem_cmd $graph_dir/mkgraph.log \ - utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri1 $graph_dir + utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri1 \ + $graph_dir steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ - exp/$L/tri1/decode_dev_${lm_suffix} + exp/$L/tri1/decode_dev_${lm_suffix} + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/eval \ + exp/$L/tri1/decode_eval_${lm_suffix} ) & done done -# SAT-trained triphone systems: MFCC feats + +# Train tri2a, which is deltas + delta-deltas for L in $GP_LANGUAGES; do ( - mkdir -p exp/$L/tri1_ali_fmllr - steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ - data/$L/train data/$L/lang exp/$L/tri1 exp/$L/tri1_ali_fmllr \ - >& exp/$L/tri1_ali_fmllr/align.log || exit 1; + mkdir -p exp/$L/tri1_ali + steps/align_si.sh --nj 10 --cmd "$train_cmd" \ + data/$L/train data/$L/lang exp/$L/tri1 exp/$L/tri1_ali \ + >& exp/$L/tri1_ali/tri1_ali.log num_states=$(grep "^$L" conf/tri.conf | cut -f2) num_gauss=$(grep "^$L" conf/tri.conf | cut -f3) mkdir -p exp/$L/tri2a - steps/train_sat.sh --cmd "$train_cmd" --cluster-thresh 100 \ - $num_states $num_gauss data/$L/train data/$L/lang exp/$L/tri1_ali_fmllr \ - exp/$L/tri2a >& exp/$L/tri2a/train.log + steps/train_deltas.sh --cmd "$train_cmd" \ + --cluster-thresh 100 $num_states $num_gauss data/$L/train data/$L/lang \ + exp/$L/tri1_ali exp/$L/tri2a >& exp/$L/tri2a/train.log ) & done wait; +# Decode tri2a for L in $GP_LANGUAGES; do for lm_suffix in tgpr_sri; do ( graph_dir=exp/$L/tri2a/graph_${lm_suffix} mkdir -p $graph_dir - $highmem_cmd $graph_dir/mkgraph.log \ - utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri2a $graph_dir + utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri2a \ + $graph_dir - steps/decode_fmllr.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ - exp/$L/tri2a/decode_dev_${lm_suffix} + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ + exp/$L/tri2a/decode_dev_${lm_suffix} + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/eval \ + exp/$L/tri2a/decode_eval_${lm_suffix} ) & done done +# Train tri2b, which is LDA+MLLT +for L in $GP_LANGUAGES; do + ( + num_states=$(grep "^$L" conf/tri.conf | cut -f2) + num_gauss=$(grep "^$L" conf/tri.conf | cut -f3) + mkdir -p exp/$L/tri2b + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" $num_states $num_gauss data/$L/train \ + data/$L/lang exp/$L/tri1_ali exp/$L/tri2b >& exp/$L/tri2b/tri2_ali.log + ) & +done +wait; + # for L in $GP_LANGUAGES; do # mode=4 # # Doing this only for the LMs whose vocabs were limited using SRILM, since the @@ -175,19 +197,152 @@ done # exp/$L/tri2a/decode_dev_tgpr_sri exp/$L/tri2a/decode_dev_tg_sri$mode # done +# Decode tri2b +for L in $GP_LANGUAGES; do + for lm_suffix in tgpr_sri; do + ( + graph_dir=exp/$L/tri2b/graph_${lm_suffix} + mkdir -p $graph_dir + utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri2b \ + $graph_dir + + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ + exp/$L/tri2b/decode_dev_${lm_suffix} + steps/decode.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/eval \ + exp/$L/tri2b/decode_eval_${lm_suffix} + ) & + done +done +wait; + +# Train tri3b, which is LDA+MLLT+SAT. +for L in $GP_LANGUAGES; do + ( + mkdir -p exp/$L/tri2b_ali + steps/align_si.sh --nj 10 --cmd "$train_cmd" \ + --use-graphs true data/$L/train data/$L/lang exp/$L/tri2b exp/$L/tri2b_ali \ + >& exp/$L/tri2b_ali/align.log + + num_states=$(grep "^$L" conf/tri.conf | cut -f2) + num_gauss=$(grep "^$L" conf/tri.conf | cut -f3) + mkdir -p exp/$L/tri3b + steps/train_sat.sh --cmd "$train_cmd" \ + --cluster-thresh 100 $num_states $num_gauss data/$L/train data/$L/lang \ + exp/$L/tri2b_ali exp/$L/tri3b >& exp/$L/tri3b/train.log + ) & +done +wait; + +# Decode 3b +for L in $GP_LANGUAGES; do + for lm_suffix in tgpr_sri; do + ( + graph_dir=exp/$L/tri3b/graph_${lm_suffix} + mkdir -p $graph_dir + utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/tri3b \ + $graph_dir + + mkdir -p exp/$L/tri3b/decode_dev_${lm_suffix} + steps/decode_fmllr.sh --nj 5 --cmd "$decode_cmd" \ + $graph_dir data/$L/dev exp/$L/tri3b/decode_dev_${lm_suffix} + steps/decode_fmllr.sh --nj 5 --cmd "$decode_cmd" \ + $graph_dir data/$L/eval exp/$L/tri3b/decode_eval_${lm_suffix} + ) & +done +done +wait; + +## Train sgmm2b, which is SGMM on top of LDA+MLLT+SAT features. +for L in $GP_LANGUAGES; do + ( + mkdir -p exp/$L/tri3b_ali + steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ + data/$L/train data/$L/lang exp/$L/tri3b exp/$L/tri3b_ali + + num_states=$(grep "^$L" conf/sgmm.conf | cut -f2) + num_substates=$(grep "^$L" conf/sgmm.conf | cut -f3) + mkdir -p exp/$L/ubm4a + steps/train_ubm.sh --cmd "$train_cmd" \ + 600 data/$L/train data/$L/lang exp/$L/tri3b_ali exp/$L/ubm4a + + mkdir -p exp/$L/sgmm2_4a + steps/train_sgmm2.sh --cmd "$train_cmd" \ + $num_states $num_substates data/$L/train data/$L/lang exp/$L/tri3b_ali \ + exp/$L/ubm4a/final.ubm exp/$L/sgmm2_4a + ) & +done +wait; + +## Decode sgmm2_4a +for L in $GP_LANGUAGES; do + for lm_suffix in tgpr_sri; do + ( + graph_dir=exp/$L/sgmm2_4a/graph_${lm_suffix} + mkdir -p $graph_dir + utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/sgmm2_4a \ + $graph_dir + + steps/decode_sgmm2.sh --use-fmllr true --nj 5 --cmd "$decode_cmd" \ + --transform-dir exp/$L/tri3b/decode_dev_${lm_suffix} $graph_dir data/$L/dev \ + exp/$L/sgmm2_4a/decode_dev_${lm_suffix} + steps/decode_sgmm2.sh --use-fmllr true --nj 5 --cmd "$decode_cmd" \ + --transform-dir exp/$L/tri3b/decode_eval_${lm_suffix} $graph_dir data/$L/eval \ + exp/$L/sgmm2_4a/decode_eval_${lm_suffix} + ) + done +done +wait; + + +# Now we'll align the SGMM system to prepare for discriminative training MMI +for L in $GP_LANGUAGES; do + for lm_suffix in tgpr_sri; do + ( + mkdir -p exp/$L/sgmm2_4a_ali + steps/align_sgmm2.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir exp/$L/tri3b_ali --use-graphs true --use-gselect true data/$L/train \ + data/$L/lang exp/$L/sgmm2_4a exp/$L/sgmm2_4a_ali + + mkdir -p exp/$L/sgmm2_4a_denlats + steps/make_denlats_sgmm2.sh --nj 10 --sub-split 10 --cmd "$decode_cmd" \ + --transform-dir exp/$L/tri3b_ali data/$L/train data/$L/lang \ + exp/$L/sgmm2_4a_ali exp/$L/sgmm2_4a_denlats + mkdir -p exp/$L/sgmm2_4a_mmi_b0.1 + steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" \ + --transform-dir exp/$L/tri3b_ali --boost 0.1 data/$L/train data/$L/lang \ + exp/$L/sgmm2_4a_ali exp/$L/sgmm2_4a_denlats exp/$L/sgmm2_4a_mmi_b0.1 + ) & + done +done +wait; + +# decode sgmm2_4a-mmi_b0.1 +for L in $GP_LANGUAGES; do + for lm_suffix in tgpr_sri; do + ( + graph_dir=exp/$L/sgmm2_4a/graph_${lm_suffix} + for iter in 1 2 3 4; do + for test in dev eval; do + steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" \ + --iter $iter --transform-dir exp/$L/tri3b/decode_${test}_${lm_suffix} data/$L/lang_test_${lm_suffix} \ + data/$L/${test} exp/$L/sgmm2_4a/decode_${test}_${lm_suffix} \ + exp/$L/sgmm2_4a_mmi_b0.1/decode_${test}_${lm_suffix}_it$iter + done + done + ) & + done +done +wait; + + # SGMMs starting from non-SAT triphone system, both with and without # speaker vectors. for L in $GP_LANGUAGES; do ( - mkdir -p exp/$L/tri1_ali - steps/align_si.sh --nj 10 --cmd "$train_cmd" \ - data/$L/train data/$L/lang exp/$L/tri1 exp/$L/tri1_ali \ - >& exp/$L/tri1_ali/align.log - mkdir -p exp/$L/ubm2a - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/$L/train data/$L/lang exp/$L/tri1_ali exp/$L/ubm2a \ - >& exp/$L/ubm2a/train.log || exit 1; + steps/train_ubm.sh --cmd "$train_cmd" \ + 400 data/$L/train data/$L/lang exp/$L/tri1_ali exp/$L/ubm2a \ + >& exp/$L/ubm2a/train.log num_states=$(grep "^$L" conf/sgmm.conf | cut -f2) num_substates=$(grep "^$L" conf/sgmm.conf | cut -f3) @@ -222,35 +377,5 @@ for L in $GP_LANGUAGES; do done # loop over model with and without speaker vecs done # loop over languages -# Train SGMMs using SAT features -for L in $GP_LANGUAGES; do - ( - mkdir -p exp/$L/ubm2c - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/$L/train data/$L/lang exp/$L/tri1_ali_fmllr exp/$L/ubm2c \ - >& exp/$L/ubm2c/train.log || exit 1; - num_states=$(grep "^$L" conf/tri.conf | cut -f2) - num_gauss=$(grep "^$L" conf/tri.conf | cut -f3) - mkdir -p exp/$L/sgmm2c - steps/train_sgmm.sh --cmd "$train_cmd" --cluster-thresh 100 \ - $num_states $num_gauss data/$L/train data/$L/lang exp/$L/tri1_ali_fmllr \ - exp/$L/ubm2c/final.ubm exp/$L/sgmm2c >& exp/$L/sgmm2c/train.log - ) & -done -wait -for L in $GP_LANGUAGES; do - for lm_suffix in tgpr_sri; do - ( - graph_dir=exp/$L/sgmm2c/graph_${lm_suffix} - mkdir -p $graph_dir - $highmem_cmd $graph_dir/mkgraph.log \ - utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/sgmm2c $graph_dir - - steps/decode_sgmm.sh --nj 5 --cmd "$decode_cmd" \ - --transform-dir exp/$L/tri2a/decode_dev_${lm_suffix} \ - $graph_dir data/$L/dev exp/$L/sgmm2c/decode_dev_${lm_suffix} - ) & - done -done From 98aed44aede9892add6c832dc9d6c26e5929ffb1 Mon Sep 17 00:00:00 2001 From: BogdanvL Date: Tue, 29 Mar 2016 16:42:12 +0200 Subject: [PATCH 06/10] Results obtained with extended version of GP recipe --- egs/gp/s5/RESULTS | 99 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 17 deletions(-) diff --git a/egs/gp/s5/RESULTS b/egs/gp/s5/RESULTS index 760545cf5..804f4ba01 100644 --- a/egs/gp/s5/RESULTS +++ b/egs/gp/s5/RESULTS @@ -1,20 +1,85 @@ -$ for L in $GP_LANGUAGES; do grep WER exp/$L/mono/decode_dev_tgpr_sri/wer_* | ./utils/best_wer.sh ; doneexp/CZ/mono/decode_dev_tgpr_sri/wer_9:%WER 35.13 [ 5820 / 16568, 486 ins, 1116 del, 4218 sub ] -exp/FR/mono/decode_dev_tgpr_sri/wer_9:%WER 45.69 [ 10192 / 22306, 533 ins, 2323 del, 7336 sub ] -exp/GE/mono/decode_dev_tgpr_sri/wer_11:%WER 27.48 [ 4228 / 15387, 278 ins, 974 del, 2976 sub ] -exp/PL/mono/decode_dev_tgpr_sri/wer_9:%WER 36.45 [ 6437 / 17660, 607 ins, 1228 del, 4602 sub ] -exp/PO/mono/decode_dev_tgpr_sri/wer_11:%WER 44.63 [ 5815 / 13030, 283 ins, 1861 del, 3671 sub ] -exp/RU/mono/decode_dev_tgpr_sri/wer_9:%WER 51.89 [ 9840 / 18962, 427 ins, 2457 del, 6956 sub ] -exp/SP/mono/decode_dev_tgpr_sri/wer_9:%WER 43.56 [ 8320 / 19098, 379 ins, 2581 del, 5360 sub ] -qghoshal@merlin:[~/globalphone/a1.1] +#!/bin/bash -$ for L in $GP_LANGUAGES; do grep WER exp/$L/tri1/decode_dev_tgpr_sri/wer_* | ./utils/best_wer.sh ; done -exp/CZ/tri1/decode_dev_tgpr_sri/wer_16:%WER 20.68 [ 3427 / 16568, 606 ins, 521 del, 2300 sub ] -exp/FR/tri1/decode_dev_tgpr_sri/wer_14:%WER 28.61 [ 6381 / 22306, 744 ins, 779 del, 4858 sub ] -exp/GE/tri1/decode_dev_tgpr_sri/wer_17:%WER 15.08 [ 2321 / 15387, 273 ins, 440 del, 1608 sub ] -exp/PL/tri1/decode_dev_tgpr_sri/wer_17:%WER 21.34 [ 3768 / 17660, 548 ins, 646 del, 2574 sub ] -exp/PO/tri1/decode_dev_tgpr_sri/wer_14:%WER 26.56 [ 3461 / 13030, 477 ins, 795 del, 2189 sub ] -exp/RU/tri1/decode_dev_tgpr_sri/wer_14:%WER 33.89 [ 6427 / 18962, 575 ins, 1612 del, 4240 sub ] -exp/SP/tri1/decode_dev_tgpr_sri/wer_16:%WER 26.73 [ 5105 / 19098, 551 ins, 1313 del, 3241 sub ] -qghoshal@merlin:[~/globalphone/a1.1] +# this RESULTS file was obtained by Bogdan Vlasenko in February 2016. +for x in exp/*/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done +# Monophone, MFCC+delta+accel +%WER 45.16 [ 10073 / 22306, 684 ins, 2010 del, 7379 sub ] exp/FR/mono/decode_dev_tgpr_sri/wer_8 +%WER 26.96 [ 4149 / 15387, 285 ins, 933 del, 2931 sub ] exp/GE/mono/decode_dev_tgpr_sri/wer_11 +%WER 52.95 [ 10040 / 18962, 588 ins, 2182 del, 7270 sub ] exp/RU/mono/decode_dev_tgpr_sri/wer_8 + +%WER 41.80 [ 9071 / 21700, 513 ins, 1876 del, 6682 sub ] exp/FR/mono/decode_eval_tgpr_sri/wer_9 +%WER 44.71 [ 5347 / 11959, 399 ins, 1024 del, 3924 sub ] exp/GE/mono/decode_eval_tgpr_sri/wer_9 +%WER 51.55 [ 9416 / 18266, 533 ins, 1975 del, 6908 sub ] exp/RU/mono/decode_eval_tgpr_sri/wer_9 + +# First triphone build. +%WER 28.44 [ 6343 / 22306, 751 ins, 742 del, 4850 sub ] exp/FR/tri1/decode_dev_tgpr_sri/wer_14 +%WER 15.29 [ 2353 / 15387, 288 ins, 388 del, 1677 sub ] exp/GE/tri1/decode_dev_tgpr_sri/wer_18 +%WER 36.40 [ 6903 / 18962, 578 ins, 1876 del, 4449 sub ] exp/RU/tri1/decode_dev_tgpr_sri/wer_13 + +%WER 26.26 [ 5699 / 21700, 697 ins, 526 del, 4476 sub ] exp/FR/tri1/decode_eval_tgpr_sri/wer_13 +%WER 22.80 [ 2727 / 11959, 386 ins, 329 del, 2012 sub ] exp/GE/tri1/decode_eval_tgpr_sri/wer_15 +%WER 34.69 [ 6336 / 18266, 547 ins, 1470 del, 4319 sub ] exp/RU/tri1/decode_eval_tgpr_sri/wer_14 + +# tri2a is delta+delta-delta features. +%WER 28.45 [ 6345 / 22306, 771 ins, 725 del, 4849 sub ] exp/FR/tri2a/decode_dev_tgpr_sri/wer_14 +%WER 15.13 [ 2328 / 15387, 320 ins, 345 del, 1663 sub ] exp/GE/tri2a/decode_dev_tgpr_sri/wer_17 +%WER 36.62 [ 6944 / 18962, 526 ins, 2083 del, 4335 sub ] exp/RU/tri2a/decode_dev_tgpr_sri/wer_14 + +%WER 26.18 [ 5681 / 21700, 694 ins, 542 del, 4445 sub ] exp/FR/tri2a/decode_eval_tgpr_sri/wer_14 +%WER 22.52 [ 2693 / 11959, 341 ins, 363 del, 1989 sub ] exp/GE/tri2a/decode_eval_tgpr_sri/wer_17 +%WER 34.37 [ 6278 / 18266, 594 ins, 1378 del, 4306 sub ] exp/RU/tri2a/decode_eval_tgpr_sri/wer_14 + +# LDA+MLLT. +%WER 27.76 [ 6192 / 22306, 723 ins, 824 del, 4645 sub ] exp/FR/tri2b/decode_dev_tgpr_sri/wer_17 +%WER 13.78 [ 2121 / 15387, 300 ins, 313 del, 1508 sub ] exp/GE/tri2b/decode_dev_tgpr_sri/wer_18 +%WER 34.68 [ 6576 / 18962, 521 ins, 1872 del, 4183 sub ] exp/RU/tri2b/decode_dev_tgpr_sri/wer_15 + +%WER 25.43 [ 5519 / 21700, 724 ins, 532 del, 4263 sub ] exp/FR/tri2b/decode_eval_tgpr_sri/wer_15 +%WER 21.26 [ 2542 / 11959, 307 ins, 372 del, 1863 sub ] exp/GE/tri2b/decode_eval_tgpr_sri/wer_17 +%WER 32.83 [ 5997 / 18266, 522 ins, 1431 del, 4044 sub ] exp/RU/tri2b/decode_eval_tgpr_sri/wer_16 + +# LDA+MLLT+SAT. +%WER 25.62 [ 5714 / 22306, 746 ins, 634 del, 4334 sub ] exp/FR/tri3b/decode_dev_tgpr_sri/wer_18 +%WER 11.01 [ 1694 / 15387, 311 ins, 205 del, 1178 sub ] exp/GE/tri3b/decode_dev_tgpr_sri/wer_20 +%WER 32.48 [ 6159 / 18962, 556 ins, 1534 del, 4069 sub ] exp/RU/tri3b/decode_dev_tgpr_sri/wer_17 + +%WER 23.82 [ 5169 / 21700, 685 ins, 478 del, 4006 sub ] exp/FR/tri3b/decode_eval_tgpr_sri/wer_17 +%WER 17.72 [ 2119 / 11959, 329 ins, 248 del, 1542 sub ] exp/GE/tri3b/decode_eval_tgpr_sri/wer_18 +%WER 31.24 [ 5706 / 18266, 657 ins, 1046 del, 4003 sub ] exp/RU/tri3b/decode_eval_tgpr_sri/wer_16 + +# Some "SGMM2" experiments. +%WER 24.76 [ 5524 / 22306, 716 ins, 623 del, 4185 sub ] exp/FR/sgmm2_4a/decode_dev_tgpr_sri/wer_12 +%WER 9.61 [ 1478 / 15387, 253 ins, 174 del, 1051 sub ] exp/GE/sgmm2_4a/decode_dev_tgpr_sri/wer_13 +%WER 30.27 [ 5740 / 18962, 505 ins, 1301 del, 3934 sub ] exp/RU/sgmm2_4a/decode_dev_tgpr_sri/wer_12 + +%WER 22.88 [ 4965 / 21700, 675 ins, 430 del, 3860 sub ] exp/FR/sgmm2_4a/decode_eval_tgpr_sri/wer_11 +%WER 16.03 [ 1917 / 11959, 267 ins, 224 del, 1426 sub ] exp/GE/sgmm2_4a/decode_eval_tgpr_sri/wer_12 +%WER 29.06 [ 5309 / 18266, 494 ins, 1107 del, 3708 sub ] exp/RU/sgmm2_4a/decode_eval_tgpr_sri/wer_13 + +%WER 24.16 [ 5389 / 22306, 733 ins, 559 del, 4097 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it1/wer_12 +%WER 24.02 [ 5359 / 22306, 733 ins, 534 del, 4092 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it2/wer_12 +%WER 24.23 [ 5405 / 22306, 754 ins, 532 del, 4119 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it3/wer_12 +%WER 24.50 [ 5464 / 22306, 727 ins, 574 del, 4163 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it4/wer_13 +%WER 9.22 [ 1418 / 15387, 266 ins, 146 del, 1006 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it1/wer_12 +%WER 9.17 [ 1411 / 15387, 253 ins, 153 del, 1005 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it2/wer_13 +%WER 9.18 [ 1412 / 15387, 264 ins, 150 del, 998 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it3/wer_13 +%WER 9.31 [ 1432 / 15387, 271 ins, 150 del, 1011 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it4/wer_13 +%WER 29.96 [ 5681 / 18962, 465 ins, 1549 del, 3667 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it1/wer_11 +%WER 30.39 [ 5762 / 18962, 500 ins, 1669 del, 3593 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it2/wer_10 +%WER 31.00 [ 5879 / 18962, 420 ins, 1864 del, 3595 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it3/wer_11 +%WER 31.50 [ 5973 / 18962, 433 ins, 1926 del, 3614 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_dev_tgpr_sri_it4/wer_11 + +%WER 22.51 [ 4885 / 21700, 672 ins, 423 del, 3790 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it1/wer_12 +%WER 22.56 [ 4896 / 21700, 702 ins, 380 del, 3814 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it2/wer_11 +%WER 22.70 [ 4925 / 21700, 670 ins, 398 del, 3857 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it3/wer_12 +%WER 22.83 [ 4954 / 21700, 681 ins, 400 del, 3873 sub ] exp/FR/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it4/wer_12 +%WER 15.28 [ 1827 / 11959, 291 ins, 178 del, 1358 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it1/wer_11 +%WER 15.22 [ 1820 / 11959, 271 ins, 190 del, 1359 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it2/wer_12 +%WER 15.35 [ 1836 / 11959, 281 ins, 187 del, 1368 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it3/wer_12 +%WER 15.38 [ 1839 / 11959, 252 ins, 205 del, 1382 sub ] exp/GE/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it4/wer_13 +%WER 28.31 [ 5172 / 18266, 496 ins, 1127 del, 3549 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it1/wer_11 +%WER 28.64 [ 5232 / 18266, 446 ins, 1321 del, 3465 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it2/wer_11 +%WER 28.96 [ 5289 / 18266, 458 ins, 1334 del, 3497 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it3/wer_10 +%WER 29.55 [ 5398 / 18266, 421 ins, 1477 del, 3500 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it4/wer_11 From c6468a60c78306a2f4782758539791e0f9d0a56d Mon Sep 17 00:00:00 2001 From: BogdanvL Date: Wed, 30 Mar 2016 17:21:13 +0200 Subject: [PATCH 07/10] Previous results from Arnab Ghoshal were added --- egs/gp/s5/RESULTS | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/egs/gp/s5/RESULTS b/egs/gp/s5/RESULTS index 804f4ba01..c7f83351e 100644 --- a/egs/gp/s5/RESULTS +++ b/egs/gp/s5/RESULTS @@ -83,3 +83,26 @@ for x in exp/*/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* %WER 28.64 [ 5232 / 18266, 446 ins, 1321 del, 3465 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it2/wer_11 %WER 28.96 [ 5289 / 18266, 458 ins, 1334 del, 3497 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it3/wer_10 %WER 29.55 [ 5398 / 18266, 421 ins, 1477 del, 3500 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it4/wer_11 + + +# these RESULTS were obtained with Arnab Ghoshal version established in 2012. +$ for L in $GP_LANGUAGES; do grep WER exp/$L/mono/decode_dev_tgpr_sri/wer_* | ./utils/best_wer.sh ; done + +exp/CZ/mono/decode_dev_tgpr_sri/wer_9:%WER 35.13 [ 5820 / 16568, 486 ins, 1116 del, 4218 sub ] +exp/FR/mono/decode_dev_tgpr_sri/wer_9:%WER 45.69 [ 10192 / 22306, 533 ins, 2323 del, 7336 sub ] +exp/GE/mono/decode_dev_tgpr_sri/wer_11:%WER 27.48 [ 4228 / 15387, 278 ins, 974 del, 2976 sub ] +exp/PL/mono/decode_dev_tgpr_sri/wer_9:%WER 36.45 [ 6437 / 17660, 607 ins, 1228 del, 4602 sub ] +exp/PO/mono/decode_dev_tgpr_sri/wer_11:%WER 44.63 [ 5815 / 13030, 283 ins, 1861 del, 3671 sub ] +exp/RU/mono/decode_dev_tgpr_sri/wer_9:%WER 51.89 [ 9840 / 18962, 427 ins, 2457 del, 6956 sub ] +exp/SP/mono/decode_dev_tgpr_sri/wer_9:%WER 43.56 [ 8320 / 19098, 379 ins, 2581 del, 5360 sub ] +qghoshal@merlin:[~/globalphone/a1.1] + +$ for L in $GP_LANGUAGES; do grep WER exp/$L/tri1/decode_dev_tgpr_sri/wer_* | ./utils/best_wer.sh ; done +exp/CZ/tri1/decode_dev_tgpr_sri/wer_16:%WER 20.68 [ 3427 / 16568, 606 ins, 521 del, 2300 sub ] +exp/FR/tri1/decode_dev_tgpr_sri/wer_14:%WER 28.61 [ 6381 / 22306, 744 ins, 779 del, 4858 sub ] +exp/GE/tri1/decode_dev_tgpr_sri/wer_17:%WER 15.08 [ 2321 / 15387, 273 ins, 440 del, 1608 sub ] +exp/PL/tri1/decode_dev_tgpr_sri/wer_17:%WER 21.34 [ 3768 / 17660, 548 ins, 646 del, 2574 sub ] +exp/PO/tri1/decode_dev_tgpr_sri/wer_14:%WER 26.56 [ 3461 / 13030, 477 ins, 795 del, 2189 sub ] +exp/RU/tri1/decode_dev_tgpr_sri/wer_14:%WER 33.89 [ 6427 / 18962, 575 ins, 1612 del, 4240 sub ] +exp/SP/tri1/decode_dev_tgpr_sri/wer_16:%WER 26.73 [ 5105 / 19098, 551 ins, 1313 del, 3241 sub ] +qghoshal@merlin:[~/globalphone/a1.1] From dcbca115247b2f20c13d31ada4a527b55d0b10ad Mon Sep 17 00:00:00 2001 From: BogdanvL Date: Wed, 30 Mar 2016 17:21:56 +0200 Subject: [PATCH 08/10] Update RESULTS --- egs/gp/s5/RESULTS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/gp/s5/RESULTS b/egs/gp/s5/RESULTS index c7f83351e..297ef23d2 100644 --- a/egs/gp/s5/RESULTS +++ b/egs/gp/s5/RESULTS @@ -85,7 +85,7 @@ for x in exp/*/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* %WER 29.55 [ 5398 / 18266, 421 ins, 1477 del, 3500 sub ] exp/RU/sgmm2_4a_mmi_b0.1/decode_eval_tgpr_sri_it4/wer_11 -# these RESULTS were obtained with Arnab Ghoshal version established in 2012. +# these RESULTS were obtained with Arnab Ghoshal version of the script established in 2012. $ for L in $GP_LANGUAGES; do grep WER exp/$L/mono/decode_dev_tgpr_sri/wer_* | ./utils/best_wer.sh ; done exp/CZ/mono/decode_dev_tgpr_sri/wer_9:%WER 35.13 [ 5820 / 16568, 486 ins, 1116 del, 4218 sub ] From e82ca8596f93b11c061c7235b426eb3bca5ed41e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 31 Mar 2016 00:55:49 -0400 Subject: [PATCH 09/10] documentation change (regarding acknowledgements) --- src/doc/README | 2 +- src/doc/history.dox | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/doc/README b/src/doc/README index fc5763a5c..566f0d0bf 100644 --- a/src/doc/README +++ b/src/doc/README @@ -38,7 +38,7 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then scp html.tar.gz newrelay:/var/www/kaldi-asr echo 'cd /var/www/kaldi-asr/; rm -rf html doc.old; - tar -xzf html.tar.gz; mv doc2 doc.old; mv html doc2; rm -rf doc.old; rm html.tar.gz' \ + tar -xzf html.tar.gz; mv doc doc.old; mv html doc; rm -rf doc.old; rm html.tar.gz' \ | ssh newrelay bash fi diff --git a/src/doc/history.dox b/src/doc/history.dox index a3cb6d6fe..bf114a3a9 100644 --- a/src/doc/history.dox +++ b/src/doc/history.dox @@ -55,13 +55,13 @@ and documentation); we were visited by Michael Riley (who helped us to understand OpenFst and gave some lectures on FSTs), and would like to acknowledge the help of Honza Cernocky (for allowing us to have the workshop and helping to organize it), - Renata Kohlova (administration), and Tomas Kasparek (system administration). + Renata Kohlova (administration), and Tomas Kasparek (system administration). It is possible that this list of contributors contains oversights; any important omissions are unlikely to be intentional. A lot of code was written during the summer of 2010 but we still did not have a complete working system. Some of the participants of the 2010 workshop - continued working to complete the toolkit and get a working set of training scripts. + continued working to complete the toolkit and get a working set of training scripts. The code was released on May 14th, 2011. Since the initial release, Kaldi has been maintained and developed to a large @@ -95,9 +95,15 @@ for his help in organizing the JHU'09 workshop and with the Wall Street Journal recipe. We would also like to acknowledge the help of faculty and staff at Johns Hopkins University's Center for Language and - Speech Processing during the JHU'09 workshop: particularly + Speech Processing during the JHU'09 workshop: particularly Sanjeev Khudanpur, Desiree Cleves and the late Fred Jelinek. + Since 2012, Kaldi development has received significant support from IARPA's + BABEL program (IARPA-BAA-11-02) and from the Human Language Technology + Center of Excellence (HLTCOE); and since 2015, from the NSF computing + research infrastructure (CRI) award ``CI-EN: Enhancements for the Kaldi Speech + Recognition Toolkit''. + Sanjeev Khudanpur deserves special mention for creating the conditions for the Kaldi project to succeed, first at the JHU'09 workshop where in his role as workshop organizer he was instrumental in putting the team together From e14f1f7b72f2e5118bb501a41626b18183c90903 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 31 Mar 2016 00:56:24 -0400 Subject: [PATCH 10/10] various minor bug-fixes to bugs flagged by Coverity scan (thanks to Yenda and Vassil for finding them) --- src/matrix/kaldi-matrix.cc | 3 ++ src/nnet2bin/nnet1-to-raw-nnet.cc | 3 +- src/sgmm2/estimate-am-sgmm2-test.cc | 7 ++--- src/sgmm2/estimate-am-sgmm2.h | 47 ++++++++++++----------------- 4 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/matrix/kaldi-matrix.cc b/src/matrix/kaldi-matrix.cc index 7dfe161ca..3fa909f36 100644 --- a/src/matrix/kaldi-matrix.cc +++ b/src/matrix/kaldi-matrix.cc @@ -68,6 +68,9 @@ void MatrixBase::Invert(Real *log_det, Real *det_sign, if (log_det) *log_det = -std::numeric_limits::infinity(); if (det_sign) *det_sign = 0; delete[] pivot; +#ifndef HAVE_ATLAS + KALDI_MEMALIGN_FREE(p_work); +#endif return; } } diff --git a/src/nnet2bin/nnet1-to-raw-nnet.cc b/src/nnet2bin/nnet1-to-raw-nnet.cc index 964f2e570..54f04f520 100644 --- a/src/nnet2bin/nnet1-to-raw-nnet.cc +++ b/src/nnet2bin/nnet1-to-raw-nnet.cc @@ -166,8 +166,7 @@ nnet2::Nnet *ConvertNnet1ToNnet2(const nnet1::Nnet &nnet1, nnet2::Nnet *res = new nnet2::Nnet(); res->Init(components); - // not de-allocate the memory for components - // since the nnet takes the ownership + delete components; return res; } diff --git a/src/sgmm2/estimate-am-sgmm2-test.cc b/src/sgmm2/estimate-am-sgmm2-test.cc index 9093c1108..bfdb161d9 100644 --- a/src/sgmm2/estimate-am-sgmm2-test.cc +++ b/src/sgmm2/estimate-am-sgmm2-test.cc @@ -58,7 +58,6 @@ void TestSgmm2AccsIO(const AmSgmm2 &sgmm, accs.CommitStatsForSpk(sgmm, empty); kaldi::MleAmSgmm2Options update_opts; - update_opts.check_v = (Rand()%2 == 0); AmSgmm2 *sgmm1 = new AmSgmm2(); sgmm1->CopyFromSgmm2(sgmm, false, false); kaldi::MleAmSgmm2Updater updater(update_opts); @@ -66,7 +65,7 @@ void TestSgmm2AccsIO(const AmSgmm2 &sgmm, sgmm1->ComputeDerivedVars(); std::vector gselect; Sgmm2LikelihoodCache like_cache(sgmm.NumGroups(), sgmm.NumPdfs()); - + sgmm1->GaussianSelection(sgmm_config, feats.Row(0), &gselect); sgmm1->ComputePerFrameVars(feats.Row(0), gselect, empty, &frame_vars); BaseFloat loglike1 = sgmm1->LogLikelihood(frame_vars, 0, &like_cache, &empty); @@ -119,7 +118,7 @@ void TestSgmm2AccsIO(const AmSgmm2 &sgmm, delete accs2; delete sgmm2; delete sgmm3; - + unlink("tmpf"); unlink("tmpfb"); } @@ -136,7 +135,7 @@ void UnitTestEstimateSgmm2() { pdf2group.push_back(0); sgmm.InitializeFromFullGmm(full_gmm, pdf2group, dim+1, dim, false, 0.9); // TODO-- make this true! sgmm.ComputeNormalizers(); - + kaldi::Matrix feats; { // First, generate random means and variances diff --git a/src/sgmm2/estimate-am-sgmm2.h b/src/sgmm2/estimate-am-sgmm2.h index 928027d90..1e2f0ed3d 100644 --- a/src/sgmm2/estimate-am-sgmm2.h +++ b/src/sgmm2/estimate-am-sgmm2.h @@ -48,19 +48,12 @@ struct MleAmSgmm2Options { /// Should probably be related to numerical properties of machine /// or BaseFloat type. BaseFloat max_cond; - /// Set check_v to true if you want to use the "checking" version of the update - /// for the v's, in which it checks the "real" objective function value and - /// backtracks if necessary; - bool check_v; bool renormalize_V; // Renormalize the phonetic space. bool renormalize_N; // Renormalize the speaker space. /// Number of iters when re-estimating weight projections "w". int weight_projections_iters; - /// The "sequential" weight update that checks each i in turn. - /// (if false, uses the "parallel" one). - bool use_sequential_weight_update; BaseFloat epsilon; ///< very small value used to prevent SVD crashing. BaseFloat max_impr_u; ///< max improvement per frame allowed in update of u. @@ -149,7 +142,7 @@ class MleAmSgmm2Accs { void Check(const AmSgmm2 &model, bool show_properties = true) const; /// Resizes the accumulators to the correct sizes given the model. The flags - /// argument controls which accumulators to resize. + /// argument controls which accumulators to resize. void ResizeAccumulators(const AmSgmm2 &model, SgmmUpdateFlagsType flags, bool have_spk_vecs); @@ -174,7 +167,7 @@ class MleAmSgmm2Accs { /// speaker's data. void CommitStatsForSpk(const AmSgmm2 &model, const Sgmm2PerSpkDerivedVars &spk_vars); - + /// Accessors void GetStateOccupancies(Vector *occs) const; int32 FeatureDim() const { return feature_dim_; } @@ -218,17 +211,17 @@ class MleAmSgmm2Accs { /// which is the same thing but for purposes of computing /// the speaker-vector v^{(s)}. Vector a_s_; - + /// the U_i quantities from the less-exact version of the SSGMM update for the /// speaker weight projections. Dimension is [I][T][T] std::vector > U_; - + /// Sub-state occupancies gamma_{jm}^{(c)} for each sub-state. In the /// SCTM version of the SGMM, for compactness we store two separate /// sets of gamma statistics, one to estimate the v_{jm} quantities /// and one to estimate the sub-state weights c_{jm}. std::vector< Vector > gamma_c_; - + /// gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I] /// Needed for stats R_. This can be viewed as a temporary variable; it /// does not form part of the stats that we eventually dump to disk. @@ -261,7 +254,7 @@ class MleAmSgmm2Updater { void Update(const MleAmSgmm2Accs &accs, AmSgmm2 *model, SgmmUpdateFlagsType flags); - + private: friend class UpdateWClass; friend class UpdatePhoneVectorsClass; @@ -279,7 +272,7 @@ class MleAmSgmm2Updater { friend class EbwAmSgmm2Updater; MleAmSgmm2Options options_; - + // Called from UpdatePhoneVectors; updates a subset of states // (relates to multi-threading). void UpdatePhoneVectorsInternal(const MleAmSgmm2Accs &accs, @@ -289,7 +282,7 @@ class MleAmSgmm2Updater { double *auxf_impr, int32 num_threads, int32 thread_id) const; - + double UpdatePhoneVectors(const MleAmSgmm2Accs &accs, const std::vector > &H, const std::vector > &log_a, @@ -303,7 +296,7 @@ class MleAmSgmm2Updater { void RenormalizeV(const MleAmSgmm2Accs &accs, AmSgmm2 *model, const Vector &gamma_i, const std::vector > &H); - + double UpdateN(const MleAmSgmm2Accs &accs, const Vector &gamma_i, AmSgmm2 *model); void RenormalizeN(const MleAmSgmm2Accs &accs, const Vector &gamma_i, @@ -330,15 +323,15 @@ class MleAmSgmm2Updater { Matrix *F_i, Matrix *g_i, double *tot_like, - int32 num_threads, + int32 num_threads, int32 thread_id); - + double UpdateSubstateWeights(const MleAmSgmm2Accs &accs, AmSgmm2 *model); static void ComputeLogA(const MleAmSgmm2Accs &accs, std::vector > *log_a); // [SSGMM] - + void ComputeMPrior(AmSgmm2 *model); // TODO(arnab): Maybe make this static? double MapUpdateM(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix > &Q, @@ -373,7 +366,7 @@ class MleSgmm2SpeakerAccs { int32 pdf_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk_vars); - + /// Accumulate statistics, given posteriors. Returns total /// count accumulated, which may differ from posteriors.Sum() /// due to randomized pruning. @@ -382,7 +375,7 @@ class MleSgmm2SpeakerAccs { const Matrix &posteriors, int32 pdf_index, Sgmm2PerSpkDerivedVars *spk_vars); - + /// Update speaker vector. If v_s was empty, will assume it started as zero /// and will resize it to the speaker-subspace size. void Update(const AmSgmm2 &model, @@ -390,7 +383,7 @@ class MleSgmm2SpeakerAccs { Vector *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out); - + private: // Update without speaker-dependent weights (vectors u_i), // i.e. not symmetric SGMM (SSGMM) @@ -402,8 +395,8 @@ class MleSgmm2SpeakerAccs { Vector *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out); - - + + /// Statistics for speaker adaptation (vectors), stored per-speaker. /// Per-speaker stats for vectors, y^{(s)}. Dimension [T]. Vector y_s_; @@ -416,7 +409,7 @@ class MleSgmm2SpeakerAccs { /// relates to the speaker subspace. /// Eq. (82): H_{i}^{spk} = N_{i}^T \Sigma_{i}^{-1} N_{i} std::vector< SpMatrix > H_spk_; - + /// N_i^T \Sigma_{i}^{-1}. Needed for y^{(s)} std::vector< Matrix > NtransSigmaInv_; @@ -444,13 +437,13 @@ class UpdateWClass: public MultiThreadable { F_i_.Resize(F_i->NumRows(), F_i->NumCols()); g_i_.Resize(g_i->NumRows(), g_i->NumCols()); } - + ~UpdateWClass() { F_i_ptr_->AddMat(1.0, F_i_, kNoTrans); g_i_ptr_->AddMat(1.0, g_i_, kNoTrans); *tot_like_ptr_ += tot_like_; } - + inline void operator() () { // Note: give them local copy of the sums we're computing, // which will be propagated to the total sums in the destructor.