trunk: various script changes to better support Mac OS X. Avoiding the '-T' option to 'cp', and the '>/dev/stderr' syntax in awk, which is only supported by GNU awk. Also providing an alternative to 'du -b' for getting file sizes, to failover to 'stat' on Mac

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4877 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2015-02-11 19:40:55 +00:00
Родитель 2884f22c48
Коммит 2bd491d16e
45 изменённых файлов: 122 добавлений и 71 удалений

Просмотреть файл

@ -38,7 +38,8 @@ echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
cp -rT data/lang $test
mkdir -p $test
cp -r data/lang/* $test
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt

Просмотреть файл

@ -47,8 +47,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -11,7 +11,8 @@ steps/train_raw_sat.sh --cmd "$train_cmd" \
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
mkdir -p data/$y
cp data/$x/* data/$y || true
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;

Просмотреть файл

@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause

Просмотреть файл

@ -38,7 +38,8 @@ echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
cp -rT data/lang $test
mkdir -p $test
cp -r data/lang/* $test
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt

Просмотреть файл

@ -9,7 +9,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause

Просмотреть файл

@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause

Просмотреть файл

@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause

Просмотреть файл

@ -8,7 +8,8 @@ mkdir -p data/lang_test_fsh
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test_fsh
mkdir -p data/lang_test_fsh
cp -r data/lang/* data/lang_test_fsh
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause

Просмотреть файл

@ -33,7 +33,10 @@ function check_and_download () {
echo "Downloading file '$fname' into '$dst_dir'..."
expect_size="${sizes["$fname"]}"
if [[ -s $dst_dir/$fname ]]; then
fsize=$(du -b $dst_dir/$fname | awk '{print $1}')
# In the following statement, the first version works on linux, and the part
# after '||' works on Linux.
f=$dst_dir/$fname
fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
if [[ "$fsize" -eq "$expect_size" ]]; then
echo "'$fname' already exists and appears to be complete"
return 0
@ -45,7 +48,10 @@ function check_and_download () {
echo "Error while trying to download $fname!"
return 1
}
fsize=$(du -b $dst_dir/$fname | awk '{print $1}')
f=$dst_dir/$fname
# In the following statement, the first version works on linux, and the part after '||'
# works on Linux.
fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
[[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; }
return 0
}

Просмотреть файл

@ -47,7 +47,8 @@ for lm_suffix in tgsmall tgmed; do
# tglarge is prepared by a separate command, called from run.sh; we don't
# want to compile G.fst for tglarge, as it takes a while.
test=${src_dir}_test_${lm_suffix}
cp -rT ${src_dir} $test
mkdir -p $test
cp -r ${src_dir}/* $test
gunzip -c $lm_dir/lm_${lm_suffix}.arpa.gz |\
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt || exit 1

Просмотреть файл

@ -54,8 +54,8 @@ if [ $stage -le 3 ]; then
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -62,8 +62,8 @@ if [ $stage -le 3 ]; then
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -10,8 +10,8 @@ tmpdir=data/local/tmp
. ./path.sh || exit 1; # for KALDI_ROOT
cp -rT data/lang data/lang_ug
mkdir -p data/lang_ug
cp -r data/lang/* data/lang_ug
rm -rf data/lang_ug/tmp
cat data/train/text | \

Просмотреть файл

@ -11,7 +11,8 @@ set -e
# train linear vtln
steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 \
data/train data/lang exp/tri2a exp/tri3d
cp -rT data/train data/train_vtln
mkdir -p data/train_vtln
cp data/train/* data/train_vtln || true
cp exp/tri3d/final.warp data/train_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
@ -19,7 +20,8 @@ steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3d/graph data/test exp/tri3d/decode
cp -rT data/test data/test_vtln
mkdir -p data/test_vtln
cp data/test/* data/test_vtln || true
cp exp/tri3d/decode/final.warp data/test_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/test_vtln exp/make_mfcc/test_vtln $featdir
steps/compute_cmvn_stats.sh data/test_vtln exp/make_mfcc/test_vtln $featdir

Просмотреть файл

@ -9,7 +9,8 @@ set -e
steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 \
data/train data/lang exp/tri2b exp/tri3e
cp -rT data/train data/train_vtln
mkdir -p data/train_vtln
cp data/train/* data/train_vtln || true
cp exp/tri3e/final.warp data/train_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
@ -17,7 +18,8 @@ steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3e/graph data/test exp/tri3e/decode
cp -rT data/test data/test_vtln
mkdir -p data/test_vtln
cp data/test/* data/test_vtln || true
cp exp/tri3e/decode/final.warp data/test_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/test_vtln exp/make_mfcc/test_vtln $featdir
steps/compute_cmvn_stats.sh data/test_vtln exp/make_mfcc/test_vtln $featdir

Просмотреть файл

@ -47,8 +47,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -72,8 +72,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -11,7 +11,8 @@ steps/train_raw_sat.sh --cmd "$train_cmd" \
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
mkdir -p data/$y
cp data/$x/* data/$y || true
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;

Просмотреть файл

@ -49,8 +49,7 @@ idngram2lm -linear -idngram $lmdir/sprak.idngram -vocab \
test=data/lang_test_${lm_suffix}
mkdir -p $test
cp -rT data/lang $test
cp -r data/lang/* $test
cat $lmdir/sprak.arpa | \
utils/find_arpa_oovs.pl $test/words.txt > $lmdir/oovs_${lm_suffix}.txt

Просмотреть файл

@ -49,9 +49,9 @@ wait
test=data/lang_test_${lm_suffix}
mkdir -p $test
cp -rT $srcdir $test
mkdir -p $test
cp -r $srcdir/* $test
cat $lmdir/train${ngram}.arpa | \
utils/find_arpa_oovs.pl $test/words.txt > $lmdir/oovs_${lm_suffix}.txt

Просмотреть файл

@ -79,8 +79,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -30,7 +30,8 @@ steps/make_phone_graph.sh data/lang exp/tri3b_ali_all exp/tri4b_seg || exit 1;
mkdir -p data_reseg
for data in train eval2000; do
cp -rT data/${data} data_reseg/${data}_orig; rm -r data_reseg/${data}_orig/split*
mkdir -p data_reseg/${data}_orig
cp data/${data}/* data_reseg/${data}_orig || true
for f in text utt2spk spk2utt feats.scp cmvn.scp segments; do rm data_reseg/${data}_orig/$f; done
cat data_reseg/${data}_orig/wav.scp | awk '{print $1, $1;}' | \
tee data_reseg/${data}_orig/spk2utt > data_reseg/${data}_orig/utt2spk

Просмотреть файл

@ -12,7 +12,8 @@ logdet_scale=0.0
steps/train_lvtln.sh --cmd "$train_cmd" \
--logdet-scale $logdet_scale $num_leaves $num_gauss \
data/train_30k_nodup data/lang exp/tri2 exp/tri2c || exit 1
cp -rT data/train_30k_nodup data/train_30k_nodup_vtln || exit 1
mkdir -p data/train_30k_nodup_vtln
cp data/train_30k_nodup/* data/train_30k_nodup_vtln
cp exp/tri2c/final.warp data/train_30k_nodup_vtln/spk2warp || exit 1
steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data/train_30k_nodup_vtln exp/make_mfcc/train_30k_nodup_vtln ${featdir} || exit 1
steps/compute_cmvn_stats.sh data/train_30k_nodup_vtln exp/make_mfcc/train_30k_nodup_vtln ${featdir} || exit 1
@ -22,7 +23,8 @@ utils/mkgraph.sh data/lang_sw1_tg exp/tri2c exp/tri2c/graph_sw1_tg || exit 1
steps/decode_lvtln.sh --config conf/decode.config --nj 30 --cmd "$decode_cmd" --logdet-scale $logdet_scale \
exp/tri2c/graph_sw1_tg data/eval2000 exp/tri2c/decode_eval2000_sw1_tg || exit 1
cp -rT data/eval2000 data/eval2000_vtln || exit 1
mkdir -p data/eval2000_vtln
cp data/eval2000/* data/eval2000_vtln
cp exp/tri2c/decode_eval2000_sw1_tg/final.warp data/eval2000_vtln/spk2warp || exit 1
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000_vtln exp/make_mfcc/eval2000_vtln ${featdir} || exit 1
steps/compute_cmvn_stats.sh data/eval2000_vtln exp/make_mfcc/eval2000_vtln ${featdir} || exit 1
@ -30,7 +32,8 @@ utils/fix_data_dir.sh data/eval2000_vtln || exit 1 # remove segments with probl
steps/align_lvtln.sh --nj 30 --cmd "$train_cmd" --logdet-scale $logdet_scale \
data/train_100k_nodup data/lang exp/tri2c exp/tri2c_ali_100k_nodup || exit 1
cp -rT data/train_100k_nodup data/train_100k_nodup_vtln || exit 1
mkdir data/train_100k_nodup_vtln
cp data/train_100k_nodup/* data/train_100k_nodup_vtln
cp exp/tri2c_ali_100k_nodup/final.warp data/train_100k_nodup_vtln/spk2warp || exit 1
steps/train_lda_mllt.sh --cmd "$train_cmd" \

Просмотреть файл

@ -31,7 +31,8 @@ steps/make_phone_graph.sh data/lang exp/tri3_ali_train exp/tri4_seg || exit 1;
mkdir -p data_reseg
for data in train eval2000; do
cp -rT data/${data} data_reseg/${data}_orig;
mkdir -p data_reseg/${data}_orig
cp data/${data}/* data_reseg/${data}_orig
rm -r data_reseg/${data}_orig/split*
for f in text utt2spk spk2utt feats.scp cmvn.scp segments; do
rm data_reseg/${data}_orig/$f;

Просмотреть файл

@ -12,12 +12,22 @@
#export cuda_cmd=run.pl
#b) BUT cluster options
queue="all.q@@blade,all.q@@speech,all.q@dellgpu*,all.q@supergpu*"
export train_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,matylda5=0.5"
export decode_cmd="queue.pl -q $queue -l ram_free=3000M,mem_free=3000M,matylda5=0.1"
export mkgraph_cmd="queue.pl -q $queue -l ram_free=4G,mem_free=4G,matylda5=3"
export cuda_cmd="queue.pl -q long.q@pcspeech-gpu,long.q@dellgpu1,long.q@pcgpu*,long.q@supergpu1 -l gpu=1"
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
export train_cmd="queue.pl -l arch=*64*"
export decode_cmd="queue.pl -l arch=*64*,ram_free=3G,mem_free=3G"
export mkgraph_cmd="queue.pl -l arch=*64*,ram_free=4G,mem_free=4G"
export cuda_cmd="queue.pl -l gpu=1"
elif [[ $(hostname -f) == *.fit.vutbr.cz ]]; then
#b) BUT cluster options
queue="all.q@@blade,all.q@@speech,all.q@dellgpu*,all.q@supergpu*"
export train_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,matylda5=0.5"
export decode_cmd="queue.pl -q $queue -l ram_free=3000M,mem_free=3000M,matylda5=0.1"
export mkgraph_cmd="queue.pl -q $queue -l ram_free=4G,mem_free=4G,matylda5=3"
export cuda_cmd="queue.pl -q long.q@pcspeech-gpu,long.q@dellgpu1,long.q@pcgpu*,long.q@supergpu1 -l gpu=1"
else
echo "$0: you need to define options for your cluster."
exit 1;
fi
#c) run locally...
#export train_cmd=run.pl

Просмотреть файл

@ -47,7 +47,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r data/${x}_fbank
cp -r data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -78,7 +78,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r data/${x}_fbank
cp -r data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;

Просмотреть файл

@ -11,7 +11,8 @@ steps/train_raw_sat.sh --cmd "$train_cmd" \
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
mkdir -p data/$y
cp -r data/$x/* data/$y
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;

Просмотреть файл

@ -6,24 +6,24 @@ num_leaves=2500
num_gauss=15000
if false; then #TEMP
# train linear vtln
steps/train_lvtln.sh --cmd "$train_cmd" $num_leaves $num_gauss \
data/train_si84 data/lang exp/tri2a exp/tri2c || exit 1
cp -rT data/train_si84 data/train_si84_vtln || exit 1
mkdir -p data/train_si84_vtln
cp -r data/train_si84/* data/train_si84_vtln || exit 1
cp exp/tri2c/final.warp data/train_si84_vtln/spk2warp || exit 1
utils/mkgraph.sh data/lang_test_bg_5k exp/tri2c exp/tri2c/graph_bg_5k || exit 1;
utils/mkgraph.sh data/lang_test_tgpr exp/tri2c exp/tri2c/graph_tgpr || exit 1;
fi #TEMP
for t in eval93 dev93 eval92; do
nj=10
[ $t == eval92 ] && nj=8
steps/decode_lvtln.sh --nj $nj --cmd "$decode_cmd" \
exp/tri2c/graph_bg_5k data/test_$t exp/tri2c/decode_${t}_bg_5k || exit 1
cp -rT data/test_$t data/test_${t}_vtln || exit 1
mkdir -p data/test_${t}_vtln
cp -r data/test_$t/* data/test_${t}_vtln || exit 1
cp exp/tri2c/decode_${t}_bg_5k/final.warp data/test_${t}_vtln/spk2warp || exit 1
done

Просмотреть файл

@ -8,7 +8,9 @@ num_gauss=15000
# train linear vtln
steps/train_lvtln.sh --cmd "$train_cmd" $num_leaves $num_gauss \
data/train_si84 data/lang exp/tri2b exp/tri2c || exit 1
cp -rT data/train_si84 data/train_si84_vtln || exit 1
mkdir -p data/train_si84_vtln
cp -r data/train_si84/* data/train_si84_vtln || exit 1
cp exp/tri2c/final.warp data/train_si84_vtln/spk2warp || exit 1
utils/mkgraph.sh data/lang_test_bg_5k exp/tri2c exp/tri2c/graph_bg_5k || exit 1;
@ -19,7 +21,8 @@ for t in eval93 dev93 eval92; do
[ $t == eval92 ] && nj=8
steps/decode_lvtln.sh --nj $nj --cmd "$decode_cmd" \
exp/tri2c/graph_bg_5k data/test_$t exp/tri2c/decode_${t}_bg_5k || exit 1
cp -rT data/test_$t data/test_${t}_vtln || exit 1
mkdir -p data/test_${t}_vtln
cp -r data/test_$t/* data/test_${t}_vtln || exit 1
cp exp/tri2c/decode_${t}_bg_5k/final.warp data/test_${t}_vtln/spk2warp || exit 1
done

Просмотреть файл

@ -39,7 +39,8 @@ echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
cp -rT data/lang $test || exit 1;
mkdir -p $test
cp -r data/lang/* $test || exit 1;
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt

Просмотреть файл

@ -318,7 +318,8 @@ steps/train_quick.sh --cmd "$train_cmd" \
( # run decoding with larger dictionary and pron-probs. Need to get dict with
# pron-probs first. [This seems to help by about 0.1% absolute in general.]
cp -rT data/local/dict_larger data/local/dict_larger_pp
mkdir -p data/local/dict_larger_pp
cp -r data/local/dict_larger/* data/local/dict_larger_pp
rm -r data/local/dict_larger_pp/{b,f,*.gz,lexicon.txt}
steps/get_lexicon_probs.sh data/train_si284 data/lang exp/tri4b data/local/dict_larger/lexicon.txt \
exp/tri4b_lexprobs data/local/dict_larger_pp/lexiconp.txt || exit 1;
@ -326,7 +327,8 @@ steps/train_quick.sh --cmd "$train_cmd" \
data/local/dict_larger_pp "<SPOKEN_NOISE>" data/dict_larger/tmp data/lang_bd_pp
cmp data/lang_bd/words.txt data/lang_bd_pp/words.txt || exit 1;
for suffix in tg tgpr fg; do
cp -rT data/lang_bd_pp data/lang_test_bd_pp_${suffix}
mkdir -p data/lang_test_bd_pp_${suffix}
cp -r data/lang_bd_pp/* data/lang_test_bd_pp_${suffix}
cp data/lang_test_bd_${suffix}/G.fst data/lang_test_bd_pp_${suffix}/G.fst || exit 1;
done
utils/mkgraph.sh data/lang_test_bd_pp_tgpr exp/tri4b exp/tri4b/graph_bd_pp_tgpr || exit 1;

Просмотреть файл

@ -70,7 +70,8 @@ $cmd JOB=1:$nj $graph_dir/log/make_transcript_graph.JOB.log \
$model_dir $graph_dir/split$nj/JOB || exit 1;
# Copies files from lang directory.
cp -rT $lang $graph_dir
mkdir -p $graph_dir
cp -r $lang/* $graph_dir
am-info --print-args=false $model_dir/final.mdl |\
grep pdfs | awk '{print $NF}' > $graph_dir/num_pdfs

Просмотреть файл

@ -106,7 +106,8 @@ while read line; do
done < $text
# Copies files from lang directory.
cp -rT $lang $graph_dir
mkdir -p $graph_dir
cp -r $lang/* $graph_dir
am-info --print-args=false $model_dir/final.mdl |\
grep pdfs | awk '{print $NF}' > $graph_dir/num_pdfs

Просмотреть файл

@ -56,7 +56,8 @@ num_jobs_align=$(cat $alidir/num_jobs) || exit 1;
mkdir -p $dir/log
cp -rT $egs_in_dir/info $dir/info
mkdir -p $dir/info
cp -r $egs_in_dir/info/* $dir/info
alignments=`eval echo $alidir/ali.{$(seq -s ',' $num_jobs_align)}.gz`

Просмотреть файл

@ -116,7 +116,8 @@ fi
if [ $stage -le 3 ]; then
echo "$0: combining activations across jobs"
cp -rT $data $dir/data
mkdir -p $dir/data
cp -r $data/* $dir/data
for j in $(seq $nj); do cat $dir/feats/feats.$j.scp; done >$dir/data/feats.scp || exit 1;
fi

Просмотреть файл

@ -71,7 +71,8 @@ cp $nnet_src/tree $dir/ || exit 1;
# There are a bunch of files that we will need to copy from $online_src, because
# we're aiming to have one self-contained directory that has everything in it.
cp -rT $online_src/ivector_extractor/ $dir/ivector_extractor
mkdir -p $dir/ivector_extractor
cp -r $online_src/ivector_extractor/* $dir/ivector_extractor
[ ! -d $online_src/conf ] && \
echo "Expected directory $online_src/conf to exist" && exit 1;

Просмотреть файл

@ -52,7 +52,8 @@ cp $nnet_src/$iter.mdl $dir/ || exit 1;
# There are a bunch of files that we will need to copy from $online_src, because
# we're aiming to have one self-contained directory that has everything in it.
cp -rT $online_src/ivector_extractor/ $dir/ivector_extractor
mkdir -p $dir/ivector_extractor
cp -r $online_src/ivector_extractor/* $dir/ivector_extractor
[ ! -d $online_src/conf ] && \
echo "Expected directory $online_src/conf to exist" && exit 1;

Просмотреть файл

@ -85,7 +85,7 @@ if [ $stage -le 1 ]; then
# look for lines like: LOG (blah:blah.cc:95) Overall density is 153.3 over 164361 frames
grep -w Overall $prunedir/log/lattice_depth.*.log | \
awk -v nj=$nj '{num+=$6*$8; den+=$8; nl++} END{
if (nl != nj) { print "Error: expected " nj " lines, got " nl >"/dev/stderr"; }
if (nl != nj) { print "Error: expected " nj " lines, got " nl | "cat 1>&2"; }
printf("%.2f ( %d / %d )\n", num/den, num, den); }' > $prunedir/depth || exit 1;
echo -n "Depth is: "
cat $prunedir/depth
@ -103,7 +103,7 @@ if [ $stage -le 2 ]; then
# look for lines like: LOG (blah:blah.cc:95) Overall %WER 25.6 [ 1243 / 6331, ... ]
grep -w Overall $prunedir/log/lattice_oracle.*.log | \
awk -v nj=$nj '{num+=$7; den+=$9; ins+=$10; del+=$12; sb+=$14; nl++} END{
if (nl != nj) { print "Error: expected " nj " lines, got " nl >"/dev/stderr"; }
if (nl != nj) { print "Error: expected " nj " lines, got " nl | "cat 1>&2"; }
printf("%.2f%% [ %d / %d, %d insertions, %d deletions, %d substitutions ]\n", (100.0 * num/den), num, den, ins, del, sb); }' > \
$prunedir/oracle_wer || exit 1;
echo -n "Oracle WER is: "

Просмотреть файл

@ -30,7 +30,8 @@ new_lang=$3
mkdir -p $new_lang
cp -rT $old_lang $new_lang
mkdir -p $new_lang
cp -r $old_lang/* $new_lang
unk=`cat $new_lang/oov.int`

Просмотреть файл

@ -81,7 +81,8 @@ echo "Converting '$lm' to FST"
tmpdir=$(mktemp -d kaldi.XXXX);
trap 'rm -rf "$tmpdir"' EXIT
cp -rT $lang_dir $out_dir || exit 1;
mkdir -p $out_dir
cp -r $lang_dir/* $out_dir || exit 1;
lm_base=$(basename $lm '.gz')
gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \

Просмотреть файл

@ -242,11 +242,11 @@ cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq | awk '
}
{
if ($1 == "<s>") {
print "<s> is in the vocabulary!" > "/dev/stderr"
print "<s> is in the vocabulary!" | "cat 1>&2"
exit 1;
}
if ($1 == "</s>") {
print "</s> is in the vocabulary!" > "/dev/stderr"
print "</s> is in the vocabulary!" | "cat 1>&2"
exit 1;
}
printf("%s %d\n", $1, NR);

Просмотреть файл

@ -53,7 +53,7 @@ cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
if (invoc[w]) { printf("%s ",w); } else {
printf("<RNN_UNK> ");
if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
else { print "Warning: unknown word ", w >"/dev/stderr"; logprob += log(1.0e-07); }}}
else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}}
printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
# OK, now we compute the scores on the text with OOVs replaced

Просмотреть файл

@ -778,7 +778,7 @@ if (-e "$lang/G.fst") {
# Check that G.fst does not have cycles with only disambiguation symbols or
# epsilons on the input, or the forbidden symbols <s> and </s>.
$cmd = ". ./path.sh; fstprint $lang/G.fst | awk -v disambig=$lang/phones/disambig.int -v words=$lang/words.txt 'BEGIN{while((getline<disambig)>0) is_disambig[$1]=1; is_disambig[0] = 1; while((getline<words)>0){ if(\$1==\"<s>\"||\$1==\"</s>\") is_forbidden[\$2]=1;}} {if(NF<3 || is_disambig[\$3]) print; else if(is_forbidden[\$3] || is_forbidden[\$4]) { print \"Error: line \" \$0 \" in G.fst contains forbidden symbol <s> or </s>\" >/dev/stderr; exit(1); }}' | fstcompile | fstinfo ";
$cmd = ". ./path.sh; fstprint $lang/G.fst | awk -v disambig=$lang/phones/disambig.int -v words=$lang/words.txt 'BEGIN{while((getline<disambig)>0) is_disambig[$1]=1; is_disambig[0] = 1; while((getline<words)>0){ if(\$1==\"<s>\"||\$1==\"</s>\") is_forbidden[\$2]=1;}} {if(NF<3 || is_disambig[\$3]) print; else if(is_forbidden[\$3] || is_forbidden[\$4]) { print \"Error: line \" \$0 \" in G.fst contains forbidden symbol <s> or </s>\" | \"cat 1>&2\"; exit(1); }}' | fstcompile | fstinfo ";
$output = `$cmd`;
if ($output !~ m/# of states\s+[1-9]/) { # fstinfo did not read a nonempty FST (there should be final probs at least)...
print "--> ERROR: failure running command to check for disambig-sym loops [possibly G.fst " .