зеркало из https://github.com/mozilla/kaldi.git
Remove attempts to install a rnnlm from all recipes
This commit is contained in:
Родитель
db871081cf
Коммит
8f93c2e9ae
|
@ -164,37 +164,6 @@
|
||||||
%WER 17.16 [ 8982 / 52343, 855 ins, 1421 del, 6706 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgmed/wer_12
|
%WER 17.16 [ 8982 / 52343, 855 ins, 1421 del, 6706 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgmed/wer_12
|
||||||
%WER 18.90 [ 9891 / 52343, 798 ins, 1786 del, 7307 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgsmall/wer_13
|
%WER 18.90 [ 9891 / 52343, 798 ins, 1786 del, 7307 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgsmall/wer_13
|
||||||
|
|
||||||
# RNNLM rescoring of tri6b (rnnlm-hs-0.1b hidden=150 direct=4.0Gb)
|
|
||||||
%WER 7.39 [ 4023 / 54402, 540 ins, 444 del, 3039 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13_1.0
|
|
||||||
%WER 7.06 [ 3839 / 54402, 547 ins, 401 del, 2891 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_13_1.0
|
|
||||||
%WER 7.14 [ 3883 / 54402, 579 ins, 364 del, 2940 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_13_1.0
|
|
||||||
%WER 7.54 [ 4103 / 54402, 651 ins, 343 del, 3109 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_12_1.0
|
|
||||||
|
|
||||||
%WER 21.31 [ 10858 / 50948, 1525 ins, 1151 del, 8182 sub ] exp/tri6b/decode_tglarge_dev_other/wer_17_0.0
|
|
||||||
%WER 20.77 [ 10583 / 50948, 1387 ins, 1222 del, 7974 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_17_0.5
|
|
||||||
%WER 20.88 [ 10638 / 50948, 1456 ins, 1148 del, 8034 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_16_0.5
|
|
||||||
%WER 21.37 [ 10890 / 50948, 1518 ins, 1126 del, 8246 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_17_0.5
|
|
||||||
|
|
||||||
%WER 9.21 [ 5012 / 54402, 703 ins, 510 del, 3799 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_14_0.0
|
|
||||||
%WER 7.97 [ 4334 / 54402, 579 ins, 465 del, 3290 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_14_0.5
|
|
||||||
%WER 7.77 [ 4226 / 54402, 615 ins, 394 del, 3217 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_14_0.5
|
|
||||||
%WER 7.97 [ 4338 / 54402, 614 ins, 420 del, 3304 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_13_1.0
|
|
||||||
|
|
||||||
%WER 24.27 [ 12365 / 50948, 1365 ins, 1591 del, 9409 sub ] exp/tri6b/decode_tgmed_dev_other/wer_17_0.0
|
|
||||||
%WER 22.76 [ 11596 / 50948, 1557 ins, 1239 del, 8800 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_15_0.0
|
|
||||||
%WER 22.50 [ 11465 / 50948, 1586 ins, 1155 del, 8724 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_15_0.0
|
|
||||||
%WER 22.56 [ 11495 / 50948, 1597 ins, 1143 del, 8755 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_17_0.0
|
|
||||||
|
|
||||||
%WER 10.50 [ 5711 / 54402, 693 ins, 674 del, 4344 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_15_0.0
|
|
||||||
%WER 8.60 [ 4680 / 54402, 700 ins, 445 del, 3535 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_14_0.0
|
|
||||||
%WER 8.17 [ 4447 / 54402, 723 ins, 352 del, 3372 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_13_0.0
|
|
||||||
%WER 8.34 [ 4536 / 54402, 732 ins, 354 del, 3450 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_14_0.0
|
|
||||||
|
|
||||||
%WER 26.22 [ 13358 / 50948, 1330 ins, 1955 del, 10073 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_17_0.0
|
|
||||||
%WER 24.06 [ 12258 / 50948, 1526 ins, 1394 del, 9338 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.25/wer_15_0.0
|
|
||||||
%WER 23.53 [ 11988 / 50948, 1585 ins, 1267 del, 9136 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.5/wer_15_0.0
|
|
||||||
%WER 23.61 [ 12030 / 50948, 1571 ins, 1268 del, 9191 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm-hs-0.1b_h150-me5-1000_L0.75/wer_18_0.0
|
|
||||||
|
|
||||||
# RNNLM rescoring of tri6b (faster-rnnlm hidden=150 direct=4.0Gb, Hierarchical Softmax)
|
# RNNLM rescoring of tri6b (faster-rnnlm hidden=150 direct=4.0Gb, Hierarchical Softmax)
|
||||||
%WER 7.39 [ 4023 / 54402, 540 ins, 444 del, 3039 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13_1.0
|
%WER 7.39 [ 4023 / 54402, 540 ins, 444 del, 3039 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_13_1.0
|
||||||
%WER 7.03 [ 3823 / 54402, 608 ins, 343 del, 2872 sub ] exp/tri6b/decode_tglarge_dev_clean_faster-rnnlm_h150-me5-1000_L0.25/wer_13_0.5
|
%WER 7.03 [ 3823 / 54402, 608 ins, 343 del, 2872 sub ] exp/tri6b/decode_tglarge_dev_clean_faster-rnnlm_h150-me5-1000_L0.25/wer_13_0.5
|
||||||
|
|
|
@ -56,19 +56,8 @@ fi
|
||||||
|
|
||||||
if [ $stage -le 2 ]; then
|
if [ $stage -le 2 ]; then
|
||||||
echo "$0: Training RNNLM. It will probably take several hours."
|
echo "$0: Training RNNLM. It will probably take several hours."
|
||||||
|
$KALDI_ROOT/tools/extras/check_for_rnnlm.sh "$rnnlm_ver" || exit 1
|
||||||
rnnlm_path="$(readlink -f $KALDI_ROOT)/tools/$rnnlm_ver/rnnlm"
|
rnnlm_path="$(readlink -f $KALDI_ROOT)/tools/$rnnlm_ver/rnnlm"
|
||||||
if [ -f "$rnnlm_path" ]; then
|
|
||||||
echo "$0: Using binary $rnnlm_path"
|
|
||||||
else
|
|
||||||
if [ $rnnlm_ver == "rnnlm-hs-0.1b" ]; then
|
|
||||||
echo "$0: ERROR RNNLM-HS is not installed. Use extras/install_rnnlm_hs.sh to install it"
|
|
||||||
elif [ $rnnlm_ver == "faster-rnnlm" ]; then
|
|
||||||
echo "$0: ERROR Faster RNNLM is not installed. Use extras/install_faster_rnnlm.sh to install it"
|
|
||||||
else
|
|
||||||
echo "$0: ERROR Cannot find $rnnlm_path"
|
|
||||||
fi
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
cd $s5_dir
|
cd $s5_dir
|
||||||
mkdir -p $modeldir
|
mkdir -p $modeldir
|
||||||
echo "$0: Model file: $modeldir/rnnlm"
|
echo "$0: Model file: $modeldir/rnnlm"
|
||||||
|
|
|
@ -330,16 +330,8 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
||||||
# steps/cleanup/debug_lexicon.sh --remove-stress true --nj 200 --cmd "$train_cmd" data/train_clean_100 \
|
# steps/cleanup/debug_lexicon.sh --remove-stress true --nj 200 --cmd "$train_cmd" data/train_clean_100 \
|
||||||
# data/lang exp/tri6b data/local/dict/lexicon.txt exp/debug_lexicon_100h
|
# data/lang exp/tri6b data/local/dict/lexicon.txt exp/debug_lexicon_100h
|
||||||
|
|
||||||
# #Perform rescoring of tri6b be means of rnnlm-hs
|
|
||||||
# #Attention: with default settings requires 4 GB of memory per rescoring job, so commenting this out by default
|
|
||||||
# wait && local/run_rnnlm.sh \
|
|
||||||
# --rnnlm-ver "rnnlm-hs-0.1b" \
|
|
||||||
# --rnnlm-options "-hidden 150 -direct 1000 -direct-order 5" \
|
|
||||||
# --rnnlm_tag "h150-me5-1000" $data data/local/lm
|
|
||||||
|
|
||||||
# #Perform rescoring of tri6b be means of faster-rnnlm
|
# #Perform rescoring of tri6b be means of faster-rnnlm
|
||||||
# #Results are identical to results obtained by rnnlm-hs (or better)
|
# #Attention: with default settings requires 4 GB of memory per rescoring job, so commenting this out by default
|
||||||
# #Hovewer, faster-rnnlm training is several times faster
|
|
||||||
# wait && local/run_rnnlm.sh \
|
# wait && local/run_rnnlm.sh \
|
||||||
# --rnnlm-ver "faster-rnnlm" \
|
# --rnnlm-ver "faster-rnnlm" \
|
||||||
# --rnnlm-options "-hidden 150 -direct 1000 -direct-order 5" \
|
# --rnnlm-options "-hidden 150 -direct 1000 -direct-order 5" \
|
||||||
|
|
|
@ -28,32 +28,10 @@ devtext=$2
|
||||||
dir=$3
|
dir=$3
|
||||||
mkdir -p $dir
|
mkdir -p $dir
|
||||||
|
|
||||||
|
$KALDI_ROOT/tools/extras/check_for_rnnlm.sh "$rnnlm_ver" || exit 1
|
||||||
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
|
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
|
||||||
|
|
||||||
|
|
||||||
( # First make sure the kaldi_lm toolkit is installed.
|
|
||||||
# Note: this didn't work out of the box for me, I had to
|
|
||||||
# change the g++ version to just "g++" (no cross-compilation
|
|
||||||
# needed for me as I ran on a machine that had been setup
|
|
||||||
# as 64 bit by default.
|
|
||||||
cd $KALDI_ROOT/tools || exit 1;
|
|
||||||
if [ -d $rnnlm_ver ]; then
|
|
||||||
echo Not installing the rnnlm toolkit since it is already there.
|
|
||||||
else
|
|
||||||
echo Downloading and installing the rnnlm tools
|
|
||||||
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
|
|
||||||
if [ ! -f $rnnlm_ver.tgz ]; then
|
|
||||||
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
|
|
||||||
fi
|
|
||||||
mkdir $rnnlm_ver
|
|
||||||
cd $rnnlm_ver
|
|
||||||
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
|
|
||||||
make CC=g++ || exit 1;
|
|
||||||
echo Done making the rnnlm tools
|
|
||||||
fi
|
|
||||||
) || exit 1;
|
|
||||||
|
|
||||||
|
|
||||||
if [ ! -f $srcdir/transcripts.uniq ] || [ ! -f $srcdir/lexicon.txt ]; then
|
if [ ! -f $srcdir/transcripts.uniq ] || [ ! -f $srcdir/lexicon.txt ]; then
|
||||||
echo "Expecting $srcdir/transcripts.uniq and $srcdir/lexicon.txt to exist";
|
echo "Expecting $srcdir/transcripts.uniq and $srcdir/lexicon.txt to exist";
|
||||||
exit 1;
|
exit 1;
|
||||||
|
|
|
@ -7,42 +7,42 @@ echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
. cmd.sh
|
. cmd.sh
|
||||||
# This step interpolates a small RNNLM (with weight 0.15) with the 4-gram LM.
|
# This step interpolates a small RNNLM (with weight 0.15) with the 4-gram LM.
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.15 data/lang${lang_suffix}_test_bd_fg \
|
0.15 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h30.voc10k data/test_eval92 \
|
data/local/rnnlm-hs.h30.voc10k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs30_0.15 || exit 1;
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs30_0.15 || exit 1;
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h100.voc20k data/test_eval92 \
|
data/local/rnnlm-hs.h100.voc20k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs100_0.3 || exit 1;
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs100_0.3 || exit 1;
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h300.voc30k data/test_eval92 \
|
data/local/rnnlm-hs.h300.voc30k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs300_0.3 || exit 1;
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs300_0.3 || exit 1;
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 || exit 1;
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 || exit 1;
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -53,7 +53,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4_N1000
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4_N1000
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.4 data/lang${lang_suffix}_test_bd_fg \
|
0.4 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -62,7 +62,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.4 data/lang${lang_suffix}_test_bd_fg \
|
0.4 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -71,13 +71,13 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.15
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.15
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.15 data/lang${lang_suffix}_test_bd_fg \
|
0.15 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg $dir
|
exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg $dir
|
||||||
|
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--N 10 --cmd "$decode_cmd" --inv-acwt 17 \
|
--N 10 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.3 data/lang${lang_suffix}_test_bd_fg \
|
0.3 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -88,7 +88,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4_N1000
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.4_N1000
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.4 data/lang${lang_suffix}_test_bd_fg \
|
0.4 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -97,7 +97,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.15_N1000
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.15_N1000
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.15 data/lang${lang_suffix}_test_bd_fg \
|
0.15 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -106,7 +106,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.5_N1000
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.5_N1000
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.5 data/lang${lang_suffix}_test_bd_fg \
|
0.5 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
@ -115,7 +115,7 @@ steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
||||||
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.75_N1000
|
dir=exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.75_N1000
|
||||||
rm -rf $dir
|
rm -rf $dir
|
||||||
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
cp -r exp/tri3b/decode${lang_suffix}_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000 $dir
|
||||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b \
|
steps/rnnlmrescore.sh --rnnlm_ver faster-rnnlm \
|
||||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||||
0.75 data/lang${lang_suffix}_test_bd_fg \
|
0.75 data/lang${lang_suffix}_test_bd_fg \
|
||||||
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
data/local/rnnlm-hs.h400.voc40k data/test_eval92 \
|
||||||
|
|
|
@ -38,39 +38,10 @@ dir=$1
|
||||||
srcdir=data/local/dict${dict_suffix}_larger
|
srcdir=data/local/dict${dict_suffix}_larger
|
||||||
mkdir -p $dir
|
mkdir -p $dir
|
||||||
|
|
||||||
|
$KALDI_ROOT/tools/extras/check_for_rnnlm.sh "$rnnlm_ver" || exit 1
|
||||||
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
|
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
|
||||||
|
|
||||||
|
|
||||||
( # First make sure the kaldi_lm toolkit is installed.
|
|
||||||
# Note: this didn't work out of the box for me, I had to
|
|
||||||
# change the g++ version to just "g++" (no cross-compilation
|
|
||||||
# needed for me as I ran on a machine that had been setup
|
|
||||||
# as 64 bit by default.
|
|
||||||
cd $KALDI_ROOT/tools || exit 1;
|
|
||||||
if [ -f $rnnlm_ver/rnnlm ]; then
|
|
||||||
echo Not installing the rnnlm toolkit since it is already there.
|
|
||||||
else
|
|
||||||
if [ $rnnlm_ver == "rnnlm-hs-0.1b" ]; then
|
|
||||||
extras/install_rnnlm_hs.sh
|
|
||||||
elif [ $rnnlm_ver == "faster-rnnlm" ]; then
|
|
||||||
echo "ERROR Faster RNNLM is not installed. Use extras/install_faster_rnnlm.sh to install it"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo Downloading and installing the rnnlm tools
|
|
||||||
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
|
|
||||||
if [ ! -f $rnnlm_ver.tgz ]; then
|
|
||||||
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
|
|
||||||
fi
|
|
||||||
mkdir $rnnlm_ver
|
|
||||||
cd $rnnlm_ver
|
|
||||||
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
|
|
||||||
make CC=g++ || exit 1;
|
|
||||||
echo Done making the rnnlm tools
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
) || exit 1;
|
|
||||||
|
|
||||||
|
|
||||||
if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
|
if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
|
||||||
echo "Expecting files $srcdir/cleaned.gz and $srcdir/wordlist.final to exist";
|
echo "Expecting files $srcdir/cleaned.gz and $srcdir/wordlist.final to exist";
|
||||||
echo "You need to run local/wsj_extend_dict.sh before running this script."
|
echo "You need to run local/wsj_extend_dict.sh before running this script."
|
||||||
|
|
|
@ -80,19 +80,19 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
|
||||||
(
|
(
|
||||||
num_threads_rnnlm=8
|
num_threads_rnnlm=8
|
||||||
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
||||||
--rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
|
--rnnlm_ver faster-rnnlm --threads $num_threads_rnnlm \
|
||||||
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
||||||
--hidden 30 --nwords 10000 --direct 1000 data/local/rnnlm-hs.h30.voc10k
|
--hidden 30 --nwords 10000 --direct 1000 data/local/rnnlm-hs.h30.voc10k
|
||||||
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
||||||
--rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
|
--rnnlm_ver faster-rnnlm --threads $num_threads_rnnlm \
|
||||||
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
||||||
--hidden 100 --nwords 20000 --direct 1500 data/local/rnnlm-hs.h100.voc20k
|
--hidden 100 --nwords 20000 --direct 1500 data/local/rnnlm-hs.h100.voc20k
|
||||||
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
||||||
--rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
|
--rnnlm_ver faster-rnnlm --threads $num_threads_rnnlm \
|
||||||
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
||||||
--hidden 300 --nwords 30000 --direct 1500 data/local/rnnlm-hs.h300.voc30k
|
--hidden 300 --nwords 30000 --direct 1500 data/local/rnnlm-hs.h300.voc30k
|
||||||
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
|
||||||
--rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
|
--rnnlm_ver faster-rnnlm --threads $num_threads_rnnlm \
|
||||||
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
--cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
|
||||||
--hidden 400 --nwords 40000 --direct 2000 data/local/rnnlm-hs.h400.voc40k
|
--hidden 400 --nwords 40000 --direct 2000 data/local/rnnlm-hs.h400.voc40k
|
||||||
)
|
)
|
||||||
|
@ -479,4 +479,4 @@ local/nnet/run_dnn.sh
|
||||||
|
|
||||||
# # A couple of nnet3 recipes:
|
# # A couple of nnet3 recipes:
|
||||||
# local/nnet3/run_tdnn_baseline.sh # designed for exact comparison with nnet2 recipe
|
# local/nnet3/run_tdnn_baseline.sh # designed for exact comparison with nnet2 recipe
|
||||||
# local/nnet3/run_tdnn.sh # better absolute results
|
# local/nnet3/run_tdnn.sh # better absolute results
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ $# -ne 1 ]; then
|
||||||
|
echo "The scripts checks whether requested rnnlm binary exists in tools/<rnnlm_ver>/rnnlm"
|
||||||
|
echo
|
||||||
|
echo "Usage: $0 <rnnlm_ver>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
rnnlm_ver=$1
|
||||||
|
rnnlm_path="$(readlink -f "$(dirname "$0")/../")/$rnnlm_ver/rnnlm"
|
||||||
|
scriptname="$(basename "$0")"
|
||||||
|
|
||||||
|
if [ -f "$rnnlm_path" ]; then
|
||||||
|
echo "$scriptname: Found binary $rnnlm_path"
|
||||||
|
else
|
||||||
|
if [ $rnnlm_ver == "faster-rnnlm" ]; then
|
||||||
|
echo "$scriptname: ERROR Faster RNNLM is not installed. Use extras/install_faster_rnnlm.sh to install it"
|
||||||
|
elif [ $rnnlm_ver == "rnnlm-0.??" ]; then
|
||||||
|
echo "$scriptname: ERROR Class based RNNLM is not installed. Use extras/install_class_rnnlm.sh to install it"
|
||||||
|
else
|
||||||
|
echo "$scriptname: ERROR Cannot find $rnnlm_path. Neither know how to install it"
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ $# -ne 1 ]; then
|
||||||
|
echo "Download and install given rnnlm version from rnnlm.org"
|
||||||
|
echo
|
||||||
|
echo "Usage: $0 <rnnlm_ver> # e.g. $0 rnnlm-0.3e"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
rnnlm_ver=$1
|
||||||
|
tools_dir="$(readlink -f "$(dirname "$0")/../")"
|
||||||
|
|
||||||
|
if [ "$(basename "$tools_dir")" != "tools" ]; then
|
||||||
|
echo "Cannot find tools/ dir. Am I in tools/extras?"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd $tools_dir
|
||||||
|
echo Downloading and installing the rnnlm tools
|
||||||
|
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
|
||||||
|
arc_file="$rnnlm_ver.tgz"
|
||||||
|
if [ ! -f "$arc_file" ]; then
|
||||||
|
wget "http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz" -O "$arc_file" || exit 1;
|
||||||
|
fi
|
||||||
|
mkdir $rnnlm_ver
|
||||||
|
cd $rnnlm_ver
|
||||||
|
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
|
||||||
|
make CC=g++ || exit 1;
|
||||||
|
echo Done making the rnnlm tools
|
Загрузка…
Ссылка в новой задаче