From e213cbb278c42d4eb7ebd92d0a86adc9a47aa675 Mon Sep 17 00:00:00 2001 From: Minhua Wu Date: Tue, 24 Mar 2015 17:41:23 +0000 Subject: [PATCH] trunk: Update RM LSTM recipe, thanks to Jiayu Du git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4966 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8 --- egs/rm/s5/RESULTS | 3 +-- egs/rm/s5/local/nnet/run_lstm.sh | 8 ++++---- egs/wsj/s5/steps/nnet/train.sh | 2 +- egs/wsj/s5/utils/nnet/make_lstm_proto.py | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/egs/rm/s5/RESULTS b/egs/rm/s5/RESULTS index 398e6df9d..0762f537c 100644 --- a/egs/rm/s5/RESULTS +++ b/egs/rm/s5/RESULTS @@ -248,7 +248,6 @@ for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/ # LSTM result for x in exp/lstm4f/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done -%WER 3.05 [ 382 / 12533, 23 ins, 86 del, 273 sub ] exp/lstm4f_0.0001_0.7/decode/wer_5_0.5 -%WER 12.02 [ 1506 / 12533, 130 ins, 251 del, 1125 sub ] exp/lstm4f_0.0001_0.7/decode_ug/wer_8_0.0 +%WER 2.04 [ 256 / 12533, 18 ins, 60 del, 178 sub ] exp/lstm4f_c512_r200_c512_r200_lr0.0001_mmt0.9_clip50/decode/wer_4_0.5 diff --git a/egs/rm/s5/local/nnet/run_lstm.sh b/egs/rm/s5/local/nnet/run_lstm.sh index 410badaab..f992dc04c 100755 --- a/egs/rm/s5/local/nnet/run_lstm.sh +++ b/egs/rm/s5/local/nnet/run_lstm.sh @@ -45,16 +45,16 @@ if [ $stage -le 1 ]; then $cuda_cmd $dir/log/train_nnet.log \ steps/nnet/train.sh --network-type lstm --learn-rate 0.0001 \ --cmvn-opts "--norm-means=true --norm-vars=true" --feat-type plain --splice 0 \ - --train-opts "--momentum 0.7 --halving-factor 0.8" \ + --train-opts "--momentum 0.9 --halving-factor 0.5" \ --train-tool "nnet-train-lstm-streams --num-stream=4 --targets-delay=5" \ + --proto-opts "--num-cells 512 --num-recurrent 200 --num-layers 2 --clip-gradient 50.0" \ ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1; # Decode (reuse HCLG graph) steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ $gmm/graph $dev $dir/decode || exit 1; - steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ - --nnet-forward-opts "--no-softmax=true --prior-scale=1.0 --time-shift=5" \ - $gmm/graph $dev $dir/decode_time-shift5 || exit 1; + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $gmm/graph_ug $dev $dir/decode_ug || exit 1; fi # TODO : sequence training, diff --git a/egs/wsj/s5/steps/nnet/train.sh b/egs/wsj/s5/steps/nnet/train.sh index 38c73214c..b669f99ca 100755 --- a/egs/wsj/s5/steps/nnet/train.sh +++ b/egs/wsj/s5/steps/nnet/train.sh @@ -188,7 +188,7 @@ if [ ! -z $feature_transform ]; then fi # read the features, -feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |" +feats_tr="ark:shuf $dir/train.scp | copy-feats scp:- ark:- |" feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |" # optionally add per-speaker CMVN, if [ ! -z "$cmvn_opts" ]; then diff --git a/egs/wsj/s5/utils/nnet/make_lstm_proto.py b/egs/wsj/s5/utils/nnet/make_lstm_proto.py index 4d341ac1f..063d9ad9e 100755 --- a/egs/wsj/s5/utils/nnet/make_lstm_proto.py +++ b/egs/wsj/s5/utils/nnet/make_lstm_proto.py @@ -30,6 +30,8 @@ parser.add_option('--num-cells', dest='num_cells', type='int', default=800, help='Number of LSTM cells [default: %default]'); parser.add_option('--num-recurrent', dest='num_recurrent', type='int', default=512, help='Number of LSTM recurrent units [default: %default]'); +parser.add_option('--num-layers', dest='num_layers', type='int', default=2, + help='Number of LSTM layers [default: %default]'); parser.add_option('--lstm-stddev-factor', dest='lstm_stddev_factor', type='float', default=0.01, help='Standard deviation of initialization [default: %default]'); parser.add_option('--param-stddev-factor', dest='param_stddev_factor', type='float', default=0.04, @@ -53,8 +55,18 @@ if len(args) != 2 : # print "" -print " %d %d %s %f %f" % \ - (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) +# normally we won't use more than 2 layers of LSTM +if o.num_layers == 1: + print " %d %d %s %f %f" % \ + (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) +elif o.num_layers == 2: + print " %d %d %s %f %f" % \ + (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) + print " %d %d %s %f %f" % \ + (o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) +else: + sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n") + sys.exit(1) print " %d %d 0.0 0.0 %f" % \ (o.num_recurrent, num_leaves, o.param_stddev_factor) print " %d %d" % \