trunk: Update RM LSTM recipe, thanks to Jiayu Du

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4966 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2015-03-24 17:41:23 +00:00 · 2015-03-24 17:41:23 +00:00 · e213cbb278
--- a/egs/rm/s5/RESULTS
+++ b/egs/rm/s5/RESULTS
@ -248,7 +248,6 @@ for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/

 # LSTM result
 for x in exp/lstm4f/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
-%WER 3.05 [ 382 / 12533, 23 ins, 86 del, 273 sub ] exp/lstm4f_0.0001_0.7/decode/wer_5_0.5
-%WER 12.02 [ 1506 / 12533, 130 ins, 251 del, 1125 sub ] exp/lstm4f_0.0001_0.7/decode_ug/wer_8_0.0
+%WER 2.04 [ 256 / 12533, 18 ins, 60 del, 178 sub ] exp/lstm4f_c512_r200_c512_r200_lr0.0001_mmt0.9_clip50/decode/wer_4_0.5


--- a/egs/rm/s5/local/nnet/run_lstm.sh
+++ b/egs/rm/s5/local/nnet/run_lstm.sh
@ -45,16 +45,16 @@ if [ $stage -le 1 ]; then
  $cuda_cmd $dir/log/train_nnet.log \
    steps/nnet/train.sh --network-type lstm --learn-rate 0.0001 \
      --cmvn-opts "--norm-means=true --norm-vars=true" --feat-type plain --splice 0 \
-      --train-opts "--momentum 0.7 --halving-factor 0.8" \
+      --train-opts "--momentum 0.9 --halving-factor 0.5" \
      --train-tool "nnet-train-lstm-streams --num-stream=4 --targets-delay=5" \
+      --proto-opts "--num-cells 512 --num-recurrent 200 --num-layers 2 --clip-gradient 50.0" \
    ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;

  # Decode (reuse HCLG graph)
  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
    $gmm/graph $dev $dir/decode || exit 1;
-  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
-    --nnet-forward-opts "--no-softmax=true --prior-scale=1.0 --time-shift=5" \
-    $gmm/graph $dev $dir/decode_time-shift5 || exit 1;
+  steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
+    $gmm/graph_ug $dev $dir/decode_ug || exit 1;
 fi

 # TODO : sequence training,
--- a/egs/wsj/s5/steps/nnet/train.sh
+++ b/egs/wsj/s5/steps/nnet/train.sh
@ -188,7 +188,7 @@ if [ ! -z $feature_transform ]; then
 fi

 # read the features,
-feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
+feats_tr="ark:shuf $dir/train.scp | copy-feats scp:- ark:- |"
 feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
 # optionally add per-speaker CMVN,
 if [ ! -z "$cmvn_opts" ]; then
--- a/egs/wsj/s5/utils/nnet/make_lstm_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_lstm_proto.py
@ -30,6 +30,8 @@ parser.add_option('--num-cells', dest='num_cells', type='int', default=800,
                   help='Number of LSTM cells [default: %default]');
 parser.add_option('--num-recurrent', dest='num_recurrent', type='int', default=512, 
                   help='Number of LSTM recurrent units [default: %default]');
+parser.add_option('--num-layers', dest='num_layers', type='int', default=2, 
+                   help='Number of LSTM layers [default: %default]');
 parser.add_option('--lstm-stddev-factor', dest='lstm_stddev_factor', type='float', default=0.01, 
                   help='Standard deviation of initialization [default: %default]');
 parser.add_option('--param-stddev-factor', dest='param_stddev_factor', type='float', default=0.04, 
@ -53,8 +55,18 @@ if len(args) != 2 :
 #</NnetProto>

 print "<NnetProto>"
-print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
+# normally we won't use more than 2 layers of LSTM
+if o.num_layers == 1:
+    print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
        (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
+elif o.num_layers == 2:
+    print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
+        (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
+    print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
+        (o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
+else:
+    sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n")
+    sys.exit(1)
 print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" % \
    (o.num_recurrent, num_leaves, o.param_stddev_factor)
 print "<Softmax> <InputDim> %d <OutputDim> %d" % \