trunk: Update RM LSTM recipe, thanks to Jiayu Du

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4966 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Minhua Wu 2015-03-24 17:41:23 +00:00
Родитель d21b8a35d0
Коммит e213cbb278
4 изменённых файлов: 20 добавлений и 9 удалений

Просмотреть файл

@ -248,7 +248,6 @@ for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/
# LSTM result
for x in exp/lstm4f/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
%WER 3.05 [ 382 / 12533, 23 ins, 86 del, 273 sub ] exp/lstm4f_0.0001_0.7/decode/wer_5_0.5
%WER 12.02 [ 1506 / 12533, 130 ins, 251 del, 1125 sub ] exp/lstm4f_0.0001_0.7/decode_ug/wer_8_0.0
%WER 2.04 [ 256 / 12533, 18 ins, 60 del, 178 sub ] exp/lstm4f_c512_r200_c512_r200_lr0.0001_mmt0.9_clip50/decode/wer_4_0.5

Просмотреть файл

@ -45,16 +45,16 @@ if [ $stage -le 1 ]; then
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --network-type lstm --learn-rate 0.0001 \
--cmvn-opts "--norm-means=true --norm-vars=true" --feat-type plain --splice 0 \
--train-opts "--momentum 0.7 --halving-factor 0.8" \
--train-opts "--momentum 0.9 --halving-factor 0.5" \
--train-tool "nnet-train-lstm-streams --num-stream=4 --targets-delay=5" \
--proto-opts "--num-cells 512 --num-recurrent 200 --num-layers 2 --clip-gradient 50.0" \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
--nnet-forward-opts "--no-softmax=true --prior-scale=1.0 --time-shift=5" \
$gmm/graph $dev $dir/decode_time-shift5 || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# TODO : sequence training,

Просмотреть файл

@ -188,7 +188,7 @@ if [ ! -z $feature_transform ]; then
fi
# read the features,
feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
feats_tr="ark:shuf $dir/train.scp | copy-feats scp:- ark:- |"
feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
# optionally add per-speaker CMVN,
if [ ! -z "$cmvn_opts" ]; then

Просмотреть файл

@ -30,6 +30,8 @@ parser.add_option('--num-cells', dest='num_cells', type='int', default=800,
help='Number of LSTM cells [default: %default]');
parser.add_option('--num-recurrent', dest='num_recurrent', type='int', default=512,
help='Number of LSTM recurrent units [default: %default]');
parser.add_option('--num-layers', dest='num_layers', type='int', default=2,
help='Number of LSTM layers [default: %default]');
parser.add_option('--lstm-stddev-factor', dest='lstm_stddev_factor', type='float', default=0.01,
help='Standard deviation of initialization [default: %default]');
parser.add_option('--param-stddev-factor', dest='param_stddev_factor', type='float', default=0.04,
@ -53,8 +55,18 @@ if len(args) != 2 :
#</NnetProto>
print "<NnetProto>"
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
# normally we won't use more than 2 layers of LSTM
if o.num_layers == 1:
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
elif o.num_layers == 2:
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
else:
sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n")
sys.exit(1)
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" % \
(o.num_recurrent, num_leaves, o.param_stddev_factor)
print "<Softmax> <InputDim> %d <OutputDim> %d" % \