From f7c89eae5f0f7987a6e886f1b42ffcf3b5a68eed Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Wed, 4 Dec 2013 06:53:24 +0000 Subject: [PATCH] sandbox/dan2: Improvements to discriminative training script (RE modify-learning-rates) and recipe for Switchboard. git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3259 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8 --- egs/swbd/s5b/RESULTS | 11 ++++++++-- egs/swbd/s5b/local/nnet2/run_6c_gpu.sh | 20 ++++--------------- .../s5/steps/nnet2/train_discriminative.sh | 10 +++++++++- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/egs/swbd/s5b/RESULTS b/egs/swbd/s5b/RESULTS index 51dc417d4..e196c578e 100644 --- a/egs/swbd/s5b/RESULTS +++ b/egs/swbd/s5b/RESULTS @@ -99,6 +99,15 @@ for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep WER $x/wer_* | %WER 17.9 | 1831 21395 | 83.8 10.8 5.5 1.7 17.9 55.8 | exp/nnet6a_gpu/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys %WER 18.4 | 1831 21395 | 83.6 11.4 5.0 2.1 18.4 56.1 | exp/nnet6a_gpu/decode_eval2000_sw1_tg/score_11/eval2000.ctm.swbd.filt.sys +# Discriminative training on top of 5c (14.6 still not as Good as Karel's 14.1 number with this LM, but getting closer). +%WER 15.0 | 1831 21395 | 87.3 9.0 3.7 2.3 15.0 52.2 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_fsh_tgpr_epoch1/score_15/eval2000.ctm.swbd.filt.sys +%WER 14.8 | 1831 21395 | 87.3 8.9 3.8 2.2 14.8 51.9 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_fsh_tgpr_epoch2/score_16/eval2000.ctm.swbd.filt.sys +%WER 14.7 | 1831 21395 | 87.5 8.9 3.6 2.2 14.7 51.7 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_fsh_tgpr_epoch3/score_16/eval2000.ctm.swbd.filt.sys +%WER 14.6 | 1831 21395 | 87.6 8.9 3.6 2.2 14.6 51.5 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_fsh_tgpr_epoch4/score_16/eval2000.ctm.swbd.filt.sys +%WER 15.2 | 1831 21395 | 87.1 9.1 3.8 2.3 15.2 53.1 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_tg_epoch1/score_15/eval2000.ctm.swbd.filt.sys +%WER 15.1 | 1831 21395 | 87.4 9.1 3.5 2.5 15.1 52.6 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_tg_epoch2/score_13/eval2000.ctm.swbd.filt.sys +%WER 15.0 | 1831 21395 | 87.5 9.0 3.5 2.5 15.0 52.4 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_tg_epoch3/score_14/eval2000.ctm.swbd.filt.sys +%WER 14.9 | 1831 21395 | 87.4 9.1 3.5 2.4 14.9 52.3 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_tg_epoch4/score_15/eval2000.ctm.swbd.filt.sys @@ -113,5 +122,3 @@ for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep WER $x/wer_* | # Final system rescored by sw1_fsh trigram (unpruned) %WER 13.4 | 1831 21395 | 88.4 8.2 3.4 1.8 13.4 49.2 | exp/tri4b_pretrain-dbn_dnn_smbr_iter1-lats/decode_eval2000_sw1_fsh_tg.3_it2/score_14/eval2000.ctm.swbd.filt.sys - - diff --git a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh index f2a56d777..735510dfc 100755 --- a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh @@ -28,24 +28,12 @@ set -e # exit on error. # likely generate very thin lattices. Note: the transform-dir is important to # specify, since this system is on top of fMLLR features. -nj=$(cat exp/tri4b/num_jobs) -if [ $stage -le 0 ]; then - steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G" \ - --nj $nj --sub-split 20 --num-threads 6 --parallel-opts "-pe smp 6" \ - --transform-dir exp/tri4b \ - data/train_nodup data/lang exp/nnet5c_gpu exp/nnet5c_gpu_denlats -fi - -if [ $stage -le 1 ]; then - steps/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu yes \ - --transform-dir exp/tri4b \ - --nj $nj data/train_nodup data/lang exp/nnet5c_gpu exp/nnet5c_gpu_ali -fi if [ $stage -le 2 ]; then steps/nnet2/train_discriminative.sh --cmd "$decode_cmd" --learning-rate 0.000002 \ - --num-epochs 2 \ + --modify-learning-rates true --last-layer-factor 0.1 \ + --num-epochs 4 --cleanup false \ --num-jobs-nnet 4 --stage $train_stage \ --transform-dir exp/tri4b \ --num-threads 1 --parallel-opts "$gpu_opts" data/train data/lang \ @@ -53,7 +41,7 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then - for epoch in 1 2; do + for epoch in 1 2 3 4; do for lm_suffix in tg fsh_tgpr; do steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 30 --iter epoch$epoch \ --config conf/decode.config --transform-dir exp/tri4b/decode_eval2000_sw1_${lm_suffix} \ @@ -63,5 +51,5 @@ if [ $stage -le 3 ]; then fi -exit 0; +exit 0; diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh index d22b8f223..9f04a0a90 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh @@ -22,7 +22,8 @@ num_jobs_nnet=4 # Number of neural net jobs to run in parallel. Note: this samples_per_iter=400000 # measured in frames, not in "examples" spk_vecs_dir= - +modify_learning_rates=false +last_layer_factor=1.0 # relates to modify-learning-rates shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples # on each iter. You could set it to 0 or to a large value for complete # randomization, but this would both consume memory and cause spikes in @@ -78,6 +79,8 @@ if [ $# != 6 ]; then echo " # the middle." echo " --criterion # Training criterion: may be smbr, mmi or mpfe" echo " --boost # Boosting factor for MMI (e.g., 0.1)" + echo " --modify-learning-rates # If true, modify learning rates to try to equalize relative" + echo " # changes across layers." echo " --degs-dir # Directory for discriminative examples, e.g. exp/foo/degs" exit 1; fi @@ -283,6 +286,11 @@ while [ $x -lt $num_iters ]; do $cmd $dir/log/average.$x.log \ nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1; + if $modify_learning_rates; then + $cmd $dir/log/modify_learning_rates.$x.log \ + nnet-modify-learning-rates --last-layer-factor=$last_layer_factor $dir/$x.mdl \ + $dir/$[$x+1].mdl $dir/$[$x+1].mdl + fi rm $nnets_list fi