зеркало из https://github.com/mozilla/kaldi.git
add fisher_swbd nnet3 and chain recipe
This commit is contained in:
Родитель
c823bd87c1
Коммит
02cf52a48e
|
@ -42,8 +42,71 @@ for x in exp/nnet2_online/nnet_ms_a_online/decode_eval2000*_fg; do grep Sum $x/
|
||||||
%WER 12.3 | 1831 21395 | 89.2 7.2 3.5 1.5 12.3 50.8 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_fsh_sw1_fg/score_13/eval2000.ctm.swbd.filt.sys
|
%WER 12.3 | 1831 21395 | 89.2 7.2 3.5 1.5 12.3 50.8 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_fsh_sw1_fg/score_13/eval2000.ctm.swbd.filt.sys
|
||||||
%WER 11.8 | 1831 21395 | 89.6 7.2 3.2 1.4 11.8 49.0 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_offline_fsh_sw1_fg/score_11/eval2000.ctm.swbd.filt.sys
|
%WER 11.8 | 1831 21395 | 89.6 7.2 3.2 1.4 11.8 49.0 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_offline_fsh_sw1_fg/score_11/eval2000.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on eval2000
|
||||||
|
# BLSTM ran for about 760 hours, command:
|
||||||
|
# local/nnet3/run_lstm.sh --affix bidirectional --lstm-delay " [-1,1] [-2,2] [-3,3] " --label-delay 0 \
|
||||||
|
# --cell-dim 1024 --recurrent-projection-dim 128 --non-recurrent-projection-dim 128 \
|
||||||
|
# --chunk-left-context 40 --chunk-right-context 40 \
|
||||||
|
# --extra-left-context 50 --extra-right-context 50
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 15.8 | 4459 42989 | 86.1 9.7 4.1 1.9 15.8 52.6 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
%WER 14.8 | 4459 42989 | 86.6 9.2 4.3 1.4 14.8 54.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 15.4 | 4459 42989 | 86.4 9.5 4.0 1.8 15.4 51.6 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
%WER 14.5 | 4459 42989 | 87.0 9.0 4.0 1.5 14.5 53.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on eval2000 for swbd subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 11.6 | 1831 21395 | 89.7 7.3 3.0 1.3 11.6 47.7 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 10.7 | 1831 21395 | 90.3 6.7 3.0 1.0 10.7 45.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 11.1 | 1831 21395 | 90.2 7.0 2.8 1.3 11.1 46.2 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 10.4 | 1831 21395 | 90.6 6.5 2.9 1.0 10.4 45.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on eval2000 for callhm subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 19.9 | 2628 21594 | 82.6 12.1 5.3 2.6 19.9 56.0 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
%WER 18.8 | 2628 21594 | 83.1 11.7 5.2 1.9 18.8 60.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 19.7 | 2628 21594 | 82.7 12.1 5.2 2.4 19.7 55.3 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
%WER 18.6 | 2628 21594 | 83.3 11.5 5.2 1.9 18.6 59.6 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
|
||||||
|
# chain result on eval2000
|
||||||
|
# BLSTM ran for about 380 hours
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 13.6 | 4459 42989 | 88.2 7.9 3.9 1.8 13.6 51.0 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
%WER 12.1 | 4459 42989 | 89.7 6.8 3.5 1.8 12.1 50.2 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 13.3 | 4459 42989 | 88.4 7.8 3.8 1.8 13.3 50.1 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.filt.sys
|
||||||
|
%WER 12.0 | 4459 42989 | 89.6 6.5 3.8 1.7 12.0 49.3 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_8_0.5/eval2000_hires.ctm.filt.sys
|
||||||
|
|
||||||
|
# chain result on eval2000 for swbd subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 9.4 | 1831 21395 | 91.7 5.4 2.9 1.2 9.4 43.9 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_tg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 8.8 | 1831 21395 | 92.5 5.3 2.2 1.4 8.8 46.9 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_1.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 9.2 | 1831 21395 | 92.1 5.6 2.3 1.3 9.2 42.4 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_fg/score_9_0.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 8.5 | 1831 21395 | 92.6 4.9 2.4 1.2 8.5 44.1 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_9_1.0/eval2000_hires.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# chain result on eval2000 for callhm subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 17.4 | 2628 21594 | 84.7 9.8 5.5 2.1 17.4 55.3 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
%WER 15.3 | 2628 21594 | 86.9 8.3 4.8 2.2 15.3 52.4 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 17.3 | 2628 21594 | 84.9 9.7 5.5 2.1 17.3 55.0 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
%WER 15.3 | 2628 21594 | 87.0 8.6 4.4 2.4 15.3 52.1 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_6_0.5/eval2000_hires.ctm.callhm.filt.sys
|
||||||
|
|
||||||
# GMM and SGMM numbers reported on rt03
|
# GMM and SGMM numbers reported on rt03
|
||||||
for x in exp/*/decode_rt03*; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
for x in exp/*/decode_rt03*; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
@ -89,3 +152,63 @@ for x in exp/nnet2_online/nnet_ms_a_online/decode_rt03*_fg; do grep Sum $x/scor
|
||||||
%WER 20.2 | 3970 36721 | 88.3 8.1 3.6 8.5 20.2 74.3 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
|
%WER 20.2 | 3970 36721 | 88.3 8.1 3.6 8.5 20.2 74.3 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
|
||||||
%WER 19.1 | 3970 36721 | 88.8 7.8 3.4 7.9 19.1 72.2 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_offline_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
|
%WER 19.1 | 3970 36721 | 88.8 7.8 3.4 7.9 19.1 72.2 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_offline_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on rt03
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 14.7 | 8420 76157 | 86.8 8.9 4.3 1.5 14.7 45.9 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
%WER 14.2 | 8420 76157 | 87.0 8.7 4.3 1.2 14.2 46.9 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 14.4 | 8420 76157 | 87.1 8.8 4.2 1.5 14.4 45.2 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_11_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
%WER 13.9 | 8420 76157 | 87.2 8.4 4.3 1.2 13.9 46.0 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_9_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on rt03 for swbd subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 17.4 | 4450 39436 | 84.3 10.6 5.1 1.8 17.4 48.9 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.5/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 16.6 | 4450 39436 | 84.7 10.0 5.3 1.3 16.6 49.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_10_0.5/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 17.1 | 4450 39436 | 84.6 10.3 5.1 1.8 17.1 48.2 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_12_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 16.3 | 4450 39436 | 85.0 9.8 5.1 1.3 16.3 49.0 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# nnet3 result on rt03 for fsh subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 11.8 | 3970 36721 | 89.4 7.2 3.5 1.2 11.8 42.5 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
%WER 11.6 | 3970 36721 | 89.4 7.1 3.5 1.0 11.6 43.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 11.4 | 3970 36721 | 89.7 6.9 3.4 1.1 11.4 41.5 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_11_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
%WER 11.4 | 3970 36721 | 89.5 6.7 3.8 1.0 11.4 42.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
|
||||||
|
# chain result on rt03
|
||||||
|
# BLSTM ran for about 380 hours
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 12.7 | 8420 76157 | 88.5 7.2 4.2 1.3 12.7 43.2 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_tg/score_9_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
%WER 11.7 | 8420 76157 | 89.8 6.6 3.6 1.5 11.7 43.7 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 12.4 | 8420 76157 | 88.9 7.0 4.1 1.3 12.4 42.7 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_fg/score_9_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
%WER 11.4 | 8420 76157 | 89.9 6.1 3.9 1.3 11.4 43.4 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.filt.sys
|
||||||
|
|
||||||
|
# chain result on rt03 for swbd subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 15.0 | 4450 39436 | 86.4 8.6 5.0 1.4 15.0 45.8 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_tg/score_9_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 13.3 | 4450 39436 | 88.3 7.5 4.2 1.6 13.3 45.2 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 14.8 | 4450 39436 | 86.5 8.0 5.5 1.3 14.8 45.5 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
%WER 13.0 | 4450 39436 | 88.5 7.3 4.2 1.6 13.0 44.8 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.swbd.filt.sys
|
||||||
|
|
||||||
|
# chain result on rt03 for fsh subset
|
||||||
|
# use tri-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 10.2 | 3970 36721 | 91.1 6.0 3.0 1.2 10.2 40.2 | exp/chain/tdnn_7b_relu_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
%WER 9.8 | 3970 36721 | 91.4 5.3 3.3 1.2 9.8 42.0 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
# rescore with four-gram
|
||||||
|
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
|
||||||
|
%WER 9.8 | 3970 36721 | 91.4 5.8 2.8 1.2 9.8 39.6 | exp/chain/tdnn_7b_relu_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
%WER 9.6 | 3970 36721 | 91.6 5.2 3.3 1.2 9.6 41.4 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
|
||||||
|
|
|
@ -0,0 +1,181 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# based on run_tdnn_6h.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# configs for 'chain'
|
||||||
|
stage=12
|
||||||
|
train_stage=-10
|
||||||
|
get_egs_stage=-10
|
||||||
|
dir=exp/chain/blstm_6h
|
||||||
|
decode_iter=
|
||||||
|
decode_dir_affix=
|
||||||
|
|
||||||
|
# training options
|
||||||
|
num_epochs=4
|
||||||
|
remove_egs=false
|
||||||
|
common_egs_dir=
|
||||||
|
affix=
|
||||||
|
chunk_width=150
|
||||||
|
chunk_left_context=40
|
||||||
|
chunk_right_context=40
|
||||||
|
|
||||||
|
# End configuration section.
|
||||||
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
if ! cuda-compiled; then
|
||||||
|
cat <<EOF && exit 1
|
||||||
|
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||||
|
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||||
|
where "nvcc" is installed.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
dir=$dir${affix:+_$affix}
|
||||||
|
train_set=train_nodup_sp
|
||||||
|
ali_dir=exp/tri5a_ali_nodup
|
||||||
|
treedir=exp/chain/tri6_tree_11000
|
||||||
|
lang=data/lang_chain
|
||||||
|
|
||||||
|
# The iVector-extraction and feature-dumping parts are the same as the standard
|
||||||
|
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
|
||||||
|
# run those things.
|
||||||
|
local/nnet3/run_ivector_common.sh --stage $stage \
|
||||||
|
--speed-perturb true \
|
||||||
|
--generate-alignments false || exit 1;
|
||||||
|
|
||||||
|
if [ $stage -le 9 ]; then
|
||||||
|
# Get the alignments as lattices (gives the CTC training more freedom).
|
||||||
|
# use the same num-jobs as the alignments
|
||||||
|
nj=$(cat $ali_dir/num_jobs) || exit 1;
|
||||||
|
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
|
||||||
|
data/lang exp/tri5a exp/tri5a_lats_nodup_sp
|
||||||
|
rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 10 ]; then
|
||||||
|
# Create a version of the lang/ directory that has one state per phone in the
|
||||||
|
# topo file. [note, it really has two states.. the first one is only repeated
|
||||||
|
# once, the second one has zero or more repeats.]
|
||||||
|
rm -rf $lang
|
||||||
|
cp -r data/lang $lang
|
||||||
|
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
|
||||||
|
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
|
||||||
|
# Use our special topology... note that later on may have to tune this
|
||||||
|
# topology.
|
||||||
|
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 11 ]; then
|
||||||
|
# Build a tree using our new topology.
|
||||||
|
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
|
||||||
|
--leftmost-questions-truncate -1 \
|
||||||
|
--cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 12 ]; then
|
||||||
|
echo "$0: creating neural net configs";
|
||||||
|
|
||||||
|
steps/nnet3/lstm/make_configs.py \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--tree-dir $treedir \
|
||||||
|
--splice-indexes="-2,-1,0,1,2 0 0" \
|
||||||
|
--lstm-delay=" [-3,3] [-3,3] [-3,3] " \
|
||||||
|
--xent-regularize 0.1 \
|
||||||
|
--include-log-softmax false \
|
||||||
|
--num-lstm-layers 3 \
|
||||||
|
--cell-dim 1024 \
|
||||||
|
--hidden-dim 1024 \
|
||||||
|
--recurrent-projection-dim 256 \
|
||||||
|
--non-recurrent-projection-dim 256 \
|
||||||
|
--label-delay 0 \
|
||||||
|
--self-repair-scale 0.00001 \
|
||||||
|
$dir/configs || exit 1;
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 13 ]; then
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
|
||||||
|
utils/create_split_dir.pl \
|
||||||
|
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
|
||||||
|
fi
|
||||||
|
|
||||||
|
touch $dir/egs/.nodelete # keep egs around when that run dies.
|
||||||
|
|
||||||
|
steps/nnet3/chain/train.py --stage $train_stage \
|
||||||
|
--cmd "$decode_cmd" \
|
||||||
|
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
|
||||||
|
--chain.xent-regularize 0.1 \
|
||||||
|
--chain.leaky-hmm-coefficient 0.1 \
|
||||||
|
--chain.l2-regularize 0.00005 \
|
||||||
|
--chain.apply-deriv-weights false \
|
||||||
|
--chain.lm-opts="--num-extra-lm-states=2000" \
|
||||||
|
--chain.left-deriv-truncate 0 \
|
||||||
|
--trainer.num-chunk-per-minibatch 64 \
|
||||||
|
--trainer.frames-per-iter 1200000 \
|
||||||
|
--trainer.max-param-change 1.414 \
|
||||||
|
--trainer.num-epochs $num_epochs \
|
||||||
|
--trainer.optimization.shrink-value 0.99 \
|
||||||
|
--trainer.optimization.num-jobs-initial 3 \
|
||||||
|
--trainer.optimization.num-jobs-final 16 \
|
||||||
|
--trainer.optimization.initial-effective-lrate 0.001 \
|
||||||
|
--trainer.optimization.final-effective-lrate 0.0001 \
|
||||||
|
--trainer.optimization.momentum 0.0 \
|
||||||
|
--egs.stage $get_egs_stage \
|
||||||
|
--egs.opts "--frames-overlap-per-eg 0" \
|
||||||
|
--egs.chunk-width $chunk_width \
|
||||||
|
--egs.chunk-left-context $chunk_left_context \
|
||||||
|
--egs.chunk-right-context $chunk_right_context \
|
||||||
|
--egs.dir "$common_egs_dir" \
|
||||||
|
--cleanup.remove-egs $remove_egs \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--tree-dir $treedir \
|
||||||
|
--lat-dir exp/tri5a_lats_nodup_sp \
|
||||||
|
--dir $dir || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 14 ]; then
|
||||||
|
# Note: it might appear that this $lang directory is mismatched, and it is as
|
||||||
|
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
|
||||||
|
# the lang directory.
|
||||||
|
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg
|
||||||
|
fi
|
||||||
|
|
||||||
|
decode_suff=fsh_sw1_tg
|
||||||
|
graph_dir=$dir/graph_fsh_sw1_tg
|
||||||
|
if [ $stage -le 15 ]; then
|
||||||
|
iter_opts=
|
||||||
|
if [ ! -z $decode_iter ]; then
|
||||||
|
iter_opts=" --iter $decode_iter "
|
||||||
|
fi
|
||||||
|
|
||||||
|
# decoding options
|
||||||
|
extra_left_context=$[$chunk_left_context+10]
|
||||||
|
extra_right_context=$[$chunk_right_context+10]
|
||||||
|
|
||||||
|
for decode_set in eval2000 rt03; do
|
||||||
|
(
|
||||||
|
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
|
||||||
|
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
|
||||||
|
--nj $num_jobs --cmd "$decode_cmd" $iter_opts \
|
||||||
|
--extra-left-context $extra_left_context \
|
||||||
|
--extra-right-context $extra_right_context \
|
||||||
|
--frames-per-chunk $chunk_width \
|
||||||
|
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
|
||||||
|
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1;
|
||||||
|
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
|
||||||
|
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
|
||||||
|
$dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_fsh_sw1_{tg,fg} || exit 1;
|
||||||
|
fi
|
||||||
|
) &
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
wait;
|
||||||
|
exit 0;
|
|
@ -0,0 +1,162 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# based on run_tdnn_7b.sh in the swbd recipe
|
||||||
|
|
||||||
|
# configs for 'chain'
|
||||||
|
affix=
|
||||||
|
stage=12
|
||||||
|
train_stage=-10
|
||||||
|
get_egs_stage=-10
|
||||||
|
dir=exp/chain/tdnn_7b
|
||||||
|
decode_iter=
|
||||||
|
|
||||||
|
# training options
|
||||||
|
num_epochs=4
|
||||||
|
remove_egs=false
|
||||||
|
common_egs_dir=
|
||||||
|
|
||||||
|
# End configuration section.
|
||||||
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
if ! cuda-compiled; then
|
||||||
|
cat <<EOF && exit 1
|
||||||
|
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||||
|
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||||
|
where "nvcc" is installed.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
dir=${dir}${affix:+_$affix}
|
||||||
|
train_set=train_nodup_sp
|
||||||
|
ali_dir=exp/tri5a_ali_nodup
|
||||||
|
treedir=exp/chain/tri6_tree_11000
|
||||||
|
lang=data/lang_chain
|
||||||
|
|
||||||
|
|
||||||
|
# The iVector-extraction and feature-dumping parts are the same as the standard
|
||||||
|
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
|
||||||
|
# run those things.
|
||||||
|
local/nnet3/run_ivector_common.sh --stage $stage \
|
||||||
|
--speed-perturb true \
|
||||||
|
--generate-alignments false || exit 1;
|
||||||
|
|
||||||
|
if [ $stage -le 9 ]; then
|
||||||
|
# Get the alignments as lattices (gives the chain training more freedom).
|
||||||
|
# use the same num-jobs as the alignments
|
||||||
|
nj=$(cat $ali_dir/num_jobs) || exit 1;
|
||||||
|
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
|
||||||
|
data/lang exp/tri5a exp/tri5a_lats_nodup_sp
|
||||||
|
rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 10 ]; then
|
||||||
|
# Create a version of the lang/ directory that has one state per phone in the
|
||||||
|
# topo file. [note, it really has two states.. the first one is only repeated
|
||||||
|
# once, the second one has zero or more repeats.]
|
||||||
|
rm -rf $lang
|
||||||
|
cp -r data/lang $lang
|
||||||
|
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
|
||||||
|
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
|
||||||
|
# Use our special topology... note that later on may have to tune this
|
||||||
|
# topology.
|
||||||
|
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 11 ]; then
|
||||||
|
# Build a tree using our new topology.
|
||||||
|
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
|
||||||
|
--leftmost-questions-truncate -1 \
|
||||||
|
--cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 12 ]; then
|
||||||
|
echo "$0: creating neural net configs";
|
||||||
|
|
||||||
|
# create the config files for nnet initialization
|
||||||
|
steps/nnet3/tdnn/make_configs.py \
|
||||||
|
--self-repair-scale 0.00001 \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--tree-dir $treedir \
|
||||||
|
--relu-dim 725 \
|
||||||
|
--splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
|
||||||
|
--use-presoftmax-prior-scale false \
|
||||||
|
--xent-regularize 0.1 \
|
||||||
|
--xent-separate-forward-affine true \
|
||||||
|
--include-log-softmax false \
|
||||||
|
--final-layer-normalize-target 0.5 \
|
||||||
|
$dir/configs || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 13 ]; then
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
|
||||||
|
utils/create_split_dir.pl \
|
||||||
|
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
|
||||||
|
fi
|
||||||
|
|
||||||
|
touch $dir/egs/.nodelete # keep egs around when that run dies.
|
||||||
|
|
||||||
|
steps/nnet3/chain/train.py --stage $train_stage \
|
||||||
|
--egs.dir "$common_egs_dir" \
|
||||||
|
--cmd "$decode_cmd" \
|
||||||
|
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
|
||||||
|
--chain.xent-regularize 0.1 \
|
||||||
|
--chain.leaky-hmm-coefficient 0.1 \
|
||||||
|
--chain.l2-regularize 0.00005 \
|
||||||
|
--chain.apply-deriv-weights false \
|
||||||
|
--chain.lm-opts="--num-extra-lm-states=2000" \
|
||||||
|
--egs.stage $get_egs_stage \
|
||||||
|
--egs.opts "--frames-overlap-per-eg 0" \
|
||||||
|
--egs.chunk-width 150 \
|
||||||
|
--trainer.num-chunk-per-minibatch 128 \
|
||||||
|
--trainer.frames-per-iter 1500000 \
|
||||||
|
--trainer.num-epochs $num_epochs \
|
||||||
|
--trainer.optimization.num-jobs-initial 3 \
|
||||||
|
--trainer.optimization.num-jobs-final 16 \
|
||||||
|
--trainer.optimization.initial-effective-lrate 0.001 \
|
||||||
|
--trainer.optimization.final-effective-lrate 0.0001 \
|
||||||
|
--trainer.max-param-change 2.0 \
|
||||||
|
--cleanup.remove-egs $remove_egs \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--tree-dir $treedir \
|
||||||
|
--lat-dir exp/tri5a_lats_nodup_sp \
|
||||||
|
--dir $dir || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 14 ]; then
|
||||||
|
# Note: it might appear that this $lang directory is mismatched, and it is as
|
||||||
|
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
|
||||||
|
# the lang directory.
|
||||||
|
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg
|
||||||
|
fi
|
||||||
|
|
||||||
|
decode_suff=fsh_sw1_tg
|
||||||
|
graph_dir=$dir/graph_fsh_sw1_tg
|
||||||
|
if [ $stage -le 15 ]; then
|
||||||
|
iter_opts=
|
||||||
|
if [ ! -z $decode_iter ]; then
|
||||||
|
iter_opts=" --iter $decode_iter "
|
||||||
|
fi
|
||||||
|
for decode_set in eval2000 rt03; do
|
||||||
|
(
|
||||||
|
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
|
||||||
|
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
|
||||||
|
--nj $num_jobs --cmd "$decode_cmd" $iter_opts \
|
||||||
|
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
|
||||||
|
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1;
|
||||||
|
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
|
||||||
|
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
|
||||||
|
$dir/decode_${decode_set}${decode_iter:+_$decode_iter}_fsh_sw1_{tg,fg} || exit 1;
|
||||||
|
fi
|
||||||
|
) &
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
wait;
|
||||||
|
exit 0;
|
|
@ -0,0 +1,141 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
set -e
|
||||||
|
stage=1
|
||||||
|
train_stage=-10
|
||||||
|
generate_alignments=true # false if doing chain training
|
||||||
|
speed_perturb=true
|
||||||
|
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
# perturbed data preparation
|
||||||
|
train_set=train_nodup
|
||||||
|
if [ "$speed_perturb" == "true" ]; then
|
||||||
|
if [ $stage -le 1 ]; then
|
||||||
|
#Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
|
||||||
|
# _sp stands for speed-perturbed
|
||||||
|
|
||||||
|
for datadir in train_nodup; do
|
||||||
|
utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
|
||||||
|
utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
|
||||||
|
utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
|
||||||
|
utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
|
||||||
|
rm -r data/temp1 data/temp2
|
||||||
|
|
||||||
|
mfccdir=mfcc_perturbed
|
||||||
|
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
|
||||||
|
data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
|
||||||
|
steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
|
||||||
|
utils/fix_data_dir.sh data/${datadir}_tmp
|
||||||
|
|
||||||
|
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
|
||||||
|
utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
|
||||||
|
utils/fix_data_dir.sh data/${datadir}_sp
|
||||||
|
rm -r data/temp0 data/${datadir}_tmp
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
|
||||||
|
#obtain the alignment of the perturbed data
|
||||||
|
steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
|
||||||
|
data/train_nodup_sp data/lang_nosp exp/tri5a exp/tri5a_ali_nodup_sp || exit 1
|
||||||
|
fi
|
||||||
|
train_set=train_nodup_sp
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 3 ]; then
|
||||||
|
mfccdir=mfcc_hires
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
|
||||||
|
date=$(date +'%m_%d_%H_%M')
|
||||||
|
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/fisher_swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
|
||||||
|
fi
|
||||||
|
|
||||||
|
# the 100k_nodup directory is copied seperately, as
|
||||||
|
# we want to use exp/tri1b_ali_100k_nodup for lda_mllt training
|
||||||
|
# the main train directory might be speed_perturbed
|
||||||
|
for dataset in $train_set train_100k_nodup; do
|
||||||
|
utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
|
||||||
|
|
||||||
|
# scale the waveforms, this is useful as we don't use CMVN
|
||||||
|
data_dir=data/${dataset}_hires
|
||||||
|
cat $data_dir/wav.scp | python -c "
|
||||||
|
import sys, os, subprocess, re, random
|
||||||
|
scale_low = 1.0/8
|
||||||
|
scale_high = 2.0
|
||||||
|
for line in sys.stdin.readlines():
|
||||||
|
if len(line.strip()) == 0:
|
||||||
|
continue
|
||||||
|
print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
|
||||||
|
"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1;
|
||||||
|
mv $data_dir/wav.scp_scaled $data_dir/wav.scp
|
||||||
|
|
||||||
|
steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
|
||||||
|
--cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
|
||||||
|
steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir;
|
||||||
|
|
||||||
|
# Remove the small number of utterances that couldn't be extracted for some
|
||||||
|
# reason (e.g. too short; no such file).
|
||||||
|
utils/fix_data_dir.sh data/${dataset}_hires;
|
||||||
|
done
|
||||||
|
|
||||||
|
for dataset in eval2000 rt03; do
|
||||||
|
# Create MFCCs for the eval set
|
||||||
|
utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
|
||||||
|
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
|
||||||
|
data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
|
||||||
|
steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
|
||||||
|
utils/fix_data_dir.sh data/${dataset}_hires # remove segments with problems
|
||||||
|
done
|
||||||
|
|
||||||
|
# Take the first 30k utterances (about 1/8th of the data) this will be used
|
||||||
|
# for the diagubm training
|
||||||
|
utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires
|
||||||
|
local/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ivector extractor training
|
||||||
|
if [ $stage -le 5 ]; then
|
||||||
|
# We need to build a small system just because we need the LDA+MLLT transform
|
||||||
|
# to train the diag-UBM on top of. We use --num-iters 13 because after we get
|
||||||
|
# the transform (12th iter is the last), any further training is pointless.
|
||||||
|
# this decision is based on fisher_english
|
||||||
|
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
|
||||||
|
--splice-opts "--left-context=3 --right-context=3" \
|
||||||
|
5500 90000 data/train_100k_nodup_hires \
|
||||||
|
data/lang_nosp exp/tri1b_ali exp/nnet3/tri2b
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 6 ]; then
|
||||||
|
# To train a diagonal UBM we don't need very much data, so use the smallest subset.
|
||||||
|
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
|
||||||
|
data/${train_set}_30k_nodup_hires 512 exp/nnet3/tri2b exp/nnet3/diag_ubm
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 7 ]; then
|
||||||
|
# iVector extractors can be sensitive to the amount of data, but this one has a
|
||||||
|
# fairly small dim (defaults to 100) so we don't use all of it, we use just the
|
||||||
|
# 100k subset (just under half the data).
|
||||||
|
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
|
||||||
|
data/train_100k_nodup_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 8 ]; then
|
||||||
|
# We extract iVectors on all the train_nodup data, which will be what we
|
||||||
|
# train the system on.
|
||||||
|
|
||||||
|
# having a larger number of speakers is helpful for generalization, and to
|
||||||
|
# handle per-utterance decoding well (iVector starts at zero).
|
||||||
|
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires
|
||||||
|
|
||||||
|
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
|
||||||
|
data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1;
|
||||||
|
|
||||||
|
for data_set in eval2000 rt03; do
|
||||||
|
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
|
||||||
|
data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_$data_set || exit 1;
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0;
|
|
@ -0,0 +1,158 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2015 Johns Hopkins University (Author: Daniel Povey).
|
||||||
|
# 2015 Vijayaditya Peddinti
|
||||||
|
# 2015 Xingyu Na
|
||||||
|
# 2015 Pegah Ghahrmani
|
||||||
|
# Apache 2.0.
|
||||||
|
|
||||||
|
|
||||||
|
# this is a basic lstm script
|
||||||
|
# LSTM script runs for more epochs than the TDNN script
|
||||||
|
# and each epoch takes twice the time
|
||||||
|
|
||||||
|
# At this script level we don't support not running on GPU, as it would be painfully slow.
|
||||||
|
# If you want to run without GPU you'd have to call lstm/train.sh with --gpu false
|
||||||
|
|
||||||
|
stage=0
|
||||||
|
train_stage=-10
|
||||||
|
affix=
|
||||||
|
common_egs_dir=
|
||||||
|
reporting_email=
|
||||||
|
|
||||||
|
# LSTM options
|
||||||
|
splice_indexes="-2,-1,0,1,2 0 0"
|
||||||
|
lstm_delay=" -1 -2 -3 "
|
||||||
|
label_delay=5
|
||||||
|
num_lstm_layers=3
|
||||||
|
cell_dim=1024
|
||||||
|
hidden_dim=1024
|
||||||
|
recurrent_projection_dim=256
|
||||||
|
non_recurrent_projection_dim=256
|
||||||
|
chunk_width=20
|
||||||
|
chunk_left_context=40
|
||||||
|
chunk_right_context=0
|
||||||
|
|
||||||
|
|
||||||
|
# training options
|
||||||
|
num_epochs=8
|
||||||
|
initial_effective_lrate=0.0003
|
||||||
|
final_effective_lrate=0.00003
|
||||||
|
num_jobs_initial=3
|
||||||
|
num_jobs_final=15
|
||||||
|
momentum=0.5
|
||||||
|
num_chunk_per_minibatch=100
|
||||||
|
samples_per_iter=20000
|
||||||
|
remove_egs=true
|
||||||
|
|
||||||
|
#decode options
|
||||||
|
extra_left_context=
|
||||||
|
extra_right_context=
|
||||||
|
frames_per_chunk=
|
||||||
|
|
||||||
|
# End configuration section.
|
||||||
|
|
||||||
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
if ! cuda-compiled; then
|
||||||
|
cat <<EOF && exit 1
|
||||||
|
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||||
|
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||||
|
where "nvcc" is installed.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
dir=exp/nnet3/lstm
|
||||||
|
dir=$dir${affix:+_$affix}
|
||||||
|
if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi
|
||||||
|
train_set=train_nodup_sp
|
||||||
|
ali_dir=exp/tri5a_ali_nodup_sp
|
||||||
|
|
||||||
|
local/nnet3/run_ivector_common.sh --stage $stage \
|
||||||
|
--speed-perturb true || exit 1;
|
||||||
|
|
||||||
|
if [ $stage -le 9 ]; then
|
||||||
|
echo "$0: creating neural net configs";
|
||||||
|
config_extra_opts=()
|
||||||
|
[ ! -z "$lstm_delay" ] && config_extra_opts+=(--lstm-delay "$lstm_delay")
|
||||||
|
steps/nnet3/lstm/make_configs.py "${config_extra_opts[@]}" \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--ali-dir $ali_dir \
|
||||||
|
--num-lstm-layers $num_lstm_layers \
|
||||||
|
--splice-indexes "$splice_indexes " \
|
||||||
|
--cell-dim $cell_dim \
|
||||||
|
--hidden-dim $hidden_dim \
|
||||||
|
--recurrent-projection-dim $recurrent_projection_dim \
|
||||||
|
--non-recurrent-projection-dim $non_recurrent_projection_dim \
|
||||||
|
--label-delay $label_delay \
|
||||||
|
--self-repair-scale 0.00001 \
|
||||||
|
$dir/configs || exit 1;
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 10 ]; then
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
|
||||||
|
utils/create_split_dir.pl \
|
||||||
|
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
|
||||||
|
fi
|
||||||
|
|
||||||
|
steps/nnet3/train_rnn.py --stage=$train_stage \
|
||||||
|
--cmd="$decode_cmd" \
|
||||||
|
--feat.online-ivector-dir=exp/nnet3/ivectors_${train_set} \
|
||||||
|
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
|
||||||
|
--trainer.num-epochs=$num_epochs \
|
||||||
|
--trainer.samples-per-iter=$samples_per_iter \
|
||||||
|
--trainer.optimization.num-jobs-initial=$num_jobs_initial \
|
||||||
|
--trainer.optimization.num-jobs-final=$num_jobs_final \
|
||||||
|
--trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
|
||||||
|
--trainer.optimization.final-effective-lrate=$final_effective_lrate \
|
||||||
|
--trainer.optimization.shrink-value 0.99 \
|
||||||
|
--trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \
|
||||||
|
--trainer.optimization.momentum=$momentum \
|
||||||
|
--egs.chunk-width=$chunk_width \
|
||||||
|
--egs.chunk-left-context=$chunk_left_context \
|
||||||
|
--egs.chunk-right-context=$chunk_right_context \
|
||||||
|
--egs.dir="$common_egs_dir" \
|
||||||
|
--cleanup.remove-egs=$remove_egs \
|
||||||
|
--cleanup.preserve-model-interval=500 \
|
||||||
|
--use-gpu=true \
|
||||||
|
--feat-dir=data/${train_set}_hires \
|
||||||
|
--ali-dir=$ali_dir \
|
||||||
|
--lang=data/lang \
|
||||||
|
--reporting.email="$reporting_email" \
|
||||||
|
--dir=$dir || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
graph_dir=exp/tri5a/graph_sw1_tg
|
||||||
|
if [ $stage -le 11 ]; then
|
||||||
|
if [ -z $extra_left_context ]; then
|
||||||
|
extra_left_context=$chunk_left_context
|
||||||
|
fi
|
||||||
|
if [ -z $extra_right_context ]; then
|
||||||
|
extra_right_context=$chunk_right_context
|
||||||
|
fi
|
||||||
|
if [ -z $frames_per_chunk ]; then
|
||||||
|
frames_per_chunk=$chunk_width
|
||||||
|
fi
|
||||||
|
for decode_set in eval2000 rt03; do
|
||||||
|
(
|
||||||
|
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
|
||||||
|
steps/nnet3/lstm/decode.sh --nj $num_jobs --cmd "$decode_cmd" \
|
||||||
|
--extra-left-context $extra_left_context \
|
||||||
|
--extra-right-context $extra_right_context \
|
||||||
|
--frames-per-chunk "$frames_per_chunk" \
|
||||||
|
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
|
||||||
|
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_fsh_sw1_tg || exit 1;
|
||||||
|
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
|
||||||
|
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
|
||||||
|
$dir/decode_${decode_set}_fsh_sw1_{tg,fg} || exit 1;
|
||||||
|
) &
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
wait;
|
||||||
|
exit 0;
|
|
@ -0,0 +1,99 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# this is the standard "tdnn" system, built in nnet3; it's what we use to
|
||||||
|
# call multi-splice.
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
|
||||||
|
|
||||||
|
# At this script level we don't support not running on GPU, as it would be painfully slow.
|
||||||
|
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
|
||||||
|
# --num-threads 16 and --minibatch-size 128.
|
||||||
|
|
||||||
|
stage=0
|
||||||
|
affix=
|
||||||
|
train_stage=-10
|
||||||
|
common_egs_dir=
|
||||||
|
reporting_email=
|
||||||
|
remove_egs=true
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
|
||||||
|
if ! cuda-compiled; then
|
||||||
|
cat <<EOF && exit 1
|
||||||
|
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||||
|
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||||
|
where "nvcc" is installed.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
dir=exp/nnet3/tdnn
|
||||||
|
dir=$dir${affix:+_$affix}
|
||||||
|
train_set=train_nodup_sp
|
||||||
|
ali_dir=exp/tri5a_ali_nodup_sp
|
||||||
|
|
||||||
|
local/nnet3/run_ivector_common.sh --stage $stage \
|
||||||
|
--speed-perturb true || exit 1;
|
||||||
|
|
||||||
|
if [ $stage -le 9 ]; then
|
||||||
|
echo "$0: creating neural net configs";
|
||||||
|
|
||||||
|
# create the config files for nnet initialization
|
||||||
|
python steps/nnet3/tdnn/make_configs.py \
|
||||||
|
--feat-dir data/${train_set}_hires \
|
||||||
|
--ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--ali-dir $ali_dir \
|
||||||
|
--relu-dim 1024 \
|
||||||
|
--splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -3,3 -7,2 0" \
|
||||||
|
--use-presoftmax-prior-scale true \
|
||||||
|
$dir/configs || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [ $stage -le 10 ]; then
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
|
||||||
|
utils/create_split_dir.pl \
|
||||||
|
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
|
||||||
|
fi
|
||||||
|
|
||||||
|
steps/nnet3/train_dnn.py --stage=$train_stage \
|
||||||
|
--cmd="$decode_cmd" \
|
||||||
|
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
|
||||||
|
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
|
||||||
|
--trainer.num-epochs 4 \
|
||||||
|
--trainer.optimization.num-jobs-initial 3 \
|
||||||
|
--trainer.optimization.num-jobs-final 16 \
|
||||||
|
--trainer.optimization.initial-effective-lrate 0.0017 \
|
||||||
|
--trainer.optimization.final-effective-lrate 0.00017 \
|
||||||
|
--egs.dir "$common_egs_dir" \
|
||||||
|
--cleanup.remove-egs $remove_egs \
|
||||||
|
--cleanup.preserve-model-interval 500 \
|
||||||
|
--use-gpu true \
|
||||||
|
--feat-dir=data/${train_set}_hires \
|
||||||
|
--ali-dir $ali_dir \
|
||||||
|
--lang data/lang \
|
||||||
|
--reporting.email="$reporting_email" \
|
||||||
|
--dir=$dir || exit 1;
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
graph_dir=exp/tri5a/graph_fsh_sw1_tg
|
||||||
|
if [ $stage -le 11 ]; then
|
||||||
|
for decode_set in eval2000 rt03; do
|
||||||
|
(
|
||||||
|
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
|
||||||
|
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \
|
||||||
|
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
|
||||||
|
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_fsh_sw1_tg || exit 1;
|
||||||
|
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
|
||||||
|
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
|
||||||
|
$dir/decode_${decode_set}_fsh_sw1_{tg,fg} || exit 1;
|
||||||
|
) &
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
wait;
|
||||||
|
exit 0;
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
# this is run_tdnn_discriminative.sh
|
||||||
|
|
||||||
|
# This script does discriminative training on top of CE nnet3 system.
|
||||||
|
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
|
||||||
|
# since the lattice generation runs in about real-time, so takes of the order of
|
||||||
|
# 1000 hours of CPU time.
|
||||||
|
#
|
||||||
|
. ./cmd.sh
|
||||||
|
|
||||||
|
|
||||||
|
stage=0
|
||||||
|
train_stage=-10 # can be used to start training in the middle.
|
||||||
|
get_egs_stage=-10
|
||||||
|
use_gpu=true # for training
|
||||||
|
cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
|
||||||
|
# alignments and degs).
|
||||||
|
|
||||||
|
. ./cmd.sh
|
||||||
|
. ./path.sh
|
||||||
|
. ./utils/parse_options.sh
|
||||||
|
|
||||||
|
srcdir=exp/nnet3/tdnn
|
||||||
|
train_data_dir=data/train_nodup_sp_hires
|
||||||
|
online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp
|
||||||
|
degs_dir= # If provided, will skip the degs directory creation
|
||||||
|
lats_dir= # If provided, will skip denlats creation
|
||||||
|
|
||||||
|
## Objective options
|
||||||
|
criterion=smbr
|
||||||
|
one_silence_class=true
|
||||||
|
|
||||||
|
dir=${srcdir}_${criterion}
|
||||||
|
|
||||||
|
## Egs options
|
||||||
|
frames_per_eg=150
|
||||||
|
frames_overlap_per_eg=30
|
||||||
|
truncate_deriv_weights=10
|
||||||
|
|
||||||
|
## Nnet training options
|
||||||
|
effective_learning_rate=0.0000125
|
||||||
|
max_param_change=1
|
||||||
|
num_jobs_nnet=4
|
||||||
|
num_epochs=4
|
||||||
|
regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options
|
||||||
|
minibatch_size=64
|
||||||
|
adjust_priors=true # May need to be set to false
|
||||||
|
# because it does not help in some setups
|
||||||
|
modify_learning_rates=true
|
||||||
|
last_layer_factor=0.1
|
||||||
|
|
||||||
|
## Decode options
|
||||||
|
decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
|
||||||
|
|
||||||
|
if $use_gpu; then
|
||||||
|
if ! cuda-compiled; then
|
||||||
|
cat <<EOF && exit 1
|
||||||
|
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||||
|
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||||
|
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
num_threads=1
|
||||||
|
else
|
||||||
|
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
|
||||||
|
# almost the same, but this may be a little bit slow.
|
||||||
|
num_threads=16
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ${srcdir}/final.mdl ]; then
|
||||||
|
echo "$0: expected ${srcdir}/final.mdl to exist; first run run_tdnn.sh or run_lstm.sh"
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 1 ]; then
|
||||||
|
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
|
||||||
|
# get excellent GPU utilization though.]
|
||||||
|
nj=100 # have a high number of jobs because this could take a while, and we might
|
||||||
|
# have some stragglers.
|
||||||
|
steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \
|
||||||
|
--online-ivector-dir $online_ivector_dir \
|
||||||
|
--nj $nj $train_data_dir data/lang $srcdir ${srcdir}_ali ;
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$lats_dir" ]; then
|
||||||
|
lats_dir=${srcdir}_denlats
|
||||||
|
if [ $stage -le 2 ]; then
|
||||||
|
nj=100
|
||||||
|
# this doesn't really affect anything strongly, except the num-jobs for one of
|
||||||
|
# the phases of get_egs_discriminative.sh below.
|
||||||
|
num_threads_denlats=6
|
||||||
|
subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
|
||||||
|
# total slots = 80 * 6 = 480.
|
||||||
|
steps/nnet3/make_denlats.sh --cmd "$decode_cmd" --determinize true \
|
||||||
|
--online-ivector-dir $online_ivector_dir \
|
||||||
|
--nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
|
||||||
|
$train_data_dir data/lang $srcdir ${lats_dir} ;
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
|
||||||
|
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`
|
||||||
|
|
||||||
|
left_context=$[model_left_context + extra_left_context]
|
||||||
|
right_context=$[model_right_context + extra_right_context]
|
||||||
|
|
||||||
|
valid_left_context=$[valid_left_context + frames_per_eg]
|
||||||
|
valid_right_context=$[valid_right_context + frames_per_eg]
|
||||||
|
|
||||||
|
frame_subsampling_opt=
|
||||||
|
if [ -f $srcdir/frame_subsampling_factor ]; then
|
||||||
|
frame_subsampling_opt="--frame-subsampling-factor $(cat $srcdir/frame_subsampling_factor)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cmvn_opts=`cat $srcdir/cmvn_opts`
|
||||||
|
|
||||||
|
if [ -z "$degs_dir" ]; then
|
||||||
|
degs_dir=${srcdir}_degs
|
||||||
|
|
||||||
|
if [ $stage -le 3 ]; then
|
||||||
|
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
|
||||||
|
utils/create_split_dir.pl \
|
||||||
|
/export/b0{1,2,12,13}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
|
||||||
|
fi
|
||||||
|
# have a higher maximum num-jobs if
|
||||||
|
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
|
||||||
|
|
||||||
|
degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true"
|
||||||
|
|
||||||
|
steps/nnet3/get_egs_discriminative.sh \
|
||||||
|
--cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \
|
||||||
|
--adjust-priors $adjust_priors \
|
||||||
|
--online-ivector-dir $online_ivector_dir \
|
||||||
|
--left-context $left_context --right-context $right_context \
|
||||||
|
--valid-left-context $valid_left_context --valid-right-context $valid_right_context \
|
||||||
|
--priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \
|
||||||
|
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \
|
||||||
|
$train_data_dir data/lang ${srcdir}_ali $lats_dir $srcdir/final.mdl $degs_dir ;
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 4 ]; then
|
||||||
|
steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \
|
||||||
|
--stage $train_stage \
|
||||||
|
--effective-lrate $effective_learning_rate --max-param-change $max_param_change \
|
||||||
|
--criterion $criterion --drop-frames true \
|
||||||
|
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \
|
||||||
|
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
|
||||||
|
--regularization-opts "$regularization_opts" \
|
||||||
|
--truncate-deriv-weights $truncate_deriv_weights --adjust-priors $adjust_priors \
|
||||||
|
--modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \
|
||||||
|
${degs_dir} $dir
|
||||||
|
fi
|
||||||
|
|
||||||
|
graph_dir=exp/tri5a/graph_fsh_sw1_tg
|
||||||
|
if [ $stage -le 5 ]; then
|
||||||
|
for x in `seq $decode_start_epoch $num_epochs`; do
|
||||||
|
for decode_set in eval2000 rt03; do
|
||||||
|
(
|
||||||
|
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
|
||||||
|
iter=epoch$x.adj
|
||||||
|
|
||||||
|
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
|
||||||
|
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
|
||||||
|
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_fsh_sw1_tg_$iter ;
|
||||||
|
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
|
||||||
|
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
|
||||||
|
$dir/decode_${decode_set}_fsh_sw1_{tg,fg}_$iter ;
|
||||||
|
) &
|
||||||
|
done
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
wait;
|
||||||
|
|
||||||
|
if [ $stage -le 6 ] && $cleanup; then
|
||||||
|
# if you run with "--cleanup true --stage 6" you can clean up.
|
||||||
|
rm ${lats_dir}/lat.*.gz || true
|
||||||
|
rm ${srcdir}_ali/ali.*.gz || true
|
||||||
|
steps/nnet2/remove_egs.sh ${srcdir}_degs || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
exit 0;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче