trunk: removed minor bugs from swbd/s5b and added multisplice results to RESULTS

git-svn-id: 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Vijayaditya Peddinti 2015-01-30 20:00:16 +00:00
Родитель bfb0489168
Коммит 41a8f9b3cb
5 изменённых файлов: 14 добавлений и 220 удалений

Просмотреть файл

@ -130,7 +130,13 @@ $WER 15.6 | 1831 21395 | 86.2 9.2 4.5 1.9 15.6 53.0 | exp/nnet5f_gpu/decode_eva
%WER 14.9 | 1831 21395 | 87.4 9.1 3.5 2.4 14.9 52.3 | exp/nnet6c_mpe_gpu/decode_eval2000_sw1_tg_epoch4/score_15/eval2000.ctm.swbd.filt.sys
# Multi-splice recipe
%WER 13.8 | 1831 21395 | 87.4 8.0 4.6 1.2 13.8 51.9 | exp/nnet2_online/nnet_ms_a/decode_eval2000_hires_sw1_fsh_tgpr/score_12/eval2000_hires.ctm.swbd.filt.sys
%WER 14.5 | 1831 21395 | 86.8 8.4 4.8 1.3 14.5 51.9 | exp/nnet2_online/nnet_ms_a/decode_eval2000_hires_sw1_tg/score_12/eval2000_hires.ctm.swbd.filt.sys
%WER 13.9 | 1831 21395 | 87.3 8.1 4.6 1.2 13.9 51.8 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_hires_sw1_fsh_tgpr/score_12/eval2000_hires.ctm.swbd.filt.sys
%WER 14.2 | 1831 21395 | 87.1 8.3 4.6 1.3 14.2 51.5 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_hires_sw1_fsh_tgpr_per_utt/score_12/eval2000_hires.ctm.swbd.filt.sys
%WER 14.5 | 1831 21395 | 86.9 8.6 4.6 1.4 14.5 52.2 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_hires_sw1_tg/score_11/eval2000_hires.ctm.swbd.filt.sys
%WER 14.7 | 1831 21395 | 86.7 8.7 4.6 1.4 14.7 51.7 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_hires_sw1_tg_per_utt/score_12/eval2000_hires.ctm.swbd.filt.sys
# Below are the results from the Karel's DNN recipe (Oct 11 2013) from 'local/'
# DNN with RBM pre-training, 6hidden layers, 2048 neurons each, ~9k outputs
@ -141,3 +147,5 @@ $WER 15.6 | 1831 21395 | 86.2 9.2 4.5 1.9 15.6 53.0 | exp/nnet5f_gpu/decode_eva
%WER 14.1 | 1831 21395 | 87.8 8.7 3.5 1.9 14.1 50.4 | exp/tri4b_pretrain-dbn_dnn_smbr_iter1-lats/decode_eval2000_sw1_fsh_tgpr_it2/score_14/eval2000.ctm.swbd.filt.sys
# Final system rescored by sw1_fsh trigram (unpruned)
%WER 13.4 | 1831 21395 | 88.4 8.2 3.4 1.8 13.4 49.2 | exp/tri4b_pretrain-dbn_dnn_smbr_iter1-lats/decode_eval2000_sw1_fsh_tg.3_it2/score_14/eval2000.ctm.swbd.filt.sys

Просмотреть файл

@ -88,7 +88,7 @@ if [ $stage -le 5 ]; then
steps/online/nnet2/ --utts-per-spk-max 2 data/train_hires_nodup data/train_hires_nodup_max2
steps/online/nnet2/ --cmd "$train_cmd" --nj 30 \
data/train_hires_nodup_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors_train_nodup2 || exit 1;
data/train_hires_nodup_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors_train_hires_nodup2 || exit 1;

Просмотреть файл

@ -43,7 +43,7 @@ if [ $stage -le 6 ]; then
--num-epochs 5 --num-jobs-initial 3 --num-jobs-final 18 \
--num-hidden-layers 6 --splice-indexes "$splice_indexes" \
--feat-type raw \
--online-ivector-dir exp/nnet2_online/ivectors_train_nodup2 \
--online-ivector-dir exp/nnet2_online/ivectors_train_hires_nodup2 \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
@ -94,7 +94,7 @@ if [ $stage -le 10 ]; then
for lm_suffix in tg fsh_tgpr; do
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/ --config conf/decode.config
steps/online/nnet2/ --config conf/decode.config \
--cmd "$decode_cmd" --nj 30 \
"$graph_dir" data/${data} \
${dir}_online/decode_${data}_sw1_${lm_suffix} || exit 1;

Просмотреть файл

@ -137,10 +137,9 @@ if [ $stage -le 5 ]; then
for epoch in $(seq $decode_start_epoch $num_epochs); do
for test in eval2000_hires train_hires_dev; do
steps/online/nnet2/ --config conf/decode.config --cmd "$decode_cmd" --nj 50 \
steps/online/nnet2/ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true \
--iter epoch$epoch "$graph_dir" data/${test} $dir/decode_epoch${epoch}_${test}_sw1_${lm_suffix}_per_utt || exit 1
) &

Просмотреть файл

@ -1,213 +0,0 @@
. ./
set -e
splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-3:3"
. ./
. ./utils/
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
parallel_opts="-l gpu=1"
# the _a is in case I want to change the parameters.
# Use 4 nnet jobs just like so the results should be
# almost the same, but this may be a little bit slow.
parallel_opts="-pe smp $num_threads"
mkdir -p exp/$nnet2_online
if [ $stage -le 1 ]; then
if [[ $(hostname -f) == * ]] && [ ! -d $mfccdir/storage ]; then
date=$(date +'%m_%d_%H_%M')
utils/ /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
utils/ data/train data/train_hires
steps/ --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir;
steps/ data/train_hires exp/make_hires/train $mfccdir;
# Remove the small number of utterances that couldn't be extracted for some
# reason (e.g. too short; no such file).
utils/ data/train_hires;
# Create MFCCs for the eval set
utils/ data/eval2000 data/eval2000_hires
steps/ --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
steps/ data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
utils/ data/eval2000_hires # remove segments with problems
# Use the first 4k sentences as dev set. Note: when we trained the LM, we used
# the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
# LM training data. However, they will be in the lexicon, plus speakers
# may overlap, so it's still not quite equivalent to a test set.
utils/ --first data/train_hires 4000 data/train_hires_dev ;# 5hr 6min
n=$[`cat data/train/segments | wc -l` - 4000]
utils/ --last data/train_hires $n data/train_hires_nodev ;
# Take the first 30k utterances (about 1/8th of the data) this will be used
# for the diagubm training
utils/ --first data/train_hires_nodev 30000 data/train_hires_30k
local/ 200 data/train_hires_30k data/train_hires_30k_nodup # 33hr
# create a 100k subset for the lda+mllt training
utils/ --first data/train_hires_nodev 100000 data/train_hires_100k;
local/ 200 data/train_hires_100k data/train_hires_100k_nodup;
local/ 300 data/train_hires_nodev data/train_hires_nodup # 286hr
if [ $stage -le 2 ]; then
# We need to build a small system just because we need the LDA+MLLT transform
# to train the diag-UBM on top of. We use --num-iters 13 because after we get
# the transform (12th iter is the last), any further training is pointless.
# this decision is based on fisher_english
steps/ --cmd "$train_cmd" --num-iters 13 \
--splice-opts "--left-context=3 --right-context=3" \
5500 90000 data/train_hires_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/$nnet2_online/tri3b
if [ $stage -le 3 ]; then
# To train a diagonal UBM we don't need very much data, so use the smallest subset.
steps/online/nnet2/ --cmd "$train_cmd" --nj 30 --num-frames 200000 \
data/train_hires_30k_nodup 512 exp/$nnet2_online/tri3b exp/$nnet2_online/diag_ubm
if [ $stage -le 4 ]; then
# iVector extractors can be sensitive to the amount of data, but this one has a
# fairly small dim (defaults to 100) so we don't use all of it, we use just the
# 100k subset (just under half the data).
steps/online/nnet2/ --cmd "$train_cmd" --nj 10 \
data/train_hires_100k_nodup exp/$nnet2_online/diag_ubm exp/$nnet2_online/extractor || exit 1;
if [ $stage -le 5 ]; then
# We extract iVectors on all the train_nodup data, which will be what we
# train the system on.
# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/ --utts-per-spk-max 2 data/train_hires_nodup data/train_hires_nodup_max2
steps/online/nnet2/ --cmd "$train_cmd" --nj 30 \
data/train_hires_nodup_max2 exp/$nnet2_online/extractor exp/$nnet2_online/ivectors_train_nodup2 || exit 1;
if [ $stage -le 6 ]; then
# Because we have a lot of data here and we don't want the training to take
# too long so we reduce the number of epochs from the defaults (15 + 5) to (5
# + 2), and the (initial,final) learning rate from the defaults (0.04, 0.004)
# to (0.01, 0.001).
# decided to let others run their jobs too (we only have 10 GPUs on our queue
# at JHU). The number of parameters is smaller than the baseline system we had in
# mind (../nnet2/, which had pnorm input/output dim 3000/300 and
# 5 hidden layers, versus our 3000/300 and 5 hidden layers, even though we're
# training on more data than the baseline system. The motivation here is that we
# want to demonstrate the capability of doing real-time decoding, and if the
# network was too bug we wouldn't be able to decode in real-time using a CPU.
steps/nnet2/ --stage $train_stage \
--splice-indexes "$splice_indexes" \
--feat-type raw \
--online-ivector-dir exp/$nnet2_online/ivectors_train_nodup2 \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--num-jobs-nnet 6 \
--num-epochs 5 \
--add-layers-period 1 \
--num-hidden-layers 4 \
--mix-up 4000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--cmd "$decode_cmd" \
--egs-dir "$common_egs_dir" \
--pnorm-input-dim 3000 \
--pnorm-output-dim 300 \
data/train_hires_nodup data/lang exp/tri4b_ali_nodup $dir || exit 1;
if [ $stage -le 7 ]; then
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/ --cmd "$train_cmd" --nj 20 \
data/${data} exp/$nnet2_online/extractor exp/$nnet2_online/ivectors_${data} || exit 1;
if [ $stage -le 8 ]; then
# this does offline decoding that should give the same results as the real
# online decoding (the one with --per-utt true)
for lm_suffix in tg fsh_tgpr; do
# use already-built graphs.
for data in eval2000_hires train_hires_dev; do
steps/nnet2/ --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/$nnet2_online/ivectors_${data} \
$graph_dir data/${data} $dir/decode_${data}_sw1_${lm_suffix} || exit 1;
if [ $stage -le 9 ]; then
# If this setup used PLP features, we'd have to give the option --feature-type plp
# to the script below.
steps/online/nnet2/ --mfcc-config conf/mfcc_hires.conf \
data/lang exp/$nnet2_online/extractor "$dir" ${dir}_online || exit 1;
if [ $stage -le 10 ]; then
# do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker.
for lm_suffix in tg fsh_tgpr; do
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
"$graph_dir" data/${data} ${dir}_online/decode_${data}_sw1_${lm_suffix} || exit 1;
if [ $stage -le 11 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
for lm_suffix in tg fsh_tgpr; do
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/ --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true \
"$graph_dir" data/${data} ${dir}_online/decode_${data}_sw1_${lm_suffix}_per_utt || exit 1;
exit 0;
# get results on Dev with this command:
for x in exp/$nnet2_online/nnet_a/decode_train_dev_sw1_*; do grep WER $x/wer_* | utils/; done
_mfcc_hires# and results on eval2000 with this command:
for x in exp/$nnet2_online/nnet_a/decode_eval2000_*; do grep Sum $x/score_*/*sys | utils/; done